Tokenize and normalize word pronunciations for stable Cued Speech alignment.
This helper converts the per-word pronunciation strings (split with '-') into normalized strings, using CsNormalizationRules as the single normalization layer.

Tokenize and normalize word pronunciations for stable Cued Speech alignment.
This helper converts the per-word pronunciation strings (split with '-') into normalized strings, using CsNormalizationRules as the single normalization layer.
Create a new instance.
def __init__(self, key_rules: CuedSpeechKeys):
"""Create a new instance.
"""
self.__rules = _CueingRulesAdapter(key_rules)
Return normalized pronunciations for each word.
Input example: ('w-aI-t', '@-dZ-OI-n', 'OI-l') Output example: ('w-a-I-t', '@-dZ-O-I-n', 'O-I-l')
def normalize_word_phonemes(self, word_phonemes: tuple) -> tuple:
"""Return normalized pronunciations for each word.
Input example: ('w-aI-t', '@-dZ-OI-n', 'OI-l')
Output example: ('w-a-I-t', '@-dZ-O-I-n', 'O-I-l')
:param word_phonemes: (tuple) Pronunciation of each token, as '-' separated phones.
:raises: ValueError: Invalid type or empty item.
:return: (tuple) Normalized pronunciations, same token count as input.
"""
if isinstance(word_phonemes, (list, tuple)) is False:
raise ValueError('word_phonemes must be a tuple or list')
normalized_items = list()
for item in word_phonemes:
if isinstance(item, str) is False:
raise ValueError('Each item of word_phonemes must be a str')
if len(item) == 0:
raise ValueError('Empty pronunciation item in word_phonemes')
phones = item.split('-')
phones = [p for p in phones if len(p) > 0]
if len(phones) == 0:
raise ValueError('Empty phoneme list for a token.')
normalized_phones = self.__rules.normalize_word_phones(phones)
normalized_items.append('-'.join(normalized_phones))
return tuple(normalized_items)