Module CuedSpeech.whatkey

Class CueingKeysByToken

Description

Segment an already cued text into per-token keys and per-token phones.

The cueing result is a list of items: (keysstr, phonesstr). Each string can contain multiple key pairs separated with '.'.

The segmentation rule assigns each key to the token that consumes its vowel phones.

Constructor

Create a new instance.

View Source

def __init__(self):
    """Create a new instance."""
    pass

Public functions

segment

Return per-token keys and per-token phones.

Parameters

word_phonemes: (tuple) Pronunciation of each token ('-' separated phones).
key_items: (list) Cueing result: list of (keysstr, phonesstr).

Returns

(tuple) (codesbytoken, phonesbytoken), each is a tuple of strings ('.' separated).

View Source

def segment(self, word_phonemes: tuple, key_items: list) -> tuple:
    """Return per-token keys and per-token phones.

        :param word_phonemes: (tuple) Pronunciation of each token ('-' separated phones).
        :param key_items: (list) Cueing result: list of (keys_str, phones_str).
        :return: (tuple) (codes_by_token, phones_by_token), each is a tuple of strings ('.' separated).

        """
    words_phones = self.__parse_words(word_phonemes)
    flat_codes, flat_phons = self.__flatten_key_items(key_items)
    token_codes = [[] for _ in range(len(words_phones))]
    token_phons = [[] for _ in range(len(words_phones))]
    token_consumed = [False for _ in range(len(words_phones))]
    cursor = _PhonesCursor(words_phones, token_consumed)
    for i in range(len(flat_codes)):
        code_item = flat_codes[i]
        phon_item = flat_phons[i]
        consonants, vowels = self.__parse_key_pair(phon_item)
        token_index = None
        if len(consonants) > 0:
            token_index = cursor.consume(consonants, 'consonants')
        cursor.advance_if_done()
        if len(vowels) > 0:
            token_index = cursor.consume(vowels, 'vowels')
        if token_index is None:
            raise ValueError('Key has neither consonant nor vowel phones: ' + phon_item)
        token_codes[token_index].append(code_item)
        token_phons[token_index].append(phon_item)
    for i in range(len(words_phones)):
        if token_consumed[i] is True and len(token_codes[i]) == 0:
            token_codes[i].append(self.__PLACEHOLDER_CODE)
            token_phons[i].append(self.__PLACEHOLDER_PHON)
    codes_by_token = tuple(['.'.join(items) for items in token_codes])
    phons_by_token = tuple(['.'.join(items) for items in token_phons])
    return (codes_by_token, phons_by_token)

Protected functions

__parse_words

Return a list of list of phones per token.

Parameters

word_phonemes

View Source

def __parse_words(self, word_phonemes: tuple) -> list:
    """Return a list of list of phones per token."""
    if isinstance(word_phonemes, (list, tuple)) is False:
        raise ValueError('word_phonemes must be a tuple or list')
    words = list()
    for item in word_phonemes:
        if isinstance(item, str) is False:
            raise ValueError('Each item of word_phonemes must be a str')
        phones = self.__split_non_empty(item, '-')
        if len(phones) == 0:
            raise ValueError('Empty phoneme list for a token')
        words.append(phones)
    if len(words) == 0:
        raise ValueError('word_phonemes is empty')
    return words

__flatten_key_items

Return flat lists of codes and phones.

Parameters

key_items

View Source

def __flatten_key_items(self, key_items: list) -> tuple:
    """Return flat lists of codes and phones."""
    if isinstance(key_items, list) is False:
        raise ValueError('key_items must be a list')
    codes_flat = list()
    phons_flat = list()
    for item in key_items:
        if isinstance(item, (list, tuple)) is False or len(item) != 2:
            raise ValueError('Each item of key_items must be a (codes_str, phon_str) pair.')
        codes_str, phon_str = item
        if isinstance(codes_str, str) is False:
            raise ValueError('codes_str must be a str')
        if isinstance(phon_str, str) is False:
            raise ValueError('phon_str must be a str')
        codes = self.__split_non_empty(codes_str, '.')
        phons = self.__split_non_empty(phon_str, '.')
        if len(codes) != len(phons):
            raise ValueError("codes_str '" + codes_str + "' and phon_str '" + phon_str + "' do not have the same number of keys.")
        for c in codes:
            codes_flat.append(c)
        for p in phons:
            phons_flat.append(p)
    if len(codes_flat) == 0:
        raise ValueError('No key found in key_items')
    return (codes_flat, phons_flat)

__parse_key_pair

Return (consonants, vowels) lists from a 'C-V' pair string.

Parameters

pair_str

View Source

def __parse_key_pair(self, pair_str: str) -> tuple:
    """Return (consonants, vowels) lists from a 'C-V' pair string."""
    if isinstance(pair_str, str) is False:
        raise ValueError('Invalid key-phoneme pair: ' + str(pair_str))
    if pair_str.count('-') == 0:
        raise ValueError('Invalid key-phoneme pair: ' + pair_str)
    left, right = pair_str.split('-', 1)
    left = left.strip()
    right = right.strip()
    consonants = list()
    vowels = list()
    if left != 'cnil':
        consonants = self.__split_non_empty(left, '-')
    if right != 'vnil':
        vowels = self.__split_non_empty(right, '-')
    return (consonants, vowels)

__split_non_empty

Split and remove empty items.

Parameters

value
sep

View Source

@staticmethod
def __split_non_empty(value: str, sep: str) -> list:
    """Split and remove empty items."""
    out = list()
    for part in value.split(sep):
        if len(part) > 0:
            out.append(part)
    return out