o
    mi                     @   s   U d Z ddlZddlZddlmZ ddlmZmZ ddlm	Z	m
Z
mZ ddlmZ e
d Ze	ed< d	ed
eddfddZG dd dejZG dd deZG dd deZG dd deZdS )z5Manages words count mismatches for the espeak backend    N)Logger)ListTuple)	TypeAliasLiteralUnion)	Separator)warnignoreWordMismatchmodeloggerreturnBaseWordsMismatchc                 C   sJ   t ttd}z||  |W S  ty$   td|  dd|  dw )aO  Returns a word count mismatch processor according to `mode`

    The `mode` can be one of the following:
    - `ignore` to ignore words mismatches
    - `warn` to display a warning on each mismatched utterance
    - `remove` to remove any utterance containing a words mismatch

    Raises a RuntimeError if the `mode` is unknown.

    )r
   r	   removezmode z invalid, must be in z, N)IgnoreWarnRemoveKeyErrorRuntimeErrorjoinkeys)r   r   Z
processors r   b/home/kim/smarthome/.venv/lib/python3.10/site-packages/phonemizer/backend/espeak/words_mismatch.pyget_words_mismatch_processor   s   r   c                	   @   s   e Zd ZdZedZdefddZe	efde
e deeejf de
e fd	d
Zde
eeeef  fddZdedefddZde
e fddZde
e defddZejde
e de
e fddZdS )r   z4The base class of all word count mismatch processorsz\s+r   c                 C   s   || _ g | _g | _d S N)_logger
_count_txt
_count_phn)selfr   r   r   r   __init__:   s   
zBaseWordsMismatch.__init__textwordsepr   c                    s(   t  tjst   fdd|D S )z;Return the number of words contained in each line of `text`c              	      s*   g | ]}t d d t | D qS )c                 S   s   g | ]}|r|qS r   r   ).0wr   r   r   
<listcomp>I       z=BaseWordsMismatch._count_words.<locals>.<listcomp>.<listcomp>)lenresplitstripr#   liner"   r   r   r%   H   s    z2BaseWordsMismatch._count_words.<locals>.<listcomp>)
isinstancer(   Patternescape)clsr!   r"   r   r-   r   _count_words?   s
   

zBaseWordsMismatch._count_wordsc                 C   sP   t | jt | jkrtdt | j dt | j dd tt| j| jD S )zReturns a list of (num_line, nwords_input, nwords_output)

        Consider only the lines where nwords_input != nwords_output. Raises a
        RuntimeError if input and output do not have the same number of lines.

        zBnumber of lines in input and output must be equal, we have: input=z	, output=c                 S   s&   g | ]\}\}}||kr|||fqS r   r   )r#   ntpr   r   r   r%   Y   s
    
z7BaseWordsMismatch._mismatched_lines.<locals>.<listcomp>)r'   r   r   r   	enumeratezip)r   r   r   r   _mismatched_linesL   s   z#BaseWordsMismatch._mismatched_lines	nmismatchnlinesc                 C   s,   |r| j dt|| dd || dS dS )z$Logs a high level undetailed warningz1words count mismatch on %s%% of the lines (%s/%s)   d   N)r   warninground)r   r9   r:   r   r   r   _resume^   s   zBaseWordsMismatch._resumec                 C   s   |  || _dS )z-Stores the number of words in each input lineN)r2   r   r   r!   r   r   r   
count_texte   s   zBaseWordsMismatch.count_text	separatorc                 C   s   |  ||j| _dS )z.Stores the number of words in each output lineN)r2   wordr   )r   r!   rB   r   r   r   count_phonemizedi   s   z"BaseWordsMismatch.count_phonemizedc                 C   s   dS )zDetects and process word count misatches according to the mode

        This method is called at the very end of phonemization, during
        post-processing.

        Nr   r@   r   r   r   processm   s    zBaseWordsMismatch.processN)__name__
__module____qualname____doc__r(   compileZ
_RE_SPACESr   r    classmethodr   strr   r/   intr2   r   r8   r?   rA   r   rD   abcabstractmethodrE   r   r   r   r   r   6   s&    
 c                   @   *   e Zd ZdZdee dee fddZdS )r   zIgnores word count mismatchesr!   r   c                 C   s   |  t|  t| |S r   )r?   r'   r8   r@   r   r   r   rE   z   s   zIgnore.processNrF   rG   rH   rI   r   rL   rE   r   r   r   r   r   w       r   c                   @   rP   )r   z Warns on every mismatch detectedr!   r   c                 C   sF   |   }|D ]\}}}| jd|d || q| t|t| |S )Nz>words count mismatch on line %s (expected %s words but get %s)   )r8   r   r=   r?   r'   )r   r!   mismatchnumZntxtZnphnr   r   r   rE      s   
zWarn.processNrQ   r   r   r   r   r      rR   r   c                   @   rP   )r   z6Removes any utterance containing a word count mismatchr!   r   c                 C   sH   dd |   D }| t|t| | jd |D ]}d||< q|S )Nc                 S   s   g | ]}|d  qS )r   r   r+   r   r   r   r%      r&   z"Remove.process.<locals>.<listcomp>zremoving the mismatched lines )r8   r?   r'   r   r=   )r   r!   rT   indexr   r   r   rE      s   
zRemove.processNrQ   r   r   r   r   r      rR   r   )rI   rN   r(   loggingr   typingr   r   Ztyping_extensionsr   r   r   Zphonemizer.separatorr   r   __annotations__r   ABCr   r   r   r   r   r   r   r   <module>   s   A