o
    mi#                     @   s   d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZ ddlZddlmZ ddlmZ ddlmZmZ ddlmZ G d	d
 d
ejZdS )z.Abstract base class for phonemization backends    N)Logger)OptionalListAnyDictTupleUnionPattern)
get_logger)Punctuation)	Separatordefault_separator)chunksc                   @   s  e Zd ZdZ			d,dedeeeef  dedee	 fdd	Z
ed
d Zedd Zedd Zeejdd Zeejdd Zeejdd Zeejdeeef fddZedefddZ			d-dee dee dededee f
d d!Zed"eee  fd#d$Zejdee d%edededee f
d&d'Zdee de eeee f ef fd(d)Z!d"ee dedefd*d+Z"dS ).BaseBackendaI  Abstract base class of all the phonemization backends

    Provides a common interface to all backends. The central method is
    `phonemize()`

    Parameters
    ----------
    language: str
        The language code of the input text, must be supported by
        the backend. If ``backend`` is 'segments', the language can be a file with
        a grapheme to phoneme mapping.

    preserve_punctuation: bool
        When True, will keep the punctuation in the
        phonemized output. Not supported by the 'espeak-mbrola' backend. Default
        to False and remove all the punctuation.

    punctuation_marks: str
        The punctuation marks to consider when dealing with punctuation, either for removal or preservation.
        Can be defined as a string or regular expression. Default to Punctuation.default_marks().

    logger: logging.Logger
        the logging instance where to send
        messages. If not specified, use the default system logger.

    Raises
    ------
    RuntimeError
        if the backend is not available of if the `language` cannot be initialized.

    NFlanguagepunctuation_markspreserve_punctuationloggerc              	   C   s   |d u rt  }|d u rt }|  std|  || _| jd|  d	dd | 
 D  | || _|| _t || _d S )Nz{} not installed on your systemzinitializing backend %s-%s.c                 s   s    | ]}t |V  qd S )N)str).0v r   Q/home/kim/smarthome/.venv/lib/python3.10/site-packages/phonemizer/backend/base.py	<genexpr>S   s    z'BaseBackend.__init__.<locals>.<genexpr>)r   Zdefault_marksr
   is_availableRuntimeErrorformatname_loggerinfojoinversion_init_language	_language_preserve_punctuation_punctuator)selfr   r   r   r   r   r   r   __init__@   s    zBaseBackend.__init__c                 C   s(   |  |std| d|   d|S )zpLanguage initialization

        This method may be overloaded in child classes (see Segments backend)

        z
language "z" is not supported by the z backend)is_supported_languager   r   clsr   r   r   r   r#   \   s   
zBaseBackend._init_languagec                 C      | j S )z0A logging.Logger instance where to send messages)r   r'   r   r   r   r   i      zBaseBackend.loggerc                 C   r,   )z9The language code configured to be used for phonemization)r$   r-   r   r   r   r   n   r.   zBaseBackend.languagec                   C      dS )zThe name of the backendNr   r   r   r   r   r   s       zBaseBackend.namec                 C   r/   )z9Returns True if the backend is installed, False otherwiseNr   r+   r   r   r   r   x   r0   zBaseBackend.is_availablec                 C   r/   )z;Return the backend version as a tuple (major, minor, patch)Nr   r1   r   r   r   r"   }   r0   zBaseBackend.versionreturnc                   C   r/   )z@Return a dict of language codes -> name supported by the backendNr   r   r   r   r   supported_languages   r0   zBaseBackend.supported_languagesc                 C   s   ||   v S )z6Returns True if `language` is supported by the backend)r3   r*   r   r   r   r)      s   z!BaseBackend.is_supported_language   text	separatorstripnjobsc                    s   t |tr	tddu rt |\}}|dkr# |d}n% jd  | t	j
|d fddtt|| D } |} ||S )	a  Returns the `text` phonemized for the given language

        Parameters
        ----------
        text: list of str
            The text to be phonemized. Each string in the list
            is considered as a separated line. Each line is considered as a text
            utterance. Any empty utterance will be ignored.

        separator: Separator
            string separators between phonemes, syllables
            and words, default to separator.default_separator. Syllable separator
            is considered only for the festival backend. Word separator is
            ignored by the 'espeak-mbrola' backend.

        strip: bool
            If True, don't output the last word and phone separators
            of a token, default to False.

        njobs : int
            The number of parallel jobs to launch. The input text is
            split in ``njobs`` parts, phonemized on parallel instances of the
            backend and the outputs are finally collapsed.

        Returns
        -------
        phonemized text: list of str
            The input ``text`` phonemized for the given ``language`` and ``backend``.

        Raises
        ------
        RuntimeError
            if something went wrong during the phonemization

        z;input text to phonemize() is str but it must be list of strNr4   r   zrunning %s on %s jobs)Zn_jobsc                 3   s.    | ]}t  j|d  |d V  qdS )r   r4   N)joblibZdelayed_phonemize_aux)r   chunkr'   r6   r7   r   r   r      s    

z(BaseBackend.phonemize.<locals>.<genexpr>)
isinstancer   r   r   _phonemize_preprocessr:   r   r    r   r9   ZParallelzipr   _flatten_phonemize_postprocess)r'   r5   r6   r7   r8   r   
phonemizedr   r<   r   	phonemize   s   
'
zBaseBackend.phonemizerB   c                 C   s   t tj|  S )zFlatten a list of lists into a single one

        From [[1, 2], [3], [4]] returns [1, 2, 3, 4]. This method is used to
        format the output as obtained using multiple jobs.

        )list	itertoolschain)rB   r   r   r   r@      s   zBaseBackend._flattenoffsetc                 C   r/   )a  The "concrete" phonemization method

        Must be implemented in child classes. `separator` and `strip`
        parameters are as given to the phonemize() method. `text` is as
        returned by _phonemize_preprocess(). `offset` is line number of the
        first line in `text` with respect to the original text (this is only
        usefull with running on chunks in multiple jobs. When using a single
        jobs the offset is 0).

        Nr   )r'   r5   rG   r6   r7   r   r   r   r:      r0   zBaseBackend._phonemize_auxc                 C   s"   | j r	| j|S | j|g fS )zPreprocess the text before phonemization

        Removes the punctuation (keep trace of punctuation marks for further
        restoration if required by the `preserve_punctuation` option).

        )r%   r&   Zpreserveremove)r'   r5   r   r   r   r>      s   z!BaseBackend._phonemize_preprocessc                 C   s   | j r| j||||S |S )z\Postprocess the raw phonemized output

        Restores the punctuation as needed.

        )r%   r&   Zrestore)r'   rB   r   r6   r7   r   r   r   rA      s   	z"BaseBackend._phonemize_postprocess)NFN)NFr4   )#__name__
__module____qualname____doc__r   r   r   r	   boolr   r(   classmethodr#   propertyr   r   staticmethodabcabstractmethodr   r   r"   r   r3   r)   r   r   intrC   r   r@   r:   r   r>   rA   r   r   r   r   r      sl    !




E	(*
r   )rL   rQ   rE   reloggingr   typingr   r   r   r   r   r   r	   r9   Zphonemizer.loggerr
   Zphonemizer.punctuationr   Zphonemizer.separatorr   r   Zphonemizer.utilsr   ABCr   r   r   r   r   <module>   s   $