o
    mi=                     @   s   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
 ddlmZ G dd dZdd Zd	d
 Zdd Zdd Zedd ZedkrLe  dS dS )zBCommand-line phonemizer tool, have a 'phonemizer --help' to get in    N)	phonemize	separatorversionloggerpunctuation)BACKENDSc                   @   s,   e Zd ZdZdd Zdd Zedd ZdS )	CatchExceptionsa  Decorator wrapping a function in a try/except block

    When an exception occurs, display a user friendly message on
    standard output before exiting with error code 1.

    The detected exceptions are ValueError, OSError, RuntimeError,
    AssertionError and KeyboardInterrupt.

    Parameters
    ----------
    function :
        The function to wrap in a try/except block

    c                 C   s
   || _ d S )N)function)selfr	    r   I/home/kim/smarthome/.venv/lib/python3.10/site-packages/phonemizer/main.py__init__*   s   
zCatchExceptions.__init__c                 C   sl   z|    W dS  tttttfy' } z| d| W Y d}~dS d}~w ty5   | d Y dS w )z9Executes the wrapped function and catch common exceptionszfatal error: {}Nzkeyboard interruption, exiting)	r	   IOError
ValueErrorOSErrorRuntimeErrorAssertionErrorexitformatKeyboardInterrupt)r
   errr   r   r   __call__-   s   zCatchExceptions.__call__c                 C   s"   t j|  d  t d dS )z0Write `msg` on stderr and exit with error code 1
   N)sysstderrwritestripr   )msgr   r   r   r   9   s   zCatchExceptions.exitN)__name__
__module____qualname____doc__r   r   staticmethodr   r   r   r   r   r      s    r   c                  C   s  t jt jddd} | jddddd |  }|jd	d
ddd |jddddd | jddtdddd | d}|jdtjdddd |jddtj	ddd |jdd d!dd"d#d$ |jd%dd&d | d'}|jd(d)d"d*g d+d,d- |jd.d/dd0d | d1}|jd2d3d4d5d6d7 | d8}|jd9d:d"t
jjd;d7 |jd<d=d"t
jjd>d7 |jd?d@d"t
jjdAd7 |jdBddCd | dD}ztdE  }W n ty   d*}Y nw |jdFd*tdGdH| dIdJ |jdKdd d!dLdMdN |jdOddPd |jdQdRg dSdTdU |jdVdWg dXdYdU | dZ}ztd[  }W n ty%   d*}Y nw |jd\d*td]d^| d_dJ | jd`dadb}|jdcdddd |jdetd"tj dfd |jdgddhd |  S )iz,Argument parser for the phonemization scripta  Multilingual text to phonemes converter

The 'phonemize' program allows simple phonemization of words and texts
in many language using four backends: espeak, espeak-mbrola, festival
and segments.

- espeak is a text-to-speech software supporting multiple languages
  and IPA (International Phonetic Alphabet) output. See
  http://espeak.sourceforge.net or
  https://github.com/espeak-ng/espeak-ng

- espeak-mbrola uses the SAMPA phonetic alphabet, it requires mbrola to be
  installed as well as additional mbrola voices. It does not support word or
  syllable tokenization. See
  https://github.com/espeak-ng/espeak-ng/blob/master/docs/mbrola.md

- festival is also a text-to-speech software. Currently only American
  English is supported and festival uses a custom phoneset
  (http://www.festvox.org/bsv/c4711.html), but festival is the only
  backend supporting tokenization at the syllable
  level. See http://www.cstr.ed.ac.uk/projects/festival

- segments is a Unicode tokenizer that build a phonemization from a
  grapheme to phoneme mapping provided as a file by the user. See
  https://github.com/cldf/segments.

See the '--list-languages' option below for details on the languages
supported by each backend.

un  
Examples:

* Phonemize a US English text with espeak

   $ echo 'hello world' | phonemize -l en-us -b espeak
   həloʊ wɜːld

* Phonemize a US English text with festival

   $ echo 'hello world' | phonemize -l en-us -b festival
   hhaxlow werld

* Phonemize a Japanese text with segments

  $ echo 'konnichiwa tsekai' | phonemize -l japanese -b segments
  konnitʃiwa t͡sekai

* Add a separator between phones

  $ echo 'hello world' | phonemize -l en-us -b festival -p '-' --strip
  hh-ax-l-ow w-er-l-d

* Phonemize some French text file using espeak

  $ phonemize -l fr-fr -b espeak text.txt -o phones.txt
        )formatter_classdescriptionepilogz-Vz	--version
store_truez"show version information and exit.)actionhelpz-vz	--verbosezEwrite all log messages to stderr (displays only warnings by default).z-qz--quietz.do not display any log message, even warnings.z-jz--njobsz<int>r   z0number of parallel jobs, default is %(default)s.)typemetavardefaultr)   zinput/outputinput?z<file>z?input text file to phonemize, if not specified read from stdin.)r,   nargsr+   r)   z-oz--outputz<output text file to write, if not specified write to stdout.)r,   r+   r)   z--prepend-textFTz<str>a:  prepend each line of the phonemized output text with its
        matching input text. If a string is specified as option value, use it
        as field separator, else use one of "|", "||", "|||", "||||" by
        selecting the first one that is not configured as a token separator
        (see -p/-s/-w options).)r,   constr/   r+   r)   z--preserve-empty-lineszUpreserve the empty lines in the phonemized output, default is
        to remove them.backendsz-bz	--backendN)espeakespeak-mbrolafestivalsegmentsztthe phonemization backend, must be 'espeak', 'espeak-mbrola',
        'festival' or 'segments'. Default is 'espeak'.)r+   r,   choicesr)   z-Lz--list-languageszllist available languages (and exit) for the specified backend,
        or for all backends if none selected.languagez-lz
--languagez
<str|file>zen-usz~the language code of the input text, use '--list-languages'
        for a list of supported languages. Default is %(default)s.)r+   r,   r)   ztoken separatorsz-pz--phone-separatorz*phone separator, default is "%(default)s".z-wz--word-separatorzVword separator, not valid for espeak-mbrola backend,
        default is "%(default)s".z-sz--syllable-separatorzsyllable separator, only valid for festival backend,
        this option has no effect if another backend is used.
        Default is "%(default)s".z--stripz0removes the end separators in phonemized tokens.zspecific to espeak backendr2   z--espeak-libraryz	<library>zthe path to the espeak shared library to use (*.so on Linux,
        *.dylib on Mac and *.dll on Windows, useful to overload the default
        espeak version installed on the system). Default to
        zc. This path can also be specified
        using the PHONEMIZER_ESPEAK_LIBRARY environment variable.)r,   r*   r+   r)   z--tiez<chr>u   when the option is set, use a tie character within multi-letter
        phoneme names, default to U+361 (as in d͡ʒ), 'z' means ZWJ character,
        only compatible with espeak>1.48 and incompatible with the
        -p/--phone-separator option)r/   r,   r0   r+   r)   z--with-stressu   when the option is set, the stresses on phonemes are present
        (stresses characters are ˈ'ˌ). By default stresses are removed.z--language-switch
keep-flags)r8   zremove-flagszremove-utterancea)  espeak can pronounce some words in another language (typically
        English) when phonemizing a text. This option setups the policy to use
        when such a language switch occurs. Three values are available:
        'keep-flags' (the default), 'remove-flags' or 'remove-utterance'. The
        'keep-flags' policy keeps the language switching flags, for example
        (en) or (jp), in the output. The 'remove-flags' policy removes them and
        the 'remove-utterance' policy removes the whole line of text including
        a language switch.)r,   r6   r)   z--words-mismatchignore)r9   warnremovea  espeak can join two consecutive words or drop some words,
        yielding a word count mismatch between orthographic and phonemized
        text. This option setups the policy to use when such a words count
        mismatch occurs. Three values are available: 'ignore' (the default)
        which do nothing, 'warn' which issue a warning for each mismatched
        line, and 'remove' which remove the mismatched lines from the
        output.zspecific to festival backendr4   z--festival-executablez<executable>zthe path to the festival executable to use (useful to
        overload the default festival installed on the system). Default to
        zh. This path can also be specified using the
        PHONEMIZER_FESTIVAL_EXECUTABLE environment variable.zpunctuation processingz'not available for espeak-mbrola backend)r%   z--preserve-punctuationz[preserve the punctuation marks in the phonemized output,
        default is to remove them.z--punctuation-markszythe marks to consider during punctuation processing (either
        for removal or preservation). Default is %(default)s.z--punctuation-marks-is-regexzfinterpret the '--punctuation-marks' parameter as a regex.
        Default is to interpret as a string.)argparseArgumentParserRawDescriptionHelpFormatteradd_argumentadd_mutually_exclusive_groupintadd_argument_groupr   stdinstdoutr   Zdefault_separatorphonewordsyllabler   libraryr   str
executabler   PunctuationZdefault_marks
parse_args)parsergroupespeak_libraryfestival_executabler   r   r   rL   @   s4  =
	




rL   c              	   C   sP   | st  n| gD ]}td| dddd tt |   D   q	dS )z@Returns the available languages for the given `backend` as a strzsupported languages for z are:
r   c                 s   s$    | ]\}}d | d| V  qdS )	z	->	Nr   ).0kvr   r   r   	<genexpr>7  s   " z!list_languages.<locals>.<genexpr>N)r   keysprintjoinsortedZsupported_languagesitems)Zargs_backendbackendr   r   r   list_languages2  s   
r\   c                 C   s"   d}| rd}n|rd}t j|dS )zReturns a configured loggernormalverbosequiet)	verbosity)r   
get_logger)r^   r_   r`   r   r   r   ra   ;  s   ra   c                 C   s   t | trt| |ddS | S )z,If `stream` is a filename, open it as a fileutf8)encoding)
isinstancerI   open)streammoder   r   r   setup_streamE  s   
rh   c                     s  t  } | jrtd | j | jrtd | j | jr$tt  dS | jr0tt| j	 dS | j	p4d| _	t
| j| j}t| jd}|d|j t| jd}|d|j | j	dkrk|d	 tj| jddd
}ntj| j| j| jd
}|d| | jr|| j |d  nd | jrz|d| j t| j| _W n tjy   |  |  td| j w t |! | j"| j	|| j#| j| j$| j%| j| j&| j'| j(| j)| j*|d}|r r|+t,j-. fdd|D t,j-  dS |r|+t,j-.|t,j-  dS dS )z,Phonemize a text from command-line argumentsr2   r4   Nrzreading from %swzwriting to %sr3   z4using espeak-mbrola backend: ignoring word separator)rE   rG   rF   zseparator is %sz/prepend input text to output, separator is "%s"Fzpunctuation marks is regex %sz!can't compile regex pattern from )r7   r[   r   r   prepend_textpreserve_empty_linespreserve_punctuationpunctuation_markswith_stresstielanguage_switchwords_mismatchnjobsr   c                 3   s,    | ]}|d   d  d|d  V  qdS )r    r   Nr   )rR   lineinput_output_separatorr   r   rU     s
    
zmain.<locals>.<genexpr>)/rL   rO   r   Zset_libraryrP   Zset_executabler   rW   r\   r[   ra   r^   r_   rh   r-   debugnameoutputr   Z	SeparatorZphone_separatorZsyllable_separatorZword_separatorrk   rw   Zpunctuation_marks_is_regexrn   recompileerrorcloser   r   	readlinesr7   r   rl   rm   ro   rp   rq   rr   rs   r   oslineseprX   )argslogZstreaminZ	streamoutsepoutr   rv   r   mainM  s   

r   __main__)r"   r<   r   r   r{   Z
phonemizerr   r   r   r   r   Zphonemizer.backendr   r   rL   r\   ra   rh   r   r   r   r   r   r   <module>   s$   % s	

]
