o
    mi                     @   sx   d Z ddlZddlmZ ddlmZmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZ ddlmZmZ G dd	 d	eZdS )
z#Segments backend for the phonemizer    N)Logger)OptionalDictListUnionPattern)BaseBackend)	Separator)get_package_resourceversion_as_tuplec                       s   e Zd ZdZ			ddedeeeef  dedee	 f fdd	Z
d
d Zedd Zedd Zedd Zedd ZededefddZededejfddZdee dedededee f
ddZ  ZS ) SegmentsBackendzSegments backends for the phonemizer

    The phonemize method will raise a ValueError when parsing an
    unknown morpheme.

    NFlanguagepunctuation_markspreserve_punctuationloggerc                    s   d | _ t j||||d d S )N)r   r   r   )
_tokenizersuper__init__)selfr   r   r   r   	__class__ U/home/kim/smarthome/.venv/lib/python3.10/site-packages/phonemizer/backend/segments.pyr   $   s   
zSegmentsBackend.__init__c                 C   s$   |  |}tj|d| _t|jS )N)profile)_load_g2p_profilesegments	Tokenizerr   pathlibPathstem)r   r   r   r   r   r   _init_language0   s   
zSegmentsBackend._init_languagec                   C      dS )Nr   r   r   r   r   r   name8      zSegmentsBackend.namec                 C   s
   t tjS )N)r   r   __version__clsr   r   r   version<   s   
zSegmentsBackend.versionc                 C   r!   )NTr   r%   r   r   r   is_available@   r#   zSegmentsBackend.is_availablec                  C   s   t d} dd |  D S )a  Returns a dict of language: file supported by the segments backend

        The supported languages have a grapheme to phoneme conversion file
        bundled with phonemizer. Users can also use their own file as
        parameter of the phonemize() function.

        r   c                 S   s   i | ]}|j d kr|j|qS )z.g2p)suffixr   ).0g2pr   r   r   
<dictcomp>Q   s    z7SegmentsBackend.supported_languages.<locals>.<dictcomp>)r
   iterdir)	directoryr   r   r   supported_languagesD   s   
z#SegmentsBackend.supported_languagesreturnc                 C   s@   t | rz| | W dS  ty   Y dS w ||  v S )NTF)r   r   is_filer   RuntimeErrorr/   )r&   r   r   r   r   is_supported_languageT   s   
z%SegmentsBackend.is_supported_languagec                 C   s   t | sz|  | }W n ty   td| dw i }t|ddd2}t|D ]%\}}| 	 }t
|dksJtd|d t
|||d ||d	 < q-W d   n1 s]w   Y  tjd
d | D  S )z,Returns a segments profile from a `language`z$grapheme to phoneme file not found: Nrutf8)encoding   zBgrapheme to phoneme file, line {} must have 2 rows but have {}: {}   r   c                 S   s   g | ]	\}}||d qS ))ZGraphememappingr   )r*   kvr   r   r   
<listcomp>x   s    z5SegmentsBackend._load_g2p_profile.<locals>.<listcomp>)r   r   r1   r/   KeyErrorr2   open	enumeratestripsplitlenformatr   Profileitems)r&   r   r+   Zflangnumlineeltsr   r   r   r   ^   s6   
z!SegmentsBackend._load_g2p_profiletextoffset	separatorr@   c                    sl    fdd|D }|sdd |D }dd |D }dd |D }fdd|D }fdd|D }t |S )Nc                 3   s     | ]} j |d ddV  qdS )r9   strict)columnerrorsN)r   )r*   rG   )r   r   r   	<genexpr>}   s
    
z1SegmentsBackend._phonemize_aux.<locals>.<genexpr>c                 s   s    | ]}|d  V  qdS ) # Nr   r*   pr   r   r   rO      s    c                 s       | ]	}| d dV  qdS )rP   z  # NreplacerQ   r   r   r   rO          c                 s   rS   )rP   #NrT   rQ   r   r   r   rO      rV   c                 3       | ]
}| d  jV  qdS ) N)rU   phonerQ   rK   r   r   rO          c                 3   rX   )rW   N)rU   wordrQ   r[   r   r   rO      r\   )list)r   rI   rJ   rK   r@   Z
phonemizedr   )r   rK   r   _phonemize_aux{   s   
zSegmentsBackend._phonemize_aux)NFN)__name__
__module____qualname____doc__strr   r   r   boolr   r   r    staticmethodr"   classmethodr'   r(   r/   r3   r   rD   r   r   intr	   r_   __classcell__r   r   r   r   r      s4    



	.r   )rc   r   loggingr   typingr   r   r   r   r   r   Zphonemizer.backend.baser   Zphonemizer.separatorr	   Zphonemizer.utilsr
   r   r   r   r   r   r   <module>   s   