o
    mi%                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
 d dlZd dlZd dlmZ ddlmZmZmZmZ ddlmZ ddlmZ ddlmZ G d	d
 d
ZdS )    N)AsyncGenerator)NDArray   )MAX_PHONEME_LENGTHSAMPLE_RATEEspeakConfigKoKoroConfig)log)	Tokenizer)trimc                   @   s  e Zd Z		d&dedededB deeB dB fddZe		d&dej	dededB deeB dB fd	d
Z
deeB dB defddZdedeej dedeeej ef fddZdedeej fddZdedee fddZ				d'dedeeej B dededededeeej ef fd d!Z				d'dedeeej B dededededeeeej ef df fd"d#Zdee fd$d%ZdS )(KokoroN
model_pathvoices_pathespeak_configvocab_configc           	   	   C   s   t dtjd dt  dt   t|||| _| j  dg}tj	
d}|r1t }td}|r;|g}t d|  tj||d	| _t|| _| |}t||d
| _d S )Nzkoko-onnx version zkokoro-onnxz on  ZCPUExecutionProviderzonnxruntime-gpuZONNX_PROVIDERzProviders: )	providersvocab)r	   debug	importlibmetadataversionplatformr   configvalidateutil	find_specrtZget_available_providersosgetenvInferenceSessionsessnploadvoices_load_vocabr
   	tokenizer)	selfr   r   r   r   r   Zgpu_enabledZenv_providerr    r)   N/home/kim/smarthome/.venv/lib/python3.10/site-packages/kokoro_onnx/__init__.py__init__   s"   $


zKokoro.__init__sessionc                 C   sR   |  | }||_t|j|||_|j  t||_|	|}t
||d|_|S )Nr   )__new__r"   r   Z_model_pathr   r   r#   r$   r%   r&   r
   r'   )clsr,   r   r   r   instancer   r)   r)   r*   from_session9   s   


zKokoro.from_sessionreturnc                 C   s^   t |tr$t|dd}t|}|d W  d   S 1 sw   Y  t |tr-|d S i S )a  Load vocabulary from config file or dictionary.

        Args:
            vocab_config: Path to vocab config file or dictionary containing vocab.

        Returns:
            Loaded vocabulary dictionary or empty dictionary if no config provided.
        zutf-8)encodingr   N)
isinstancestropenjsonr$   dict)r(   r   fpr   r)   r)   r*   r&   K   s   


 
zKokoro._load_vocabphonemesvoicespeedc                 C   sT  t d|  t|tkrt dt d |d t }t }tj| j	|tj
d}t|tks;J dt d|t| }dg|dg}dd	d
 | j D v ri|tj|tjdtj|gtjdd}n||tjdtjd| d}| jd |d }t|t }t | }	|	| }
t d|ddt| d|	dd|
d |tfS )Nz
Phonemes: z%Phonemes are too long, truncating to 	 phonemes)ZdtypezContext length is z7, but leave room for the pad token 0 at the start & endr   	input_idsc                 S   s   g | ]}|j qS r)   )name).0ir)   r)   r*   
<listcomp>n   s    z(Kokoro._create_audio.<locals>.<listcomp>)r=   styler;   r   )tokensrB   r;   zCreated audio in length of .2fzs for z phonemes in zs (RTF: )r	   r   lenr   warningtimer#   arrayr'   tokenizeZint64r"   Z
get_inputsfloat32Zint32Zonesrunr   )r(   r9   r:   r;   start_trC   ZinputsaudioZaudio_durationZcreate_durationZrtfr)   r)   r*   _create_audio]   s<   

$zKokoro._create_audior>   c                 C   s
   | j | S N)r%   )r(   r>   r)   r)   r*   get_voice_style   s   
zKokoro.get_voice_stylec                 C   s   t d|}g }d}|D ]1}| }|r=t|t| d tkr*||  |}q|dv r3||7 }q|r9|d7 }||7 }q|rG||  |S )zr
        Split phonemes into batches of MAX_PHONEME_LENGTH
        Prefer splitting at punctuation marks.
        z	([.,!?;]) r   z.,!?;r   )resplitstriprE   r   append)r(   r9   wordsbatched_phoenemesZcurrent_batchpartr)   r)   r*   _split_phonemes   s$   
zKokoro._split_phonemes      ?en-usFTtextlangis_phonemesr   c                 C   s   |dkr|dksJ dt |tr#|| jv sJ d| d| |}t }|r,|}n| j||}| |}	g }
t	dt
|	 dt
| d |	D ]}| |||\}}|r_t|\}}|
| qLt|
}
t	d	t | d
d |
tfS )zM
        Create audio from text using the specified voice and speed.
              ?       @#Speed should be between 0.5 and 2.0Voice  not found in available voiceszCreating audio for z batches for r<   zCreated audio in rD   s)r3   r4   r%   rP   rG   r'   	phonemizerY   r	   r   rE   rN   
trim_audiorU   r#   Zconcatenater   )r(   r\   r:   r;   r]   r^   r   rL   r9   rW   rM   
audio_part_r)   r)   r*   create   s*   



zKokoro.createc           
        s   dkr	dksJ dt tr$jv sJ d d|r)|}nj||}| t  fdd}t	|  	 
 I d	H }	|	d	u rXd	S |	V  qK)
zp
        Stream audio creation asynchronously in the background, yielding chunks as they are processed.
        r_   r`   ra   rb   rc   c                     s   t  D ]2\} }t }|dj|I dH \}}r$t|\}}td|  d ||fI dH  qdI dH  dS )z*Process phoneme batches in the background.NzProcessed chunk z
 of stream)		enumerateasyncioZget_event_loopZrun_in_executorrN   rf   r	   r   put)r@   r9   looprg   Zsample_raterh   Zbatched_phonemesqueuer(   r;   r   r:   r)   r*   process_batches   s   z-Kokoro.create_stream.<locals>.process_batchesTN)r3   r4   r%   rP   r'   re   rY   rk   QueueZcreate_taskget)
r(   r\   r:   r;   r]   r^   r   r9   rp   chunkr)   rn   r*   create_stream   s$   


zKokoro.create_streamc                 C   s   t t| j S rO   )listsortedr%   keys)r(   r)   r)   r*   
get_voices  s   zKokoro.get_voices)NN)rZ   r[   FT)__name__
__module____qualname__r4   r   r7   r+   classmethodr   r!   r0   r&   r   r#   rJ   floattupleintrN   rP   ru   rY   boolri   r   rt   rx   r)   r)   r)   r*   r      s    

"

(&
-
3r   )rk   r   importlib.metadataimportlib.utilr6   r   r   rR   rG   collections.abcr   numpyr#   Zonnxruntimer   Znumpy.typingr   r   r   r   r   r   r	   r'   r
   r   rf   r   r)   r)   r)   r*   <module>   s$    