
    riMB                        % S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKJ	r	  SSK
Jr  SSKJrJrJrJrJrJr  SSKrSSKrSSKJrJrJr  SSKJrJrJr  SS	KJr  SS
K J!r!J"r"  SSK#J$r$  Sq%\\"   \&S'   \RN                  " 5       r(\" 5       r)Sr*\RV                  " S5      r,\RZ                  " \.5      r/\	 " S S5      5       r0\	 " S S5      5       r1\	 " S S5      5       r2g)z&Phonemization and synthesis for Piper.    N)	dataclass)Path)AnyIterableOptionalSequenceTupleUnion   )PhonemeTypePiperConfigSynthesisConfig)BOSEOSPADphonemes_to_ids)ESPEAK_DATA_DIREspeakPhonemizer)TashkeelDiacritizer_ESPEAK_PHONEMIZERg    @z(\[\[.*?\]\])c                   :    \ rS rSr% \\S'   \\   \S'   \\S'   Srg)PhonemeAlignment!   phonemephoneme_idsnum_samples N)	__name__
__module____qualname____firstlineno__str__annotations__r   int__static_attributes__r       Q/home/kim/smarthome/piper_models/venv/lib/python3.13/site-packages/piper/voice.pyr   r   !   s    L#r'   r   c                   b   \ rS rSr% Sr\\S'    \\S'    \\S'    \R                  \S'    \	\
   \S'    \	\   \S'    S	r\\R                     \S
'    S	r\\	\      \S'    S	r\\R                     \S'   S	r\\   \S'   S	r\\	\      \S'   \S\R                  4S j5       r\S\4S j5       rSrg	)
AudioChunk(   zChunk of raw audio.sample_ratesample_widthsample_channelsaudio_float_arrayphonemesr   Nphoneme_id_samplesphoneme_alignments_audio_int16_array_audio_int16_bytes_phoneme_alignmentsreturnc                     U R                   cT  [        R                  " U R                  [        -  [        * [        5      R                  [        R                  5      U l         U R                   $ )zO
Get audio as an int16 numpy array.

:return: Audio data as int16 numpy array.
)r3   npclipr/   _MAX_WAV_VALUEastypeint16selfs    r(   audio_int16_arrayAudioChunk.audio_int16_arrayM   sT     ""*&(gg&&7..'fRXX # &&&r'   c                 6    U R                   R                  5       $ )zT
Get audio as 16-bit PCM bytes.

:return: Audio data as signed 16-bit sample bytes.
)r?   tobytesr=   s    r(   audio_int16_bytesAudioChunk.audio_int16_bytes[   s     %%--//r'   )r3   )r   r    r!   r"   __doc__r%   r$   r8   ndarraylistr#   r1   r   r2   r   r3   r4   bytesr5   propertyr?   rC   r&   r   r'   r(   r*   r*   (   s    )*.zz!53i2c5/3,3
 <@&6!78?8 04,3*..<@$'7"89@'2:: ' ' 05 0 0r'   r*   c                   f   \ rS rSr% Sr\R                  \S'    \\S'    \	r
\\S'    \R                  " 5       r\\S'    Sr\\S'   S	r\\   \S
'   Sr\\   \S'   \S	S\	S	4S\\\4   S\\\\4      S\S\\\4   S\\\\4      SS 4S jj5       rS\S\\\      4S jrS\\   S\\   4S jr  S S\S\\   S\S\\   4S jjr    S!S\S\!RD                  S\\   S\S\S\\\#      4S jjr$  S S\\   S\\   S\S\\%RL                  \'\%RL                  \\%RL                     4   4   4S jjr(Sr)g	)"
PiperVoicee   zA voice for Piper.sessionconfigespeak_data_dirdownload_dirTuse_tashkeelNtashkeel_diacritizierg?taskeen_thresholdF
model_pathconfig_pathuse_cudar6   c           	         Uc  U  S3n[         R                  SU5        [        USSS9 n[        R                  " U5      nSSS5        U(       a  SSS	04/n[         R                  S
5        OS/nUc  [
        R                  " 5       n[        [        R                  " W5      [        R                  " [        U 5      [        R                  " 5       US9[        U5      [        U5      S9$ ! , (       d  f       N= f)a  
Load an ONNX model and config.

:param model_path: Path to ONNX voice model.
:param config_path: Path to JSON voice config (defaults to model_path + ".json").
:param use_cuda: True if CUDA (GPU) should be used instead of CPU.
:param espeak_data_dir: Path to espeak-ng data dir (defaults to internal data).
:param download_dir: Path to download resources (defaults to current directory).
:return: Voice object.
Nz.jsonzGuessing voice config path: %srzutf-8)encodingCUDAExecutionProvidercudnn_conv_algo_search	HEURISTICz
Using CUDACPUExecutionProvider)sess_options	providers)rN   rM   rO   rP   )_LOGGERdebugopenjsonloadr   cwdrK   r   	from_dictonnxruntimeInferenceSessionr#   SessionOptions)rT   rU   rV   rO   rP   config_fileconfig_dictr_   s           r(   rd   PiperVoice.loadz   s    $ 'L.KMM:KH+sW5))K0K 6  ,-{;I MM,'/0I88:L((500J(779#
 !1l+	
 		
% 65s   C**
C8textc                 N   U R                   R                  [        R                  :X  a!  [	        [
        R                  " SU5      5      /$ U R                   R                  [        R                  :X  aI  SSKJ	n  [        U SS5      nUc"  U" U R                  S-  5      n[        U SU5        UR                  U5      $ U R                   R                  [        R                  :w  a"  [        SU R                   R                   35      e/ n[         R#                  U5      nSn[%        U5       GH  u  pxUR'                  S	5      (       a  S
nU(       d  UR)                  / 5        US:  a/  XWS-
     R+                  S5      (       a  US   R)                  S5        US   R-                  USS R/                  5       5        U[1        U5      S-
  :  a/  XWS-      R'                  S5      (       a  US   R)                  S5        M  U R                   R2                  S:X  aF  U R4                  (       a5  U R6                  c  [9        5       U l        U R7                  XR:                  S9n[<           [>        c  [A        U RB                  5      q[>        R                  U R                   R2                  U5      n	U(       a#  U	(       a  US   R-                  U	S   5        U	SS n	UR-                  U	5        SSS5        SnGM     U(       a  US   (       d  URE                  5         U$ ! , (       d  f       N8= f)zu
Text to phonemes grouped by sentence.

:param text: Text to phonemize.
:return: List of phonemes for each sentence.
NFDr   )ChinesePhonemizer_chinese_phonemizerNg2pWzUnexpected phoneme type: Fz[[Tr       ar)rS   )#rN   phoneme_typer   TEXTrG   unicodedata	normalizePINYINphonemize_chineserp   getattrrP   setattr	phonemizeESPEAK
ValueError_PHONEME_BLOCK_PATTERNsplit	enumerate
startswithappendendswithextendstriplenespeak_voicerQ   rR   r   rS   _ESPEAK_PHONEMIZER_LOCKr   r   rO   pop)
r>   rm   rp   
phonemizerr0   
text_partsprev_raw_phonemesi	text_parttext_part_phonemess
             r(   r   PiperVoice.phonemize   s    ;;##{'7'77..ud;<==;;##{'9'99< !'<dCJ!.t/@/@6/IJ
3Z@''--;;##{'9'9989Q9Q8RSTT$&+11$7
!%j1LA##D))$(!  OOB'E
q5 1 : :3 ? ?RL'',##IaO$9$9$;<ZA--J1u4E4P4PQT4U4URL'', ((D0d6G6G--51D1FD. 661G1G 7 	 )%-)9$:N:N)O&%7%A%AKK,,i&" %);RL''(:1(=>);AB)?& 23 ) !&W 2Z Xb\LLN+ )(s   "BL
L$	r0   c                     U R                   R                  [        R                  :X  a"  SSKJn  U" XR                   R                  5      $ [        XR                   R                  5      $ )zT
Phonemes to ids.

:param phonemes: List of phonemes.
:return: List of phoneme ids.
r   r   )rN   rx   r   r|   r}   r   phoneme_id_map)r>   r0   chinese_phonemes_to_idss      r(   r   PiperVoice.phonemes_to_ids   sI     ;;##{'9'99U*8[[5O5OPPx)C)CDDr'   
syn_configinclude_alignmentsc              #   J  #    Uc  [         nU R                  U5      n[        R                  SX5        U GHh  nU(       d  M  U R	                  U5      nSnU R                  XbUS9n[        U[        5      (       a  Uu  pOUn	UR                  (       aK  [        R                  " [        R                  " U	5      5      n
U
S:  a  [        R                  " U	5      n	OX-  n	UR                  S:w  a  XR                  -  n	[        R                  " U	SS5      R                  [        R                   5      n	SnUGbA  [#        U5      [#        U5      :X  Ga(  U R$                  R&                  R)                  [*        / 5      nSn/ nSn[,        R.                  " [0        /U[2        /5       H  nU R$                  R&                  R)                  U/ 5      nU[2        :w  a!  [5        [,        R.                  " UU5      5      nOUnUnU H'  nU[#        U5      :  a  S	n  OUXm   :w  a  S	n  O	US
-  nM)     U(       a    O*UR7                  [9        UU[;        UUU 5      S95        M     U(       a  Sn[        R                  S5        [=        U R$                  R>                  SS
U	UUUUS9v   GMk     g7f)z
Synthesize one audio chunk per sentence from from text.

:param text: Text to synthesize.
:param syn_config: Synthesis configuration.
:param include_alignments: If True and the model supports it, include phoneme/audio alignments.
Nztext=%s, phonemes=%s)r   g:0yE>g      ?g      r   FTr   )r   r   r   zPhoneme alignment failedru   )r,   r-   r.   r/   r0   r   r1   r2   ) _DEFAULT_SYNTHESIS_CONFIGr   r`   ra   r   phoneme_ids_to_audio
isinstancetuplenormalize_audior8   maxabs
zeros_likevolumer9   r;   float32r   rN   r   getr   	itertoolschainr   r   rG   r   r   sumr*   r,   )r>   rm   r   r   sentence_phonemesr0   r   r1   audio_resultaudiomax_valr2   pad_idsphoneme_id_idxalignment_failedr   expected_idsids_to_checkstart_phoneme_id_idx
phoneme_ids                       r(   
synthesizePiperVoice.synthesize  sw     2J NN40,dF)H..x8K7;44<N 5 L ,..,8)) %))&&/T>MM%0E!OE  C' 1 11GGE4-44RZZ@ECG".&'3{+;;
 ++4488bA!"%'"#( (uhFG#';;#=#=#A#A'2#NL #~'+IOOL',R'S'3+9(&2
)S-==/3,!%)DD/3,!&!+ '3 (&--($+(4(+ 23G W)5  GH $)-&MM"<= KK33 !"'!'#5#5	 	c *s   J!J#wav_fileset_wav_formatc                    / nSnU R                  XUS9 H  nU(       aZ  U(       aQ  UR                  UR                  5        UR                  UR                  5        UR                  UR                  5        SnUR                  UR                  5        U(       d  M  UR                  (       d  M  UR                  UR                  5        M     U(       a  U$ g)a  
Synthesize and write WAV audio from text.

:param text: Text to synthesize.
:param wav_file: WAV file writer.
:param syn_config: Synthesis configuration.
:param set_wav_format: True if the WAV format should be set automatically.
:param include_alignments: If True and the model supports it, return phoneme/audio alignments.

:return: Phoneme/audio alignments if include_alignments is True, otherwise None.
T)r   r   FN)r   setframerater,   setsampwidthr-   setnchannelsr.   writeframesrC   r2   r   )	r>   rm   r   r   r   r   
alignmentsfirst_chunkaudio_chunks	            r(   synthesize_wavPiperVoice.synthesize_wav|  s    & .0
??<N + 
K !))+*A*AB))+*B*BC))+*E*EF#  !>!>?!!k&D&D&D!!+"@"@A
" r'   r   c                 (   Uc  [         nUR                  nUR                  nUR                  nUR                  nUc  U R
                  R                  nUc  U R
                  R                  nUc  U R
                  R                  n[        R                  " [        R                  " U[        R                  S9S5      n[        R                  " UR                  S   /[        R                  S9n	[        R                  " XeU/[        R                  S9n
UU	U
S.nU R
                  R                  S::  a  SnU R
                  R                  S:  a  Uc  SnUb(  [        R                  " U/[        R                  S9nXS'   U R                  R                  SU5      nUS   R                  5       nU(       d  U$ [!        U5      S:X  a  US4$ US   R                  5       U R
                  R"                  -  R%                  [        R                  5      nX4$ )a  
Synthesize raw audio from phoneme ids.

:param phoneme_ids: List of phoneme ids.
:param syn_config: Synthesis configuration.
:param include_alignments: Return samples per phoneme id if True.
:return: Audio float numpy array from voice model (unnormalized, in range [-1, 1]).

If include_alignments is True and the voice model supports it, the return
value will be a tuple instead with (audio, phoneme_id_samples) where
phoneme_id_samples contains the number of audio samples per phoneme id.
N)dtyper   r   )inputinput_lengthsscalessid)r   
speaker_idlength_scalenoise_scalenoise_w_scalerN   r8   expand_dimsarrayint64shaper   num_speakersrM   runsqueezer   
hop_lengthr;   )r>   r   r   r   r   r   r   r   phoneme_ids_arrayphoneme_ids_lengthsr   argsr   resultr   r1   s                   r(   r   PiperVoice.phoneme_ids_to_audio  s   $ 2J**
!.. ,,"00;;33L++11K  KK55MNN288Krxx+PRST hh(9(?(?(B'C288T6**
 '0
 ;;##q(JKK$$q(z/AJ!((J<rxx8CK !!
 q	!!#!Lv;!$; %Qi//1DKK4J4JJRRHH
 ((r'   )rR   )NF)NTF)*r   r    r!   r"   rE   rg   rh   r$   r   r   rO   r   re   rP   rQ   boolrR   r   r   rS   floatstaticmethodr
   r#   rd   rG   r   r%   r   r   r   r*   r   wave
Wave_writer   r   r8   rF   r	   r   r&   r   r'   r(   rK   rK   e   s;   )))$+OT++L$#% L$;?8$78?),x, 37,;370
#t)$0
eCI./0
 0
 sDy)	0

 uS$Y/00
 
0
 0
dOc Od49o ObES	 Ed3i E$ 15#(	mm _-m !	m
 
*	mf 15##()) //) _-	)
 ) !) 
$'(	))\ 15#(	M)#YM) _-M) !	M)
 
rzz5Xbjj-A!ABB	CM) M)r'   rK   )3rE   r   rc   loggingre	threadingrz   r   dataclassesr   pathlibr   typingr   r   r   r   r	   r
   numpyr8   rg   rN   r   r   r   constr   r   r   r   r   phonemize_espeakr   r   tashkeelr   r   r$   Lockr   r   r:   compiler   	getLoggerr   r`   r   r*   rK   r   r'   r(   <module>r      s    ,    	    !  B B   = =     ( ? )15 H-. 5#..* +- $45 


H
%    90 90 90x N) N) N)r'   