
    ri #              	          % S r SSKrSSKrSSKrSSKJrJr  SSKJr  SSK	J
r
Jr  SSKJr  SSKJr  SSKJr  S	S
KJrJrJr  S	SKJr  \R0                  " \5      r/ SQr0 \\\   _\\\   _\\\   _SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS /_S!S"/_S#S$/_S%S&/_S'S(/_0 S)S*/_S+S,/_S-S./_S/S0/_S1S2/_S3S4/_S5S6/_S7S8/_S9S:/_S;S</_S=S>/_S?S@/_SASB/_SCSD/_SESF/_SGSH/_SISJ/_E0 SKSL/_SMSN/_SOSP/_SQSR/_SSST/_SUSV/_SWSX/_SYSZ/_S[S\/_S]S^/_S_S`/_SaSb/_ScSd/_SeSf/_SgSh/_SiSj/_SkSl/_E0 SmSn/_SoSp/_SqSr/_SsSt/_SuSv/_SwSx/_SySz/_S{S|/_S}S~/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_E0 SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_SS/_Er\\\\    4   \!S'   1 Skr"Sr#\RH                  " S5      r%\RH                  " S5      r& " S S5      r' SS\\   S\
\\\\    4      S\\    4S jjr(S\S\4S jr)S\4S jr*S\\\4   SS4S jr+g)zPinyin-based phonemization for Chinese using g2pW.

Partially written by ChatGPT (December 2025).

This code is Apache 2.0 licensed.
    N)MappingSequence)Path)OptionalUnion)urlopen)G2PWConverter)
RbnfEngine   )BOSEOSPAD)DEFAULT_PHONEME_ID_MAP)zhchshbpmfdtnlgkhjqxrzcsyw   Ø   r      r      r      r      r      r   	   r   
   r      r      r      r      r      r      r       r      r      r      r!      r"      r#      r$      r%      r&      a   o   e   ai   ei   ao    ou!   an"   en#   ang$   eng%   ong&   i'   ia(   ie)   iao*   iu+   ian,   in-   iang.   ing/   iong0   u1   ua2   uo3   uai4   ui5   uan6   un7   uang8   ueng9   v:   ve;   van<   vn=   er>   ue?   1@   2A   3B   4C   5D      。E   .   ？F   ?   ！G   !   —H      …   、   ，,   ：:   ；; PHONEME_TO_ID>   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   zthttps://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/zh/zh_CN/_resources/g2pw.tar.gz?download=trueu.   (?P<sign>[-−])?(?P<num>\d+)\s*(?:°\s*C|℃)u^   (?P<num>-?\d+(?:\.\d+)?|[零〇一二三四五六七八九十百千万亿两点]+)\s*(?:%|％)c                   v    \ rS rSrSrS\\\4   SS4S jrS\S\	\	\      4S jr
S\S\4S	 jrS\S\4S
 jrSrg)ChinesePhonemizer   z"Phonemize Chinese text using g2pW.	model_dirreturnNc                     [        U5        [        [        U5      SSS9U l        [        R
                  " S5      U l        g)zInitialize phonemizer.pinyinT)r   styleenable_non_tradional_chineser   N)download_modelr	   strg2pr
   for_languagenumber_engine)selfr   s     ]/home/kim/smarthome/piper_models/venv/lib/python3.13/site-packages/piper/phonemize_chinese.py__init__ChinesePhonemizer.__init__   s9     	y! )nHSW
 (44T:    textc                 
   SSK Jn  [        R                  " SSU5      n/ nU" U/5       H  nU R	                  U5      nU R                  U5      S   n/ n[        XT5       H  u  pxUc  U[        ;   a  UR                  U5        M%  [        U5      n[        U5      u  pnU
(       d  UR                  U5        MX  U	(       d  Sn	XU4 H"  nU(       d   U5       eUR                  U5        M$     M     UR                  U5        M     U$ )z%Turn text into phonemes per sentence.r   )stream_to_sentencesu	   [“”"] r'   )sentence_streamr   resub_numbers_to_wordsr   zipr   append_normalize_g2pw_syllable_split_initial_final_tone)r   r   r   all_phonemessentencesyllssentence_phonemessylsyl_charini_pfin_ptonesyms                r   	phonemizeChinesePhonemizer.phonemize   s    7vvk2t,(*+TF3H--h7HHHX&q)E "!$U!5;=0)00:.s3%>s%C"d%,,S1 E!$/COO3%,,S1 0% "6,  125 48 r   c                   ^  S[         R                  S[        4U 4S jjn[        R	                  X!5      nS[         R                  S[        4U 4S jjn[
        R	                  X15      n[         R                  " SU 4S jU5      $ )Nr   r   c                    > U R                  S5      nU R                  S5      nTR                  U5      nU(       a  SU S3$ U S3$ )Nsignnumu   零下u   度)group
_zh_number)r   r   num_str	num_wordsr   s       r   replace_temp9ChinesePhonemizer._numbers_to_words.<locals>.replace_temp  sJ    776?DggenG0I	{#..[$$r   c                    > U R                  S5      n[        R                  " SU5      (       a  TR                  U5      nOUnSU 3$ )Nr   -?\d+(?:\.\d+)?u	   百分之)r   r   	fullmatchr   )r   r   r   r   s      r   replace_percent<ChinesePhonemizer._numbers_to_words.<locals>.replace_percent  sB    ggenG||.88 OOG4	#	yk**r   r   c                 D   > TR                  U R                  S5      5      $ )Nr   )r   r   )r   r   s    r   <lambda>5ChinesePhonemizer._numbers_to_words.<locals>.<lambda>   s    dooaggaj1r   )r   Matchr   TEMP_PATTERNr   PERCENT_PATTERN)r   r   r   r   s   `   r   r   #ChinesePhonemizer._numbers_to_words   so    
	%BHH 
	% 
	% 3	+rxx 	+C 	+ ""?9vv1
 	
r   c                 L    U R                   R                  U5      R                  $ N)r   format_numberr   )r   r   s     r   r   ChinesePhonemizer._zh_number$  s    !!//5:::r   )r   r   )__name__
__module____qualname____firstlineno____doc__r   r   r   r   listr   r   r   __static_attributes__ r   r   r   r      sd    ,	;%T	"2 	;t 	;$c $d49o $L#
c #
c #
J;s ;s ;r   r   phonemesid_mapr   c                 >   U(       d  [         n/ nUR                  U[           5        U  HW  nX1;  a  [        R	                  SU5        M   UR                  X   5        U[
        ;   d  M?  UR                  U[           5        MY     UR                  U[           5        U$ )zGet phoneme ids for phonemes.

Padding is done after a group of phonemes representing one pinyin group
instead of between each phoneme.
zMissing phoneme from id map: %s)r   extendr   _LOGGERwarningGROUP_END_PHONEMESr   r   )r   r   idsphonemes       r   phonemes_to_idsr   (  s     CJJvc{ OO=wG

6?# ((JJvc{#  JJvc{Jr   r   c                     [         R                  " SU 5      nU(       d  U $ UR                  S5      UR                  S5      p2UR                  SS5      R                  SS5      nX#-   $ )u   
- Keep only syllables like [a-züv:]+[1-5]
- Convert g2pW's 'u:' and 'ü' -> 'v' (ü-family)
  nu:3   -> nv3        (n + v)
  lu:e4  -> lve4       (l + ve)
  ju:an3 -> jvan3      (j + van)
  ju:n3  -> jvn3       (j + vn)
u   ^([a-züv:]+?)([1-5])$r      zu:r~      ü)r   matchr   replace)r   r   baser   s       r   r   r   G  s[     	*C0A
QWWQZ$ <<c"**45D;r   c                 
   [         R                  " SU 5      nU(       d  gUR                  S5      UR                  S5      p2Sn[         H  nUR	                  U5      (       d  M  Un  O   U(       a  U[        U5      S OUnXFU4$ )z9
'hang2' -> ('h', 'ang', '2')
'ai3'   -> ('', 'ai', '3')
u   ^([a-zvü]+?)([1-5])$)r   r   Nr   r  r   N)r   r  r   PINYIN_INITIALS
startswithlen)r   r   r  r   inicandfins          r   r   r   \  sx    
 	)3/AQWWQZ$
C??4  C  
 "$s3xz
tCT>r   r   c                    [        U 5      n U S-  nUR                  5       (       a  [        R                  SU5        g[        R	                  S[
        U 5        U R                  SSS9  [        [
        5       n[        R                  " USS9 nUR                  U S	9  SSS5        SSS5        g! , (       d  f       N= f! , (       d  f       g= f)
z Ensure g2pW model is downloaded.z	g2pw.onnxzFound g2pW model at %sNz(Downloading g2pW model from '%s' to '%s'T)parentsexist_okzr|gz)fileobjmode)path)r   existsr   debuginfoG2PW_URLmkdirr   tarfileopen
extractall)r   
model_pathresponsetars       r   r   r   q  s    YI[(J.
;LL;XyQOOD4O0		h\\(8CNN	N* 9 
	88 
	s$   7B?B.B?.
B<	8B??
Cr   ),r   loggingr   r  collections.abcr   r   pathlibr   typingr   r   urllib.requestr   g2pwr	   unicode_rbnfr
   constr   r   r   phoneme_idsr   	getLoggerr   r   r  r   dictr   r   int__annotations__r   r  compiler   r   r   r   r   r   r   r   r   r   <module>r,     s    	  -  " "  #     /


H
%4k'		$k'		$k' 		$k' 	1#k' !k' !k' !k' !k' !k' !k' "k' "k'  "!k'" "#k'$ "%k'& "'k'( ")k'* "+k', 	2$-k'. 	2$/k'0 	2$1k'2 "3k'4 "5k'6 "7k'8 "9k': ";k'< "=k'D "Ek'F "Gk'H "Ik'J 	2$Kk'L 	2$Mk'N 	2$Ok'P 	2$Qk'R 	2$Sk'T 	2$Uk'V 
B4Wk'X 
B4Yk'Z 
B4[k'^ "_k'` 	2$ak'b 	2$ck'd 
B4ek'f 	2$gk'h 
B4ik'j 	2$kk'l RDmk'n 
B4ok'p RDqk't "uk'v 	2$wk'x 	2$yk'z 
B4{k'| 	2$}k'~ 
B4k'@ 	2$Ak'B RDCk'D RDEk'H "Ik'J 	2$Kk'L 
B4Mk'N 	2$Ok'R 	2$Sk'V 	2$Wk'^ "_k'` "ak'b "ck'd "ek'f "gk'n 
B4ok'p "qk'r 
B4sk't "uk'v 
B4wk'x "yk'@ 
B4Ak'B 
B4Ck'D 
B4Ek'F 
B4Gk'H "Ik'J 
B4Kk'L "Mk'N 
B4Ok'P "Qk'T "Uk'tCcN# kZ < Bzz5 **e
Z; Z;~ 593iWS(3-/01 
#Y># # *3 *+eCI. +4 +r   