
    ri                         S r SSKrSSKJr  SSKJrJr  SSKrSSK	J
r
  \" \5      R                  rSrSrSr\" S	5      r1 S
kr\" \" \/ SQ5      5      rSSSSSSS.r\" S5      r " S S\5      r " S S5      rg)zPython implementation of libtashkeel.

See: https://github.com/mush42/libtashkeel

Ported with the help of ChatGPT 2025-05-01.
    N)Path)OptionalUnion)InferenceSessioni.  _#u   0123456789٠١٢٣٤٥٦٧٨٩>      ٌ   ٍ   َ   ُ   ِ   ّ   ْ)R  iQ  iN  iO  iP  iK  iL  iM  u   َّu   ًّu   ُّu   ٌّu   ِّu   ٍّ)u   َّu   ًّu   ُّu   ٌّu   ِّu   ٍّr   c                       \ rS rSrSrSrg)TashkeelError   zError for tashkeel. N)__name__
__module____qualname____firstlineno____doc____static_attributes__r       ]/home/kim/smarthome/piper_models/venv/lib/python3.13/site-packages/piper/tashkeel/__init__.pyr   r      s    r   r   c                      \ rS rSrSr\4S\\\4   SS4S jjr	SS\S\
\   S\4S	 jjrSS\S\4S
 jjrS\\   S\\   S\S\\\   \\   4   4S jrS\S\\   S\\   S\4S jrS\S\\   S\\   S\\   S\S\4S jrS\4S jr S S\S\S\\\\   4   4S jjrS\S\\\\   4   4S jrS\S\\   4S jrS\\   S\\   4S jrS\\   S\\   4S jrSrg)!TashkeelDiacritizer   z0Add diacritics for Arabic text with libtashkeel.	model_dirreturnNc                 r   [        U5      n[        US-  5      U l        [        US-  SSS9 n[        R
                  " U5      U l        SSS5        [        US-  SSS9 n[        R
                  " U5      nUR                  5        VVs0 s H  u  pVXe_M	     snnU l        SSS5        [        4 Vs1 s H  nWU   iM
     snU l
        [        US-  SSS9 n[        R
                  " U5      U l        SSS5        g! , (       d  f       N= fs  snnf ! , (       d  f       Nx= fs  snf ! , (       d  f       g= f)	zInitialize diacritizer.z
model.onnxzinput_id_map.jsonrzutf-8)encodingNztarget_id_map.jsonzhint_id_map.json)r   r   sessionopenjsonloadinput_id_mapitemsid_target_mapPADtarget_id_meta_charshint_id_map)selfr    input_id_map_filetarget_id_map_filetarget_id_mapcihint_id_map_files           r   __init__TashkeelDiacritizer.__init__!   s%   O	'	L(@A ++S7
04		:K0LD

 ,,cG
,0II6H,IM!.!4!4!62!6!62D	
 KN.OA}Q/?.O!**C'
/3yy9I/JD
 

 
2	
 
 /P
 
s;   C;*D	D	D1D#D(;
D	D
D (
D6texttaskeen_thresholdc                 $    U R                  U5      $ )!Add diacritics using libtashkeel.)
diacritize)r/   r8   r9   s      r   __call__TashkeelDiacritizer.__call__;   s    t$$r   c                    UR                  5       n[        U5      [        :  a  [        S[         35      eU R	                  U5      u  p4U R                  USS9u  p5U R                  U5      nU R                  U5      n[        U5      nUS:X  a  U$ U R                  XgU5      u  pU R                  U	5      nUc  U R                  XU5      $ U R                  XXJU5      $ )r;   zText length cannot exceed T)normalize_diacriticsr   )striplen
CHAR_LIMITr   _to_valid_chars_extract_chars_and_diacritics_input_to_ids_hint_to_ids_infer_target_to_diacritics_annotate_text_with_diacritics&_annotate_text_with_diacritics_taskeen)r/   r8   r9   
input_textremoved_chars
diacritics	input_idsdiac_ids
seq_length
target_idslogitss              r   r<   TashkeelDiacritizer.diacritize?   s    zz|t9z!"<ZL IJJ$($8$8$>!
!%!C!CT "D "

 &&z2	$$Z0^
?K![[jI
//
;
$66tWW::m5F
 	
r   rO   rP   rQ   c                 v   [         R                  " U[         R                  S9R                  SU5      n[         R                  " U[         R                  S9R                  SU5      n[         R                  " U/[         R                  S9R                  S5      nUUUS.nU R                  R                  SU5      nUS   R                  5       R                  [         R                  5      R                  5       n	US   R                  5       R                  [         R                  5      R                  5       n
X4$ )zInfer target ids and logits.)dtype   )char_inputsdiac_inputsinput_lengthsNr   )nparrayint64reshaper%   runflattenastypeuint8tolistfloat32)r/   rO   rP   rQ   input_ids_arrdiac_ids_arrinput_len_arrinputsoutputsrR   rS   s              r   rH   TashkeelDiacritizer._infer\   s     "((;CCAzRxx9AA!ZP*RXX>FFqI )'*
 ,,""40 QZ'')00:AAC
##%,,RZZ8??A!!r   rL   rN   rM   c                    / n[        U5      nU H_  nU R                  U5      (       a  M  Xc;   a  UR                  U5        M3  UR                  U5        UR                  [        US5      5        Ma     SR	                  U5      $ N )iter_is_diacritic_charappendnextjoin)r/   rL   rN   rM   output	diac_iterr3   s          r   rJ   2TashkeelDiacritizer._annotate_text_with_diacriticss   sr     $	A&&q))!a a d9b12  wwvr   rS   	thresholdc                 H   / n[        X$5      nU H  nU R                  U5      (       a  M  X;   a  UR                  U5        M3  UR                  U5        [        US5      u  pX:  a  UR                  [        5        Mn  UR                  U	5        M     SR                  U5      $ )N)rm   g        rm   )zipro   rp   rq   SUKOONrr   )r/   rL   rN   rM   rS   rv   rs   rt   r3   diaclogits              r   rK   :TashkeelDiacritizer._annotate_text_with_diacritics_taskeen   s     
+	A&&q))!a a "9i8$MM&)MM$'  wwvr   c                     U[         ;   $ N)ARABIC_DIACRITICS)r/   r3   s     r   ro   &TashkeelDiacritizer._is_diacritic_char   s    %%%r   r@   c                    UR                  SR                  [        5      5      n/ n/ nSn[        U5      S/-    HC  nU R	                  U5      (       a  XV-  nM  UR                  U5        UR                  U5        SnME     U(       a  UR                  5         U(       a  UR                  S5        U(       a=  [        U5       H.  u  pxXR                  ;  d  M  [        R                  US5      XG'   M0     SR                  U5      U4$ )Nrm    r   )lstriprr   r   listro   rp   pop	enumerater.   NORMALIZED_DIAC_MAPget)	r/   r8   r@   clean_charsrN   pending_diacr3   r4   ds	            r   rE   1TashkeelDiacritizer._extract_chars_and_diacritics   s     {{277#456
dse#A&&q))!""1%!!,/! $ OONN1!*-,,,$7$;$;Ar$BJM . ww{#Z//r   c                    / n[        5       nU Ha  nX@R                  ;   d
  U[        ;   a  UR                  U5        M/  U[        ;   a  UR                  [
        5        MP  UR                  U5        Mc     SR                  U5      U4$ rl   )setr)   r   rp   NUMERALSNUMERAL_SYMBOLaddrr   )r/   r8   validinvalidr3   s        r   rD   #TashkeelDiacritizer._to_valid_chars   sj    EA&&&A1B,BQh^,A  wwu~w&&r   c                 J    U Vs/ s H  o R                   U   PM     sn$ s  snf r~   )r)   )r/   r8   r3   s      r   rF   !TashkeelDiacritizer._input_to_ids   s#    .23d!!!$d333    c                 J    U Vs/ s H  o R                   U   PM     sn$ s  snf r~   )r.   )r/   rN   r   s      r   rG    TashkeelDiacritizer._hint_to_ids   s#    -78Z  #Z888r   rR   c                 n    U Vs/ s H#  nX R                   ;  d  M  U R                  U   PM%     sn$ s  snf r~   )r-   r+   )r/   rR   r4   s      r   rI   )TashkeelDiacritizer._target_to_diacritics   sB      
111 "Dq!
 	
 
s   22)r.   r+   r)   r%   r-   r~   )T)r   r   r   r   r   TASHKEEL_DIRr   strr   r6   r   floatr=   r<   r   inttuplerH   r   rJ   rK   boolro   rE   rD   rF   rG   rI   r   r   r   r   r   r      s   :5A K%T	"2 Kd K4%S %Xe_ %PS %
s 
s 
:"c".23i"EH"	tCy$u+%	&".+/9EHX	" I 3x	
 U  
2&t & 7;00/30	sDI~	0:
'C 
'E#s3x-,@ 
'4# 4$s) 49tCy 9T#Y 9
S	 
d3i 
r   r   )r   r'   pathlibr   typingr   r   numpyr[   onnxruntimer   __file__parentr   rC   r,   r   r   r   HARAKAT_CHARSmapchrr   r   ry   	Exceptionr   r   r   r   r   <module>r      s      "  (H~$$
	/0VC!QRS %vvv_eouv 	UI v
 v
r   