o
    ¢ÄiG ã                   @   sv   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ e  e¡ZG dd„ deƒZG dd	„ d	eƒZdS )
é    N)ÚFusionAttention)ÚFusion)ÚFunctionProtoÚ	NodeProtoÚTensorProtoÚhelperÚnumpy_helper)Ú	OnnxModelc                       sœ   e Zd ZdZdededef‡ fdd„Z							dd	ed
edededededededededede	dB dedB fdd„Z
dd„ Zdd„ Zdd„ Z‡  ZS )ÚFusionRotaryAttentionze
    Fuse Attention subgraph with rotary positional embeddings into one MultiHeadAttention node.
    ÚmodelÚhidden_sizeÚ	num_headsc                    s   t ƒ j|||dg d¢d d S )NT)ZSimplifiedLayerNormalizationÚ SkipSimplifiedLayerNormalizationZLayerNormalizationÚSkipLayerNormalizationÚAdd)Zuse_multi_head_attentionZsearch_op_types)ÚsuperÚ__init__)Úselfr   r   r   ©Ú	__class__© új/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_rotary_attention.pyr      s   
ûzFusionRotaryAttention.__init__Ú NÚinputÚoutputÚq_rotaryÚk_rotaryÚv_matmulÚ	attn_maskÚadd_qkÚpast_kÚpast_vÚ	present_kÚ	present_vÚscaleÚreturnc                 C   s  | j dksJ ‚| jdkr#| j| j  dkr#t d| j› d| j › ¡ d S | j d¡}|jd |jd |jd d||||	g}|g}|
rJ|rJ| |
|g¡ tj	d|||d}d|_
|j t d| j ¡g¡ |d urq|j t d	|¡g¡ | jd ur„|j t d
t| jƒ¡g¡ |  d¡ |S )Nr   z)fuse_rotary_attention: input hidden size z# is not a multiple of num of heads ZMultiHeadAttentionr   ©ÚinputsÚoutputsÚnameúcom.microsoftr   r$   Úmask_filter_value)r   r   ÚloggerÚdebugr   Úcreate_node_namer   Úextendr   Ú	make_nodeÚdomainÚ	attributeÚmake_attributer+   ÚfloatÚincrease_counter)r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   Zmha_node_nameZ
mha_inputsZmha_outputsZmha_noder   r   r   Úcreate_mha_node(   sB   ÿøü

z%FusionRotaryAttention.create_mha_nodec	           1      C   s  | j  |dgdg¡}	| j  |dgdg¡}
|	d u s|
d u rdS |	d |
d }}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u sg|d u sg|d u sg|d u ridS |\}}}|\}}}|jd |ks|jd |krƒdS |d j|jks“|d j|jkr•dS | j  |dgdg¡}| j  |dgdg¡}|d u s±|d u r³dS |d |d }}| j  |g d	¢g d
¢¡}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u sü|d u sü|d u sü|d u rþdS |d j|jks"|d j|jks"|d j|jks"|d j|jkr$dS | j  |dgdg¡}|d u r5dS |d }| j  |g d	¢g d
¢¡} | j  |g d¢g d¢¡}!| d u s[|!d u r]dS | d j|jkso|!d j|jkrqdS | j  |dgdg¡}"|"d u r‚dS |"d }#| j  |#g d	¢g d
¢¡}$| j  |#g d¢g d¢¡}%|$d u s¨|%d u rªdS |$d j|jks¼|%d j|jkr¾dS |$d }&| d }'|d }(|jd })|&jd |)ksç|'jd |)ksç|(jd |)krédS | j  |g d¢g d¢¡}*| j  |g d¢g d¢¡}+|*d ur|*\}},}-n|+d ur|+\}}},}-ndS |-jd dvr$dS | j  |,g d¢g d¢¡}.| j  |-g d¢g d¢¡}/| j  |-dgdg¡}0|.d u sU|/d u sU|0d u rWdS |.d j|/d jksm|.d j|/d jkrodS |/d jd |0d jd kr€dS dS )NÚConcaté   Fr   ©Ú	UnsqueezeÚGatherÚShape©r   r   r   ©r8   r   r   )é   r   r   )r:   ÚMulr;   r<   ©r   r   r   r   )r:   r   r;   r<   ©r8   r   r   r   r?   )r?   r   r   r   ©r7   ÚSlicerD   ©ÚCastr7   rD   rD   >   r   Zattention_mask)r?   r   r8   r   r:   T)r   Úmatch_parent_pathr   r)   r   )1r   Úreshape_qkv_2Úreshape_qkv_1Úreshape_q_2Úreshape_k_2Úreshape_v_2Úreshape_v_1r   Ú
root_inputZconcat_qkv_2_pathZconcat_qkv_1_pathZconcat_qkv_2Zconcat_qkv_1Zreshape_qkv_2_path_1Zreshape_qkv_2_path_2Zreshape_qkv_1_path_1Zreshape_qkv_1_path_2Ú_Úgather_1Úshape_1Úgather_2Úshape_2Zconcat_v_2_pathZconcat_v_1_pathZ
concat_v_2Z
concat_v_1Zreshape_v_2_path_1Zreshape_v_2_path_2Zreshape_v_1_path_1Zreshape_v_1_path_2Zconcat_k_2_pathZ
concat_k_2Zreshape_k_2_path_1Zreshape_k_2_path_2Zconcat_q_2_pathZ
concat_q_2Zreshape_q_2_path_1Zreshape_q_2_path_2Zmul_qZmul_kZmul_vZgather_1_outZattn_mask_path_1Zattn_mask_path_2Z
slice_qk_2Z
slice_qk_1Zslice_qk_2_pathZslice_qk_1_path_1Zslice_qk_1_path_2r   r   r   Ú&check_runtime_shape_paths_for_function`   sÄ   

 ÿÿ
ÿÿ$
ÿ$
0

ÿÿ,z<FusionRotaryAttention.check_runtime_shape_paths_for_functionc                 C   s\  | j  |dgdg¡}|d u rdS |d }| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}	|d u s4|	d u r6dS |\}
}}|	\}
}}|jd |ksN|jd |krPdS | j  |dgdg¡}|d u r`dS |d }| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u s„|d u r†dS |d j|jks–|d j|jkr˜dS | j  |dgdg¡}|d u r¨dS |d }| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u sÌ|d u rÎdS |d j|jksÞ|d j|jkràdS | j  |dgdg¡}|d u rðdS |d }| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u s|d u rdS |d j|jks*|d j|jkr,dS dS )	Nr7   r8   Fr   r9   r=   r>   T)r   rG   r   r)   )r   Úreshape_qkvÚ	reshape_qÚ	reshape_kÚ	reshape_vrN   Zconcat_qkv_pathZ
concat_qkvZreshape_qkv_path_1Zreshape_qkv_path_2rO   rP   rQ   rR   rS   Úconcat_v_pathÚconcat_vZreshape_v_path_1Zreshape_v_path_2Úconcat_k_pathÚconcat_kZreshape_k_path_1Zreshape_k_path_2Zconcat_q_pathZconcat_qZreshape_q_path_1Zreshape_q_path_2r   r   r   Ú#check_runtime_shape_paths_for_nodesü   sV   	

  $z9FusionRotaryAttention.check_runtime_shape_paths_for_nodesc           W         sh  |j dvrd S d }ˆ j |g d¢g d¢¡}ˆ j |g d¢g d¢¡}ˆ j |g d¢g d¢¡}|d ur;|\}}	}}
}|}n"|d urH|\}}}}|}n|d urV|\}}}}}|}nt d¡ d S d\}}}d }d }ˆ j |g d	¢g d
¢¡}ˆ j |g d¢g d¢¡}ˆ j |g d¢g d¢¡}ˆ jj|g d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg	d d\}}}ˆ j |g d ¢g d!¢¡}|d ur|\}}}}}}|}ˆ j |d"d#gd$d%g¡}|d u rt d&¡ d S |d$ jd$ }|d' jd$ }|jd$ }nq|d ur4|\}}}}|}|jd$ }|jd$ }nY|d urF|\}}}|}|jd$ }nG|d urkt|ƒd(krk|d$ d)d … \}}}}|}|jd$ }|jd$ }n"|d ur†|\}}}}}|}|}|jd$ }|jd$ }nt d*¡ d S ˆ j |g d+¢g d,¢¡}d-\}} |d ur©|\}}}} nt d.¡ d S d/\}!}"ˆ j |g d0¢g d¢¡}#ˆ j |g d1¢g d¢¡}$ˆ j |g d2¢g d3¢¡}%ˆ j |g d4¢g d5¢¡}&ˆ j |g d6¢g d7¢¡}'ˆ j |g d8¢g d3¢¡}(ˆ j |g d9¢g d:¢¡})|#d ur|#\}}*}+|*jd$ }!nb|$d ur)|$\}}}*}+|*jd$ }!nQ|%d ur9ˆ  	|%d$ jd$ ¡}"nA|&d urIˆ  	|&d$ jd$ ¡}"n1|'d urV|'d$ jd$ }"n$|(d urc|(d$ jd$ }"n|)d ursˆ  	|)d$ jd$ ¡}"nt d;¡ d S d/\},}-d }.d }/d }0ˆ j | g d<¢g d
¢¡}1ˆ j | g d=¢g d¢¡}2ˆ j | g d>¢g d?¢¡}3ˆ jj| g d@¢g d:¢fg dA¢g dB¢fg dC¢g dD¢fg dE¢g dF¢fg dG¢g dH¢fg dI¢g dJ¢fg dK¢g dL¢fg dI¢g dM¢fg dI¢g dN¢fg	d d\}}4}ˆ j | g dO¢g dP¢¡}5|1d urA|1\}6}}7}}8}9|1}.ˆ j |7d"d#gd$d%g¡}:|:d u r&t dQ¡ d S |:d$ jd$ },|:d' jd$ };|7jd$ }-||;ks@J ‚n|2d urU|2\}}8}}<}9|2}.|8jd$ }-nk|3d uro|3\}}7}8}}<}9|3}.|7jd$ },|7jd$ }-nQ|4d urœt|4ƒd(krœ|4d$ dRd … \}<}9|4d$ dSdT… \}7}8|4}.|7jd$ },|7jd$ }-n$|5d ur¹|5\	}}7}0}8}/}}<}}9|5}.|7jd$ },|7jd$ }-nt dU¡ d S d }=d }>d }?ˆ j | g dV¢g d,¢¡}@ˆ j | g dW¢g d,¢¡}Aˆ j | g dX¢g dY¢¡}B|@d urø|@\}C}}D}E|@}=n&|Ad ur|A\}D}}F}E|A}=n|Bd ur|B\}?}D}>}}F}}E|B}=nt dZ¡ d S |Ejd$ |9jd$ kr;|9jd$ |jd$ kr;t d[¡ d S d\}G||kr_ˆ  
|	|
|C|6||||Ejd$ ¡sYt d]¡ d S |	jd$ }GnX|||fv r·ˆ  ||F|<||Ejd$ ¡szt d]¡ d S |jd$ }G|>r‡|>jd$ n|Ejd$ |Djd$< |/r—|/jd$ n|9jd$ |8jd$< |?d u r¬|8jd^ |8jd$< ||kr·|d_d … }‡ fd`da„}H|?rš|0ršˆ j db¡}I|Id^ }Jtjdb|0jd$ g|Jg|Idc}K|Kj t ddg de¢¡g¡ ˆ j db¡}L|Ld^ }Mtjdb|?jd$ g|Mg|Ldc}N|Nj t ddg de¢¡g¡ |H|<ƒ}O|Od u rt df¡ d S ˆ jjdgdhdi}Ptjdg|Kjd$ |Ojd$ g|Pd^ g|Pdc}Qˆ jjdgdjdi}Rtjdg|Njd$ |Ojd$ g|Rd^ g|Rdc}S|Q}8|S}Dˆ j |O¡ ˆ j |K¡ ˆ j |N¡ ˆ j |Q¡ ˆ j |S¡ ˆ jˆ j|Oj< ˆ jˆ j|Kj< ˆ jˆ j|Nj< ˆ jˆ j|Qj< ˆ jˆ j|Sj< ˆ  |Ejd$ |G|D|8||!|"|,||-|¡}T|Td u r¸t dk¡ d S ˆ j |T¡ ˆ jˆ j|Tj< ˆ j |d_d … ¡ ||krêˆ j |d u râ|d d'… n|d dR… ¡ n|d$ d' g}U|D ]	}Vˆ  |V|U¡ qóˆ j |¡ |.|1krˆ j |.d dR… ¡ nw|.|2kr1ˆ j |.d$ ¡ ˆ j |.d% ¡ ˆ j |.dl ¡ nY|.|3krWˆ j |.d$ ¡ ˆ j |.d_ ¡ ˆ j |.dl ¡ ˆ j |.dm ¡ n3|.|5krmˆ j |.d$ ¡ ˆ j |.d_ ¡ n|.|4krŠ|.d$ d' |.d$ d) g}U|.D ]	}Vˆ  |V|U¡ q€|=|@kršˆ j |=d dR… ¡ n|=|Akr¯ˆ j |=d_ ¡ ˆ j |=d% ¡ dnˆ _d S )oN>   r   r   r   )ÚMatMulÚReshapeÚ	Transposer_   r^   ©r8   r   r   r   r   )r^   r_   r`   r^   rB   )Z	AllReducer^   r_   r`   r^   z0fuse_rotary_attention: failed to match qkv nodes)r   r   r   )r_   r`   r7   r`   r_   r^   )r8   r   r   r8   r   r   )r7   r`   r_   r^   )r8   r8   r   r   )r`   r_   r^   r>   )r_   ÚExpandr:   r7   r`   r_   r^   )r8   r   r   r   r8   r   r   )r_   rb   ÚWhereÚEqualr_   r7   r:   r;   r<   r7   r`   r_   r^   )r8   r   r8   r   r   r   r   r   r   r   r8   r   r   )r_   rb   rc   rd   r@   ÚConstantOfShaper<   r_   r7   r:   r;   r<   r7   r`   r_   r^   )r8   r   r8   r   r8   r   r   r   r   r8   r   r   r   r8   r   r   )r_   rb   rc   re   r<   r_   r7   r:   r;   r<   r7   r`   r_   r^   )r8   r   r8   r8   r   r   r   é   r   r   r   r8   r   r   )r_   rb   rc   r_   r7   r:   r;   r<   r7   r`   r_   r^   )r8   r   r8   r?   r   é   r   r   r   r8   r   r   )	r_   r7   r:   r;   r<   r7   r`   r_   r^   )	r8   r8   r   r   r   r   r8   r   r   )
r_   r7   r:   r@   r;   r<   r7   r`   r_   r^   )
r8   r8   r8   r   r   r   r   r8   r   r   )	r8   r8   r?   r   r   r   r8   r   r   )	r8   r8   rf   r   r   r   r8   r   r   )Úoutput_name_to_node)r7   r`   r_   r   r^   )r8   r8   r   r   r8   rD   r:   r   r?   zDfuse_rotary_attention: failed to match past/present concat in v pathéÿÿÿÿé	   éüÿÿÿz-fuse_rotary_attention: failed to match v path)ZSoftmaxr   ÚDivr^   rA   ©NNz/fuse_rotary_attention: failed to match qk nodes)r   r   rC   rE   )r   rc   ÚSubrF   rb   r:   r:   )r8   r   r?   r8   r   r   r   )rc   rn   rF   rb   r:   r:   )r8   r?   r8   r   r   r   )rb   r   rc   rn   rF   rb   r:   r:   )r8   r   r   r?   r8   r   r   r   )rb   rc   rn   rF   rb   r:   r:   )	rc   rF   rc   rF   rn   rF   rb   r:   r:   )	r8   r   r   r   r   r8   r   r   r   z;fuse_rotary_attention: failed to match attention mask nodes)r_   r`   r7   r`   ÚRotaryEmbeddingr^   )r`   ro   r`   r_   r^   )r`   r7   ro   r`   r_   r^   )r8   r   r8   r   r   r   )	r`   r_   rb   r:   r7   ro   r`   r_   r^   )r`   r_   rb   rc   rd   r_   r7   r:   r;   r<   r7   ro   r`   r_   r^   )r8   r   r   r8   r   r   r   r   r   r   r   r8   r   r   r   )r`   r_   rb   rc   rd   r@   re   r<   r_   r7   r:   r;   r<   r7   ro   r`   r_   r^   )r8   r   r   r8   r   r8   r   r   r   r   r8   r   r   r   r8   r   r   r   )r`   r_   rb   rc   re   r<   r_   r7   r:   r;   r<   r7   ro   r`   r_   r^   )r8   r   r   r8   r8   r   r   r   rf   r   r   r   r8   r   r   r   )r`   r_   rb   rc   r_   r7   r:   r;   r<   r7   ro   r`   r_   r^   )r8   r   r   r8   r?   r   rg   r   r   r   r8   r   r   r   )r`   r_   r7   r:   r;   r<   r7   ro   r`   r_   r^   )r8   r   r8   r   r   r   r   r8   r   r   r   )r`   r_   r7   r:   r@   r;   r<   r7   ro   r`   r_   r^   )r8   r   r8   r8   r   r   r   r   r8   r   r   r   )r8   r   r8   r?   r   r   r   r8   r   r   r   )r8   r   r8   rf   r   r   r   r8   r   r   r   )	r`   r7   r7   ro   rD   r`   r_   r   r^   )	r8   r   r8   r   r   r   r   r   r8   zDfuse_rotary_attention: failed to match past/present concat in k pathéþÿÿÿéûÿÿÿéýÿÿÿz.fuse_rotary_attention: failed to match k nodes)r_   r`   ro   r^   )ro   r`   r_   r^   )r7   ro   rD   r`   r_   r   r^   )r   r   r   r   r   r   r8   z.fuse_rotary_attention: failed to match q nodeszKfuse_rotary_attention: failed to find the same root_input for q, k, v pathsr   z;fuse_rotary_attention: failed to verify runtime shape pathsZ	_output_0r8   c           
         s  ˆ j  | dd¡}|du rt d¡ dS ˆ j  |jd ¡}ˆ j  |jd ¡}|du s-|du r4t d¡ dS |d }|d }|| }ˆ j jd	d
d}ˆ j  |¡du r]ˆ j|t	j
dg|gdd ˆ j jddd}tjd|jd |jd |g|d g|d}	|	j t dd¡g¡ |	S )zþDetect num_heads and hidden_size for ONNX model from phi-2
            Args:
                reshape_q (NodeProto): reshape node for q
            Returns:
                hidden_size_concat_node(NodeProto): Concat node to be used by reshape
            r7   r8   NzEfuse_rotary_attention: failed to trace the concat node from reshape_qr?   rf   zMfuse_rotary_attention: failed to get constant nodes of num_heads or head_sizer   ZInitializerr   ©Zname_prefixF)r)   Ú	data_typeÚdimsÚvalsÚrawZhidden_size_concatZoutput_0r&   Zaxis)r   Zmatch_parentr,   r-   Zget_constant_valuer   r.   Úget_initializerÚadd_initializerr   ZINT64r   r0   r2   r/   r3   )
rV   ÚconcatZnum_head_constant_nodeZhead_size_constant_nodeZnum_head_valueZhead_size_valuer   Zhidden_size_initilizerZhidden_size_reshape_node_nameÚhidden_size_concat_node©r   r   r   Úcreate_hidden_size_concat_node¥  sB   

ûýø
zBFusionRotaryAttention.fuse.<locals>.create_hidden_size_concat_noder`   r&   Úperm)r   r?   r8   rf   z?fuse_rotary_attention: failed to create hidden_size_concat_noder_   Úconcat_k_halfrs   Úconcat_q_halfzSfuse_rotary_attention: failed to create multi-head attention with rotary embeddingsrf   rg   T)Úop_typer   rG   r,   r-   Zmatch_parent_paths_allr   r   ÚlenZreshape_add_qkrT   r]   r)   r.   r   r0   r2   r/   r3   Únodes_to_addÚappendÚthis_graph_nameÚnode_name_to_graph_namer6   Únodes_to_removeZ&add_nodes_to_remove_with_nodes_to_keepÚprune_graph)Wr   Znormalize_nodeÚinput_name_to_nodesrh   Z	qkv_nodesZqkv_nodes_1Zqkv_nodes_2Zqkv_nodes_3rO   rH   rI   Z
matmul_qkvrU   r!   r#   Zpast_seq_lenZv_nodesZadd_vZ	v_nodes_1Z	v_nodes_2Z	v_nodes_3Z	v_nodes_4Z	v_nodes_5rL   rZ   rM   Zmatmul_vrY   Ztranspose_vrX   Zqk_nodesr   Z	matmul_qkr   Z
add_qk_strZattn_mask_nodes_1Zattn_mask_nodes_2Zattn_mask_nodes_3Zattn_mask_nodes_4Zattn_mask_nodes_5Zattn_mask_nodes_6Zattn_mask_nodes_7Zslice_mask_1Zslice_mask_2r    r"   Zk_nodesZslice_kr   Z	k_nodes_1Z	k_nodes_2Z	k_nodes_3Z	k_nodes_4Z	k_nodes_5rK   r\   Zrotary_kZmatmul_kr[   Zshared_past_seq_lenrW   Zq_nodesZslice_qr€   Z	q_nodes_1Z	q_nodes_2Z	q_nodes_3rJ   Zrotary_qZmatmul_qrV   Zroot_outputr}   Zk_transpose_node_nameZk_tranpose_output_nameZk_transpose_nodeZq_transpose_node_nameZq_tranpose_output_nameZq_transpose_noder{   Zconcat_k_reshape_node_nameZconcat_k_reshape_nodeZconcat_q_reshape_node_nameZconcat_q_reshape_nodeÚnew_nodeZnodes_to_keepZ	temp_pathr   r|   r   ÚfuseE  sx  
ýýý

ýýýþðíïñþóþþ™l’pý
ý









ý

ýýýýýýý








ýýýôîëíïòñòò ì % Ù )ý
ý









ýýý



,

ø

û

  

5
ü
ü

üüõ


,







zFusionRotaryAttention.fuse)r   r   r   r   r   r   N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r	   Úintr   Ústrr   r4   r6   rT   r]   r‹   Ú__classcell__r   r   r   r   r
      s^    þýüóþýüûúùø	÷
öõôó
ò8 Ir
   c                
       sh   e Zd Zdef‡ fdd„Zdedefdd„Zdefd	d
„Zde	de	de	de	de	f
dd„Z
dd„ Z‡  ZS )ÚFusionRotaryEmbeddingsr   c                    s*   d| _ tƒ  || j | j | j d dg¡ d S )Nro   z.1r   )Ú	base_namer   r   )r   r   r   r   r   r   T  s   $zFusionRotaryEmbeddings.__init__Úrot_emb_nodeÚfunctionc                    sü   g g }}|j D ],}|jdkr4|jg kr4|jd |jv r4| |¡ t|jƒ |jd ¡}| |j| ¡ qg }|D ]}|jd j}	| j	 
d¡|	_| j	 |	¡ | |	j¡ q9t||ddD ]\‰ }
tt‡ fdd„| j	j	jj ƒƒ}|D ]	}t |ˆ |
¡ qqq\|S )NÚConstantr   F)Ústrictc                    s
   ˆ | j v S ©N)r   )Úentry©Zextra_outputr   r   Ú<lambda>n  s   
 z?FusionRotaryEmbeddings.reassign_extra_outputs.<locals>.<lambda>)Únoder   r   r   r„   ÚlistÚindexr2   Útr   r.   r)   ry   ÚzipÚfilterÚgraphr	   Zreplace_node_input)r   r•   r–   Zextra_constantsÚextra_outputsZfn_nodeZoutput_indexZextra_initializersZextra_constantZconstant_tensorprotoZextra_initializerZnodes_to_updateZnode_to_updater   r›   r   Úreassign_extra_outputs[  s&   

$
€ÿz-FusionRotaryEmbeddings.reassign_extra_outputsr   c                    s8  | j  | j¡}| j  ˆddgddg¡}|d ur|\}}nt d¡ d S |jd ˆjd g}tt	‡fdd„| j j j
jƒƒ}tt	‡fdd„| j j j
jƒƒ}d	\}	}
t|ƒdkrÃt|ƒdkrÃ| j  |	¡d u rÃ| j  |
¡d u rÃt |d jd j¡ ¡ }t |d jd j¡ ¡ }tj|	tjt|jƒ| ¡  ¡ d
}| j  || j¡ tj|
tjt|jƒ| ¡  ¡ d
}| j  || j¡ | j |d |d g¡ | |	|
g¡ ˆj}t|ƒdkrtt	‡fdd„| j j jƒƒ}t|ƒdksêJ ‚|  ˆ|d ¡‰ tt	‡ fdd„|ƒƒ}t|ƒdksJ ‚tj | j|||dd}d|_!| j "|¡ |S )Nr_   r^   r   z.fuse_rotary_embeddings: failed to match MatMulr8   c                    ó   | j d ˆ jd kS )Nr   r?   ©r   r   ©Zconstant©r   r   r   rœ   ˆ  ó    zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>c                    r¦   )Nr   rf   r§   r¨   r©   r   r   rœ   ‰  rª   ©Ú	cos_cacheÚ	sin_cache©r)   rt   ru   rv   c                    s   | j ˆ jkS r™   )r)   r   )Úfnr©   r   r   rœ   «  s    c                    s   | ˆ vS r™   r   )Zoutput_name)r¤   r   r   rœ   ®  s    ©r'   r(   r)   Zinterleavedr*   )#r   r.   r”   rG   r,   r-   r   r   rž   r¢   r£   r   r‚   rx   r   Úto_arrayr2   r    Úsqueezer   Úmake_tensorr   ÚFLOATÚshapeÚflattenÚtolistry   r…   r‡   r/   Z	functionsr¥   r0   r1   r„   )r   r   Úrotary_emb_node_nameZmatmul_pathZreshape_nodeZmatmul_nodeZrotary_emb_inputsÚcos_cache_nodeÚsin_cache_nodeÚcos_cache_nameÚsin_cache_namer¬   r­   Úcos_cache_tensorÚsin_cache_tensorZrotary_emb_outputsÚfuncÚrotary_emb_noder   )r¤   r   r   Ú&create_rotary_embeddings_from_functiont  sn   ý

þ
ü
üûz=FusionRotaryEmbeddings.create_rotary_embeddings_from_functionrN   Úposition_idsÚ	cos_sliceÚ	sin_slicer   c                    sž  | j  | j¡}tt‡ fdd„| j j jjƒƒ}tt‡fdd„| j j jjƒƒ}d\}	}
t|ƒdkrºt|ƒdkrº| j  |	¡d u rº| j  |
¡d u rºt	 
|d jd j¡ ¡ }t	 
|d jd j¡ ¡ }|jd }|d d …d |d …f }|d d …d |d …f }tj|	tjt|jƒ| ¡  ¡ d}| j  || j¡ tj|
tjt|jƒ| ¡  ¡ d}| j  || j¡ | j |d |d g¡ tj| j|||	|
g|g|dd	}d
|_|S )Nc                    ó   | j d ˆ kS ©Nr   ©r   r¨   )rÃ   r   r   rœ   É  ó    zLFusionRotaryEmbeddings.create_rotary_embeddings_from_nodes.<locals>.<lambda>c                    rÅ   rÆ   rÇ   r¨   )rÄ   r   r   rœ   Ê  rÈ   r«   r8   r   r?   r®   r°   r*   )r   r.   r”   rž   r¢   r£   r   r‚   rx   r   r±   r2   r    r²   rµ   r   r³   r   r´   r¶   r·   ry   r…   r‡   r/   r0   r1   )r   rN   rÂ   rÃ   rÄ   r   r¸   r¹   rº   r»   r¼   r¬   r­   Z	head_sizer½   r¾   rÀ   r   )rÃ   rÄ   r   Ú#create_rotary_embeddings_from_nodes¾  sJ   

ü
ü
ûz:FusionRotaryEmbeddings.create_rotary_embeddings_from_nodesc           %         sÖ  | j |jvr|jdkrd S d ‰ |jdkrct|jƒdvs"|jd dvr)t d¡ d S |  |¡‰ ˆ d u r9t d¡ d S | j |¡ t	t
‡ fdd„| jjjjƒƒ}t|ƒdksVJ ‚| jjjj |d	 ¡ np| j |g d
¢g d¢¡}| j |g d¢g d¢¡}|p~|}| j |g d¢g d¢¡}| j |g d¢g d¢¡}	|pš|	}
|d u s£|
d u rªt d¡ d S | j |g d¢g d¢¡}| j |g d¢g d¢¡}|pÅ|}| j |g d¢g d¢¡}| j |g d¢g d¢¡}|pá|}|d u sê|d u rñt d¡ d S |d j|d jks|d j|
d jks|d j|d jks|d j|
d jkr$t d¡ d S | j |ddgd	d	g¡}| j |ddgd	d	g¡}|p@|}|d u rMt d¡ d S d\}}}| j |g d¢g d ¢¡}| j |g d!¢g d"¢¡}| j |g d#¢g d$¢¡}| j |g d%¢g d&¢¡}|d ur‘|}|d' jd	 }nB|d ur |}|d( jd	 }n3|d ur¶|}|d' jd	 }|d) jd }n|d urÌ|}|d( jd	 }|d) jd }nt d*¡ d S d+\}}| j |g d¢g d,¢¡}| j |g d!¢g d-¢¡}| j |g d#¢g d.¢¡}| j |g d%¢g d/¢¡} |d ur|}|d' jd	 }nB|d ur%|}|d( jd	 }n3|d ur;|}|d' jd	 }|d) jd }n| d urQ| }|d( jd	 }|d) jd }nt d*¡ d S |d0kr™| j |d) d1gdg¡}!| j |d) d1gdg¡}"|!d u sŠ|"d u sŠ|!d	 j|"d	 jkr‘t d2¡ d S |"d	 jd	 }ng }!g }"d3\}#}$||kr«||ksµ||krÓ||krÓ|d4 j|d4 jksË|d j|d jkrÒt d5¡ d S no||krÝ||ksç||kr=|| kr=|d j|d jkrùt d6¡ d S | j |d d7d8gdd	g¡}#| j |d g d9¢g d:¢¡}$|#d u s5|$d u s5| j |#d jd	 ¡d u s5|$d jdkr<t d;¡ d S nt d<¡ |  |d jd	 ||||jd	 ¡‰ ˆ d u r_t d¡ d S |  |g¡ |  |d d… ¡ |  |d d… ¡ |  |d d… ¡ |  |
d d… ¡ |  |d d… ¡ |  |¡ |  |¡ |  |!d d… ¡ |  |"d d… ¡ |#d urÅt| j |#d	 ¡ƒdkrÅ|  |#¡ |$d urÓ|  |$d d… ¡ |  | j ¡ | j| jˆ j< | j ˆ ¡ d=| _d S )>Nr   >   rg   é   r8   >   rÂ   ÚposZposition_idZpos_idZpos_idszLfuse_rotary_embeddings: failed to verify inputs for RotaryEmbedding functionz=fuse_rotary_embeddings: failed to create RotaryEmbedding nodec                    s   | j ˆ jd kS rÆ   )r)   r   r©   ©rÀ   r   r   rœ     s    z-FusionRotaryEmbeddings.fuse.<locals>.<lambda>r   )r@   r7   ÚNegrD   r`   ra   )r@   r7   rÍ   rD   rD   )	r@   r7   rÍ   rD   r:   rl   r;   r<   r`   )	r8   r   r   r   r8   r   r   r   r   )	r@   r7   rÍ   rD   r:   rl   r;   r<   rD   z9fuse_rotary_embeddings: failed to match x2 in rotate_half)r@   r7   rD   r`   )r8   r   r8   r   )r@   r7   rD   rD   )r@   r7   rD   r:   rl   r;   r<   r`   )r8   r   r8   r?   r   r   r   r   )r@   r7   rD   r:   rl   r;   r<   rD   z9fuse_rotary_embeddings: failed to match x1 in rotate_halfri   zCfuse_rotary_embeddings: failed to match common input in rotate_halfr@   r`   rD   z8fuse_rotary_embeddings: failed to match x in rotate_half)Nr   r   )	r@   r:   r;   ÚSqueezerÎ   rD   r:   r;   r<   )	r8   r8   r   r   r   r   r?   r   r   )r@   r:   r;   rÎ   rÎ   rD   r:   r   )r8   r8   r   r   r   r   r?   r   )r@   r:   r;   rD   r:   r;   r<   )r8   r8   r   r   r?   r   r   )r@   r:   r;   rD   r:   r   )r8   r8   r   r   r?   r   rk   rr   r?   z>fuse_rotary_embeddings: failed to match sin path in apply_rope)Nr   )	r   r8   r   r   r   r   r?   r   r   )r   r8   r   r   r   r   r?   r   )r   r8   r   r   r?   r   r   )r   r8   r   r   r?   r   r   r_   zGfuse_rotary_embeddings: failed to match position ids path in apply_roperm   rp   zdfuse_rotary_embeddings: failed to match common Gather node and Shape node in sin cache and cos cachezRfuse_rotary_embeddings: failed to match common Add node in sin cache and cos cacher;   r<   )r;   r<   r`   r=   zKfuse_rotary_embeddings: failed to match past_seq_len and curr_seq_len pathsz:fuse_rotary_embeddings: failed to match common cache pathsT)r”   r   r‚   r   r,   r-   rÁ   r‡   r„   rž   r¢   r   r£   Z
value_infoÚremoverG   r)   Zfind_graph_inputrÉ   r   Zadd_nodes_to_removeZget_childrenr5   r…   r†   rƒ   rˆ   )%r   r   r‰   rh   Zold_shape_inferZrotate_half_x2_path_1_1Zrotate_half_x2_path_1_2Zrotate_half_x2_path_1Zrotate_half_x2_path_2_1Zrotate_half_x2_path_2_2Zrotate_half_x2_path_2Zrotate_half_x1_path_1_1Zrotate_half_x1_path_1_2Zrotate_half_x1_path_1Zrotate_half_x1_path_2_1Zrotate_half_x1_path_2_2Zrotate_half_x1_path_2Zx_path_1Zx_path_2Zx_pathZsin_pathr­   rÂ   Z
sin_path_1Z
sin_path_2Z
sin_path_3Z
sin_path_4Zcos_pathr¬   Z
cos_path_1Z
cos_path_2Z
cos_path_3Z
cos_path_4Zposition_ids_from_sin_pathZposition_ids_from_cos_pathZpast_seq_len_pathZcurr_seq_len_pathr   rÌ   r   r‹   ö  sà  



ÿýýýý
ýýýý

ýý



ýýýý




ýýýý





ýý


,ÿü
ýý


ý
û



$


zFusionRotaryEmbeddings.fuse)rŒ   r   rŽ   r	   r   r   r   r¥   rÁ   r‘   rÉ   r‹   r’   r   r   r   r   r“   S  s     Jþýüû
ú8r“   )ÚloggingZfusion_attentionr   Zfusion_baser   Zonnxr   r   r   r   r   Z
onnx_modelr	   Ú	getLoggerrŒ   r,   r
   r“   r   r   r   r   Ú<module>   s    
        L