o
    ¢ÄiÊU  ã                   @   sp   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ eeƒZG dd„ deƒZG d	d
„ d
eƒZdS )é    )Ú	getLoggerN)ÚFusion)ÚFusionUtils)Úhelper)Ú	OnnxModelc                       sJ   e Zd ZdZdedef‡ fdd„Zdd„ Zdd	„ Zd
d„ Z	dd„ Z
‡  ZS )ÚFusionGptAttentionPastBasez3Base class for GPT Attention Fusion with past stateÚmodelÚ	num_headsc                    s6   t ƒ  |dddgd¡ || _t|ƒ| _i | _d | _d S )NÚ	AttentionÚLayerNormalizationÚSkipLayerNormalizationz	with past)ÚsuperÚ__init__r	   r   ÚutilsÚcasted_attention_maskÚmask_filter_value©Úselfr   r	   ©Ú	__class__© úg/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_gpt_attention.pyr      s
   

z#FusionGptAttentionPastBase.__init__c           
      C   sø   | j  |d|¡}|d u s|jdkrt d¡ d S | j  |d¡dkr(t d¡ d S |jd }| j  |d|¡}|r?|jdkr?|}n| j  |ddgddg¡}|d u rVt d¡ d S |d }| j  |d¡dkrjt d	¡ d S |jd }	||	krzt d
¡ d S |S )Nr   ZGatherz,match_past_pattern_1: expect Gather for pasté   z9match_past_pattern_1: expect indices=1 for Gather of pastÚ	Transposez7match_past_pattern_1: failed match Transpose and Gatheréÿÿÿÿz;match_past_pattern_1: expect indices=0 for Gather k of pastz,match_past_pattern_1: expect past to be same)r   Ú
get_parentÚop_typeÚloggerÚdebugZfind_constant_inputÚinputÚmatch_parent_path)
r   Úconcat_kÚconcat_vÚoutput_name_to_nodeZgatherÚpastÚparentZgather_past_kÚpast_k_nodesÚpast_kr   r   r   Úmatch_past_pattern_1   s0   






z/FusionGptAttentionPastBase.match_past_pattern_1c           
      C   st  | j  |d|¡}|d u s|jdkrt d¡ d S | j  |d|¡}|d u s)|jdkr0t d¡ d S | j  ¡ }|dk rYt |ddg¡sHt d¡ d S t |d	d
d
g¡sXt d¡ d S n!| j 	|d
dg¡sit d¡ d S | j 	|d
d
d
g¡szt d¡ d S tj|ddddsŠt d¡ d S |j
d }| j  |ddgddg¡}|d u r¦t d¡ d S |d j
d }	||	kr¸t d¡ d S |S )Nr   ÚSqueezez:match_past_pattern_2: expect Squeeze as parent of concat_vÚSplitz0match_past_pattern_2: expect Split for past pathé   Zaxesz:match_past_pattern_2: axes != [0] for Squeeze in past pathÚsplitr   z<match_past_pattern_2: split != [1, 1] for Split in past pathZaxis)Údefault_valuezKmatch_past_pattern_2: attribute axis of Split are not expected in past pathz7match_past_pattern_2: failed to match past_k_nodes pathr   z,match_past_pattern_2: expect past to be same)r   r   r   r   r   Zget_opset_versionr   Zcheck_node_attributer   Zcheck_node_input_valuer   r    Úinfo)
r   r!   r"   r#   Zsqueezer,   Zopset_versionr$   r&   r'   r   r   r   Úmatch_past_pattern_2K   sH   




þ





z/FusionGptAttentionPastBase.match_past_pattern_2c                 C   sZ   | j j|d|dd}|st d¡ d S | j j|d|dd}|s&t d¡ d S |jd }|S )NÚ	UnsqueezeF)Ú	recursivezexpect unsqueeze for presentÚConcatzexpect concat for presentr   )r   Zfind_first_child_by_typer   r.   Úoutput)r   r"   Úinput_name_to_nodesZunsqueeze_present_vZconcat_presentÚpresentr   r   r   Úmatch_presentŽ   s   ÿ
ÿ

z(FusionGptAttentionPastBase.match_presentc                 C   s`   || j v r| j | }|S | j |¡r!| j |¡\}}|| j |< |S | j |¡\}}|| j |< |S ©N)r   r   Úfind_graph_inputr   Zcast_graph_input_to_int32Zcast_input_to_int32)r   Ú
input_nameÚattention_mask_input_nameZcastedZ	cast_noder   r   r   Úcast_attention_maskŸ   s   

ú
þ
z.FusionGptAttentionPastBase.cast_attention_mask)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   Úintr   r(   r/   r6   r;   Ú__classcell__r   r   r   r   r      s    1Cr   c                       s:   e Zd ZdZdedef‡ fdd„Zdd„ Zdd	„ Z‡  Z	S )
ÚFusionGptAttentionzP
    Fuse GPT-2 Attention with past state subgraph into one Attention node.
    r   r	   c                    s   t ƒ  ||¡ d S r7   )r   r   r   r   r   r   r   °   s   zFusionGptAttention.__init__c
                 C   s  | j  d¡}
tjd|||||g|
d |g|
d}d|_|j t d| j¡t d|	r+dnd	¡g¡ | j	d urC|j t d
t
| j	ƒ¡g¡ tjd|
d |jd g|
d g|
d d}tjd|
d |jd g|g|
d d}| j |||g¡ | j| j|j< | j| j|j< | j| j|j< d S )NZGptAttentionr
   Ú_output)ZinputsZoutputsÚnamezcom.microsoftr	   Zunidirectionalr   r   r   ÚMatMulZ_matmul_outputZ_matmulÚAddé   Ú_add)r   Zcreate_node_namer   Z	make_nodeÚdomainÚ	attributeÚextendZmake_attributer	   r   Úfloatr   Znodes_to_addZthis_graph_nameZnode_name_to_graph_namerD   )r   Ú	fc_weightÚfc_biasÚgemm_qkvr$   r5   r   r3   ÚmaskÚis_unidirectionalZattention_node_nameZattention_nodeZmatmul_nodeÚadd_noder   r   r   Úcreate_attention_node³   s>   
üþÿ
üüz(FusionGptAttention.create_attention_nodec           8      C   s  d }d }g }|j dk}d }|s| jj|g d¢g d¢||d}n| jj|g d¢g d¢||d}|d u r4d S d }	|sK|\}
}}}}}}|
jd|d   }	n|\}}}}}}| j |g d	¢g d
¢¡}|d u rjt d¡ d S |\}}}}| j |g d¢g d¢|¡}|d u rŽ| j |g d¢g d¢|¡}|d u rÔ| j |g d¢g d¢|¡}|d u r°| j |g d¢g d¢|¡}|d u r»t d¡ d S |d jd }| j |d ¡\}}|d j| }n|d jd }|d jd }|d }|	d urö|	|jvröt d¡ d S d}d }d }d }| j |g d¢g d¢¡} | d ure| \}!}"}#}$}%| j |"g d¢g d¢¡}&|&d u r.t d¡ d S |&d }'|&d }|$|'krBt d¡ d S t|&ƒdkrd|&d j dkrd| j |&d ¡\}}(|(dkrd|( | _nä| j 	|g d ¢g d!¢fg d"¢g d#¢fg|¡\}} }| d u rŠt d$¡ d S | d% })| d& }$| d }%|dkrì| d }*| j 	|*g d'¢g d(¢fg d)¢g d*¢fg d+¢g d,¢fg|¡\}}}|d u rËt d-¡ d S t|ƒdkrì|d j dkrì| j |d ¡\}}(|(dkrì|(| _| j 	|)g d.¢g d/¢fg d0¢g d1¢fg|¡\}}&}|&d u rt d2¡ d S |&|dkrdnd }| j 
|&d d|¡}+|+j d3kr;|+}'|$|'kr:t d¡ d S n|+j d4krD|+}nt d2¡ | j |jd ¡},t|,tjƒrvt|,jƒd5krv|,jd d… d6krv|,jd |,jd ks}t d7¡ d S t |,t |,¡¡rŠd8}nt |,t t |,¡¡¡sžt d9¡ d S | j |%g d:¢g d;¢¡}-|-d u r¶t d<¡ d S |-\}.}/}0||0krÇt d=¡ d S | j |%g d	¢g d
¢¡}1|1d u rø| j |%g d>¢g d?¢¡}1|1d u rðt d@¡ d S |1\}}2}3}4}5n|1\}2}3}4}5||5kr
t dA¡ d S |r|2|krt dB¡ d S dC}6|d ur,|d jd }7|  |7¡}6|  |2||¡p:|  |2||¡}|d u rGt dD¡ d S | j |¡sSt dE¡ |  ||¡}|d u ret dF¡ d S | j |¡sst dG¡ d S |  ||||||jd |jd |6|¡	 d| _d S )HNr   )rF   ÚReshapeÚGemmrT   rT   r   rE   )r   Nr   r   r   r   r   )r#   Úreturn_indice)rT   rU   rT   rT   r   rE   )Nr   r   r   r   r   r   r   )r2   r   rT   r*   )r   r   r   r   z&fuse_attention: failed to match v path)rT   rU   rT   r   )r   r   r   r   )rT   rU   rT   r   )rF   rE   r   )r   Nr   )rF   rE   r   z'fuse_attention: failed to match fc pathrG   r   zCUpstream Add and (Skip)LayerNormalization shall have one same inputT)ÚSoftmaxÚSubÚMulÚDivrE   )r   r   r   r   r   )
rY   rX   ÚSlicer[   r0   rX   r)   r[   ÚShaperZ   )
r   r   r   r   r   r   r   r   r   r   z8fuse_attention: failed to match unidirectional mask pathé   z-fuse_attention: skip since div_qk != div_maskrY   iðØÿÿ)rW   ÚWhererZ   rE   )r   r   r   r   )rW   rF   r^   rZ   rE   )r   r   Nr   r   z(fuse_attention: failed to match qk nodeséýÿÿÿéþÿÿÿ)rY   rX   ÚCastr0   r0   rT   )Nr   r   r   r   r   )rY   rX   r0   r0   rT   )Nr   r   r   r   )rY   rX   r0   r0   )Nr   r   r   z9fuse_attention: failed to match input attention mask path)ra   r[   r[   r0   rX   r)   r[   r\   )r   r   r   r   r   r   r   r   )r[   r[   r0   rX   r)   r[   r\   )r   r   r   r   r   r   r   z)fuse_attention: failed to match mask pathrZ   r2   é   )r   r   z4fuse_attention: skip since mask shape is not 1x1xWxWFzDfuse_attention: skip since mask is neither lower triangular nor ones)r   rT   r*   )r   r   r   z&fuse_attention: failed to match q pathz.fuse_attention: skip since split_fc != split_q)r   r2   r   rT   r*   )r   r   r   r   r   z&fuse_attention: failed to match k pathz.fuse_attention: skip since split_fc != split_kz8fuse_attention: skip since concat_k != concat_k_to_matchÚ z)fuse_attention: failed to match past pathzpast is not graph input.z,fuse_attention: failed to match present pathz!expect present to be graph output)r   r   r    r   r   r   Zget_constant_inputÚlenr   Zmatch_parent_pathsr   Zget_constant_valueÚ
isinstanceÚnpZndarrayÚshapeZallcloseZ	ones_likeZtrilr;   r(   r/   r.   r8   r6   Zfind_graph_outputrS   r3   Zprune_graph)8r   Znormalize_noder4   r#   r$   r5   rV   Zis_normalize_node_skiplayernormZ	qkv_nodesZanother_inputZadd_qkvZreshape_qkvrO   Z	reshape_1Z	reshape_2Ztranspose_qkvZ
matmul_qkvZv_nodesr"   Ztranspose_vZ	reshape_vZsplit_fcZfc_nodesrM   ÚiÚ_rN   Zlayernorm_before_attentionrQ   Z
slice_maskZinput_mask_nodesZconcat_k_to_matchZqk_nodesZ
softmax_qkZsub_qkZmul_qkZdiv_qkZ	matmul_qkZ
mask_nodesZdiv_maskZmul_valZwhere_qkZadd_qkZdiv_or_concatZ	mask_dataZq_nodesZtranspose_qZ	reshape_qZsplit_qZk_nodesr!   Ztranspose_kZ	reshape_kZsplit_kr:   r9   r   r   r   Úfuseã   sä  
ûû	ø	ù
üüüü


ò




€þ
ú


þþþ÷
ð


þþú
ó



þ

ÿ







ý






ÿ





÷
zFusionGptAttention.fuse)
r<   r=   r>   r?   r   r@   r   rS   rj   rA   r   r   r   r   rB   «   s
    0rB   )Úloggingr   Únumpyrf   Zfusion_baser   Zfusion_utilsr   Zonnxr   Z
onnx_modelr   r<   r   r   rB   r   r   r   r   Ú<module>   s    