o
    ¢Äiä3  ć                   @   s\   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 ee
Zdd ZG dd	 d	eZdS )
é    )Ś	getLoggerN)ŚFusionGptAttentionPastBase)Śhelper)Ś	OnnxModelc                 C   s   t | | dkS )Ngķµ ÷Ę°>)Śabs)ŚvalueŚexpected_value© r	   śp/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_gpt_attention_megatron.pyŚis_close   s   r   c                       sB   e Zd ZdZdedef fddZdd Zdd	 Zd
d Z	  Z
S )ŚFusionGptAttentionMegatronz^
    Fuse GPT-2 Attention with past state subgraph from Megatron into one Attention node.
    ŚmodelŚ	num_headsc                    s   t   ||” d S )N)ŚsuperŚ__init__)Śselfr   r   ©Ś	__class__r	   r
   r      s   z#FusionGptAttentionMegatron.__init__c                 C   sö   | j  d”}|  |”}	|jd }
|jd |jd krdnd}tjd||jd |j| |	|g|
|g|d}d|_|j 	t 
d| j”t 
dd”g” | jd ur[|j 	t 
d	t| j”g” |g}| j 	|” |D ]	}| j| j|j< qf| j |” d
| _d S )NZGptAttentionr   é   Z	Attention)ZinputsZoutputsŚnamezcom.microsoftr   ZunidirectionalŚmask_filter_valueT)r   Zcreate_node_nameZcast_attention_maskŚoutputŚinputr   Z	make_nodeŚdomainŚ	attributeŚextendZmake_attributer   r   ŚfloatŚnodes_to_addZthis_graph_nameZnode_name_to_graph_namer   Znodes_to_removeŚappendZprune_graph)r   Śmatmul_before_splitŚadd_before_splitŚpastŚpresentr   Śreshape_qkvŚmaskZattention_node_nameZ
int32_maskr   ŚiZattention_noder   Śnoder	   r	   r
   Śfuse_attention_node   s:   


ūö
ž’

z.FusionGptAttentionMegatron.fuse_attention_nodec                 C   s“  | j  |g d¢g d¢”}|d u rt d” d S |\}}}}	t|dkr<|d jdkr<| j  |d ”\}
}|dkr<| | _|jd |j	d krMt d” d S | j
 |dd	”s\t d
” d S | j
 |dd”skt d” d S | j  |	jd ”s{t d” d S | j
 |ddg”st d” d S | j
 |ddg”st d” dS | j
 |ddg”s«t d” dS | j
 |	ddg”s»t d” d S | j
 |	ddg”sĖt d” d S | j  |g d¢g d¢”}|d u sį|d |krčt d” d S | j  |	g d¢g d¢”}|d u s’|d |krt d” d S | j  |	g d¢g d¢”}|d u s|d |kr%t d” d S | j  |	g d¢g d ¢”}|d u rB| j  |	g d!¢g d ¢”}|d u sN|d |krUt d"” d S |	jd S )#N)ŚMulŚSubŚSlicer*   )r   r   r   r   z8fuse_attention: failed to match unidirectional mask pathr   r   r(   i'  zCfuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]g     Ć@z?fuse_attention failed: mul_mask input 1 is not constant 10000.0g      š?z;fuse_attention failed: sub_mask input 0 is not constant 1.0z+expect slick_mask input 0 to be graph inputzKfuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]é   zIfuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]Fé   zJfuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]é   zDfuse_attention failed: slice_mask input 3 (axes) is not constant [2]zEfuse_attention failed: slice_mask input 4 (steps) is not constant [1])Ś	UnsqueezeŚGatherŚShapeŚMatMul)r-   r   r   r   é’’’’z/fuse_attention: failed to match last slice pathz0fuse_attention: failed to match first slice path)r.   r)   r/   r0   r1   )r   r   r   r   r   z3fuse_attention: failed to match last slice sub path)r.   r)   r/   r0   ŚLayerNormalization)r   r   r   r   r   )r.   r)   r/   r0   ŚSkipLayerNormalizationz5fuse_attention: failed to match last slice sub path 1)r   Śmatch_parent_pathŚloggerŚdebugŚlenŚop_typeŚget_constant_inputr   r   r   ŚutilsZcheck_node_input_valueŚfind_graph_inputŚinfo)r   Śsub_qkŚmul_qkŚ	matmul_qkŚlayernorm_before_attentionZ
mask_nodesZmul_maskZsub_maskZlast_slice_maskZ
slice_maskŚ_Zmul_valZlast_slice_pathZfirst_slice_pathZfirst_slice_subZfirst_slice_sub_1r	   r	   r
   Ś
match_maskJ   s   









’
’
ż
ż
ż

z%FusionGptAttentionMegatron.match_maskc           .   	   C   s*  d }d }|j dk}d }|s| jj|g d¢g d¢|d}n| jj|g d¢g d¢|d}|d u r0d S d }|sB|\}	}
}}}}|	jd }n|\}
}}}}|jd }| j |g d¢g d	¢”}|d u rj| j |g d
¢g d	¢”}|d u rut d” d S |\}}}}}}}|j dkr||jd krt d” d S |j dkr¤||jd kr¤t d” d S | j |g d¢g d¢”}|d u r»t d” d S |\}}}}| j |d”dkrŃt d” d S |  ||||”}| j |g d¢g d¢”}|d u ršt d” d S |\}}} }!||!krt d” d S | j |g d¢g d¢”}"|"d u rt d” d S |"\}#}$}%}&}'}(||(kr.t d” d S | j 	|'”\})}*t
|*tjrbt|*jdgkrb|*d dkrb|*d dkrb|*d dkrb|*d dksit d” d S |*d }+|+| jkrt d|+ d | j ” |+| _|*d },| j 	|#”\})}*tt t |,””}-t|*|-s«t d!|* d"|- ” d S | j 	|”\})}*t|*|-sĘt d#|* d"|- ” d S |  |%||”}|d u rŁt d$” d S | j |”såt d%” |  ||”}|d u r÷t d&” d S | j |”st d'” d S |  |||||jd ||” d S )(Nr4   )ŚAddrD   r1   ŚReshapeŚ	Transposer1   )r   r   Nr   r   r   )Śoutput_name_to_node)rD   r1   rE   rF   r1   )r   Nr   r   r   r   )ŚConcatrF   rE   ŚSplitrD   r1   r3   )r   r   r   r   r   Nr   )rH   rF   rE   rI   rD   r1   r4   z&fuse_attention: failed to match v pathr3   zAfuse_attention: skip_input != layernorm_before_attention.input[0]r+   )ZSoftmaxr)   r(   r1   )r   r   r   r   z'fuse_attention: failed to match qk pathZaxisz+fuse_attention failed: softmax_qk axis != 3)ŚDivrF   rE   rI   z&fuse_attention: failed to match q pathz-fuse_attention: skip since split_v != split_q)rJ   rF   rH   rF   rE   rI   )r   r   r   r   r   r   z&fuse_attention: failed to match k pathz-fuse_attention: skip since split_v != split_kr,   r   r-   z:fuse_attention: reshape constant input is not [0, 0, N, H]zDetected num_heads=z. Ignore user specified value zfuse_attention: div_k value=z
 expected=zfuse_attention: div_q value=z!fuse_attention: match past failedz(fuse_attention: past is not graph input.z$fuse_attention: match present failedz1fuse_attention: expect present to be graph output)r9   r   r5   r   r6   r7   r   Zget_node_attributerC   r:   Ś
isinstanceŚnpZndarrayŚlistŚshaper   r=   r   Śsqrtr   Zmatch_past_pattern_2r<   Zmatch_presentZfind_graph_outputr'   ).r   Znormalize_nodeZinput_name_to_nodesrG   r!   r"   Zis_normalize_node_skiplayernormZ	qkv_nodesZ
skip_inputZadd_skipZadd_after_attentionZmatmul_after_attentionr#   Ztranspose_qkvZ
matmul_qkvZv_nodesZconcat_vZtranspose_vZ	reshape_vZsplit_vr    r   rA   Zqk_nodesZ
softmax_qkr>   r?   r@   Zattention_maskZq_nodesZdiv_qZtranspose_qZ	reshape_qZsplit_qZk_nodesZdiv_krB   Zconcat_kZtranspose_kZ	reshape_kZsplit_kr%   r   r   Zhidden_size_per_headr   r	   r	   r
   Śfuse¤   s   
üüłś
	õ	õ
	ų








ż




’






łzFusionGptAttentionMegatron.fuse)Ś__name__Ś
__module__Ś__qualname__Ś__doc__r   Śintr   r'   rC   rP   Ś__classcell__r	   r	   r   r
   r      s    /Zr   )Śloggingr   ŚnumpyrL   Zfusion_gpt_attentionr   Zonnxr   Z
onnx_modelr   rQ   r6   r   r   r	   r	   r	   r
   Ś<module>   s   