o
    iM                     @   sV   d dl Z d dlZd dlmZmZ d dlmZ d dlm	Z	 e 
eZG dd deZdS )    N)AttentionMaskFusionAttention)helper)	OnnxModelc                       s:   e Zd ZdZdedededef fddZdd	 Z  Z	S )
FusionBartAttentionz?
    Fuse Bart Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc                    s   t  |||| d S )N)super__init__)selfr   r   r	   r
   	__class__ h/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_bart_attention.pyr      s   zFusionBartAttention.__init__c           Y      C   s  | j |g dg d}|d ur|\}}}}}	ntd d S g }
|jD ]}||vr+q$||d jd kr5q$|
| q$t|
dkrCd S |
d }	 || }|jdv rY| j 	|d }|jD ]}|saq\|| }dd |D }|
d	dkrw|} nq\d
d | j  jD }dd | j  jD }| j |	g dg d}| j |	g dg d}| j |	g dg d}d\}}g d d }}}|d ur%|}|\}}}}||jd  }|D ]'}|jdkr| j |ddgddg}|d ur|d jd }||jd  } nq|D ]%}||jd  } | D ]}!|!jd |v r|!jd } nq	|dkr# nqn=|d ur>|}|\}"}}}}|"jd }|"jd }n$|	jd |v rL|	jd }n|d ur[|}|d jd }ntd d S ||v ri|nd}||v rr|nd}| j |	dd	gddg}#| j |	g dg d}$g d }%}&|#d ur|#\}'}(|#}%n|$d ur|$\}'}&}(|$}%ntd d S | j |(g dg d})| j |(g d g d}*g }+|)d ur|)}+|+\},}-}.}/}0n|*d ur|*}+|+\}.},}-}/}0ntd! d S | j |(g d"g d#}1| j |(g d$g d%}2| j |(g d&g d}3| j |(g d'g d(}4d\}5}6g d d }7}8}9|1d urU|1}7|7\}:};}9||;jd  }<|<D ]}=|=jd |v rR|=jd }6 nqAn|2d urn|2}7|7\}'}>}:};}9|>jd }5|>jd }6n||(jd  jd |v r||(jd  g}7|7d jd }5n|3d ur|3}7|7\}'}:};}9||9jd  }|D ]*}|jdkr| j |ddgddg}?|?d ur|?d jd }5||jd  } nq|D ]&}||jd  } | D ]}!|!jd |v r|!jd }6 nq|6dkr nqn|4d ur|4}7|7d jd }5ntd) d S |5|v r|5nd}5|6|v r|6nd}6|9d urp|8d u rp| j |jd }@|@jd }A|@j}Bd*}C| j |C}D|Dd u r[| j|C|B|Agtjd+g|A t|Bd,d- | j d.}Etd.|C|9jd g|;jg|E}8t|5ot|o|9d u o|d u }F|F o|0jd |ko|9jd |ko|jd |k}G|F o|0jd |ko|9jd |jd ko|9jd |0jd k}H|Go|%|#k}I|Go|%|$k}J|Ho|%|#k}K|Jot|5ot|}L|Fo|%|#k}M|%|$k}Ng }O|NrN| j |&d/gdg}P| j |&g d0g d1}Q| j |&g d2g d3}R| j |&d4d4gddg}S|Qd ur&|Q}On|Rd ur.|R}On|Sd ur6|S}On|Pd ur>|P}Ontd5 d S t|OdksNJ |Is]|Js]|Ks]|Ls]|Mr|}T| |-\}U}V|Udksw|Vdksw|V|U dkr~td6 d S d }W|Ls|Ks|Mr| jr| j|0|Ks|Lr|9n|5|Ks|Lr|n||/|Ks|Lr|8nd |Ks|Lr|nd |U|V|Tjd |N|Lr|5nd|Lr|nd|6|d7nd }WnB| j}Xd8| _| jdKi d9d d:|0d;|9d<|d=|/d>|8d?|d@|UdA|VdB|dC|Tjd dD|NdE|5dF|dG|6dH|}W|X| _|Wd u rtdI d S | j|W | j| j|Wj< | j |T||	g | j |% |LsC|KsC|Mrt|+dkrV|+d jd	krV|+!  t|7dkri|7d jd	kri|7!  t|dkr||d jd	kr||!  | j"rt|+dkr|+d jd.kr|+!  t|7dkr|7d jd.kr|7!  t|dkr|d jd.kr|!  | j |+ | j |7 | j | dJ| _#d S d S )LN)AddMatMulReshape	Transposer   )   r   r   r   r   z(fuse_attention: failed to match qkv pathr   r   >   r   ZClipc                 S   s   g | ]}|j qS r   )op_type).0childr   r   r   
<listcomp>O       z,FusionBartAttention.fuse.<locals>.<listcomp>r   c                 S      h | ]}|j qS r   namer   noder   r   r   	<setcomp>T   r   z+FusionBartAttention.fuse.<locals>.<setcomp>c                 S   r   r   r   r   r   r   r   r!   U   r   )r   r   r   r   )r   r   r   N)Concatr   r   r   r   )r   r   r   r   N)r   r   r   r   )r   r   r   r   ) r#   r"   r   r   r#   z&fuse_attention: failed to match v pathSoftmax)r%   r   r   )r   r   r   z'fuse_attention: failed to match qk path)r   r   Mulr   r   )r   r   r   r   r   )r&   r   r   r   r   z&fuse_attention: failed to match q path)r   r   r   )r   r   r   )r   r"   r   r   r   )r   r   r   r   r   )r&   r   r   r   )r&   r   r   r   r   )r   r   r   r   r   z&fuse_attention: failed to match k pathZ
empty_biasg        )dtype)dimsvalsr   Where)SliceZExpandr*   )r   r   r   )r+   Z	UnsqueezeZGatherZShaper   )r      r   r   r   r+   z*fuse_attention: failed to match mask nodesz9fuse_attention: failed to detect num_heads or hidden_size)q_matmulk_matmulv_matmulq_addk_addv_addr	   r   outputZunidirectionalpast_kpast_v	present_k	present_vFZ
mask_indexr-   r.   r/   r0   r1   r2   r	   r   Zfirst_inputr3   Zcausalr4   r5   r6   r7   z+fuse_attention: failed to create fused nodeTr   )$r   Zmatch_parent_pathloggerdebuginputr3   appendlenr   Zget_childrencountgraphZget_initializerr(   Z	data_typeZadd_initializernparrayr   Ztensor_dtype_to_np_dtypeZcreate_node_nameZ	make_noder   boolZget_num_heads_and_hidden_sizeZuse_multi_head_attentionZcreate_multihead_attention_nodeZcreate_attention_nodeZnodes_to_addZthis_graph_nameZnode_name_to_graph_nameZnodes_to_removeextendpopZ!disable_multi_head_attention_biasZprune_graph)Yr   Znormalize_nodeZinput_name_to_nodesZoutput_name_to_nodeZ	qkv_nodesZadd_outZ
matmul_outZreshape_qkvZtranspose_qkvZ
matmul_qkvZother_inputsZinput_Z
root_inputZskip_layernormr3   childrenZchildren_typesZgraph_input_namesZgraph_output_namesZv_nodes_past_or_presentZv_nodes_with_pastZv_nodes_past_only_oair5   r7   Zv_nodesZadd_vZmatmul_vZtranspose_vZ	reshape_vZstart_child_nodesZstart_child_nodeZconcat_v_nodesZstart_grandchild_nodesZstart_grandchild_nodeZconcat_vZqk_nodes_no_maskZqk_nodes_with_maskZqk_nodesZadd_qk_Z	matmul_qkZ
q_nodes_hfZq_nodes_oaiZq_nodesZtranspose_qZ	reshape_qZmul_qZadd_qZmatmul_qZk_nodes_no_past_hfZk_nodes_with_past_hfZk_nodes_past_or_present_oaiZk_nodes_past_only_oair4   r6   Zk_nodesZadd_kZmatmul_kZtranspose_kZ	reshape_kZtranspose_k_nodesZtranspose_k_nodeZconcat_kZconcat_k_nodesZadd_v_tensorZbias_dimr'   Zempty_bias_nameZempty_tensorZadd_nameZthree_root_inputsZone_root_inputZtwo_root_inputsZencoder_attentionZdecoder_self_attentionZdecoder_cross_attentionZ decoder_self_attention_with_pastZ!decoder_cross_attention_with_pastZcausal_maskZ
mask_nodesZmask_nodes_bartZmask_nodes_whisper_hfZmask_nodes_whisper_oaiZ mask_nodes_whisper_oai_unit_testZattention_last_noder	   r   new_nodeZ%use_multi_head_attention_ground_truthr   r   r   fuse   s  
































&




"

	




zFusionBartAttention.fuse)
__name__
__module____qualname____doc__r   intr   r   rG   __classcell__r   r   r   r   r      s    	r   )loggingnumpyr?   Zfusion_attentionr   r   Zonnxr   Z
onnx_modelr   	getLoggerrH   r8   r   r   r   r   r   <module>   s   
