o
    iϐ                     @   s   d dl Z d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlmZ e eZG d	d
 d
eZG dd deZG dd deZdS )    N)AttentionMaskFusionAttention)Fusion)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModel)BertOnnxModelc                       s   e Zd ZdZdedededef fddZded	B d
e	de	de	dededededed	B de
de	d	B fddZdedededed	B ded	B ded	B ded	B deded	B ded	B dedede	d	B fddZdd Zdd  Zd!d" Z  ZS )#FusionT5Attentionz=
    Fuse T5 Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc                    s$   t  j||||ddgd d| _d S )NFSoftmax)Zuse_multi_head_attentionZsearch_op_types   )super__init__	static_kv)selfr   r   r   r   	__class__ `/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/onnx_model_t5.pyr      s   
zFusionT5Attention.__init__
mask_indexNq_matmulk_matmulv_matmulinputoutput	attn_biasscalereturnc                 C   s  |dksJ |dkr|| dkrt d| d|  dS | j|jd }| j|jd }| j|jd }|du sD|du sD|du r^|du rJ|n|du rP|n|}t|jd  d dS t|}t|}t|}|j|jksuJ |jd }|jd }|jd }||  kr|ksJ  J |dkr||krt 	d| d| d	 t
|jdd }t
j|||fdd
}d| }| jd}tj|d tj||g| dd}| j|| j ||d dg}|r|| n|d |	r|d ||	 |r|d dkr|  |r|d dkstjd||g|d}d|_|jtd|g |
dur=|jtd|
g | jdurQ|jtdt| jg |S )a  Create an Attention node.
        Args:
            mask_index (str): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name
        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   input hidden size # is not a multiple of num of heads Nr   zl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (z3) is not same as weight matrix dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   Z	AttentionZ_qkv_weightTname	data_typedimsvalsraw inputsoutputsr*   com.microsoftr   r#   mask_filter_value)loggerdebugr   get_initializerr    printr   to_arrayshapewarningnpprodstackcreate_node_namer
   make_tensorr	   FLOATtobytesadd_initializerthis_graph_nameappendpop	make_nodedomain	attributeextendmake_attributer5   float)r   r   r   r   r   r   r   r    r!   r"   r#   Zq_weightZk_weightZv_weightmatmulZqwkwZvwZ
qw_in_sizeZ
kw_in_sizeZ
vw_in_sizeZqw_out_sizeZ
qkv_weightZqkv_weight_dimattention_node_nameweightattention_inputsattention_noder   r   r   make_attention_node)   s~   









z%FusionT5Attention.make_attention_nodequerykeyvaluepast_key
past_valuepresent_keypresent_valuec                 C   s  |dkr|dkr|r|r|sJ || dkr#t d| d|  d S | jd}|||dg}|r7|| n|d |rD|| n|d |rY|sOJ || || |rm|d dkrm|  |rm|d dksa|g}|	r|
svJ ||	 ||
 td|d|d	| tjd|||d
}d|_	|j
td|g |j
tddg | jd ur|j
tdt| jg | d |S )Nr   r%   r&   ZMultiHeadAttentionr/   r0   zattention_inputs=z, attention_outputs=z, attention_node_name=r1   r4   r   r#         ?r5   )r6   r7   r   r@   rF   rG   r9   r
   rH   rI   rJ   rK   rL   r5   rM   Zincrease_counter)r   rU   rV   rW   r   r"   rX   rY   r!   rZ   r[   r   r   rP   rR   Zattention_outputsrS   r   r   r   create_mha_node   sT    







z!FusionT5Attention.create_mha_nodec                 C   s$   |  |||r	d S | ||| d S N)fuse_t5_encoderfuse_t5_decoder)r   nodeinput_name_to_nodesoutput_name_to_noder   r   r   fuse   s   zFusionT5Attention.fusec                     s  |j dksJ | jj|g dg d|d}|d u rdS |\}}}| j|g dg d|}|d u r3dS |d }	| j|g d	g d
|}
|
d u rJdS |
\}}}| j|g dg d|}|d u rbdS |\}}}| j|g dg d|}|d u }|d ur|d }n(| j|g dg d|}|d u r| j|g dg d|}|d u rdS |d }| j|\}}|d u rdS |dkrt|| _| j|d g dg d| | j|d g dg d|} d urt fdd| j j	D r|d ur d j	d |d j	d krt
 d j	dkrd}n| j|d j	d }d }| j|ddgddg}|d u rA|rA| j|g dg d
}|d u rHdS |d jd }| j|g d	g d
}|d u rbdS |\}}}| j|g d	g d}|d u rzdS |\}}}|j	d |	j	d krdS | |\}}| j|||||||	j	d |jd |d d!
}|d u rdS | j| | j| j|j< | j| d"| _d"S )#Nr   MatMul	TransposeReshaper   r   rj   rj   edgesrb   FConcat	UnsqueezeGatherShaper   r   r   r   r0   rg   rh   rf   r   r   r   r   Addrf   r   r   r   rv   MulSubCastro   ro   r   r   r   r   r   r   r   )rv   Slicery   rz   ro   ro   )r   r   r   r   r   r   rv   r}   ry   rz   r{   ro   ro   r   r   r   r   r   r   r      )ConstantOfShapern   ro   rp   rq   r   r   r   r   r   )r   r   r   r   r   c                 3   s$    | ]}|j  d  jd kV  qdS )r0   r   N)r*   r    .0r    Zmask_nodes_2r   r   	<genexpr>F  s   " z4FusionT5Attention.fuse_t5_encoder.<locals>.<genexpr>r   r/   rv   RelativePositionBias)rv   r}   r   r\   )r   r   r    r!   r"   r#   T)op_typer   match_child_pathmatch_parent_pathget_constant_inputrM   r5   anygraphr    lenr   process_maskr!   get_num_heads_and_hidden_sizerT   nodes_to_addrF   rE   node_name_to_graph_namer*   nodes_to_removeprune_graph) r   softmax_noderb   rc   	qkv_nodes
matmul_qkv_reshape_qkvqkv_shape_nodesinput_shape_nodev_nodes	reshape_vmatmul_vqk_nodesadd_qk	matmul_qk
mask_nodesZis_pattern_for_one_graph_inputmul_nodemul_valZmask_nodes_3r   res_pos_bias	rpb_nodesk_nodesZmatmul_kq_nodes	reshape_qmatmul_qq_num_headsq_hidden_sizenew_noder   r   r   r_      s  












z!FusionT5Attention.fuse_t5_encoderc           0      C   s  |j dksJ | jj|g dg d|d}|d u rd S |\}}}| j|g dg d}|d u r2d S |d }	d }
d }d }| j|g dg d	}|d u r| j|g d
g d}|d ur|\}}}|jd }
|jd }d|vrqd S |jd |	jd krd| _nCd| _n?|jd }||v rd S d|vrd S d| _n*|\}}}}|jd }||v rd S d|vrd S |jd }d|vrd S |jd }
d| _| j|g dg d}|d u rd S |\}}}d }d }| jdkr+| j|g dg d}|d ur|d }n| j|g dg d}|d u rd S |d }| j|\}}|dkr|| _| j	
|d jd }n*| j|ddgddgfddgddgfg|\}}}|dk rPtd d S |jd }d }d }d }| jdkr| j|g d
g d}|d ur|\} }!}|!jd }||!jd  }"|"D ]}#| j|#jd }$|$d ur|$j} nq|d u rd S d|vrd S n| j|dgdg}|d u rd S |d } | jd }||v rd S d |vrd S n| j|g d!g d"fg d#g d$fg|\}%}}d }&d }"|d urd|d |d% }'}!|!jd }|%dkr||'jd  }&|&jd }n|'jd }||v r#d S d&|vr*d S |%dkrP||'jd  }"|"D ]}#| j|#jd }$|$d urM|$j} nq8n|'jd }|d u r\d S d'|vrcd S nK| j|g d
g d}|d u rwd S |\}}!}|!jd }||!jd  }"|"D ]}#| j|#jd }$|$d ur|$j} nq|d u rd S d'|vrd S | j|g d
g d}(|(d u rd S |(\})}*}+|+jd |	jd krd S | |*\},}-| jdkr|d ur|}|}
d }d }|r|
r|,dkr|-dks d S | j|+jd ||
|||||jd |||,|-d(}.|.rn| j|. | j| j|.j< |s-|rc||fD ]1}/|/r=| j|/sHtd)|/d*  d S |/|v sOJ |/d+ ||/ jd< | j|/|/d+  q1| j| d,| _d S d S )-Nr   re   ri   rk   rm   rr   r0   )rn   rg   rh   rf   )r   r   r   r   rs   rt   r   r[   r   Zpast_value_crossZpast_value_selfZpresent_value_selfru   rw   rx   r|   r~   r   r   r   rv   r}   r   zGSkip MultiHeadAttention fusion since attention bias pattern not matchedZpresent_key_crossrg   Zpast_key_cross)rg   rn   rh   rf   )r   r   r   r   )rg   rn   rg   rh   rf   )r   r   r   r   r   Zpast_key_selfZpresent_key_self)rU   rV   rW   r   r"   rX   rY   r!   rZ   r[   r   r   zgraph_output=z does not exist in graph outputZ_copyF)r   r   r   r   r    r!   r   r   r5   r   r   Zmatch_parent_pathsr6   r7   Zfind_graph_outputr*   r   r]   r   rF   rE   r   r9   Zreplace_input_of_all_nodesr   r   )0r   r   rb   rc   r   r   Z_transpose_qkvr   r   r   rW   rY   r[   r   Ztranspose_vr   r   Zconcat_vr   r   r   r   r   r   r   r   r   Zmatched_path_indexrV   rX   rZ   r   Ztranspose_kZ	reshape_kZpresent_key_transpose_nodesZpresent_key_transpose_nodeZpresent_key_candidateidxZpast_key_transpose_nodeZconcat_kr   Ztranspose_qr   r   r   r   r   Zgraph_outputr   r   r   r`     s  













































 
z!FusionT5Attention.fuse_t5_decoder)__name__
__module____qualname____doc__r   intr   r   strr   rM   rT   r]   rd   r_   r`   __classcell__r   r   r   r   r      s    	

q	

F ,r   c                       s*   e Zd Zdef fddZdd Z  ZS )FusionRelativePositionBiasBlockr   c                    s   t  |ddg d S )Nr   r   )r   r   )r   r   r   r   r   r     s   z(FusionRelativePositionBiasBlock.__init__c                 C   sx  | j |g dg d|}|d u r$| j |g dg d|}|d u r$d S |d }|d }|d }|d }|| jv r;d S | j |g d	g d
|}	|	d u rNd S | j |	d jd }
|	d }| j |g dg d|}d}|d u r| j |g dg d|}d}|d u rd S |d }ttt|
d|rdnd  }|dkrt	
d| d | j jdd|rdnd d}| j |jd }|d u rd S t|}t|}tj|d tjt|d t|d g| dd}| j || j |j|jd |jd g}|d  }||jd< tjd||g|d!}d"|_|jtd#|g |jtd$|g | j| j|j< | j | d| _!d S )%N)rv   rv   r}   ro   rg   rp   Where)r   r   r   r   r   r   r   )rv   rv   r}   ro   rg   rp   rv   r   )r   r   r   r   r   r   r   r      r0   r   r(   )	Minr   rq   rv   r{   ry   DivZLogr   )	r   r   r   r   r   r   r   r   r   r   )r{   ZNegr   r   rq   rz   ro   Range)r   r   r   r   r   r   r   r   F)r{   ZAbsrz   ro   r   r   T          zmax_distance is z], which is different from the default value 128. Please double check the model configuration.r   ZRelPosBias_encoderdecoderZname_prefixr   Z_bias_table_weightr)   Z_rel_pos_biasr1   r4   max_distanceis_bidirectional)"r   r   r   Zget_constant_valuer    r   r=   roundexpr6   r<   r@   r8   r   r:   Z	transposer
   rA   r	   rB   r;   rC   rD   rE   r*   rH   rI   rJ   rK   rL   r   r   rF   r   )r   ra   rb   rc   Zcompute_bias_nodesgatherwheresliceZ	unsqueezeZcompute_buckets_nodesZlog_maxdivZrange_nodesr   Z
range_noder   	node_nameZtable_weight_iZtable_weightZtable_weight_tZ
bias_tabler2   Zbias_outputrpb_noder   r   r   rd     s   
$	


(

z$FusionRelativePositionBiasBlock.fuse)r   r   r   r   r   rd   r   r   r   r   r   r     s    r   c                       sj   e Zd Zddedef fddZdd Zdd	 ZdddZdd Zdd Z	dd Z
dd Zdd Z  ZS )T5OnnxModelr   r   r   c                    sz   t  ||| t| | _t| jjjdkr!ddlm	} |j
| j_t| | j| j| j| _t| | _t| | _t| | _d S )Nr   r   )AttentionMaskFormat)r   r   r   r   r   r   r   r    Zfusion_optionsr   ZNoMaskZmask_formatr   r   r   attention_fusionr   layer_norm_fusionr   skip_layer_norm_fusionr   
rpb_fusion)r   r   r   r   r   r   r   r   r   7  s   



zT5OnnxModel.__init__c                 C      | j   d S r^   )r   applyr   r   r   r   fuse_attentionF     zT5OnnxModel.fuse_attentionc                 C   r   r^   )r   r   r   r   r   r   fuse_layer_normI  r   zT5OnnxModel.fuse_layer_normTc                 C   r   r^   )r   r   )r   Zshape_inferr   r   r   fuse_skip_layer_normL  r   z T5OnnxModel.fuse_skip_layer_normc              	   C   s  |   D ]}|jdkr| |g dg d}|d urdd | jjjD }|d jd |v r| jdd	d
}tjd|d jd g|d g|d}tj	dt
jdgdgd}| | tjd|d dg|d g| jddd
dd}| | | | |d |jd< |d |jd<  d S qd S )Nr   )
rp   rq   rg   rh   rn   ro   rp   rq   ZSimplifiedLayerNormalizationrp   )
r   r   r   r   r   r   r   r   r   r   c                 S   s   g | ]}|j qS r   )r*   r   r   r   r   
<listcomp>f  s    z?T5OnnxModel.adjust_rel_pos_bis_length_input.<locals>.<listcomp>r0   r   rq   ZAdded_Shape_r   Z_Outputr1   ZConstant_Index_1)r*   r+   r,   r-   rp   Z_Output_Gather_1ZAdded_Gather_r   )r2   r3   r*   r'   r   )nodesr   r   r   r   r    r@   r
   rH   rA   r	   ZINT64rD   add_node)r   ra   r   Zgraph_input_namesr   Z
shape_nodeZ	indices_1r   r   r   r   adjust_rel_pos_bis_length_inputO  sL   




z+T5OnnxModel.adjust_rel_pos_bis_length_inputc                 C   s   g }|   D ]@}|jdkrF| |g dg d}|d u rq| |dgdg}|d u r+q|d }|jd |jd< || || | | qd S )Nrv   )ry   rz   ry   ro   r{   LessOrEqualTilern   ro   rp   rq   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r!   rK   rF   Zremove_nodesr   r   ra   Zextended_mask_nodesr   r   r   r   r   !remove_extended_mask_decoder_init  s(   



z-T5OnnxModel.remove_extended_mask_decoder_initc                 C   s   g }|   D ]B}|jdkrH| |g dg d}|d u rq| |ddgddg}|d u r-q|d }|jd |jd< || || | | qd S )Nrv   )ry   rz   ry   ro   rn   r{   r   r   rn   ro   rp   rq   )r   r   r   r   r   r   r   r   r   r   r   r   r}   r   r   r   r   r   r   r   remove_extended_mask_decoder  s(   



z(T5OnnxModel.remove_extended_mask_decoderc                 C   s   |    | j  d S r^   )Zadjust_reshape_and_expandr   r   r   r   r   r   
preprocess  s   zT5OnnxModel.preprocessc                 C   s$   |    |   |   |   d S r^   )r   r   r   r   r   r   r   r   postprocess  s   zT5OnnxModel.postprocessrj   )T)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   6  s    
9#$r   )loggingnumpyr=   Zfusion_attentionr   r   Zfusion_baser   Zfusion_simplified_layernormr   r   Zfusion_utilsr   Zonnxr   r	   r
   Z
onnx_modelr   Zonnx_model_bertr   	getLoggerr   r6   r   r   r   r   r   r   r   <module>   s&   
      