o
    i                     @   sz   d dl Z d dlmZmZ d dlmZ d dlmZmZ d dl	m
Z
 d dlmZ e eZG dd deZG d	d
 d
eZdS )    N)AttentionMaskFusionAttention)NumpyHelper)	NodeProtohelper)	OnnxModel)BertOnnxModelc                       sl   e Zd ZdZdedededef fddZded	e	d
e	dededededede	dB fddZ
dd Z  ZS )FusionTnlrAttentionz
    Fuse TNLR Attention subgraph into one Attention node.
    TNLR Attention has extra addition after qk nodes and adopts [S, B, NH] as I/O shape.
    modelhidden_size	num_headsattention_maskc                    s   t  |||| d S N)super__init__)selfr
   r   r   r   	__class__ b/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/onnx_model_tnlr.pyr      s   zFusionTnlrAttention.__init__
mask_indexmatmuladdinputoutput
add_qk_strreturnNc	                 C   s  |dksJ |dkr|| dkrt d| d|  d S | j|jd }	| j|jd p7| j|jd }
|	d u s@|
d u rBd S t|	}t|
}| jd}|	j}t	
|}t	j|d ||d| g|| dd	}	| j|	| j t	j|d
 |d| g|| dd	}
| j|
| j ||d |d
 g}|d ur|| n|d |d ur|d || t	jd||g|d}d|_|jt	d|g |S )Nr   zinput hidden size z# is not a multiple of num of heads    Z	AttentionZ_qkv_weight   T)name	data_typedimsvalsrawZ	_qkv_bias )ZinputsZoutputsr   zcom.microsoftr   )loggerdebugr
   Zget_initializerr   r   Zto_arrayZcreate_node_namer    r   Ztensor_dtype_to_np_dtypeZmake_tensorZastypetobytesZadd_initializerthis_graph_nameappend	make_nodedomain	attributeextendZmake_attribute)r   r   r   r   r   r   r   r   r   weightZbiasZ
qkv_weightZqkv_biasZattention_node_nameZtensor_dtypeZnp_typeZattention_inputsZattention_noder   r   r   create_attention_node   s`   $






z)FusionTnlrAttention.create_attention_nodec                 C   s  |}|j dkr	d S | j|g dg d}|d ur"|\}}}}}	}
nd S g }t|jD ]\}}||vr4q+||d jd kr>q+|| q+t|dkrLd S |d }| j|
g dg d}|d u rbd S |\}}}}}| j|dgdg}|d }| j|
g d	g d
}|d u rd S |\}}}| j|g dg d}|d u rd S |d }|d }| j|g dg d}|d u rd S |d }|d }| j|ddgddg}|d u rd S |jd |kr^d }|}| |||| j	| j
||jd |d jd }|d u rd S | j| | j| j|j< tjdd|j g|jd gd|j g dd}| j|| j |jd |jd< d|j |jd< | j||	|
g | j| | j| | j| | j| d| _d S d S )NZSkipLayerNormalization)WhereAddMatMulReshape	Transposer2   )r   r   r   r   r   r   r   r   )r4   r3   Slicer1   r2   )r   r   r   r   r   r4   )ZSoftmaxr1   r2   )r   r   r   )ZMulr4   r3   r5   r1   r2   )r   r   r   r   r   r   r3   r0   Zback_transpose_in_Zback_transpose_)r   r      )permT)Zop_typer
   Zmatch_parent_path	enumerater   r   r)   lenr/   r   r   Znodes_to_addr(   Znode_name_to_graph_namer   r   r*   add_nodeZnodes_to_remover-   Zprune_graph)r   Znormalize_nodeZinput_name_to_nodesZoutput_name_to_nodeZ
start_nodeZ	qkv_nodes_Zmatmul_belowZreshape_qkvZtranspose_qkvZ
matmul_qkvZother_inputsZ_ir   Z
root_inputZv_nodesr   r   Zupper_nodesZ	transposeZqk_nodesZadd_qkZ	matmul_qkZq_nodesZk_nodesZrelative_position_bias_nodesr   Zattention_last_nodenew_nodeZback_transposer   r   r   fuseg   s   





zFusionTnlrAttention.fuse)__name__
__module____qualname____doc__r   intr   r   strr   r/   r?   __classcell__r   r   r   r   r	      s>    		

Hr	   c                       s$   e Zd Z fddZdd Z  ZS )TnlrOnnxModelc                    s4   t  ||| t| | _t| | j| j| j| _d S r   )r   r   r   r   r	   r   r   attention_fusion)r   r
   r   r   r   r   r   r      s   
zTnlrOnnxModel.__init__c                 C   s   | j   d S r   )rH   apply)r   r   r   r   fuse_attention   s   zTnlrOnnxModel.fuse_attention)r@   rA   rB   r   rJ   rF   r   r   r   r   rG      s    rG   )loggingZfusion_attentionr   r   Zfusion_utilsr   Zonnxr   r   Z
onnx_modelr   Zonnx_model_bertr   	getLoggerr@   r%   r	   rG   r   r   r   r   <module>   s   
 L