o
    ¢ÄiMA  ã                   @   st   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ d dlmZ eeƒZG dd	„ d	eƒZdS )
é    )Ú	getLoggerN)ÚAttentionMask)ÚFusion)ÚFusionUtilsÚNumpyHelper)Ú	NodeProtoÚhelper)Ú	OnnxModelc                       sP   e Zd Zdedededef‡ fdd„Zdedeeef fd	d
„Z	dd„ Z
‡  ZS )ÚFusionQOrderedAttentionÚmodelÚhidden_sizeÚ	num_headsÚattention_maskc                    s&   || _ || _|| _tƒ  |dd¡ d S )NÚQOrderedAttentionZQOrderedLayerNormalization)r   r   r   ÚsuperÚ__init__)Úselfr   r   r   r   ©Ú	__class__© úl/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_qordered_attention.pyr      s   z FusionQOrderedAttention.__init__Ú	reshape_qÚreturnc                 C   sf  | j  |jd ¡}|du rCt |jd › d¡ | j  |dgdg¡}|du r,| j| jfS |d }t|j	ƒdkr=| j| jfS |j	d j
}t |¡}t|ƒdksZ|d dksZ|d dkrit d	|› d
¡ | j| jfS |d }|d }|| }| jdkr’|| jkr’| jr’t d| j› d|› d¡ d| _| jdkr¯|| jkr¯| jr¯t d| j› d|› d¡ d| _||fS )zÊDetect num_heads and hidden_size from a reshape node.
        Args:
            reshape_q (NodeProto): reshape node for Q
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        é   Nz is not initializer.ÚConstantr   é   é   é   zq_shape_value=z7. Expected value are like [0, 0, num_heads, head_size].z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r   Úget_initializerÚinputÚloggerÚdebugÚmatch_parent_pathr   r   ÚlenÚ	attributeÚtr   Úto_arrayZnum_heads_warningÚwarningZhidden_size_warning)r   r   Zq_shapeZconstant_nodeZq_shape_valuer   Z	head_sizer   r   r   r   Úget_num_heads_and_hidden_size    s8   	
$ÿz5FusionQOrderedAttention.get_num_heads_and_hidden_sizec           B      C   sr  | j  |ddgddg¡}|d ur|d }nd S | j  |dgd g¡}|d u r,t d¡ d S |d }| j  |g d¢g d¢¡}|d u rGt d	¡ d S |\}}	}
}}}}t || j ¡sYd S t || j ¡sbd S g }t|jƒD ]\}}||vrrqi||d jd kr|qi| 	|¡ qit
|ƒd
krŠd S |d }| j  |g d¢g d¢¡}|d u r¥t d¡ d S |\}}}}}}t || j ¡s¶d S t || j ¡s¿d S | j  |dgd
g¡}|d u rÔt d¡ d S |d }| j  |jd ¡d u råd S t || j d¡sïd S | j  |g d¢g d¢¡}|d u rt d¡ d S |\}}}}}}} }!t || j ¡sd S t || j ¡s%d S t | | j ¡s/d S t || j ¡s9d S | j  |!g d¢g d¢¡}"|"d u rQt d¡ d S |"\}}#}$}%}&}'t |%| j ¡scd S t |$| j ¡smd S | j  |'dgd
g¡}(|(d u rƒt d¡ d S |(d }(| j  |(jd ¡d u r•d S t |(| j d¡s d S | j  |!g d¢g d¢¡})|)d u r¸t d¡ d S |)\}}}*}+},}-t |+| j ¡sÊd S t |*| j ¡sÔd S | j  |-dgd
g¡}.|.d u rêt d¡ d S |.d }.| j  |.jd ¡d u rüd S t |.| j d¡sd S | j  |g d¢g d¢¡}/|/d u rt d¡ d S | j  |(jd ¡}0| j  |.jd ¡}1| j  |jd ¡}2t |0¡}3t |1¡}4t |2¡}5t |3jd
d … ¡}6t |4jd
d … ¡}7t |5jd
d … ¡}8|jd |kr3|'jd |kr5|-jd |kr7| j |/d jd ¡}9|  |#¡\}:};|jd g}<|< 	|jd
 ¡ |< 	|$jd
 ¡ |< 	|*jd
 ¡ |< 	|jd
 ¡ |< 	|(jd ¡ |< 	|.jd ¡ |< 	|jd ¡ |< 	|(jd
 ¡ |< 	|.jd
 ¡ |< 	|jd
 ¡ | j  |&jd ¡rú|< 	|&jd ¡ n|< 	|&jd
 ¡ | j  |,jd ¡r|< 	|,jd ¡ n|< 	|,jd
 ¡ | j  |jd ¡r0|< 	|jd ¡ n|< 	|jd
 ¡ |< 	| jd
 ¡ |< 	|jd
 ¡ |< 	|jd
 ¡ |9d ur[|< 	|9¡ n|< 	d¡ | j  |(jd ¡}=t |=¡ | j  |.jd ¡}>t |>¡ | j  |jd ¡}?t |?¡ | j  d¡}@tjd|<|
jd g|@d}A| j  ||jd |Ajd ¡ | j  |	|	jd |jd ¡ |Aj t d|:¡g¡ |Aj t dd
¡g¡ |Aj t dd¡g¡ |Aj t dd
¡g¡ |Aj t d|6|7|8g¡g¡ d |A_| j 	|A¡ | j| j |Aj!< | j" |
|||g¡ | j" |¡ | j" |"¡ | j" |)¡ | j" |¡ | j" |(|.|g¡ d!| _#d S d S d S d S )"NÚQuantizeLinearÚAddr   éÿÿÿÿÚDequantizeLinearz=fuse_qordered_attention: failed to match input qdq nodes path)r*   ÚMatMulÚReshapeÚ	Transposer,   r)   r-   )NNr   r   r   r   r   z1fuse_qordered_attention: failed to match qkv pathr   )r/   r.   r,   r)   r*   r-   )r   r   r   r   r   Nz/fuse_qordered_attention: failed to match v pathF)r,   r)   ZSoftmaxr*   ÚDivr,   r)   r-   )r   r   r   r   Nr   r   r   z0fuse_qordered_attention: failed to match qk path)r   r   r   r   r   Nz/fuse_qordered_attention: failed to match q pathz/fuse_qordered_attention: failed to match k path)ZMulÚSubZCastÚ	Unsqueezer2   )Nr   r   r   r   z8fuse_qordered_attention: failed to match mask_nodes pathÚ r   )ZinputsZoutputsÚnamer   Zorder_inputZorder_weightZorder_outputZqkv_hidden_sizeszcom.microsoftT)$r   r"   r    r!   r   Zcheck_qdq_node_for_fusionÚ	enumerater   ÚoutputÚappendr#   Zget_constant_valuer   r   r&   ÚnpÚprodÚshaper   Zprocess_maskr(   Ztranspose_2d_int8_tensorZcreate_node_namer   Z	make_nodeZreplace_node_inputr$   ÚextendZmake_attributeÚdomainZnodes_to_addZthis_graph_nameZnode_name_to_graph_namer4   Znodes_to_removeZprune_graph)Br   Znormalize_nodeZinput_name_to_nodesZoutput_name_to_nodeZadd_before_layernormZ
start_nodeZdequantize_inputZ	qkv_nodesÚ_Zprojection_matmulZreshape_qkvZtranspose_qkvZdequantize_qkvZquantize_qkvZ
matmul_qkvZother_inputsZ_ir   Z
root_inputZv_nodesZdequantize_vZ
quantize_vZadd_vZmatmul_vZdequantize_v_matmul_weightZqk_nodesZdequantize_qk_softmaxZquantize_qk_softmaxZ
softmax_qkZadd_qkZdiv_qkZdequantize_qkZquantize_qkZ	matmul_qkZq_nodesr   Zdequantize_qZ
quantize_qZadd_qZmatmul_qZdequantize_q_matmul_weightZk_nodesZdequantize_kZ
quantize_kZadd_kZmatmul_kZdequantize_k_matmul_weightZ
mask_nodesZq_weightZk_weightZv_weightZqwÚkwZvwZqw_out_sizeZkw_out_sizeZvw_out_sizeZ
mask_indexr   r   Zattention_inputsZq_weight_tensorZk_weight_tensorZv_weight_tensorZattention_node_nameZattention_noder   r   r   ÚfuseS   s¦  ý
ý
ý
ý


ô

÷ý



ý



ÿ




0





üÿÿ
žzFusionQOrderedAttention.fuse)Ú__name__Ú
__module__Ú__qualname__r	   Úintr   r   r   Útupler(   r?   Ú__classcell__r   r   r   r   r
      s    þýüû3r
   )Úloggingr   Únumpyr8   Zfusion_attentionr   Zfusion_baser   Zfusion_utilsr   r   Zonnxr   r   Z
onnx_modelr	   r@   r    r
   r   r   r   r   Ú<module>   s   