o
    i=b                     @   sl   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlmZ eeZG dd deZdS )	    )	getLoggerN)Fusion)FusionUtils)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                       s.  e Zd ZdZdef fddZd.dedefdd	Zd
ede	defddZ
dededefddZdededB fddZdededB fddZdedefddZdedeeef de	fddZdededB fdd ZdededB fd!d"Zd#ededB fd$d%Zd#ed&ed'ed(ed)edefd*d+Zd,d- Z  ZS )/FusionMultiHeadAttentionMMDitzO
    Fuse MultiHeadAttention for Multimodal Diffusion Transformer (MMDiT).
    modelc                    s   t  j|ddgd i | _d S )NMultiHeadAttentionSoftmax)Zfused_op_typeZsearch_op_types)super__init__unsqueeze_update_map)selfr   	__class__ c/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_mha_mmdit.pyr      s   
z&FusionMultiHeadAttentionMMDit.__init__r   
start_nodereturnc                 C   s   | j j|g d|ddg|d}|du rdS |d }t|jdkr"dS | j |jd }|du r1dS t|jdkr:dS t|d S )	a  
        Detect num_heads from Reshape & Transpose of q/k/v for both Stable Diffusion 3.x and Flux 1.x:

                MatMul    .. [-1] [24] ..
                 |        |  |  /   /
                Add     Concat(axis=0)
                  |      /
                  Reshape
                     |
                 Transpose(perm=0,1,3,2)
                     |
               (start_node)
        )	TransposeReshapeConcatr      output_name_to_nodeN      )r   match_parent_pathleninputZget_constant_valueshapeint)r   r   r   input_indexnodesZconcat_shapevaluer   r   r   get_num_heads   s   z+FusionMultiHeadAttentionMMDit.get_num_headstranspose_kconcat_before_transposec                 C   sh   |r| j j|ddgddg|d}|r| |d |S dS | j j|dgdg|d}|r2| |d |S dS )a  
                Detect num_heads from subgraph like the following (num_heads=24 in this example):
                               MatMu    .. [-1] [24] ..
                                 |       |  |  /   /
                                Add     Concat
                                  |      /
                                 Reshape
                                    |
                             Transpose(perm=0,2,1,3)
                                    |
                             SimplifiedLayerNormalization
                                    |
                            Transpose(perm=0,1,3,2)

                Another variant is to an extra Concat node to join two symmetrical subgraphs:

                           |              |
                          MatMul        MatMul   .. [-1] [24] ..
                           |              |       |  |  /   /
                          Add  Concat    Add      Concat
                            |  /          |      /
                          Reshape         Reshape
                            |              |
                         Transpose     Transpose(perm=0,2,1,3)
                            |              |
        SimplifiedLayerNormalization  SimplifiedLayerNormalization
                                |     /
                               Concat
                                 |
                            Transpose(perm=0,1,3,2)

                    Both patterns are used in stable diffusion 3.5 model.
        r   SimplifiedLayerNormalizationr   r   r   )r   r!   r)   )r   r*   r   r+   r'   r   r   r   get_num_heads_from_k:   s   "z2FusionMultiHeadAttentionMMDit.get_num_heads_from_k
input_nameoutput_namec                 C   s   d}| j |}|du r"tjtjg ddd|d}| j || j tj	d||g|g| j 
dd}| j| | j| j|j< |jd	 S )
a+  Add a Reshape node to convert 4D BxSxNxH to 3D BxSxD.

        Args:
            input_name (str): input name for the 4D tensor of shape BxSxNxH.
            output_name (str): output name for the 3D tensor of shape BxSxD, where D = N * H.

        Returns:
            str: the output name
        Zbsnh_to_bsd_reshape_dimsN)r   r   r   Zint64)Zdtype)namer   inputsoutputsr0   r   )r   get_initializerr   Z
from_arraynparrayadd_initializerthis_graph_namer   	make_nodecreate_node_namenodes_to_addappendnode_name_to_graph_namer0   output)r   r.   r/   Znew_dims_nameZnew_dimsZ	reshape_qr   r   r   reshape_to_3dk   s   

z+FusionMultiHeadAttentionMMDit.reshape_to_3dmul_qNc                 C   s   | j |ddgddg}|du rdS |\}}t|dg ds!dS |jd |jd< |jd }|d |jd< | |jd |d S )	a  
        MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

        Before:
                               MatMul
                                 |
                               Add      Concat
                                 |      /
                                 Reshape
                                  |
                               Transpose(perm=0,2,1,3)
                                  |
                       SimplifiedLayerNorm
                                  |
                                 Mul

        After:
                               MatMul
                                 |
                                Add      Concat
                                 |      /
                                 Reshape
                                   |
                           SimplifiedLayerNorm
                                   |
                        Reshape (shape=[0, 0, -1])
        r,   r   r   Npermr   r    r      _BSNH_BSD)r   r!   r   check_node_attributer#   r>   r?   )r   r@   r   pathsln_atranspose_aZ
sln_outputr   r   r   'adjust_query_from_bnsh_to_bsd_no_concat   s   
zEFusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd_no_concatc           
      C   s8  | j |g dg d}|du rdS |\}}}t|jdkr dS | j |ddgddg}|du r2dS |\}}t|d	g d
sAdS t|d	g d
sLdS t|ddsUdS |jd |jd< |jd |jd< tjd|jd |jd g|jd d g| j 	ddd}	| j
|	 | j| j|	j< | |	jd |jd d S )a  
        MultiHeadAttenion requires query in BSD format. This function adjusts query from BNSH to BSD format.

            Before:
                      MatMul      MatMul
                        |            |
                        Add Concat  Add    Concat
                         |    /      |      /
                         Reshape     Reshape
                            |           |
        Transpose(perm=0,2,1,3)      Transpose(perm=0,2,1,3)
                            |           |
            SimplifiedLayerNorm  SimplifiedLayerNorm
                            |     /
                            Concat(axis=2)
                             |
                            Mul

            After:
                   MatMul        MatMul
                     |              |
                    Add Concat     Add     Concat
                     |    /         |     /
                     Reshape       Reshape
                        |            |
           SimplifiedLayerNorm  SimplifiedLayerNorm
                        |       /
                      Concat(axis=1)
                         |
                      Reshape (shape=[0, 0, -1])
        )r   r,   r   )r   r   r   Nr    r,   r   r   r   rA   rB   axisr   rD   r2   r3   r0   rK   rE   )r   r!   r"   r#   r   rF   r   r9   r>   r:   r;   r<   r8   r=   r0   r?   )
r   r@   r   rG   concatrH   rI   sln_btranspose_bnew_concat_noder   r   r   adjust_query_from_bnsh_to_bsd   sH   !

z;FusionMultiHeadAttentionMMDit.adjust_query_from_bnsh_to_bsd	unsqueezec                 C   s   | j |j}|d u rwt|jdkr)tjd|j|jd d g| j	ddgd}n6d}| j
|d u rGtj|tjdgdgd}| j|| j tjd|jd |g|jd d g| j	dd	}| j| | j| j|j< |jd }|| j |j< |S )
Nr   	Unsqueezer   rD   r    )r2   r3   r0   Zaxesunsqueeze_axes_2)r0   Z	data_typedimsvalsr1   )r   getr0   r"   r#   r   r9   r>   r   r:   r4   Zmake_tensorr   ZINT64r7   r8   r;   r<   r=   )r   rR   Zupdated_unsqueeze_outputnew_nodeZinitializer_namerT   r   r   r   update_unsqueeze_axes_1_to_2  s<   


z:FusionMultiHeadAttentionMMDit.update_unsqueeze_axes_1_to_2addr   c                 C   s,  t |jdkr	dS | j|g dg d|}|du rdS t| j}||d }|du s1|dgkr3dS ||d }|du sC|dgkrEdS | j|g dg d|}|du rXdS ||d }|du sh|dgkrjdS ||d }|du sz|dgkr|dS | |d |d jd< | |d |d jd< d	S )
a  
        Update axes of Unsqueeze from [1] to [2] in the following pattern:
                  Unsqueeze        Unsqueeze
                  (axes=[0])       (axes=[0])
                     |              |
                  Unsqueeze        Unsqueeze
              ... (axes=[1])  ...  (axes=[1])
                |     /        |   /
                   Mul         Mul
                    |       /
                     Add
        Args:
            add (NodeProto): the Add node
            output_name_to_node (Dict[str, NodeProto]): mapping from output name to node

        Returns:
            bool: True if the pattern is matched and updated successfully, False otherwise.
        r    F)MulrS   rS   )r   r   r   Nr   r   )r   r   r   T)r"   r#   r   r!   r   Zget_squeeze_or_unsqueeze_axesrY   )r   rZ   r   Znodes_bfusion_utilsZaxes_1Zaxes_0Znodes_ar   r   r   update_unsqueeze_axes(  s0   
z3FusionMultiHeadAttentionMMDit.update_unsqueeze_axesc                 C   sf  | j |g dg d}|du rdS |\}}}}}t|jdkr"dS | j |ddgddg}|du r4dS |\}	}
t|d	g d
sCdS t|
d	g d
sNdS t|ddsWdS | ||s_dS |jd |jd< |
jd |	jd< tjd|j	d |	j	d g|j	d d g| j 
ddd}| j| | j| j|j< | j |j	d |j	d  | |j	d |j	d d S )a3  
        Adjust graph to change query format from BNSH to BSD for Flux model.
        Note that the graph pattern is complex, and we only do a shallow match here.

        Before:
                       |               |
        Transpose(perm=0,2,1,3)    Transpose(perm=0,2,1,3)
                        |              |
        SimplifiedLayerNorm  SimplifiedLayerNorm
                        |             /
                        Concat(axis=2)
                         |
                        Mul     Mul
                         |    /
                          Add
                           |
                          Mul

        After (Transpose nods are removed, and a Reshape is added):

                        |           |
            SimplifiedLayerNorm  SimplifiedLayerNorm
                        |         /
                    Concat(axis=1)
                        |
                        Mul    Mul
                         |    /
                          Add
                           |
                       Reshape (shape=[0, 0, -1])
        )Addr[   r   r,   r   )r   r   r   r   r   Nr    r,   r   r   r   rA   rB   rK   r   rD   rL   rE   )r   r!   r"   r#   r   rF   r]   r   r9   r>   r:   r;   r<   r8   r=   r0   Zreplace_input_of_all_nodesr?   )r   r@   r   rG   rZ   _mul_arM   rH   rI   rN   rO   rP   r   r   r   "adjust_flux_query_from_bnsh_to_bsd]  sN   !
z@FusionMultiHeadAttentionMMDit.adjust_flux_query_from_bnsh_to_bsdc                 C   s   | j |g dg d}|du rdS |\}}}}t|dg ds#dS | ||s+dS |jd |jd< |jd d |jd< | |jd |jd d S )	a0  
        Adjust graph to change query format from BNSH to BSD for Flux model.
        Note that the graph pattern is complex, and we only do a shallow match here.

        Before:
                      |
                    Transpose(perm=0,2,1,3)
                      |
                    SimplifiedLayerNorm
                      |
                     Mul     Mul
                       |   /
                       Add
                        |
                       Mul

        After (Transpose is removed, and a Reshape is added):

                        |
                      SimplifiedLayerNorm
                        |
                        Mul   Mul
                         |   /
                         Add
                          |
                       Reshape (shape=[0, 0, -1])
        )r^   r[   r,   r   )r   r   r   r   NrA   rB   r   rD   rE   )r   r!   r   rF   r]   r#   r>   r?   )r   r@   r   rG   rZ   r_   rH   rI   r   r   r   )adjust_flux_single_query_from_bnsh_to_bsd  s   zGFusionMultiHeadAttentionMMDit.adjust_flux_single_query_from_bnsh_to_bsdqc                 C   sZ   t jd|g|d g| jjdddg dd}| j| | j| j|j< | 	|d |d S )Nr   rD   ZTranspose_BNSH_to_BSNH)Zname_prefixrB   )r0   rA   rE   )
r   r9   r   r:   r;   r<   r8   r=   r0   r?   )r   rb   r   Ztranspose_qr   r   r   transpose_reshape_bnsh_to_bsd  s   z;FusionMultiHeadAttentionMMDit.transpose_reshape_bnsh_to_bsdkvr>   	num_headsc           	      C   sV   |dksJ |||g}|g}t jd||| jdd}d|_|jt d|g |S )a~  
        Create a MultiHeadAttention node.

        Args:
            q (str): name of q
            k (str): name of k
            v (str): name of v
            output (str): output name of MHA
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

        Returns:
            NodeProto: the node created.
        r   r   r1   zcom.microsoftrf   )r   r9   r   r:   domain	attributeextendZmake_attribute)	r   rb   rd   re   r>   rf   Z
mha_inputsZmha_outputsZmha_noder   r   r   create_multihead_attention_node  s   

z=FusionMultiHeadAttentionMMDit.create_multihead_attention_nodec                 C   s  |j dksJ |}| j|jd rd S | j|g dg d|}|d u r'd S |\}}}t|dg ds7d S | j|g dg d}	|	d u rId S |	\}
}}}}}}}|jd }||jd krad S | j|
d	d
gddg}|d u rsd S |\}}|jd }t|dg dsd S | j|ddgddg}|d u rd S |d jd |jd krd S |jd }| jj	|dd|d}|d ur| jj	|d
d|d}|d u rd S t|dg dsd S | jj	|d
d|d}|d u rd S t|dg dsd S n| jj	|d
d|d}|d u rd S t|dg dsd S |r| 
||n| j
||dd}|dkr4| |||d u}|dkr4d S |d ur@| ||}n| ||}|d u rg| ||}|d u rg| ||}|d u rg| ||}| j||||jd |d}| j| | j| j|j< | j|||g d| _d S )Nr   r   )MatMulr   r   )r   r   rl   rl   rA   rB   )rk   r[   SqrtDivrm   ZCastSliceZShape)r   r   r   r   r   r   r   r   r[   r   r   )r   r   rC   r    rm   rn   r   )r&   r   )r&   )rb   rd   re   r>   rf   T)Zop_typer   Zfind_graph_outputr>   Zmatch_child_pathr   rF   r!   r#   Zmatch_parentr)   r-   rQ   rJ   r`   ra   rc   rj   r;   r<   r8   r=   r0   Znodes_to_removeri   Zprune_graph)r   nodeZinput_name_to_nodesr   Zsoftmaxr'   Z
matmul_s_vZtranspose_outZreshape_outZq_nodesZ	matmul_qkr@   Zsqrt_q_2Zdiv_qZsqrt_q_Zshape_qZq_bnshZk_nodesZmul_kr*   rd   Zk_scale_nodesre   Zconcat_vZtranspose_1Ztranspose_2rf   queryrX   r   r   r   fuse  s   











z"FusionMultiHeadAttentionMMDit.fuse)r   )__name__
__module____qualname____doc__r	   r   r   r%   r)   boolr-   strr?   rJ   rQ   rY   dictr]   r`   ra   rc   rj   rs   __classcell__r   r   r   r   r
      s6    !10O$5T3
+r
   )loggingr   numpyr5   Zfusion_baser   r\   r   Zonnxr   r   r   r   Z
onnx_modelr	   rt   loggerr
   r   r   r   r   <module>   s   