o
    i&Q                     @   sh   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZ eeZG dd deZdS )	    )	getLoggerN)Fusion)NumpyHelper)	NodeProtohelpernumpy_helper)	OnnxModelc                       s   e Zd ZdZdededef fddZdedefd	d
ZdedefddZ	dd Z
	d+dedededeeef fddZdededededededededededB fddZdd  Zd!d" Zdefd#d$Zd,d%d&Zded'ed(ededef
d)d*Z  ZS )-FusionMultiHeadAttentionSam2zI
    Fuse MultiHeadAttention subgraph of Segment Anything v2 (SAM2).
    modelhidden_size	num_headsc                    s.   t  |ddg || _|| _d| _d| _d S )NMultiHeadAttentionZLayerNormalizationT)super__init__r   r   num_heads_warninghidden_size_warning)selfr
   r   r   	__class__ h/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_attention_sam2.pyr      s
   
z%FusionMultiHeadAttentionSam2.__init__	reshape_qreturnc                 C   s`   d}| j |jd }|dur#t|tjr#t|jdgkr#t|d }t|tr.|dkr.|S dS )Detect num_heads from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q
        Returns:
            int: num_heads, or 0 if not found
        r      N      )	r
   get_constant_valueinput
isinstancenpndarraylistshapeint)r   r   r   shape_valuer   r   r   get_decoder_num_heads#   s   z2FusionMultiHeadAttentionSam2.get_decoder_num_heads
reshape_inc                 C   s   d}| j |jd }|dur$t|tjr#t|jdgkr#t|d }n4| j 	|dd}|durXt
|jdkrX| j |jd }|durXt|tjrXt|jdgkrXt|d }t|trc|dkrc|S dS )r   r   r   N      ZConcat)r
   r   r   r   r    r!   r"   r#   r$   Zmatch_parentlen)r   r'   r   r%   Zconcat_shaper   r   r   get_encoder_num_heads8   s   z2FusionMultiHeadAttentionSam2.get_encoder_num_headsc                 C   s*   | j |jd }|rt|jd S dS )zDetect hidden_size from LayerNormalization node.
        Args:
            layernorm_node (NodeProto): LayerNormalization node before Q, K and V
        Returns:
            int: hidden_size, or 0 if not found
        r   r   )r
   get_initializerr   r   to_arrayr#   )r   layernorm_nodeZlayernorm_biasr   r   r   get_hidden_sizeT   s   z,FusionMultiHeadAttentionSam2.get_hidden_sizeFr.   
is_encoderc                 C   s   |r|  |}n| |}|dkr| j}| jdkr1|| jkr1| jr1td| j d| d d| _| |}|dkr=| j}| jdkrZ|| jkrZ| jrZtd| j d| d d| _||fS )a  Detect num_heads and hidden_size.

        Args:
            reshape_q (NodeProto): reshape node for Q
            layernorm_node (NodeProto): LayerNormalization node before Q, K, V
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        r   z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )	r+   r&   r   r   loggerwarningr/   r   r   )r   r   r.   r0   r   r   r   r   r   get_num_heads_and_hidden_sizea   s&   

z:FusionMultiHeadAttentionSam2.get_num_heads_and_hidden_sizeq_matmulq_addk_matmulk_addv_matmulv_addoutputNc
              
   C   s*  |dkr|| dkrt d| d|  dS | j|jd }
| j|jd }| j|jd }|
r8|r8|s:dS t|
}t|}t|}t d|j d|j d|j d	|  | jd
}|j	d |j	d |j	d g}t
jd
||	g|d}d|_|jt
d|g dd}| | |S )aF  Create an Attention node.

        Args:
            q_matmul (NodeProto): MatMul node in fully connection for Q
            q_add (NodeProto): Add bias node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            k_add (NodeProto): Add bias node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            output (str): output name

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   zinput hidden size z# is not a multiple of num of heads Nr   zqw=z kw=z vw=z hidden_size=r   inputsZoutputsnamecom.microsoftr   MultiHeadAttention ({})zcross attention)r1   debugr
   r,   r   r   r-   r#   create_node_namer:   r   	make_nodedomain	attributeextendmake_attributeformatincrease_counter)r   r4   r5   r6   r7   r8   r9   r   r   r:   Zq_weightZk_weightZv_weightZqwkwZvwattention_node_nameZattention_inputsattention_nodecounter_namer   r   r   create_attention_node   s8   


(

z2FusionMultiHeadAttentionSam2.create_attention_nodec                 C   s  |  |||r	d S | |}|d u r4|jd |vrd S ||jd  }|jdkr)d S | |}|d u r4d S |\	}}}}	}
}}}}|}| ||d\}}|dkrUtd d S | j|	|
|||||||jd d	}|d u rld S | j	
| | j| j|j< | j||g d| _d S )Nr   AddF*fuse_attention: failed to detect num_heads)r:   T)fuse_sam_encoder_patternmatch_attention_subgraphr   Zop_typer3   r1   r@   rM   r:   nodes_to_addappendthis_graph_namenode_name_to_graph_namer=   nodes_to_removerE   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_nodeZ	match_qkvZskip_addreshape_qkvtranspose_qkvr   matmul_qadd_qmatmul_kadd_kmatmul_vadd_vZattention_last_nodeq_num_headsq_hidden_sizenew_noder   r   r   fuse   sF   




z!FusionMultiHeadAttentionSam2.fusec              	   C   sl  | j |g dg d}|du rdS |\}}}}}| j |g dg d}|du r0td dS |\}}}}	| j |ddgd	d	g}
|
durK|
\}}ntd
 dS | j |g dg d}|du ritd dS |\}}}}}| j |g dg d}|du rtd dS |\}}}}}| j |g dg d}|du s|d |krtd dS ||||||||	|f	S )z.Match Q, K and V paths exported by PyTorch 2.*rN   MatMulReshape	Transposerh   )NNNr   r   N)rj   ri   rN   rh   )r   r   r   Nz&fuse_attention: failed to match v pathSoftmaxrh   r   z'fuse_attention: failed to match qk path)Mulrj   ri   rN   rh   )r   Nr   r   Nz&fuse_attention: failed to match q path)r   Nr   r   Nz&fuse_attention: failed to match k path)SqrtDivrm   ZCastSliceZShaperj   ri   )Nr   r   r   r   r   r   r   z*fuse_attention: failed to match mul_q pathr
   match_parent_pathr1   r@   )r   node_after_output_projectionZ	qkv_nodes_r[   r\   Z
matmul_qkvv_nodesrb   ra   qk_nodes_softmax_qk	matmul_qkq_nodesZmul_qZ_transpose_qr   r^   r]   k_nodesZ_mul_kr`   r_   Zmul_q_nodesr   r   r   rQ      sR   





z5FusionMultiHeadAttentionSam2.match_attention_subgraphc                 C   s  | j |g dg d}|d u r| j |g dg d}|d u r*| j |dgdg}|d u r0dS |d }| j|t|d	kr?d	nd d
}|d u rIdS |\}}}	}
}}t|
d}t|trb|g dkrddS t|d}t|tru|g dkrwdS t|d}t|tr|g dkrdS | j |	g dg d}|d u rdS |\}}}| ||d\}}|dkrt	
d dS d}| j |}|d u rtjtjg ddd|d}| j || j | j d}tjd|
jd |g|
jd d g|d}| j| | j| j|j< |
}|jd |jd< |jd d |jd< t	
d|d| | ||||}|d u r,dS t| j ||d	ks:J |jd |jd< | j| | j| j|j< | j|g d| _dS )N)rN   ri   rj   ri   r   Nr   r   )rN   ro   ro   ri   rj   ri   )r   Nr   r   r   r   rN   r   Frp   r   )input_indexperm)r   r   r   r)   )r   r   r)   r   )ri   rN   rh   )r   r   NTrO   Zbsnh_to_bsd_reshape_dims)r   r   rp   Zint64)Zdtype)r=   ri   Z_BSDr;   Z_BNSHzFound MHA: q_num_heads=z q_hidden_size=) r
   rr   $match_sam_encoder_attention_subgraphr*   r   Zget_node_attributer   r"   r3   r1   r@   r,   r   Z
from_arrayr    arrayZadd_initializerrT   rA   r   rB   r   rR   rS   rU   r=   r:   create_mha_nodeZget_childrenrV   rE   rW   )r   rX   rY   rZ   nodesrs   Zmatched_sdpareshape_outtranspose_out	split_qkvtranspose_qtranspose_ktranspose_vZpermutation_qZpermutation_kZpermutation_vZinput_projection_nodesr'   Zadd_inZ	matmul_inrc   rd   Znew_dims_nameZnew_dimsZreshape_q_namer   Ztranspose_k_bnshre   r   r   r   rP   1  s   


z5FusionMultiHeadAttentionSam2.fuse_sam_encoder_patternc              	   C   sj  | j |g d|ddddg}|du rdS |\}}}}}| j |g dg d}|du r3td dS |\}	}}
}| j |ddgddg}|durN|\}}ntd	 dS | j |g d
g d}|du r|| j |g dg d}|du r|td dS |d |
krdS |d }| j |g d
g d}|du rtd dS |d |
krdS |\}}}}|||
|||	fS )z%Match SDPA pattern in SAM2 enconder.*rg   Nr   )rj   SqueezeSplitri   )r   r   r   r   zfailed to match v pathrk   rh   zfailed to match qk path)rl   rj   r   r   r{   )	rl   rj   ri   rj   ZMaxPoolrj   ri   r   r   )	r   Nr   r   r   r   r   r   r   zfailed to match q pathrp   r   )r   Nr   r   zfailed to match k pathrq   )r   rs   r|   Z	out_nodesrt   r   r   Zmatmul_qk_vru   r   r   r[   rv   rw   rx   ry   r   rz   Zmul_kr   Z
_squeeze_kr   r   r   r~     sN   




zAFusionMultiHeadAttentionSam2.match_sam_encoder_attention_subgraphr   r   c           
      C   sx   | j d}|jd |jd |jd g}|d }tjd||g|d}d|_|jtd|g d	d}	| 
|	 |S )	a  Create a MultiHeadAttention node for SAM2 encoder.

        Args:
            reshape_q (NodeProto): Reshape node for Q, output is 3D BxSxNH format
            transpose_k (NodeProto): Transpose node for K, output is BNSH format
            transpose_v (NodeProto): Transpose node for V, output is BNSH format
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

        Returns:
            NodeProto: the MultiHeadAttention node created.
        r   r   Z_outr;   r>   r   r?   zself attention)r
   rA   r:   r   rB   rC   rD   rE   rF   rG   rH   )
r   r   r   r   r   rJ   r<   r:   rK   rL   r   r   r   r     s"   

z,FusionMultiHeadAttentionSam2.create_mha_node)F)N)__name__
__module____qualname____doc__r   r$   r   r   r&   r+   r/   booltupler3   strrM   rf   rQ   rP   r~   r   __classcell__r   r   r   r   r	      sv    

$	

@2: 
7r	   )loggingr   numpyr    Zfusion_baser   Zfusion_utilsr   Zonnxr   r   r   Z
onnx_modelr   r   r1   r	   r   r   r   r   <module>   s   