o
    iM                     @   sp  d dl mZ d dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7m8Z8 d dl9m:Z: ee;Z<G dd de:Z=dS )    )	getLogger)PackingMode)AttentionMaskFusionAttention)FusionBartAttention)FusionBiasGelu)FusionConstantFold)FusionEmbedLayerNormalization)FusionFastGelu)
FusionGelu)FusionGeluApproximation)FusionGemmFastGelu)FusionLayerNormalizationFusionLayerNormalizationTF)AttentionMaskFormatFusionOptions)FusionQOrderedAttention)FusionQOrderedGelu) FusionQOrderedLayerNormalization)FusionQOrderedMatMul)FusionQuickGelu)FusionReshape)FusionRotaryEmbeddings)FusionShape)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)FusionUtils)
ModelProtoTensorProtohelper)	OnnxModelc                       sB  e Zd ZdJdededef fddZdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd ZdKd d!Zd"d# Zd$d% Zd&d' Zd(ed)ee d*efd+d,Zd*efd-d.Zd/d0 ZdLd3d4Zd5d6 Zd7d8 Zd9d: Zd;d< Z dMd?e!d=B d@efdAdBZ"dCdD Z#dNdEdFZ$dOdGefdHdIZ%  Z&S )PBertOnnxModelr   model	num_headshidden_sizec                    s   |dkr|dks|dkr|| dksJ t  | || _|| _t| | _t| | j| j| j| _t| | j| j| j| _	t
| | _dS )aG  Initialize BERT ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   N)super__init__r%   r&   r   attention_maskr   attention_fusionr   qordered_attention_fusionr   utils)selfr$   r%   r&   	__class__ b/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/onnx_model_bert.pyr(   &   s   (
zBertOnnxModel.__init__c                 C      t | }|  d S N)r   applyr-   fusionr0   r0   r1   fuse_constant_fold;      z BertOnnxModel.fuse_constant_foldc                 C   s   | j   | j  d S r3   )r*   r4   r+   r-   r0   r0   r1   fuse_attention?   s   
zBertOnnxModel.fuse_attentionc                 C   sD   t | }|  t| }|  t| }|  t| }|  d S r3   )r   r4   r
   r   r   r5   r0   r0   r1   	fuse_geluD   s   zBertOnnxModel.fuse_geluc                 C      t | |}|  d S r3   )r   r4   )r-   is_fastgelur6   r0   r0   r1   fuse_bias_geluO      
zBertOnnxModel.fuse_bias_geluc                 C   r2   r3   )r   r4   r5   r0   r0   r1   gelu_approximationS   r8   z BertOnnxModel.gelu_approximationc                 C   r2   r3   )r   r4   r5   r0   r0   r1   fuse_gemm_fast_geluW   r8   z!BertOnnxModel.fuse_gemm_fast_geluc                 C   r2   r3   )r   r4   r5   r0   r0   r1   fuse_add_bias_skip_layer_norm[   r8   z+BertOnnxModel.fuse_add_bias_skip_layer_normc                 C   r2   r3   )r   r4   r5   r0   r0   r1   fuse_reshape_   r8   zBertOnnxModel.fuse_reshapec                 C   r2   r3   )r   r4   r5   r0   r0   r1   
fuse_shapec   r8   zBertOnnxModel.fuse_shapec                 C   r<   r3   )r	   r4   )r-   use_mask_indexr6   r0   r0   r1   fuse_embed_layerg   r?   zBertOnnxModel.fuse_embed_layerc                 C   s4   t | }|  t| }|  t| }|  d S r3   )r   r4   r   r   r5   r0   r0   r1   fuse_layer_normk   s   zBertOnnxModel.fuse_layer_normc                 C   r2   r3   )r   r4   r5   r0   r0   r1   fuse_simplified_layer_normv   r8   z(BertOnnxModel.fuse_simplified_layer_normTc                 C   s   t | |d}|  d S )N)shape_infer)r   r4   )r-   rI   r6   r0   r0   r1   fuse_skip_layer_normz   s   z"BertOnnxModel.fuse_skip_layer_normc                 C   r2   r3   )r   r4   r5   r0   r0   r1   fuse_skip_simplified_layer_norm~   r8   z-BertOnnxModel.fuse_skip_simplified_layer_normc                 C   s   t | }|  ttdd | jjj}dd |D }d}|t| jjk rK| jj| }d|j	v r=|j
|vr=| jj| n|d7 }|t| jjk s%d S d S )Nc                 S   s   | j dko	| jdkS )NRotaryEmbeddingcom.microsoft)op_typedomainnoder0   r0   r1   <lambda>   s    z6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>c                 S   s   h | ]}|j qS r0   )rO   ).0rQ   r0   r0   r1   	<setcomp>   s    z7BertOnnxModel.fuse_rotary_embeddings.<locals>.<setcomp>r   rL      )r   r4   listfilterr$   graphrQ   lenZ	functionsnamerO   remove)r-   r6   Zrot_emb_nodesZnon_ms_domains_to_keepifnr0   r0   r1   fuse_rotary_embeddings   s    z$BertOnnxModel.fuse_rotary_embeddingsc                 C   r2   r3   )r   r4   r5   r0   r0   r1   fuse_qordered_mamtul   r8   z"BertOnnxModel.fuse_qordered_mamtulrN   input_indicescastedc           
         s   g }|   }| |}|D ]>  fdd|D }|D ]0}| |r)|s(|| q||v rJ|| }	|	jdkrJ| |	jd durJ|rJ||	jd  qq|S )z
        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
        Returns a list of the graph input names based on the filter whether it is casted or not.
        c                    s$   g | ]}|t  jk r j| qS r0   )rY   input)rS   r\   rP   r0   r1   
<listcomp>   s   $ zABertOnnxModel.get_graph_inputs_from_node_type.<locals>.<listcomp>Castr   N)output_name_to_nodeget_nodes_by_op_typeZfind_graph_inputappendrN   rb   )
r-   rN   r`   ra   Zgraph_inputsre   nodesZbert_inputsZ
bert_inputparentr0   rP   r1   get_graph_inputs_from_node_type   s$   


	z-BertOnnxModel.get_graph_inputs_from_node_typec                 C   s*   |  dg d|}||  ddg|7 }|S )NEmbedLayerNormalization)r   rU      	Attention   )rj   )r-   ra   inputsr0   r0   r1   !get_graph_inputs_from_fused_nodes   s   z/BertOnnxModel.get_graph_inputs_from_fused_nodesc                 C   sb   |   }d}d}|jD ]}| |tj\}}|r|d7 }|t|7 }qtd| d| d dS )zPChange data type of all graph inputs to int32 type, and add Cast node if needed.r   rU   z)Graph inputs are changed to int32. Added z Cast nodes, and removed z Cast nodes.N)rX   rb   Zchange_graph_input_typer    ZINT32rY   loggerinfo)r-   rX   Zadd_cast_countZremove_cast_countZgraph_inputnew_nodeZremoved_nodesr0   r0   r1   change_graph_inputs_to_int32   s   
z*BertOnnxModel.change_graph_inputs_to_int32
batch_sizemax_seq_lenc                 C   s   | j dd| j dd }| jjjD ]!}|j|v r2|jjjjd }||_	|dur2|jjjjd }||_	q| jjj
D ]}|jjjjd }||_	q8dS )zD
        Update input and output shape to use dynamic axes.
        T)ra   Fr   NrU   )rp   r$   rX   rb   rZ   typeZtensor_typeshapedimZ	dim_paramoutput)r-   Zdynamic_batch_dimZdynamic_seq_lenZbert_graph_inputsrb   Z	dim_protorz   r0   r0   r1   use_dynamic_axes   s"   

zBertOnnxModel.use_dynamic_axesc                 C   s   |    d S r3   )adjust_reshape_and_expandr9   r0   r0   r1   
preprocess   s   zBertOnnxModel.preprocessc           
      C   s2  g }|   D ]}}|jdkr| |jd }|d ur1|jdkr1||g | |jd |jd  q| |g dg d| 	 }|d ur|d }| |jd }|d }| |jd }|d }	|d ur|d urt
|d	krt
|dkr|d |d kr|	jd |jd< q|r| | td
t
|  d S d S )NReshaperU   r   )Expandr   r~   Slice)r   r   r   r      z"Removed Reshape and Expand count: )rh   rN   Zget_constant_valuerb   sizeextendZreplace_input_of_all_nodesrz   match_parent_pathre   rY   remove_nodesrq   rr   )
r-   nodes_to_removerQ   Zreshape_shapeZreshape_pathZexpand_nodeZexpand_shape_valueZreshape_before_expandZshape_valueZ
slice_noder0   r0   r1   r|      s>   

z'BertOnnxModel.adjust_reshape_and_expandc                 C   sd  |   }g }|  D ]}dddd}|j|v rQ||j }| |g d|dddddg|}|d urQ|\}}}	}
}}|jd |  jd jkrQ|jd |jd< |   }|jdkr| |g dg d|}|d ur|d	 jd |  jd jkrtj	d|jdt
|jd  |j|jd
 d}d|_|jtd| jg | || |j || q
| | d S )NrU   r   rn   )rk   	ReduceSumrm   )rd   ConstantOfShapeZConcatZ	UnsqueezeZGatherShaperm   )r   rd   r   r   )rn   r   r   r   r   Z_remove_mask)ro   ZoutputsrZ   rM   r%   )re   rh   rN   r   rb   rX   rZ   rz   r!   Z	make_noderY   rO   	attributer   Zmake_attributer%   add_nodeZget_graph_by_noderg   r   )r-   re   r   rQ   Zop_input_idr\   Zparent_nodescastZconstantOfShapeconcatZ	unsqueezeZgatherrx   Zattention_noder0   r0   r1   clean_graph  sZ   	



zBertOnnxModel.clean_graphc                 C   s   |    |   d S r3   )r   Zprune_graphr9   r0   r0   r1   postprocessF  r8   zBertOnnxModel.postprocessNFoptionsadd_dynamic_axesc                 C   s  |d ur|j s|   | j  | j  |   |d u s |jr(|   |   |d u s/|j	r3| 
  |   |   |d u sB|jrL| |j  |   |d u sS|jrW|   |d urx| j|j |jrxt| jtsxt| | j| j| j|j| _|d u s|jr|   |d u s|jr|   |    |d u s|j!r|jt"j#k}| $| | j%  | &  |d u s|j'r| j(dd | j(dd |d u s|j)r| *  |d ur|j+r| ,  |d ur|j-r| .  | /  |r| 0  t12d| 3   d S )NT)r=   Fzopset version: )4Zenable_shape_inferenceZdisable_shape_inferencer,   Zremove_identity_nodesZremove_useless_cast_nodesr7   Zenable_layer_normrG   rH   Zenable_gelur;   r}   rC   Zenable_skip_layer_normrJ   rK   Zenable_rotary_embeddingsr^   r)   Zset_mask_formatZattention_mask_formatZuse_multi_head_attention
isinstancer*   r   r   r&   r%   Zenable_attentionr:   Zenable_qordered_matmulr_   rD   Zenable_embed_layer_normr   ZMaskIndexEndrF   Zremove_useless_reshape_nodesr   Zenable_bias_gelur>   Zenable_bias_skip_layer_normrB   Zenable_gelu_approximationr@   Zenable_gemm_fast_gelurA   Zremove_unused_constantr{   rq   rr   Zget_opset_version)r-   r   r   rE   r0   r0   r1   optimizeJ  sd   



zBertOnnxModel.optimizec                 C   sL   i }g d}g d}|| D ]}|  |}t|||< qtd|  |S )z8
        Returns node count of fused operators.
        )rk   rm   MultiHeadAttentionGeluFastGeluBiasGeluZGemmFastGeluLayerNormalizationSimplifiedLayerNormalizationSkipLayerNormalization SkipSimplifiedLayerNormalizationrL   )QOrderedAttentionZQOrderedGeluZQOrderedLayerNormalizationZQOrderedMatMulzOptimized operators: )rf   rY   rq   rr   )r-   op_countopsZq_opsoprh   r0   r0   r1   get_fused_operator_statistics  s   
z+BertOnnxModel.get_fused_operator_statisticsc           	         s   du r|    dtf fdd}|d}|d|d |d }|d	|d
 |d }|d|d }|d|d }|dkoT|dkoT||koT|d| kpT|d| k}|dkr^td |dkrgtd |dkrptd |dkrytd |dkrtd |S )zA
        Returns True when the model is fully optimized.
        Nop_namec                    s     | pdS )Nr   )get)r   fused_op_countr0   r1   r     s   z2BertOnnxModel.is_fully_optimized.<locals>.op_countrk   rm   r   r   r   r   r   r   r   r   r   r   r   zLayer Normalization not fusedz$Simple Layer Normalization not fusedzGelu (or FastGelu) not fusedz!EmbedLayerNormalization not fusedz+Attention (or MultiHeadAttention) not fused)r   strrq   debugwarning)	r-   r   r   ZembedZ	attentionZgeluZ
layer_normZsimple_layer_normZ
is_perfectr0   r   r1   is_fully_optimized  s4   




z BertOnnxModel.is_fully_optimizeduse_symbolic_shape_inferc                 C   s   t | }|| d S r3   )r   convert)r-   r   Zpacking_moder0   r0   r1   convert_to_packing_mode  s   z%BertOnnxModel.convert_to_packing_mode)r   r   )T)ru   rv   )NFr3   )F)'__name__
__module____qualname__r   intr(   r7   r:   r;   r>   r@   rA   rB   rC   rD   rF   rG   rH   rJ   rK   r^   r_   r   rV   boolrj   rp   rt   r{   r}   r|   r   r   r   r   r   r   r   __classcell__r0   r0   r.   r1   r#   %   s<    

)BT
 (r#   N)>loggingr   r   r   Zfusion_attentionr   r   Zfusion_bart_attentionr   Zfusion_biasgelur   Zfusion_constant_foldr   Zfusion_embedlayerr	   Zfusion_fastgelur
   Zfusion_gelur   Zfusion_gelu_approximationr   Zfusion_gemmfastgelur   Zfusion_layernormr   r   Zfusion_optionsr   r   Zfusion_qordered_attentionr   Zfusion_qordered_gelur   Zfusion_qordered_layernormr   Zfusion_qordered_matmulr   Zfusion_quickgelur   Zfusion_reshaper   Zfusion_rotary_attentionr   Zfusion_shaper   Zfusion_simplified_layernormr   r   Zfusion_skiplayernormr   r   Zfusion_utilsr   Zonnxr   r    r!   Z
onnx_modelr"   r   rq   r#   r0   r0   r0   r1   <module>   s8   