o
    iL0                     @   sH   d dl mZ d dlmZ G dd dZG dd deZG dd dZd	S )
    )ArgumentParser)Enumc                   @   s   e Zd ZdZdZdZdZdS )AttentionMaskFormatr            N)__name__
__module____qualname__MaskIndexEndZMaskIndexEndAndStartAttentionMaskNoMask r   r   a/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_options.pyr   	   s
    r   c                   @   s4   e Zd ZdZdZdZdZdd Zdd Zd	d
 Z	dS )AttentionOpType	AttentionMultiHeadAttentionGroupQueryAttentionPagedAttentionc                 C   s   | j S Nvalueselfr   r   r   __str__   s   zAttentionOpType.__str__c                 C   s
   t | jS r   )hashr   r   r   r   r   __hash__!      
zAttentionOpType.__hash__c                 C   s   |j | j kS r   r   )r   otherr   r   r   __eq__$      zAttentionOpType.__eq__N)
r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r      s    r   c                   @   sV   e Zd ZdZdd ZdddZdd Zd	efd
dZe	dd Z
e	defddZdS )FusionOptionsz'Options of fusion in graph optimizationc                 C   s   d| _ d| _d| _d| _d| _d| _d| _d| _d| _d| _	d| _
d| _d| _d| _d| _|dkr4d| _tj| _|dkrAtj| _n|dkrItj| _d | _|dv rgd| _d| _d| _d| _d| _d| _d| _d S d S )NTFclipZbertZvitZunetZvaer"   )enable_geluenable_layer_normenable_attentionenable_rotary_embeddingsuse_multi_head_attentionZ!disable_multi_head_attention_biasenable_skip_layer_normenable_embed_layer_normenable_bias_skip_layer_normenable_bias_geluenable_gelu_approximationZenable_qordered_matmulenable_shape_inferenceenable_gemm_fast_gelugroup_norm_channels_lastr   r   attention_mask_formatr   r   attention_op_typeenable_nhwc_convenable_group_normenable_skip_group_normenable_bias_splitgeluenable_packed_qkvenable_packed_kvenable_bias_add)r   
model_typer   r   r   __init__+   s@   

zFusionOptions.__init__Tc                 C   s   |rt j| _d S t j| _d S r   )r   r   r1   r   )r   Zuse_raw_maskr   r   r   use_raw_attention_maskZ   s   z$FusionOptions.use_raw_attention_maskc                 C   s   t j| _d S r   )r   r   r1   r   r   r   r   disable_attention_mask`   r    z$FusionOptions.disable_attention_maskattn_op_typec                 C   s
   || _ d S r   )r2   )r   r>   r   r   r   set_attention_op_typec   r   z#FusionOptions.set_attention_op_typec                 C   s6  t | j}| jrd|_| jrd|_| jrd|_| jrd|_	| j
r#d|_
| jr)d|_| jr/d|_| jr5d|_| jr;d|_| jrAd|_| jrGd|_| jrMd|_| jrU|d | jr]|d | jrd|  | jdv r| jrod|_| jrud|_| jr{d|_ | j!rd|_"| j#rd|_$| j%rd|_&| j'rd|_(| j)rd|_*|S )NFTr#   )+r!   r:   disable_gelur$   disable_layer_normr%   Zdisable_rotary_embeddingsr'   disable_attentionr&   r(   disable_skip_layer_normr)   disable_embed_layer_normr*   disable_bias_skip_layer_normr+   disable_bias_gelur,   r-   disable_shape_inferencer.   r/   use_mask_indexr<   no_attention_maskr=   use_group_norm_channels_firstr0   disable_nhwc_convr3   disable_group_normr4   disable_skip_group_normr5   disable_bias_splitgelur6   disable_packed_qkvr7   disable_packed_kvr8   disable_bias_addr9   )argsoptionsr   r   r   parsef   sb   



zFusionOptions.parseparserc                 C   s  | j ddddd | jdd | j ddddd | jdd	 | j d
dddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j ddddd | jdd | j dddd d | jdd! | j d"ddd#d | jdd$ | j d%ddd&d | jdd' | j d(ddd)d | jdd* | j d+ddd,d | jdd- | j d.ddd/d | jdd0 | j d1ddd2d | jdd3 | j d4ddd5d | jdd6 | j d7ddd8d | jdd9 | j d:ddd;d | jdd< | j d=ddd>d | jdd? | j d@dddAd | jddB | j dCdddDd | jddE | j dFdddGd d S )HNz--disable_attentionF
store_truezdisable Attention fusion)requiredactionhelp)rB   z--disable_skip_layer_normz%disable SkipLayerNormalization fusion)rC   z--disable_embed_layer_normz&disable EmbedLayerNormalization fusion)rD   z--disable_bias_skip_layer_normz2disable Add Bias and SkipLayerNormalization fusion)rE   z--disable_bias_geluz)disable Add Bias and Gelu/FastGelu fusion)rF   z--disable_layer_normz!disable LayerNormalization fusion)rA   z--disable_geluzdisable Gelu fusion)r@   z--enable_gelu_approximationz+enable Gelu/BiasGelu to FastGelu conversion)r-   z--disable_shape_inferencez disable symbolic shape inference)rG   z--enable_gemm_fast_geluzenable GemmfastGelu fusion)r/   z--use_mask_indexzWuse mask index to activate fused attention to speed up. It requires right-side padding!)rH   z--use_raw_attention_maskzuse raw attention mask. Use this option if your input is not right-side padding. This might deactivate fused attention and get worse performance.)r<   z--no_attention_maskz1no attention mask. Only works for model_type=bert)rI   z--use_multi_head_attentionzUse MultiHeadAttention instead of Attention operator for testing purpose. Note that MultiHeadAttention might be slower than Attention when qkv are not packed. )r(   z--disable_group_normz9not fuse GroupNorm. Only works for model_type=unet or vae)rL   z--disable_skip_group_normzPnot fuse Add + GroupNorm to SkipGroupNorm. Only works for model_type=unet or vae)rM   z--disable_packed_kvz[not use packed kv for cross attention in MultiHeadAttention. Only works for model_type=unet)rP   z--disable_packed_qkvz[not use packed qkv for self attention in MultiHeadAttention. Only works for model_type=unet)rO   z--disable_bias_addz0not fuse BiasAdd. Only works for model_type=unet)rQ   z--disable_bias_splitgeluz6not fuse BiasSplitGelu. Only works for model_type=unet)rN   z--disable_nhwc_convz:Do not use NhwcConv. Only works for model_type=unet or vae)rK   z--use_group_norm_channels_firstznUse channels_first (NCHW) instead of channels_last (NHWC) for GroupNorm. Only works for model_type=unet or vae)rJ   z--disable_rotary_embeddingsz5Do not fuse rotary embeddings into RotaryEmbedding op)add_argumentset_defaults)rU   r   r   r   add_arguments   s@  
zFusionOptions.add_argumentsN)T)r   r	   r
   __doc__r;   r<   r=   r   r?   staticmethodrT   r   r\   r   r   r   r   r!   (   s    
/
5r!   N)argparser   enumr   r   r   r!   r   r   r   r   <module>   s
   