o
    i@                     @   s   d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	 d dl
mZmZ d dlmZmZ d dlmZ eeZG dd dZG dd	 d	eZG d
d deZG dd dZdd Zdd Zdd Zedkrle  dS dS )    N)AttentionInputIDsAttentionOutputIDsMultiHeadAttentionInputIDsMultiHeadAttentionOutputIDs	Operators)helper
load_model)	NodeProto	OnnxModel)SymbolicShapeInferenceHelperc                   @   s   e Zd ZdedefddZdedB fddZdedB fd	d
ZdedB fddZ	de
fddZdee dee ddfddZdee dee ddfddZdededdfddZdedB fddZdde
ddfddZdS ) PackingAttentionBasemodelattention_op_typec                 C   sD   || _ g | _g | _d| _i | _| j j jj| _|| _| j 	|| _
d S )NF)r   nodes_to_removenodes_to_addprune_graphnode_name_to_graph_namegraphnamethis_graph_namer   get_nodes_by_op_typeattention_nodes)selfr   r    r   j/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/convert_to_packing_mode.py__init__   s   zPackingAttentionBase.__init__returnNc                 C   sr   | j tjkr	tjntj}|  }|rt|j	|krd S |j	| }| j
D ]}t|j	|ks3|j	| |kr6 d S q#|S N)r   r   	ATTENTIONr   Z
MASK_INDEXr   ZKEY_PADDING_MASK_try_getting_first_attentionleninputr   )r   Z
mask_indexfirst_attention_nodeattention_masknoder   r   r   _try_getting_attention_mask$   s   

z0PackingAttentionBase._try_getting_attention_maskc                 C   s   t | jdkr	d S | jd S )Nr   )r    r   r   r   r   r   r   8   s   
z1PackingAttentionBase._try_getting_first_attentionc                 C   s4   d }| j  D ]}|jtjks|jtjkr|}q|S r   )r   nodesop_typer   Z	LAYERNORMZSKIPLAYERNORM)r   last_layernorm_noder$   r   r   r   _try_getting_last_layernorm>   s   z0PackingAttentionBase._try_getting_last_layernormc                 C      t  r   NotImplementedErrorr&   r   r   r   _are_attentions_supportedE      z.PackingAttentionBase._are_attentions_supportedinputsoutputsc                 C   B   t jtj||| jtjd}d|_| j| | j	| j
|j< d S Nr0   r1   r   com.microsoft)r   	make_noder   ZREMOVEPADDINGr   create_node_namedomainr   appendr   r   r   r   r0   r1   new_noder   r   r   _insert_removepadding_nodeH      z/PackingAttentionBase._insert_removepadding_nodec                 C   r2   r3   )r   r6   r   ZRESTOREPADDINGr   r7   r8   r   r9   r   r   r   r:   r   r   r   _insert_restorepadding_nodeT   r=   z0PackingAttentionBase._insert_restorepadding_nodetoken_offsetcumulative_sequence_lengthc                 C   r+   r   r,   )r   r?   r@   r   r   r   )_replace_attention_with_packing_attention`   r/   z>PackingAttentionBase._replace_attention_with_packing_attentionc                 C   s   | j tjkr|jtj S d S r   )r   r   r   r!   r   INPUT)r   r"   r   r   r   _get_input_to_remove_paddingc   s   z1PackingAttentionBase._get_input_to_remove_paddingTuse_symbolic_shape_inferc                 C   s  t d |  sd S |  }|sd S |  }|  }|sd S | |}|s(d S |d }|d }|d }|d }	| ||g||||	g | j	|| t d |j
d d }
| |
|g|j
d g | j|j
d |
 t d	|j d
 | || t d| j d| j  | j| j | j| j| j | jr| j  n| js| jr| j  | j  |rt| jjdd}|j| jjddd}|r|| j_d S d S d S )Nz$start converting to packing model...Z_no_paddingZ_token_offsetZ_cumulated_seq_lenZ_max_seq_lenz'inserted RemovePadding before Attentionr   Z_restore_inputz#inserted RestorePadding after last z layerz	replaced z with PackedverboseTF)Z
auto_mergeZguess_output_rank)loggerdebugr.   r%   r   r*   rC   r<   r   Zreplace_input_of_all_nodesoutputr>   Zreplace_output_of_all_nodesr(   rA   r   Zremove_nodesr   Z	add_nodesr   r   r   Zupdate_graphZclean_shape_inferr   Zinfer_shapes)r   rD   r#   r"   r)   Zinput_to_remove_paddingZoutput_without_paddingr?   Zcumulated_seq_lenZmax_seq_lenZrestorepadding_inputshape_infer_helperZinferred_modelr   r   r   converth   sV   





zPackingAttentionBase.convertT)__name__
__module____qualname__r
   strr   r%   r	   r   r*   boolr.   listr<   r>   rA   rC   rK   r   r   r   r   r      s    
r   c                       sF   e Zd Zdef fddZdefddZdededd	fd
dZ  Z	S )PackingAttentionr   c                       t  |tj d S r   )superr   r   r   r   r   	__class__r   r   r         zPackingAttention.__init__r   c                 C   s   | j D ]K}t|dd ur dS t|dd ur dS t|d}|d ur,|dkr, dS t|jtjkr=|jtj s= dS t|jtjkrN|jtj sN dS qdS )NZpast_present_share_bufferFZ	do_rotaryZunidirectionalr   T)r   r
   Zget_node_attributer    r!   r   ZPASTZPAST_SEQUENCE_LENGTH)r   r$   Zunidirection_attrr   r   r   r.      s    

z*PackingAttention._are_attentions_supportedr?   r@   Nc              	   C   s   | j D ]f}t|jtjkr|jtj nd}tjtj|jtj	 |jtj
 |jtj |||g|jtj g| jtjd}g }|jD ]}|jdv rL|| q@|j| d|_| j| | j| | j| j|j< qtdt| j  d S )N r4   )	num_headsZqkv_hidden_sizesscaler5   z0Converted %d Attention nodes to PackedAttention.)r   r    r!   r   ATTENTION_BIASr   r6   r   ZPACKEDATTENTIONrB   ZWEIGHTSBIASrI   r   OUTPUTr   r7   	attributer   r9   extendr8   r   r   r   r   rG   info)r   r?   r@   Z	attentionattention_biasZpacked_attention
attributesattrr   r   r   rA      s8   






z:PackingAttention._replace_attention_with_packing_attention)
rM   rN   rO   r
   r   rQ   r.   rP   rA   __classcell__r   r   rW   r   rS      s    rS   c                       s|   e Zd Zdef fddZdedefddZdedefdd	Zd
e	fddZ
deded
dfddZd
edB fddZ  ZS )PackingMultiHeadAttentionr   c                    rT   r   )rU   r   r   MULTI_HEAD_ATTENTIONrV   rW   r   r   r      rY   z"PackingMultiHeadAttention.__init__indexr   c                 C   D   t |j|kr t |j| dkr td| d| d|  dS dS )'Check a node does not have given input.r   znode input  (0) is not supported in PackedMultiHeadAttention: FT)r    r!   rG   errorr   r$   ri   r   r   r   r   _check_empty_input   
   z,PackingMultiHeadAttention._check_empty_inputc                 C   rj   )rk   r   znode output rl   rm   FT)r    rI   rG   rn   ro   r   r   r   _check_empty_output   rq   z-PackingMultiHeadAttention._check_empty_outputr   c                 C   s   | j D ]T}|jD ]}|jdvrtd|j d|    dS q|jtj r4|jtj s4td  dS | 	|tj
drT| 	|tjdrT| |tjdrT| |tjdsW dS qdS )	Nr[   Zmask_filter_valuer\   znode attribute z/ is not supported in PackedMultiHeadAttention: Fz=packed kv format is not supported in PackedMultiHeadAttentionZpast_keyZpresent_keyT)r   r`   r   rG   rn   r!   r   KEYVALUErp   ZPAST_KEYZ
PAST_VALUErr   r   ZPRESENT_KEYZPRESENT_VALUE)r   r$   re   r   r   r   r.      s(   



z3PackingMultiHeadAttention._are_attentions_supportedr?   r@   Nc           
   
   C   sH  d}| j D ]}t|jtjkr|jtj nd}tjtj|jtj	 |jtj
 |jtj |jtj |||g|jtj g| jtjd}g }|jD ]}|jdv rS|| qG|j| d|_| j| | j| | j| j|j< |r| j|tj}	|	r|	jdkrt|	jdkr|	j| |d7 }qtd	t| j  td
| d S )Nr   rZ   r4   rs   r5   ZGatedRelativePositionBias      zBConverted %d MultiHeadAttention nodes to PackedMultiHeadAttention.z=Converted %d GatedRelativePositionBias nodes to packing mode.)r   r    r!   r   r]   r   r6   r   ZPACKED_MULTI_HEAD_ATTENTIONZQUERYrt   ru   r^   rI   r   r_   r   r7   r`   r   r9   ra   r8   r   r   r   r   
get_parentr(   rG   rb   )
r   r?   r@   Zgated_relative_pos_bias_countZmharc   Z
packed_mhard   re   Zrel_pos_bias_noder   r   r   rA     sP   




	



zCPackingMultiHeadAttention._replace_attention_with_packing_attentionc                 C   s*   | j |d}|r|jdkr|jd S d S )Nr   ZMatMul)r   rx   r(   r!   )r   r"   matmulr   r   r   rC   4  s   
z6PackingMultiHeadAttention._get_input_to_remove_padding)rM   rN   rO   r
   r   intrP   rp   rr   rQ   r.   rA   rC   rf   r   r   rW   r   rg      s    0rg   c                   @   s.   e Zd ZdefddZd
deddfdd	ZdS )PackingModer   c                 C   s
   || _ d S r   )r   rV   r   r   r   r   =  s   
zPackingMode.__init__TrD   r   Nc                 C   sn   | j tjr| j tjrtd d S t| j }||S | j tjr0t	| j }||S td d S )NzRPacking mode does not support both Attention and MultiHeadAttention in same graph.zPPacking mode requires either Attention or MultiHeadAttention node in onnx graph.)
r   r   r   r   rh   rG   rn   rS   rK   rg   )r   rD   Zpackingr   r   r   rK   @  s   





zPackingMode.convertrL   )rM   rN   rO   r
   r   rQ   rK   r   r   r   r   r{   <  s    r{   c                  C   sx   t jdd} | jddtdd | jddtdd | jd	d
ddd | jd
d | jdd
ddd | jd
d |  }|S )Nz_Convert to packing mode tool for ONNX Runtime. It converts BERT like model to use packing mode.)descriptionz--inputTzinput onnx model path)requiredtypehelpz--outputzoptimized onnx model pathz	--verboseF
store_truezshow debug information.)r}   actionr   rE   z--use_external_data_formatz4use external data format to store large model (>2GB)use_external_data_format)argparseArgumentParseradd_argumentrP   set_defaults
parse_args)parserargsr   r   r   _parse_argumentsO  s    r   c                 C   s&   | rt jddd d S t jdd d S )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(funcName)20s: %(message)s)r   )coloredlogsinstallrE   r   r   r   _setup_loggerg  s   
r   c                  C   s|   t  } t| j td|   tj| jtj| j	kr#t
d t| j}tt|}|  |jj| j	| jd d S )Nz
arguments:zYSpecified the same input and output path. Note that this may overwrite the original modelr   )r   r   rF   rG   rH   ospathrealpathr!   rI   warningr   r{   r
   rK   r   Zsave_model_to_filer   )r   r   Zpacking_moder   r   r   mainq  s   


r   __main__)r   loggingr   r   	constantsr   r   r   r   r   Zonnxr   r   Z
onnx_modelr	   r
   rJ   r   	getLoggerrM   rG   r   rS   rg   r{   r   r   r   r   r   r   r   <module>   s(   
 
9a

