o
    ioA                     @   s   d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ e eZG d
d dejjZG dd dejjZG dd dZG dd dZdS )    N)Path)
TypeHelper)	OnnxModel)PastKeyValuesHelper)T5EncoderInputs)torch_onnx_export)	MT5ConfigT5Config)InferenceSessionc                
       sb   e Zd ZdZ	ddejjdejjdeeB de	dB f fddZ
d	ejd
ejdejfddZ  ZS )T5DecoderInitz~A T5 decoder with LM head to create initial past key values.
    This model is only called once during starting decoding.
    Ndecoderlm_headconfigdecoder_start_token_idc                    sV   t    || _|| _|| _|d ur|n| jj| _t| jdr&| jj| _d S d| _d S Ntie_word_embeddingsT)super__init__r   r   r   r   hasattrr   )selfr   r   r   r   	__class__ g/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/t5/t5_decoder.pyr       s   
zT5DecoderInit.__init__decoder_input_idsencoder_attention_maskencoder_hidden_statesc                 C   s   |d u r|j d }tj|dftj|jd| j }| j|||ddd}|j}|j}| j	r3|| j
jd  }| |}t|\}	}
||	|
fS )Nr      dtypedeviceT)	input_idsr   r   	use_cachereturn_dict      )shapetorchZoneslongr    r   r   last_hidden_statepast_key_valuesr   r   d_modelr   r   group_by_self_or_cross)r   r   r   r   
batch_sizedecoder_outputssequence_outputpresent_key_values	lm_logitsZ	past_selfZ
past_crossr   r   r   forward2   s0   
	

zT5DecoderInit.forwardN)__name__
__module____qualname____doc__r&   nnModuler	   r   intr   ZTensorZFloatTensorr1   __classcell__r   r   r   r   r      s&    	r   c                       s(   e Zd ZdZ fddZdd Z  ZS )	T5Decoderz-A T5 decoder with LM head and past key valuesc                    s@   t    || _|| _|| _t| jdr| jj| _d S d| _d S r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   Y   s   
zT5Decoder.__init__c                 G   sv   | j j}t||}|d}| j||||ddd}|j}|j}	| jr+|| j j	d  }| 
|}
t|	\}}|
|fS )N   T)r!   r)   r   r   r"   r#   r$   )r   num_decoder_layersr   Zgroup_by_layerZ	unsqueezer   r(   r)   r   r*   r   r+   )r   r   r   pastr=   r)   Zdummy_encoder_hidden_statesr-   r.   r/   r0   Zpresent_self_r   r   r   r1   b   s$   
	
zT5Decoder.forward)r3   r4   r5   r6   r   r1   r:   r   r   r   r   r;   V   s    	r;   c                   @   sd   e Zd Z	dddZe		ddeeB dededed	ej	d
e
de
fddZdefddZdd ZdS )T5DecoderInputsNc                 C   s   || _ || _|| _d S r2   )r   r   r)   )r   r   r   r)   r   r   r   r      s   
zT5DecoderInputs.__init__Fr   r,   encode_sequence_lengthpast_decode_sequence_lengthr    float16use_int32_inputsc                 C   s   | j }| j}| j}	| j}
d}tjd|	d ||f|rtjntj|d}tj	|||	||d}|r1tj
ntj}|dkro||||
g}||||
g}g }td| D ]}|tj|||d qLtd| D ]}|tj|||d q`nd}t||j|S )aZ  Create dummy inputs for T5Decoder.

        Args:
            decoder: decoder
            batch_size (int): batch size
            encode_sequence_length (int): sequence length of input_ids for encoder
            past_decode_sequence_length (int): past sequence length of input_ids for decoder
            device (torch.device): device of output tensors
            float16 (bool): whether the model uses float32 or float16 in input
            use_int32_inputs(bool): whether use int32 instead of int64 for some inputs

        Returns:
            T5DecoderInputs: dummy inputs for decoder
        r   r   )lowhighsizer   r    )rD   r<   r   N)Z	num_headsr=   
vocab_sizeZd_kvr&   randintZint32Zint64r   create_dummyrC   float32rangeappendZrandr@   Zattention_mask)r   r,   rA   rB   r    rC   rD   Znum_attention_heads
num_layersrH   Z	head_sizeZsequence_lengthr   Zencoder_inputsZ
float_typeZself_attention_past_shapeZcross_attention_past_shaper>   r?   r   r   r   rJ      sN   zT5DecoderInputs.create_dummyreturnc                 C   s"   | j | jg}| jr|| j |S r2   )r   r   r)   extend)r   
input_listr   r   r   to_list   s   zT5DecoderInputs.to_listc                 C   s2   | j rdd | j D nd }t| j | j |S )Nc                 S   s   g | ]	}|j tjd qS ))r   )tor&   rK   ).0pr   r   r   
<listcomp>   s    z+T5DecoderInputs.to_fp32.<locals>.<listcomp>)r)   r@   r   cloner   )r   r>   r   r   r   to_fp32   s   zT5DecoderInputs.to_fp32r2   )FF)r3   r4   r5   r   staticmethodr	   r   r9   r&   r    boolrJ   listrR   rX   r   r   r   r   r@   ~   s.    

K	r@   c                   @   s~   e Zd Ze			ddeeB dejdede	de	de	fd	d
Z
edefddZe	ddeeB dedejde	def
ddZdS )T5DecoderHelperTFr   r    onnx_model_pathverboseuse_external_data_formatrD   c                 C   s  t | ttfs	J tj| jddt | trdnd||d}| }| jj}tj	|dd}	tj	|dd}
|
d	d|  }t | trA|	ng }t | trJ|n|
}d
g|}dg}|
d || ddiddddddddid}|D ]}dd|v rzdndd||< qq|D ]!}d|v rddd||< qt | trddd||< qddi||< qt|jjddd t I}tj|d}t|jjddd t| t||r|n|d|||dd||d |rtj|dd}tj||ddd W d	   d	S W d	   d	S 1 sw   Y  d	S )a  Export decoder to ONNX

        Args:
            decoder (Union[T5Decoder, T5DecoderNoPastState]): decoder object
            device (torch.device): device of decoder object
            onnx_model_path (str): onnx path
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs
        r<         r   )r,   rA   rB   r    rD   F)ZpresentTNlogitsr!   r   r,   rA   )r   r   )r!   r   r   rb   r   rB   )r   r<   crosszpast_decode_sequence_length + 1)parentsexist_okzdecoder.onnx   )
argsfZexport_paramsinput_namesoutput_namesdynamic_axesZopset_versionZdo_constant_foldingr_   r^   )Zload_external_data)Zsave_as_external_dataZall_tensors_to_one_file)
isinstancer;   r   r@   rJ   r   rR   r=   r   get_past_namesrM   rP   r   parentmkdirtempfileTemporaryDirectoryospathjoinr   tupleonnxZ
load_modelr   save)r   r    r]   r^   r_   rD   inputsrQ   r=   
past_namesZpresent_namesZpresent_self_namesZinput_past_namesZoutput_present_namesrj   ri   rk   nameZtmp_dir_nameZtemp_onnx_model_pathmodelr   r   r   export_onnx   s   






"zT5DecoderHelper.export_onnxrx   c                 C   s   t d t|j  t|j  d}|jrMt|jd dks(J t	t|jd }t
|}t|jD ]\}}t|  ||| < q;| d|}|S )zRun inference of ONNX model.zstart onnxruntime_inference)r!   r      r   N)loggerdebugnumpyZascontiguousarrayr   cpur   r)   lenr9   r   rm   	enumeraterun)ort_sessionrx   Z
ort_inputsrN   ry   iZpast_tensorort_outputsr   r   r   onnxruntime_inference^  s   

z%T5DecoderHelper.onnxruntime_inferencer}   r{   r   	max_casesc                 C   s  t |ddk}g d}g }|d| D ]\}}	}
t| tr d}
tj| j||	|
|||d}|  }t	
  | | }W d   n1 sFw   Y  t||}| jj}tt|d   |d  }|}td|  td| D ](}tt|d	 |   |d	|   }td
| d|  t||}qwt| trtd| D ],}tt|d |   |d	d|  |   }td| d|  t||}q|| td||	|
| q|S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.Zpast_key_self_0ztensor(float16)))r}      r`   )r   r<   ra   )r`   r   r   )   ra   r<   Nr   )r    rC   rD   zlogits max_diff=r<   r   zself attention past state z
 max_diff=zcross attention past state zUbatch_size=%s, encode_sequence_length=%s, past_decode_sequence_length=%s, max_diff=%s)r   Zget_input_typerl   r   r@   rJ   r   rX   rR   r&   Zno_gradr\   r   r=   r   Zamaxabsr   r~   r   rL   maxrM   info)r{   r   r    rD   r   rC   Z
test_casesZtest_cases_max_diffr,   rA   rB   rx   rQ   Ztorch_outputsr   r=   Zmax_diffZmax_diff_allr   r   r   r   verify_onnxr  s`   	



$,
,
zT5DecoderHelper.verify_onnxN)TFF)r}   )r3   r4   r5   rY   r;   r   r&   r    strrZ   r|   r@   r   r
   r9   r   r   r   r   r   r\      sB    ur\   ) loggingrr   rp   pathlibr   r   rv   r&   Zio_binding_helperr   Z
onnx_modelr   Zpast_helperr   Z
t5_encoderr   Ztorch_onnx_export_helperr   Ztransformersr   r	   Zonnxruntimer
   	getLoggerr3   r~   r7   r8   r   r;   r@   r\   r   r   r   r   <module>   s&   
;(i