o
    i/                     @   s   d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZmZ d d	lmZ e eZg d
Zg dZG dd dZdS )    N)Path)float_to_float16_max_diff)	OnnxModel)optimize_model)	T5DecoderT5DecoderHelper)T5EncoderDecoderInitT5EncoderDecoderInitHelper)MT5ForConditionalGenerationT5ForConditionalGeneration)InferenceSession)zt5-smallzt5-basezt5-largezt5-3bzt5-11b)zgoogle/mt5-smallzgoogle/mt5-basezgoogle/mt5-largezgoogle/mt5-xlzgoogle/mt5-xxlc                   @   s<  e Zd Ze		d.dededededef
dd	Ze	
		d/dededejdededede	ee
eB f fddZe				d0dee
B dejdededededefddZe			d1dedee dB dedefd d!Ze				d2ded"ed#ed$ed%eded&ed'ed(efd)d*Zedee
B d+edejdefd,d-ZdS )3T5Helper F
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 C   s^   |}t j|rt|jd }n|dd  ||7 }|r$t j| |n| }t j||d S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.

        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   Z
model_name	directory r   f/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/t5/t5_helper.pyget_onnx_path!   s   zT5Helper.get_onnx_patht5	cache_dirdevice
model_typestate_dict_pathencoder_decoder_initc           
      C   s   |dkrt j| |d}n|dkrtj| |d}ntd|r&|t| t|j|j	|j
}| | t|j|j|j	|j
d| d}|rIdnd}	|	|d	|iS )
a{  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            model_type (str, optional): model type "t5" or "mt5"
            state_dict_path(str, optional): state dictionary path
            encoder_decoder_init (bool, optional): combine encoder and decoder kv cache initialization into one model.
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        r    )r!   Zmt5z only support mode_type=t5 or mt5N)Zdecoder_start_token_idZoutput_cross_onlyr%   encoderdecoder)r   Zfrom_pretrainedr
   
ValueErrorZload_state_dicttorchloadr   r'   Zlm_headconfigevaltor   r&   )
r   r!   r"   r#   r$   r%   modelr'   r&   Zencoder_namer   r   r   
load_model>   s&   	zT5Helper.load_modelTr.   onnx_model_pathverboseuse_external_data_formatuse_decoder_input_idsuse_int32_inputsc              	   C   s<   t | trt| |||||| d S t| ||||| d S )N)
isinstancer   r	   export_onnxr   )r.   r"   r0   r1   r2   r3   r4   r   r   r   r6   l   s$   


zT5Helper.export_onnxN
onnx_modelop_block_listforce_fp16_logitsuse_symbolic_shape_inferc                    sp  du rg ddd |   D }t}||}td| d|  |  jd j}d}|  }	||	v s9J |	| }
d}|
j	d	krw|
}td
|
j  d}|
j
D ]}| |}|dura nqTt|}td|
j d|  |dk }ntd|
j	 d|
j  g }g  |s|dur|s|g}|jg dvr|  }d}d}| d}|rd}|D ]=}
|
j vr| |
|	}| |
|} fdd|D } fdd|D }t|t| dkr |
j |d7 }d}q|st|| }td| d|  td   | |d}td|  |r%| jdddi| |S ddlm} || jfddi| |S )a  Convert model to mixed precision.
           It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): operators need to run in fp32.
            force_fp16_logits (bool, optional): force logits and last MatMul node to be in float16. Defaults to False.
            use_symbolic_shape_infer (bool, optional): use symbolic shape inference to convert float to float16. Defaults to True.
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        N)ZSimplifiedLayerNormalizationZ SkipSimplifiedLayerNormalizationZReluAddc                 S   s   h | ]}|j qS r   op_type).0noder   r   r   	<setcomp>   s    z0T5Helper.auto_mixed_precision.<locals>.<setcomp>z	fp32 op: z
 fp16 op: r   FZMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node r;   Tc                    "   g | ]}|j v s| v r|qS r   r<   )r>   childnode_block_listr8   r   r   
<listcomp>       z1T5Helper.auto_mixed_precision.<locals>.<listcomp>c                    rA   r   r<   )r>   parentrC   r   r   rE      rF      z#node counter of Add operator: fp32=z fp16=znode_block_list: )keep_io_typesr8   rD   Zforce_fp16_initializersz!auto_mixed_precision parameters: r:   )convert_float_to_float16Zdisable_shape_inferr   )nodesset
differenceloggerinfographoutputnameoutput_name_to_noder=   inputZget_initializerr   debugwarninginput_name_to_nodesZget_nodes_by_op_typeZget_parentsZget_childrenlenappendrJ   float16r.   )r7   r8   r9   r:   Zop_full_setZfp32_op_setZfp16_op_setZlogits_output_nameZis_weight_fp16_precisionrS   r?   Zlast_matmul_nodeZinitializerrT   Zmax_diffrI   rW   Zfp32_addchangedZ	add_nodesparentschildrenZblocked_childrenZblocked_parentsZfp16_add
parametersrJ   r   rC   r   auto_mixed_precision   s   






zT5Helper.auto_mixed_precisionoptimized_model_path
is_float16num_attention_headshidden_sizer_   use_gpuforce_fp16_ioc	              	   C   sr   ddl m}	 d}
|r|	d}
| |
_t| d||d|
|d}|r/|r)tj||d n|j|d |j||dd	 dS )
zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsNr    )r#   Z	num_headsrc   	opt_leveloptimization_optionsrd   )r9   )Zcast_input_outputT)Zall_tensors_to_one_file)Zfusion_optionsrf   Zenable_skip_layer_normr   r   r_   Z convert_model_float32_to_float16Zsave_model_to_file)r0   r`   ra   rb   rc   r2   r_   rd   re   rf   rh   mr   r   r   optimize_onnx   s&   
zT5Helper.optimize_onnxort_sessionc                 C   s*   t | trt| |||S t| |||S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.)r5   r   r	   verify_onnxr   )r.   rk   r"   r4   r   r   r   rl   #  s   
zT5Helper.verify_onnx)r   F)r    r   F)TFTF)NFT)FTFF)__name__
__module____qualname__staticmethodstrboolr   r)   r"   dictr   r   r/   r6   r   listr_   intrj   r   rl   r   r   r   r   r       s    -
p	'r   )loggingr   pathlibr   r)   rZ   r   r7   r   Z	optimizerr   Z
t5_decoderr   r   Zt5_encoder_decoder_initr   r	   Ztransformersr
   r   Zonnxruntimer   	getLoggerrm   rN   ZPRETRAINED_T5_MODELSZPRETRAINED_MT5_MODELSr   r   r   r   r   <module>   s   
	