o
    ¢Äi#  ã                   @   s`   d dl Z d dlZd dlZd dlZd dlmZ e  e¡Zdd„ Z	dd„ Z
dd„ ZG d	d
„ d
ƒZdS )é    N)ÚConv1Dc                 C   s<   | j j\}}tj ||¡}| j jj ¡ |j _| jj|j_|S )N)	ÚweightÚshapeÚtorchÚnnÚLinearÚdataÚTÚ
contiguousZbias)ÚmoduleZin_sizeZout_sizeÚlinear© r   úb/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/quantize_helper.pyÚ_conv1d_to_linear   s
   r   c                 C   sN   t  d¡ t| jƒD ]}| j| }t|tƒr t|ƒ}|| j|< q
t|ƒ q
dS )zsin-place
    This is for Dynamic Quantization, as Conv1D is not recognized by PyTorch, convert it to nn.Linear
    zreplace Conv1D with LinearN)ÚloggerÚdebugÚlistZ_modulesÚ
isinstancer   r   Úconv1d_to_linear)ÚmodelÚnamer   r   r   r   r   r      s   



úr   c                 C   s.   t  |  ¡ d¡ tj d¡d }t d¡ |S )Nztemp.pé   )r   ÚsaveZ
state_dictÚosÚpathÚgetsizeÚremove)r   Úsizer   r   r   Ú_get_size_of_pytorch_model'   s   
r   c                   @   s,   e Zd Zeejfdd„ƒZeddd„ƒZdS )ÚQuantizeHelperc                 C   sL   t | ƒ tjj| tjjh|d}t dt| ƒ› ¡ t dt|ƒ› ¡ |S )z{
        Usage: model = quantize_model(model)

        TODO: mix of in-place and return, but results are different
        )Údtypez'Size of full precision Torch model(MB):z"Size of quantized Torch model(MB):)	r   r   ZquantizationÚquantize_dynamicr   r   r   Úinfor   )r   r    Zquantized_modelr   r   r   Úquantize_torch_model/   s
   z#QuantizeHelper.quantize_torch_modelFc                 C   s   ddl m} ddlm} ||ƒjjddd t dtj	 
| ¡d › ¡ || ||dtjjid	 t d
|› ¡ t dtj	 
|¡d › ¡ d S )Nr   )ÚPath)r!   T)ÚparentsÚexist_okz&Size of full precision ONNX model(MB):r   ZDefaultTensorType)Úuse_external_data_formatZextra_optionszquantized model saved to:z!Size of quantized ONNX model(MB):)Úpathlibr$   Zonnxruntime.quantizationr!   ÚparentÚmkdirr   r"   r   r   r   ÚonnxZTensorProtoÚFLOAT)Zonnx_model_pathZquantized_model_pathr'   r$   r!   r   r   r   Úquantize_onnx_model<   s   
ü z"QuantizeHelper.quantize_onnx_modelN)F)Ú__name__Ú
__module__Ú__qualname__Ústaticmethodr   Zqint8r#   r-   r   r   r   r   r   .   s
    r   )Úloggingr   r+   r   Ztransformers.modeling_utilsr   Ú	getLoggerr.   r   r   r   r   r   r   r   r   r   Ú<module>   s   
