o
    i2d                     @   s   d dl Z d dlmZ d dlZd dlZd dlZzd dlmZ W n e	y)   dZY nw ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ G d	d
 d
ZG dd dZdS )    N)Any)to_array_extended   )
TensorData)	ONNXModel)DEQUANT_OP_NAMEONNX_TYPE_TO_NP_TYPEQUANT_OP_NAMETENSOR_NAME_QUANT_SUFFIXfind_by_nameget_opset_versionmodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   @   sL   e Zd Zdeeef fddZdddZdd Zd	d
 Z	dd Z
dd ZdS )QuantizationParamsdatac                 K   s   i | _ | D ]e\}}t|tstdt| d|d|dkr8t|tttjt	fs8tdt| d|d|dkrOt|tsO|d urOtdt| d|dkrg|j
tjtjfvrgtd|j
 d||| j |< qd S )	NzKeys must be strings not z for k=.axisz1Values must be numpy arrays, int, float, str not z'Axis value must be an int or None, not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarrayfloatdtypefloat32float16
ValueError)selfr   kv r*   a/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/quantization/base_quantizer.py__init__(   s   
zQuantizationParams.__init__Nc                 C   s   | j ||S N)r   get)r'   keydefault_valuer*   r*   r+   r.   5      zQuantizationParams.getc                 c   s    | j E d H  d S r-   r   r'   r*   r*   r+   __iter__8   s   zQuantizationParams.__iter__c                 C   s
   | j | S r-   r2   )r'   r/   r*   r*   r+   __getitem__;      
zQuantizationParams.__getitem__c                 C   s   || j |< d S r-   r2   )r'   r/   valuer*   r*   r+   __setitem__>   r1   zQuantizationParams.__setitem__c                 C   s
   t | jS r-   )lenr   r3   r*   r*   r+   __len__A   r6   zQuantizationParams.__len__r-   )__name__
__module____qualname__dictr   r   r,   r.   r4   r5   r8   r:   r*   r*   r*   r+   r   '   s    
r   c                   @   s   e Zd Z	dddZdejjdefddZdd	 Z	d
d Z
dd Zdd Zdd ZdddZdddZ		d ddZdd ZdS )!BaseQuantizerNc                 C   s  t |st|}dd |jjD | _| jdd |jjD  | jdd |jjD  t|| _	t
|| _|| _|| _|
r@|
ni | _d| jv oL| jd | _d | _d| jv oZ| jd | _| jdd | _| jdd	| _| jd
| _t|d|| _t|d|| _	 |d urtdd | D rtddd | D  d|| _|| _|| _|	| _t | jdi | _!dd | j	" D | _#| j!$| j#| j% |\}}|st&|| j!' | _(d S )Nc                 S      i | ]}|j |qS r*   name).0vir*   r*   r+   
<dictcomp>U       z*BaseQuantizer.__init__.<locals>.<dictcomp>c                 S   r@   r*   rA   )rC   Zotr*   r*   r+   rE   V   rF   c                 S   r@   r*   rA   )rC   itr*   r*   r+   rE   W   rF   ZEnableSubgraphZForceQuantizeNoInputCheckZWeightSymmetricZActivationSymmetricFZMinimumRealRangetensor_typec                 s   s    | ]	}t |t V  qd S r-   )r   r   )rC   tr*   r*   r+   	<genexpr>{   s    z)BaseQuantizer.__init__.<locals>.<genexpr>z(tensors_range contains unexpected types c                 S   s   h | ]}t |qS r*   )r   )rC   r)   r*   r*   r+   	<setcomp>}   rF   z)BaseQuantizer.__init__.<locals>.<setcomp>z, not TensorData.ZTensorQuantOverridesc                 S   r@   r*   rA   )rC   Zinitzerr*   r*   r+   rE      rF   ))r   r   graphZ
value_infoZvalue_infosupdateoutputinputr   modelr   Zopset_versionper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentZforce_quantize_no_input_checkr.   _is_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanyvaluesr   tensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizer   tensor_quant_overridesinitializerZinitializersZis_validkeysr&   Zget_quant_typesZtensor_quant_override_qtypes)r'   rP   rQ   rR   r[   rZ   r^   r_   r`   ra   rS   Zoverrides_validZoverrides_errr*   r*   r+   r,   F   sJ   

zBaseQuantizer.__init__weight_quant_typereturnc                 C   s0   | j d ur| j S |tjjtjjtjjtjjfv S r-   )rV   onnxTensorProtoINT4ZINT8ZINT16FLOAT8E4M3FN)r'   re   r*   r*   r+   is_weight_symmetric   s   
z!BaseQuantizer.is_weight_symmetricc                 C   s   t r-   )NotImplementedErrorr3   r*   r*   r+   quantize_model   s   zBaseQuantizer.quantize_modelc                 C   s   t || j }|d uS r-   )r   rP   rc   )r'   Z
input_namerc   r*   r*   r+   is_input_a_initializer   s   z$BaseQuantizer.is_input_a_initializerc                 C   s   | j S r-   )rQ   r3   r*   r*   r+   is_per_channel   s   zBaseQuantizer.is_per_channelc                 C   sN   t || j }|d ur|jtjjtjjfv S | jr| j	d u r!dS | j	
|S )NF)r   rP   rc   	data_typerg   rh   FLOATFLOAT16rT   rU   is_valid_quantize_weight)r'   weight_nameweightr*   r*   r+   rs      s   z&BaseQuantizer.is_valid_quantize_weightc                 C   sh   | j d urt| j dkr|j| j vrdS |j| jvrdS |jttfv r%dS | jd ur2|j| jv r2dS dS )Nr   FT)r_   r9   rB   op_typera   r   r	   r`   )r'   noder*   r*   r+   should_quantize_node   s   
z"BaseQuantizer.should_quantize_node      ?c                 C   s  t || j }t|}|t }| jtjjkr`t	
|}|jt	jkr'tjj}	n|jt	jkr2tjj}	n	td|j d|t	j}
t	jdg|
jd}|d}tj|
|}| j|g d}nz|| | }t	j
|t	jdt	j
|t	jd }
|
 }
t	t	t	jj}t	t	t	jj}t	|
|k st	|
|krtd| d t	 |
||t	j}
t	j
|
t	jd|j!}tj||}| j|g t	j
||jdd}d	}| j}	|d
 }tj||}| j|g | jtjjkr| j}ntjj"}|d }| jtjjkrtj#$|| jdgdg}n$|j%dkr.t	j&|j't	jdd}tj||}n
tj#$||g dg}| j|g ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r#   ZCastzQuantized bias `z<` exceeds the range of a int32. The bias scale is too small.ZDequantizeLinear_scale_zero_point        r   )(r   rP   rc   r   r
   r[   rg   rh   rj   r    asarrayr#   r%   rr   r$   rq   r   Zastypearrayreshapenumpy_helper
from_arrayinitializer_extendZfloat64roundZiinfoZint32minmaxr\   loggingwarningZclipdimsZINT32helpermake_tensorsizeZzerosshape)r'   Z	bias_nameZinput_scaleZweight_scalebetaZbias_initializerZ	bias_dataZquantized_bias_namer   Z
node_qtypeZquantized_dataZ
bias_scaleZbias_scale_dataZpacked_bias_initializerZ	node_typeZ	int32_minZ	int32_maxZbias_np_dataZquantized_bias_scale_nameZpacked_bias_scale_initializerrH   Zquantized_bias_zp_nameZpacked_bias_zp_initializerZbias_zp_datar*   r*   r+   quantize_bias_static_impl   sj   



 
z'BaseQuantizer.quantize_bias_static_implFc                 C   s~  |j t }|j d }|j d }t|}| jj|j i d}	d|	v r%|	d j}d|	v r{d|	v r{tj|	d t| d}
t|	d }t	||
 ||
}t|
tjsWJ dt|
 |
jtjkrc|
jtjkskJ d	|
j t|tjszJ dt| na|| jkr| |n| j}t|
 ||	d
||	d| jo|| j|	d|	dd\}
}}t|
tjsJ dt|
 |
jtjkr|
jtjksJ d	|
j t|tjsJ dt| |j}tj||g |d }tj||g |
d }| j||g |s| jtj j!krkt  }| j|_|j"#|j" ||_ |
 $ % |_&t'durjt'|}|j(|j(ksB|% |% krjt)d|j( d|% dd  d|% dd  d|j( dt*|dd  dnH|tj j+tj j,fv r|jtj-tj.fvrt)d| dt/t0|% }tjj|||j"|dd}ntj1|tj2|d|j"}tj34||}| j|g |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r}   r|   Zdefault_val
quant_typer   
zero_pointrz   Unexpected type Unexpected dtype 	symmetricrR   rminrmaxrR   rX   Zrmin_overrideZrmax_override)r{   NzThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)5rB   r
   r   rb   Zget_per_tensor_overridesrH   r    r   r   r   flattenr   r!   r   r#   r$   r%   r[   rk   rW   r   r.   rR   rX   rp   rg   r   r   r   tolistrP   r   rh   rj   r   extendcopytobytesraw_datar   r   RuntimeErrorr   ri   UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r'   ru   ZqTyperR   keep_float_weightq_weight_namezp_name
scale_nameZweight_dataZquant_overridesr   r   Zq_weight_datar   Zscale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datar*   r*   r+   quantize_initializer_impl  s   
	



 



 


z'BaseQuantizer.quantize_initializer_implTc           &      C   s  t || j }|d u rtd|t|}t|j}t||\}	}
|	s0td| d| d| |
}|j| }| jj	|d|igd}t|}|dkrY||krYtd| d	| d
t|d d |\}}|rj||kr|td| d| d|d d  dd|d v r|d d j
}|d d| |}|d d| jo|}g }g }g }t|j}t|}d||< t|D ]}|||}||k r|nd}|| }d|v r5d|v r5tj|d t| d}t|d }t|| ||}t|tjsJ dt| |jtjkr|jtjksJ d|j t|tjs$J dt| t|tjs4J dt| n]t| |||| j|d|dd\}}}t|tjs\J dt| |jtjkrj|jtjksrJ d|j t|tjsJ dt| t|tjsJ dt| || || |t| | qt!||}|t" }|d }|d } |j#| g}!t$j%&| |j'|!t(|) }"t$j%&|||!t(|) }#| j*|"|#g |s=|t$j+j,t$j+j-fv r |jtj.tj/fvrt0d| dt1t2|3 }$t$j%j&||||$dd }%| j*|%g ntj|t$j%4|d |j#}t$j56||}%| j*|%g ||| fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank r   r   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   rR   r   r   rz   r   r   r   r   r   r}   r|   r   r   Tr   )7r   rP   rc   r&   r   r9   r   r   rb   Zget_per_channel_overridesrH   r.   rk   rR   listrangeZtaker    r   r   r   r   r   r!   r   r#   r$   r%   r   rX   appendr   r   Zconcatenater
   r   rg   r   r   rp   Zhstackr   r   rh   ri   r   r   r   r   r   r   r   r   r   r   )&r'   rt   r[   Zchannel_axisrR   r   rc   weightsZweights_rankZis_axis_validZ	axis_normZchannel_countZquant_overrides_for_channelsZnum_channel_overridesZis_axis_override_validZaxis_overrider   Zzero_point_listZ
scale_listZquantized_per_channel_data_listZweights_shapeZreshape_dimsiZper_channel_dataZchannel_override_indexZchannel_quant_overridesr   r   Zquantized_per_channel_dataZquantized_weightsr   r   r   Zzero_scale_shaper   r   r   r   r*   r*   r+    quantize_weight_per_channel_impln  s   	






  

 





z.BaseQuantizer.quantize_weight_per_channel_implc                 C   s   | j d u rd S | j D ]p}|jdv r_| |sqt| j |jd  dkr(q|jd | j vs8|jd | j vr9q| j |jd  }t	|t
sVtdt| d|jd d|| j |jd < q|jdkr|| |sjqt
tdtd	d
| j |jd < qd S )N)ZClipZRelur   r   r   z for r   ZSoftmaxr~   ry   )ZlowestZhighest)r^   rP   nodesrv   rx   r9   Zinput_name_to_nodesrO   rN   r   r   r   r   r    r$   )r'   rw   tdr*   r*   r+   adjust_tensor_ranges  s(   


 
 

$z"BaseQuantizer.adjust_tensor_rangesr-   )ry   )FF)TF)r;   r<   r=   r,   rg   rh   ZDataTypeboolrk   rm   rn   ro   rs   rx   r   r   r   r   r*   r*   r*   r+   r?   E   s     
J


T`
 r?   ) r   typingr   numpyr    rg   Zonnx.numpy_helperZonnx.reference.op_runr   ImportErrorZ	calibrater   Z
onnx_modelr   Zquant_utilsr   r   r	   r
   r   r   r   r   r   r   r   r   r   rb   r   r   r?   r*   r*   r*   r+   <module>   s    <