o
    iF                     @   s   d dl Z d dlZd dlZd dlZd dlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddl m!Z! G d	d
 d
eZ"dS )    N)onnx_pb   )BaseQuantizerQuantizationParams)
TensorData)	ONNXModel)TENSOR_NAME_QUANT_SUFFIXQuantizationModeQuantizedValueQuantizedValueType__producer____version__add_infer_metadataattribute_to_kwargcompute_scale_zpcompute_scale_zp_float8find_by_nameget_qmin_qmax_for_qTypeget_qrange_for_qType	ms_domainquantize_onnx_initializer&save_and_reload_model_with_shape_infertensor_proto_to_array)CreateOpQuantizerc                   @   sd  e Zd Z	dGddZdd Zdd Zdd	 Zd
d Zdd Zdd Z	dd Z
dHddZdd Zdd Zdd Zdd ZdIddZ	dJdd Zd!d" Z	dGd#d$Zd%ejd&ejd'ed(ejd)ed*eeejdB f fd+d,Zd'ed-ejd*dfd.d/ZdKd1d2Zd3d4 ZdHd5d6Z			7	dLd8d9Z 	:			7	dMd;d<Z!dNd=d>Z"	:	dOd?d@Z#dAdB Z$dCdD Z%dEdF Z&dS )PONNXQuantizerNc                 C   sx  t | |||||||	|
|| |sE| j  t| jj}dd |jjD | _| jdd |jj	D  | jdd |jj
D  t|| _|| _|| _| jdk| _d| jv oZ| jd | _g | _d| _i | _| jdd |jj	D  | jd	d |jj
D  | jjjjD ]}| jt|j	d
 q| jtvrtd| j |  | _d| _d| _d| _d| _i | _ | j! | _"d S )Nc                 S      i | ]}|j |qS  name).0vir   r   a/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/quantization/onnx_quantizer.py
<dictcomp>H       z*ONNXQuantizer.__init__.<locals>.<dictcomp>c                 S   r   r   r   r   Zotr   r   r!   r"   I   r#   c                 S   r   r   r   r   itr   r   r!   r"   J   r#   
   ZMatMulConstBOnly/c                 S      i | ]}|j d qS r   r   r$   r   r   r!   r"   V   r#   c                 S   r)   r*   r   r%   r   r   r!   r"   W   r#   r   zunsupported quantization mode Zfixed_quantization_range_uint8Zfixed_quantization_range_int8Z
fixed_zeroZfixed_zero_zp)#r   __init__modelZreplace_gemm_with_matmulr   graph
value_infovalue_infosupdateoutputinputr   modestaticZopset_versionfuse_dynamic_quantextra_optionsZq_matmul_const_b_only	new_nodesgraph_scopetensor_namesnodedictfromkeysr	   
ValueErrorcalculate_quantization_paramsquantization_paramsfixed_qrange_uint8_namefixed_qrange_int8_namefixed_zero_namefixed_zero_zp_namequantized_value_mapZget_non_initializer_inputsgenerated_value_names)selfr,   per_channelreduce_ranger3   r4   weight_qTypeactivation_qTypetensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizer6   r:   r   r   r!   r+   '   sP   



zONNXQuantizer.__init__c                 C   s~   t jj|d| jjjd}t| t|| j| j| j	| j
| j| j| j| j| j| j| j}| |_| j | d|_|  |jjjS )z
        generate submodel for the subgraph, so that we re-utilize current quantization implementation.
        quantize the submodel
        update subgraph and set it back to node
        onnx-quantizer)producer_nameZopset_importsr(   )onnxhelperZ
make_modelr,   opset_importr   r   rG   rH   r3   r4   rI   rJ   rK   rL   rM   rN   r6   parentr8   quantize_modelr-   )rF   subgraphZ	graph_keyZwarped_modelZsub_quantizerr   r   r!   quantize_subgraphp   s0   
zONNXQuantizer.quantize_subgraphc           	      C   s  dd |j D }t|dkr|S |jr|jn
|j dt| j }i }|j D ]M}|jtjjkr@|j| 	|j
| d|j i}n.|jtjjkrjg }|jD ]}|| 	|| d|j dt| g qL|j|i}nt|}|| q&tjj|j|j|jfd|ji|S )z|
        Check subgraph, if any, quantize it and replace it.
        return new_nodes added for quantizing subgraph
        c                 S   s,   g | ]}|j tjjks|j tjjkr|qS r   )typerQ   AttributeProtoGRAPHGRAPHS)r   attrr   r   r!   
<listcomp>   s
    z>ONNXQuantizer.quantize_node_with_sub_graph.<locals>.<listcomp>r   Z_node_count_:r   )	attributelenr   op_typer7   rX   rQ   rY   rZ   rW   gr[   Zgraphsextendr   r0   rR   	make_noder2   r1   )	rF   r:   Zgraph_attrsZ	node_namekwargsr\   kvvaluerV   r   r   r!   quantize_node_with_sub_graph   s0   "
"
$z*ONNXQuantizer.quantize_node_with_sub_graphc                 C   s   t dd | j D S )zQ
        Detect if model already has QuantizeLinear or DequantizeLinear.
        c                 s   s$    | ]}|j d kp|j dkV  qdS )QuantizeLinearDequantizeLinearN)ra   r   r:   r   r   r!   	<genexpr>   s    
z.ONNXQuantizer.has_QDQ_nodes.<locals>.<genexpr>)anyr,   nodes)rF   r   r   r!   has_QDQ_nodes   s   zONNXQuantizer.has_QDQ_nodesc                 C   s2   t || j d urdS | jd ur| j|S dS )NTF)r   r,   initializerrT   find_initializer_in_path)rF   Zinitializer_namer   r   r!   rq      s
   
z&ONNXQuantizer.find_initializer_in_pathc                 C   s2   | j | |D ]}|jD ]}| j| qqd S N)r7   rc   r1   rE   add)rF   rn   r:   output_namer   r   r!   add_new_nodes   s   
zONNXQuantizer.add_new_nodesc                 C   sD  |   r	td | j D ]2}| jr| |}t| j}t	| |}|
  t|t| jD ]}| j| jD ]}| j| q6q.q|   | j d | j j| j | jd u rq| j \}}t|dkrqtdt| t| jj_t| jj_dd | jjjD }|sdd | jD }	|	r| jjj }
d|
_t|
_| jjS )	NzPlease check if the model is already quantized. Note you don't need to quantize a QAT model. OnnxRuntime support to run QAT model directly.r:   r   z0Invalid model with unknown initializers/tensors.c                 S   s   g | ]	}|j tkr|qS r   )domainr   )r   opsetr   r   r!   r]          z0ONNXQuantizer.quantize_model.<locals>.<listcomp>c                 S   s   g | ]	}|j d kr|qS )zcom.microsoft)rv   rk   r   r   r!   r]      rx   r   ) ro   loggingwarningr,   rn   enable_subgraph_quantizationrh   r`   r7   r   quantizeranger1   rE   rs   _dequantize_outputsr-   Z
ClearFieldr:   rc   rT   Zclean_initializersRuntimeErrorstrr   rP   r   Zproducer_versionrS   versionr   rv   )rF   r:   Znumber_of_existing_new_nodesZop_quantizerirt   _Zinitializers_not_foundZms_opsetZms_nodesrw   r   r   r!   rU      s@   





zONNXQuantizer.quantize_modelc                 C   s8   d| j v rtd|| j d  | j d S td|d)NZDefaultTensorTypezDget_tensor_type returns DefaultTensorType for tensor name %r, use %dz)Unable to find data type for weight_name=a7  . shape_inference failed to return a type probably this node is from a different domain or using an input produced by such an operator. This may happen if you quantize a model already quantized. You may use extra_options `DefaultTensorType` to indicate the default weight type, usually `onnx.TensorProto.FLOAT`.)r6   ry   infor   rF   tensor_namer   r   r!   _get_default_tensor_type   s   


z&ONNXQuantizer._get_default_tensor_typeFc                 C   s   t || j }|d ur|jS || jv r2| j| }|jdr2|r-|jjjdkr-| 	|S |jjjS | j
r:| jd u rC|rA| 	|S d S | j|}|d urO|S | j
ra| jra| j|}|d ura|S |rh| 	|S d S )Ntensor_typer   )r   r,   rp   	data_typer/   rX   HasFieldr   	elem_typer   r{   rT   is_valid_quantize_weightget_tensor_type)rF   r   	mandatoryweightr    Zotyperesr   r   r!   r     s.   





zONNXQuantizer.get_tensor_typec                 C   s   |  |r
| |S || jv r8| j| }|jdr)|jjjtjj	tjj
fv r)dS td|d|j d dS | jrD| jrD| j|S td|d dS )	Nr   Tz<Inference failed or unsupported type to quantize for tensor z
, type is .Fz%Failed to infer data type of tensor: zS. Please add data type info for this tensor if your model has customized operators.)Zis_input_a_initializerr   r/   rX   r   r   r   
onnx_protoTensorProtoFLOATFLOAT16ry   rz   r{   rT   is_float_tensor)rF   r   r    r   r   r!   r     s&   




zONNXQuantizer.is_float_tensorc                 C   sD   |t jjkr| |||S |t jjkr| |||S td| d)a  
        Create nodes for dynamic quantization of input and add them to nodes_list.
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter qType: type to quantize to.
            parameter initial_type: type to quantize from
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        zUnexpected value for qType=r   )r   r   INT8+_get_dynamic_input_quantization_params_int8UINT8,_get_dynamic_input_quantization_params_uint8r=   )rF   
input_name
nodes_listqTypeinitial_typer   r   r!   &_get_dynamic_input_quantization_params6  s
   	z4ONNXQuantizer._get_dynamic_input_quantization_paramsc                 C   s  t jj}|d }|d }tjjd|g|d g|dd}|| |d }tjjd|g|d g|dd}	||	 |d	 }
tjd
|jd g|
d g|
}|| |d	 }tjd
|	jd g|d g|}|| |d }tjd|jd |jd g|d g|}|| tj| j	|g t
|d g}| j| |d }tjd|jd | j	g|g|}|| tj| j|g dg}| j| || jg g fS )az  
        Create nodes for dynamic quantization of input to int8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLOAT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        _scale
_ReduceMin	ReduceMin:0r   Zkeepdims
_ReduceMax	ReduceMaxZ_AbsZAbsZ_Abs_MaxZMax       @Z	scale_DivDiv)r   r   r   rQ   rR   rd   appendr1   make_tensorrA   r   r,   add_initializerrC   )rF   r   r   r   r   input_scale_namereduce_min_namereduce_min_nodereduce_max_namereduce_max_nodeZreduce_min_abs_nameZreduce_min_abs_nodeZreduce_max_abs_nameZreduce_max_abs_nodeZabs_max_nameZabs_max_nodeZinitializer_divscale_div_namescale_div_nodeZinitializer_zpr   r   r!   r   E  s|   







z9ONNXQuantizer._get_dynamic_input_quantization_params_int8c                 C   s  t jj}|d }|d }|d }tjjd|g|d g|dd}|| |d }	tjjd	|g|	d g|	dd}
||
 tj| j|g t	|g}| j
| tj| j|g d
g}| j
| |d }tjd|
jd |jd g|d g|}|| |d }tjd|jd | jg|g|}|| |d }tjd| j|jd g|d g|}|| |d }tjd|jd |g|d g|}|| |d }tjd|j|d g|}|| |d }tjjd|j|g||d}|| ||g g fS )a{  
        Create nodes for dynamic quantization of input to uint8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLAOT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        r   _zero_pointr   r   r   r   r   r   r           Z
_scale_SubSubZ
_scale_Divr   Z_zero_point_SubZ_zero_point_DivZ_zero_point_FloorZFloorZ_zero_point_CastZCast)to)r   r   r   rQ   rR   rd   r   r   r@   r   r,   r   rB   r1   )rF   r   r   r   r   r   Zinput_zp_namer   r   r   r   Zinitializer_qrangeZinitializer_qvalueZscale_sub_nameZscale_sub_noder   r   Zzp_sub_nameZzp_sub_nodeZzp_div_nameZzp_div_nodeZzp_floor_nameZzp_floor_nodeZzp_cast_nameZzp_cast_noder   r   r!   r     s   







z:ONNXQuantizer._get_dynamic_input_quantization_params_uint8c                 C   s  | j }|du s|du r| jdu s|| jvr td| d dS | j| }t|ts7tdt| d|d|du sAt|dkrKt	d	| d
| t
|d g}t|d dre|d jt
jt
jfvrst	dt|d  d|t
|d g}|jt
jksJ |d }n't
|g}t
|g}| j| }d|v r|d j}||}|jt
jksJ g }	|d }
g }|d }tj|
||	|  }| j| |jt
jkrtjj}n|jt
jkrtjj}nt	d|j d|tj||||d }| j| d||
||	fS )a\  
        Create initializers and inputs in the graph for zero point and scale of output.
        Zero point and scale values are obtained from self.quantization_params if specified.
            parameter param_name: Name of the quantization parameter.
            return: result, scale_name, zero_point_name, scale_shape, zero_point_shape.
        Nz$Quantization parameters for tensor:"z" not specified)F r   r   r   Unexpected type  for r      zbQuantization parameters should contain zero point, scale, quant type. Specified values for output z: 
zero_pointscaledtypez and param_name=
quant_typer   r   zUnexpected dtype=z for param_name=)T)rJ   r?   ry   r   
isinstancer   	TypeErrorrX   r`   r=   nparrayhasattrr   Zfloat32Zfloat16float64astyperQ   rR   r   Zraveltolistr,   r   r   r   r   r   reshape)rF   
param_nameZ	use_scaleZuse_zeropointZzero_point_typeparamsZzero_point_valuesZscale_valuesr   Zzero_point_shapeZzero_point_namescale_shape
scale_nameZinit_zpZ
scale_typeZ
init_scaler   r   r!   _get_quantization_params  sZ   

$





z&ONNXQuantizer._get_quantization_paramsc              	   C   sJ  |j | }|dksJ d|t }|d }	|dur&|dur&d||}
}}n
| |\}
}}}}g }|
rBtjd|||g|g|	}nR| jrGdS | jrf|tj	j
krf|d }|d }tjd	|g|||g|	}n.|duszJ d
|d| d| d| | j||||d\}}}}tjd|||g|g|	}t|||||| j|< g ||S )a  
        Given an input for a node (which is not a initializer), this function

        - add nodes to compute zero point and scale for this input if they don't exist.
        - add new QuantizeLinear node to quantize the input.

        :param node: node being quantized in NodeProto format.
        :param input_index: index of input in node.input.
        :param qType: type to quantize to.
        :param given_scale_name: if those inputs need to be quanitzed using this scale tensor.
        :param given_zp_name: if those inputs to be quantized using this zeropoint tensor.
        :param initial_type: type of the weight to quantize
        :return: List of newly created nodes in NodeProto format.
        r   z*Cannot access undefined variable in graph._QuantizeLinearNTri   r   r   ZDynamicQuantizeLinearzCCannot quantize input without knowing the initial type, input_name=z, input_index=z, qType=z, node=r   )r2   r   r   rQ   rR   rd   r4   r5   r   r   r   r   r
   rD   )rF   r:   input_indexr   Zgiven_scale_nameZgiven_zp_namer   r   rt   Zql_node_nameZ
data_foundr   zp_namer   rn   qlinear_noder   Zzp_shaper   r   r!   _get_quantize_input_nodes2  sf   

	z'ONNXQuantizer._get_quantize_input_nodesc                 C   s.   || j v r
| j | S | jd ur| j|S d S rr   )rD   rT   find_quantized_value)rF   r   r   r   r!   r   x  s
   


z"ONNXQuantizer.find_quantized_valuec
                 C   s   t |}
|d|
  | }t j| t jd}t j| t jd}|| }||k rj|dkrj|| }|| }|	du rQtd| d| d| d d	t j||dfS td
|	 d| d| d| d	 d	||fS d|fS )zHAdjust a single weight scale to ensure the int32 bias does not overflow.r   r   r   NzIncreasing scale for weight `z` by the ratio z to ensure bias `z` has a valid scale.TzIncreased scale[z] for weight `z` by ratio F)r   absr   itemr   ry   r   r   )rF   Zbias_valinput_scaleweight_scaleweight_scale_dtypeweight_name	bias_nameqrangemultiplicative_epsilonidxabsmaxZbias_smallest_valid_scaleZinput_scale_fp64Zweight_scale_fp64Zbias_candidate_scaleratio	new_scaler   r   r!   $adjust_single_weight_scale_if_needed  s,   
z2ONNXQuantizer.adjust_single_weight_scale_if_neededr   r   r   bias_tpis_per_channelreturnc                 C   sD  |j sdS t|}ttj}d}tj|jtjdtj|jd tjd }	|j	}
d}|smt
| tjdtjd}t| tjdtjd}tt|t|}| ||||
||j|	|\}}|ri|}d}||fS |jrt|jdkrt|jd D ]}| j|| ||| |
||j|	||d	\}}|r|||< d}q~||fS )	zOChecks if the bias scale is too small and increases the weight scale if needed.)FNgqh ?r   r   Fr   T)r   )sizer   r   ZiinfoZint32r   maxr   minr   minimummaximumr   r   r   shaper`   r}   )rF   r   r   r   r   r   Zbias_float_dataZ
int32_infor   r   r   updatedrminrmaxr   changedr   r   r   r   r!   #_adjust_weight_scale_for_int32_bias  sX   
(

z1ONNXQuantizer._adjust_weight_scale_for_int32_biasr   c                 C   s  || j vrdS | j | }t|| j }t|j| j }t|j| j }t|j| j }|du s?|du s?|du s?|du rAdS | j| | j| tj	
|}|j}	tj|tj|jd}
tj	|
|j|j}| j| t|| j||
|	|jd}| j| dS )zCRe-quantizes the given weight initializer using the provided scale.Nr   )Zquant_weight_name)rD   r   r,   rp   r   r   q_nameZremove_initializerrQ   numpy_helperto_arrayaxisr   ZasarrayrR   Ztensor_dtype_to_np_dtyper   Z
from_arrayr   dimsr   r   rI   )rF   r   r   ZqvZ	weight_tp
scale_initZzp_initZq_weight_initweight_zero_pointr   Zscale_npZnew_scale_initZnew_q_weightr   r   r!   _requantize_weight  s2   

 z ONNXQuantizer._requantize_weight      ?c              
   C   s  || j v r| j | jS | j | j}t|| j }t|}|| j v r)| j | j}n|| jv r9| |\}	}}	}	}	nt	d| dt|| j }
t|
}| j | j
}t|| j }|duretj|nd}| j}|dur|jr| s| jtjjfv rt|| j }| |||||\}}|r| || |}| ||||\}}}}}}|| j vsJ t||||tj|jdkrdnd||d}|| j |< |S )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        z	Expected z5 to be in quantized value map for static quantizationNr   r   )	node_type
node_qtype)rD   r   r   r   r,   rp   r   r?   r   r=   r   rQ   r   r   rG   r   rm   rI   r   r   r   r   r   Zquantize_bias_static_implr
   r   Initializer)rF   r   r   r   betaZweight_scale_nameZweight_initializerr   r   r   Zinputscale_initializerr   Zweight_zp_nameZweight_zp_initr   r   Zbias_initializerZ
did_updateZnew_weight_scaleZquantized_bias_nameZquantized_bias_scale_nameZquantized_bias_zp_nameZbias_scale_datar   r   quantized_valuer   r   r!   quantize_bias_static  sl   


	

z"ONNXQuantizer.quantize_bias_staticc                 C   s   || j v p|| jv p|| jv S )zq
        only check for value info and newly generated tensor names, initializers are checked separately
        )r/   r9   rE   r   r   r   r!   contains_tensorJ  s
   
zONNXQuantizer.contains_tensorc              	   C   s   | j ||dddd|dS )NFr   r:   indicesinitializer_use_weight_qTyperH   op_level_per_channelr   from_subgraph_ONNXQuantizer__quantize_inputs)rF   r:   r  r  r   r   r!   quantize_activationT  s   z!ONNXQuantizer.quantize_activationr   c              	   C   s   | j ||d||||dS )NTr  r  )rF   r:   r  rH   r  r   r  r   r   r!   quantize_weighta  s   	zONNXQuantizer.quantize_weightTc              
   C   s  g }g }	g }
g }|D ]e}|j | }|| jv r/| j| }||j |	|j |
|j q
|sA|
d |d |	d q
t|| j }|dur| j	re|re| 
|j|r[| jn| j||\}}}n| ||rm| jn| j|\}}}|
| |	| || q
| |r8| j|d | j| j }|du r|j | }|| jv r| j| }|dsJ d| d|jdsJ d| d|jjj}n|| jv sJ d	|d
| j| }| j||| j|d}|du r dS |r| | n|| |d }|jdkr|
|j ||j d  |	|j d  q
|
|jd  ||jd  |	|jd  q
| jdurf| jj||g||||dd\}}}}|
|d  ||d  |	|d  q
t d| d| j! |
|	||fS )a  
        Given a node, this function quantizes the inputs as follows:
            - If input is an initializer, quantize the initializer data, replace old initializer
              with new initializer
            - Else, add QuantizeLinear nodes to perform quantization
            parameter node: node being quantized in NodeProto format.
            parameter indices: input indices to quantize.
            return: (List of quantized input names,
                     List of zero point names used for input quantization,
                     List of scale names used for input quantization,
                     List of new QuantizeLinear nodes created)
        r   Nr   rX   zvalue_info=z has no type.r   z is not a tensor.zshape inference failed for zF and attribute 'tensor_names' does not have any value for this tensor.r   )NNNNr   ri   r      r   T)r  rH   r  r   r  z!Invalid tensor name to quantize: z @graph scope)"r2   rD   r   r   r   r   r   r,   rp   rG   quantize_weight_per_channelr   rI   rJ   quantize_initializerr  find_node_by_namer7   r-   r/   r   rX   r   r   r9   r   ru   rc   ra   r1   rT   r  r=   r8   )rF   r:   r  r  rH   r  r   r  Zscale_namesZzero_point_namesZquantized_input_namesrn   r   Z
node_inputr   rp   q_weight_namer   r   r   r   r.   r   Zquantize_input_nodesZparent_quantized_input_namesZparent_zero_point_namesZparent_scale_namesr   r   r   r!   Z__quantize_inputst  s   



















zONNXQuantizer.__quantize_inputsc           	      C   sj   |j | jv r| j|j  }|j|j|jfS | ||||\}}}t|j |||tjd}|| j|j < |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        N)	r   rD   r   r   r   Zquantize_initializer_implr
   r   r   )	rF   r   r   rH   keep_float_weightr   r  r   r   r   r   r!   r    s&   	

z"ONNXQuantizer.quantize_initializerc           
      C   sd   || j v r| j | }|j|j|jfS | |||||\}}}	t|||	|tjd }|| j |< |||	fS rr   )rD   r   r   r   Z quantize_weight_per_channel_implr
   r   r   )
rF   r   rI   Zchannel_axisrH   r  r   r  r   r   r   r   r!   r    s&   
	




z)ONNXQuantizer.quantize_weight_per_channelc                 C   s   || j v rj|| jvrj| j | }t|j| j }| jjjdks*| jjjdkr9|dur9|du s9tj	|j
dks9J |d }| j|| j| j }|du ra|j|j|jg}tjd||g|}|S ||jd ksjJ dS )a  
        Given a value (input/output) which is quantized, add a DequantizeLinear node to dequantize
        it back to float32 or float16
            parameter value_name: value to dequantize
            parameter new_nodes_list: List of new nodes created before processing current node
            return: None if there is already a DequantizeLinear node that dequantizes it
                    A DequantizeLinear node otherwise
        rO   Nr   Z_DequantizeLinearrj   r   )rD   rE   r   r   r,   rp   rP   rQ   r   r   r   r  r7   r-   r   r   rR   rd   r1   )rF   Z
value_namer   r   Zdqlinear_nameZdqlinear_nodeZdqlinear_inputsdequantize_noder   r   r!   _dequantize_value8  s&   	

zONNXQuantizer._dequantize_valuec                 C   s6   | j  jD ]}| |j}|dur| j| qdS )z
        Dequantize output if it is quantized
            parameter new_nodes_list: List of new nodes created before processing current node
            return: List of new nodes created
        N)r,   r-   r1   r  r   r7   r   )rF   r1   r  r   r   r!   r~   _  s   z!ONNXQuantizer._dequantize_outputsc                 C   s@  | j d u rd S |   i }| j D ]}| j | }t|ts)tdt| d|d| jj|i d}| j}d|v r=|d j	}d|v rOd|v rO|d |d }}nE|t
jjkr`t||jd \}}n4|d	|jd
 }|d|jd }	|d| j}
|dd}t|||
d\}}t||	|||
| j\}}t|||d||< q|S )Nr   r   r   )Zdefault_valr   r   r   r   r   r   r   	symmetricrH   F)rH   r  )r   r   r   )rK   Zadjust_tensor_rangesr   r   r   rX   Ztensor_quant_overridesZget_per_tensor_overridesrJ   r   rQ   r   ZFLOAT8E4M3FNr   Zavg_stdgetZrange_valueZis_activation_symmetricr   r   Zmin_real_ranger   )rF   r?   r   tdZquant_overridesr   zeror   r   r   r  rH   ZqminZqmaxr   r   r!   r>   k  s0   




z+ONNXQuantizer.calculate_quantization_paramsrr   )F)NN)NNN)r   )FFr   F)TFFr   F)FF)TF)'__name__
__module____qualname__r+   rW   rh   ro   rq   ru   rU   r   r   r   r   r   r   r   r   r   r   r   Zndarrayr   rQ   r   booltupler   r   r   r  r	  r
  r  r  r  r  r~   r>   r   r   r   r!   r   &   sr    
I"-
T
^<
F
%
8
&H



 
&
 'r   )#ry   numpyr   rQ   Zonnx.numpy_helperr   r   Zbase_quantizerr   r   Z	calibrater   Z
onnx_modelr   Zquant_utilsr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r   r   r   r   r!   <module>   s   L