o
    i3+                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
mZ d dlmZmZ G dd deZdd Zdd	 Zd
d ZedkrIe  dS dS )    N)QuantFormat	QuantTypeStaticQuantConfigquantize)CalibrationDataReaderCalibrationMethodc                   @   s*   e Zd Zdd ZdefddZdd ZdS )	OnnxModelCalibrationDataReaderc           
         s   t j|_fddt jD }t| }g }|D ]1 i } fddtt  D }fdd|D }t	||ddD ]	\}}	|	||j
< qA|| qt|t|ks[J t|d t|ksgJ t|_d S )Nc                    s&   g | ]}| d rtj j|qS )Ztest_data_set_)
startswithospathjoin	model_dir.0aself i/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/quantization/static_quantize_runner.py
<listcomp>   s
    
z;OnnxModelCalibrationDataReader.__init__.<locals>.<listcomp>c                    s   g | ]	}t j |qS r   )r
   r   r   r   )data_dirr   r   r      s    c                    s   g | ]}  |qS r   )read_onnx_pb_data)r   Z	data_pathr   r   r   r      s    F)strictr   )r
   r   dirnamer   listdironnxruntimeZInferenceSessionZ
get_inputssortedzipnameappendlenitercalibration_data)
r   
model_pathZ	data_dirsZmodel_inputsZname2tensorsZname2tensorZ
data_pathsZdata_ndarraysmodel_inputZdata_ndarrayr   )r   r   r   __init__   s    

z'OnnxModelCalibrationDataReader.__init__returnc                 C   s   t | jdS )z9generate the input data dict for ONNXinferenceSession runN)nextr"   r   r   r   r   get_next!   s   z'OnnxModelCalibrationDataReader.get_nextc                 C   sP   t  }t|d}||  W d    n1 sw   Y  t j|}|S )Nrb)onnxZTensorProtoopenZParseFromStringreadZnumpy_helperZto_array)r   Zfile_pbtensorfretr   r   r   r   %   s   z0OnnxModelCalibrationDataReader.read_onnx_pb_dataN)__name__
__module____qualname__r%   dictr(   r   r   r   r   r   r      s    r   c                  C   s  t jdd} | jddddd | jdd	dd
d | jdg dddd | jdg dddd | jdddd | jdddd | jdddd | jdddd | jdddd | jdd g d!d" | jd#d$g d%d&d' | jd(d)d)d*gd+d' | jd,dd-d | jd.dd/d | jd0dd1d | jd2dd3d | jd4td5d6d7 | jd8dd9d | jd:dd;d | jd<dd=d | jd>d d d?d" | jd@d d dAd" | jdBdCdDdEg dFdG | jdHdIdJ |  S )KNz%The arguments for static quantization)descriptionz-iz--input_model_pathTzPath to the input onnx model)requiredhelpz-oz--output_quantized_model_pathz'Path to the output quantized onnx modelz--activation_typeqint8quint8Zqint16Zquint16Zqint4Zquint4Zqfloat8e4m3fnr9   z!Activation quantization type used)choicesdefaultr6   z--weight_typer8   zWeight quantization type usedz--enable_subgraph
store_truez#If set, subgraph will be quantized.)actionr6   z--force_quantize_no_input_checka   By default, some latent operators like maxpool, transpose, do not quantize if their input is not quantized already. Setting to True to force such operator always quantize input and so generate quantized output. Also the True behavior could be disabled per node using the nodes_to_exclude.z--matmul_const_b_onlyz3If set, only MatMul with const B will be quantized.z--add_qdq_pair_to_weightzjIf set, it remains floating-point weight and inserts both QuantizeLinear/DeQuantizeLinear nodes to weight.z--dedicated_qdq_pairzFIf set, it will create identical and dedicated QDQ pair for each node.z)--op_types_to_exclude_output_quantization+z]If any op type is specified, it won't quantize the output of ops with this specific op types.)nargsr;   r6   z--calibration_methodminmaxr@   ZentropyZ
percentiledistributionzCalibration method used)r;   r:   r6   z--quant_formatqdq	qoperatorzQuantization format usedz--calib_tensor_range_symmetriczoIf enabled, the final range of tensor during calibration will be explicitly set to symmetric to central point 0z--calib_moving_averagezIf enabled, the moving average of the minimum and maximum values will be computed when the calibration method selected is MinMax.z--disable_quantize_biaszWhether to quantize floating-point biases by solely inserting a DeQuantizeLinear node If not set, it remains floating-point bias and does not insert any quantization nodes associated with biases.z--use_qdq_contrib_opszIf set, the inserted QuantizeLinear and DequantizeLinear ops will have the com.microsoft domain, which forces use of ONNX Runtime's QuantizeLinear and DequantizeLinear contrib op implementations.z--minimum_real_rangeg-C6?a  If set to a floating-point value, the calculation of the quantization parameters (i.e., scale and zero point) will enforce a minimum range between rmin and rmax. If (rmax-rmin) is less than the specified minimum range, rmax will be set to rmin + MinimumRealRange. This is necessary for EPs like QNN that require a minimum floating-point range when determining  quantization parameters.)typer;   r6   z --qdq_keep_removable_activationsz|If set, removable activations (e.g., Clip or Relu) will not be removed, and will be explicitly represented in the QDQ model.z*--qdq_disable_weight_adjust_for_int32_biaszIf set, QDQ quantizer will not adjust the weight's scale when the bias has a scale (input_scale * weight_scale) that is too small.z--per_channelz&Whether using per-channel quantizationz--nodes_to_quantizezfList of nodes names to quantize. When this list is not None only the nodes in this list are quantized.z--nodes_to_excludeznList of nodes names to exclude. The nodes in this list will be excluded from quantization when it is not None.z--op_per_channel_axis   r   )ZOP_TYPEZPER_CHANNEL_AXISa8  Set channel axis for specific op type, for example: --op_per_channel_axis MatMul 1, and it's effective only when per channel quantization is supported and per_channel is True. If specific op type supports per channel quantization but not explicitly specified with channel axis, default channel axis will be used.)r?   r=   metavarr;   r6   z--tensor_quant_overridesz4Set the json file for tensor quantization overrides.)r6   )argparseArgumentParseradd_argumentfloat
parse_args)parserr   r   r   parse_arguments-   s   	

rN   c                 C   s   | si S t | }t|}W d    n1 sw   Y  |D ]}|| D ]}tj|d tjd|d< t|d |d< q%q|S )Nscale)ZdtypeZ
zero_point)r+   jsonloadnparrayZfloat32)filer.   Zquant_override_dictr-   Zenc_dictr   r   r   get_tensor_quant_overrides   s   
rU   c            
      C   s  t  } t| jd}tjtjtjtjtjtj	tj
d}|| j }|| j }t| j}| j| j| j| j| j| j|| j| j| j | j| j| j| jt| jd}tjtj tj!tj"d}t#j$t#j%d}t&||| j' || j( ||d | j)| j*| j+ddd |d}	t,| j| j-|	d d S )	N)r#   r7   )ZEnableSubgraphZForceQuantizeNoInputCheckZMatMulConstBOnlyZAddQDQPairToWeightZ"OpTypesToExcludeOutputQuantizationZDedicatedQDQPairZ QDQOpTypePerChannelSupportToAxisZCalibTensorRangeSymmetricZCalibMovingAverageZQuantizeBiasZUseQDQContribOpsZMinimumRealRangeZQDQKeepRemovableActivationsZ"QDQDisableWeightAdjustForInt32BiasZTensorQuantOverridesrA   )rC   rD   F)Zcalibration_data_readerZcalibrate_methodquant_formatactivation_typeweight_typeZop_types_to_quantizenodes_to_quantizenodes_to_excludeper_channelZreduce_rangeZuse_external_data_formatZcalibration_providersextra_options)r$   Zmodel_outputZquant_config).rN   r   Zinput_model_pathr   ZQInt8ZQUInt8ZQInt16ZQUInt16ZQInt4ZQUInt4ZQFLOAT8E4M3FNrW   rX   r3   Zop_per_channel_axisZenable_subgraphZforce_quantize_no_input_checkZmatmul_const_b_onlyZadd_qdq_pair_to_weightZ'op_types_to_exclude_output_quantizationZdedicated_qdq_pairZcalib_tensor_range_symmetricZcalib_moving_averageZdisable_quantize_biasZuse_qdq_contrib_opsZminimum_real_rangeZqdq_keep_removable_activationsZ(qdq_disable_weight_adjust_for_int32_biasrU   Ztensor_quant_overridesr   ZMinMaxZEntropyZ
PercentileDistributionr   ZQDQZ	QOperatorr   Zcalibration_methodrV   rY   rZ   r[   r   Zoutput_quantized_model_path)
argsZdata_readerZarg2quant_typerW   rX   Z'qdq_op_type_per_channel_support_to_axisr\   Zarg2calib_methodZarg2quant_formatZsqcr   r   r   main   sj   
	

r_   __main__)rH   rP   r
   numpyrR   r*   r   Zonnxruntime.quantizationr   r   r   r   Z"onnxruntime.quantization.calibrater   r   r   rN   rU   r_   r0   r   r   r   r   <module>   s       	=
