o
    iL                      @   s   d dl Z d dlZd dlZd dlmZ ddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ G dd	 d	eZ	 G d
d deZ	 G dd deZG dd deZdS )    N)onnx_pb   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypefind_by_nameget_mul_node   )QuantOperatorBase)QDQOperatorBasec                       $   e Zd Z fddZdd Z  ZS )	QOpMatMulc                       t  || d S Nsuper__init__selfZonnx_quantizerZ	onnx_node	__class__ c/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/quantization/operators/matmul.pyr         zQOpMatMul.__init__c                 C   s   | j | jstd| jj d dS | j | jjd s5| j | jjd s5td| jj d dS | j j	rU| j 
| jjd sUtd| j j d| jj d dS d	S )
NzIgnore MatMul ]Fr	   r   z&Ignore MatMul due to non float inputs z%Ignore MatMul due to non constant B: [T)	quantizerZshould_quantize_nodenodeloggingdebugnameZis_float_tensorinputinfoZq_matmul_const_b_onlyZfind_initializer_in_pathZgraph_scope)r   r   r   r   should_quantize   s    zQOpMatMul.should_quantize)__name__
__module____qualname__r   r#   __classcell__r   r   r   r   r          r   c                       r   )MatMulIntegerc                    r   r   r   r   r   r   r   r   )   r   zMatMulInteger.__init__c                 C   s  | j }|jdks
J | j|dg\}}}}| jj|dgddd\}}}}	|| || || ||	 |jd d }
|jrH|jd nd}tj	
d	|| |
g|}|| |
d
 }| jj|jd dd}tj	j
d|
g|g|
d |d}|| t|dksJ |r|d n|d d |d  d }t|| jj}|d u rt||d |}|| |jd }d}|r|d }|t||g|jd | | j j|7  _d S )NMatMulr   r	   TZreduce_rangeZop_level_per_channelZ_output_quantized_quant r)   Z_cast_output)	mandatoryZCastZ_cast)tor   Z_scales_mul__mulz:0Z_output_scale_mul)r   op_typer   quantize_activationquantize_weightextendoutputr    onnxhelper	make_nodeappendZget_tensor_typelenr   	new_nodesr   )r   r   quantized_input_nameszero_point_namesscale_namesnodesquantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightZmatmul_integer_outputZmatmul_integer_nameZmatmul_integer_nodeZcast_op_outputZotypeZ	cast_nodeZscales_mul_opZscales_mul_nodeZscales_mul_op_outputZoutput_scale_mul_opr   r   r   quantize,   st   








zMatMulInteger.quantizer$   r%   r&   r   rE   r'   r   r   r   r   r)   (   r(   r)   c                       s(   e Zd Z fddZ fddZ  ZS )QLinearMatMulc                    r   r   r   r   r   r   r   r   |   r   zQLinearMatMul.__init__c                    s  | j }|jdks
J | j|dg\}}}}| jj|dgddd\}}}}	|| || || ||	 | j|jd \}
}}}}|
rM|d u rRt 	 S |jd t
 }|jra|jd nd}g }||d  ||d  ||d  ||d  ||d  ||d  || || | jjtjjtjjtjjtjjhv rdnd}tjjd	||g||d
}|| t|jd |||tj}|| jj|jd < | j j|7  _d S )Nr*   r   r	   Tr+   r,   r-   zcom.microsoftrG   )domain)r   r2   r   r3   r4   r5   Z_get_quantization_paramsr6   r   rE   r   r    r:   Zweight_qType
onnx_protoZTensorProtoZFLOAT8E4M3FNZFLOAT8E4M3FNUZZ
FLOAT8E5M2ZFLOAT8E5M2FNUZr7   r8   r9   r   r   ZInputZquantized_value_mapr<   )r   r   r=   r>   r?   r@   rA   rB   rC   rD   Z
data_foundZoutput_scale_nameZoutput_zp_namer0   Zqlinear_matmul_outputZqlinear_matmul_nameZqlinear_matmul_inputsrH   Zqlinear_matmul_nodeZq_outputr   r   r   rE      s   







zQLinearMatMul.quantizerF   r   r   r   r   rG   {   s    rG   c                       r   )	QDQMatMulc                    r   r   r   r   r   r   r   r      r   zQDQMatMul.__init__c                 C   s   | j }|jdks
J | jr|j}nt|j|j}|D ].}t|| jj	
 rC| jj|d|jd\}}|r<| j|| q| j| q| j| qd S )Nr*   r	   )Zdefault_axisr2   )r   r2   Zdisable_qdq_for_node_outputr!   	itertoolschainr6   r   r   modelZinitializerZis_tensor_per_channelZ"quantize_weight_tensor_per_channelZquantize_weight_tensorZquantize_activation_tensor)r   r   Znodes_to_iterateZtensor_nameZis_per_channelZchannel_axisr   r   r   rE      s   
zQDQMatMul.quantizerF   r   r   r   r   rJ      r(   rJ   )rK   r   r7   r   rI   Zquant_utilsr   r   r   r   r   Zbase_operatorr
   Zqdq_base_operatorr   r   r)   rG   rJ   r   r   r   r   <module>   s    NU