o
    i@#                     @   s(  d dl Z d dlZd dlZd dlZd dlmZ d dlZd dl	m
Z
mZmZmZ d dlmZ ddlmZ ddlmZ eeZG dd dZd	d
 Zedkre ZejrXeej ejZejZ ej!"e rue#de  d e$de  de%eZ&ee&ej'ej(ej)dZ*e*+  e*j&,e d dS dS )    N)
GraphProto
ModelProto	NodeProtoTensorProto)quantize_matmul_bnb4   )	ONNXModel)attribute_to_kwargc                   @   s   e Zd ZdZdZdZddededefdd	Ze	d
e
e deeef fddZdejdejfddZdede
e defddZde
e fddZdd ZdS )MatMulBnb4QuantizerzMPerform 4b quantization of constant MatMul weights using FP4 or NF4 data typer   r   Nmodel
quant_type
block_sizec                 C   s@   |pg }|t jt jfv sJ t|| _|| _|| _t|| _d S N)	r
   FP4NF4r   r   r   r   setnodes_to_exclude)selfr   r   r   r    r   h/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/quantization/matmul_bnb4_quantizer.py__init__%   s   
zMatMulBnb4Quantizer.__init__
graph_pathreturnc                 C   sL   t t|d ddD ]}|| }|jD ]}|j| kr"||f    S qq
dS )Nr   )NN)rangeleninitializername)r   r   gidgraphZtensorr   r   r   Z__get_initializer-   s   

z%MatMulBnb4Quantizer.__get_initializerfpweightc                 C   s   t |jdkrtd|  }|j\}}|| }| j}|| d | }|d d }tj|dd}	tj||jd}
t	|	||
|| j
|| |	|
fS )z4b quantize fp32/fp16 weight   z9Current bnb4 block quantization only supports 2D tensors!r   Zuint8)dtype)r   shape
ValueErrorZ	transposecopyr   npZzerosr"   r   r   )r   r    Z
fpweight_trowscolsZnumelr   Z
num_blocksZquantized_numelpackedabsmaxr   r   r   bnb4_block_quant6   s   
z$MatMulBnb4Quantizer.bnb4_block_quantnodegraph_stackc                 C   s  |j dkr|S td|j d |j| jv r#td|j d |S |jd }t||\}}|du r;td |S tj	
|}t|jd	krOtd
 |S | |\}}tj	|}	|jd |	_|jD ]}
|
j|krt|j|
  nqetj	|}|jd |_|j|	|g i }|j\}}||d< ||d< | j|d< | j|d< tjj	d|jd |	j|jg|jd g|jr|jd nddd|}td|j d |S )zdIf the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new nodeZMatMulzstart to quantize z ...zexclude to quantize z$ as specified by nodes_to_exclude...r   Nz2MatMul doesn't have const weight. Skip to quantizer!   z)MatMul weight is not 2D. Skip to quantizeZ_Bnb4Z_absmaxKNr   r   
MatMulBnb4r    com.microsoft)ZinputsZoutputsr   domainzcomplete quantization of )r0   )op_typeloggerdebugr   r   inputr
   %_MatMulBnb4Quantizer__get_initializeronnxZnumpy_helperZto_arrayr   r#   r+   Z
from_arrayremover   extendr   r   helper	make_nodeoutput)r   r,   r-   ZinputBBZBs_graphZB_arrayr)   r*   ZB_quantr7   Zabsmax_tensorkwargsr'   r(   Zmatmul_bnb4_noder   r   r   _bnb4_matmul_node_weightL   sX   









	z,MatMulBnb4Quantizer._bnb4_matmul_node_weightc                 C   s  g }|d }|j D ]q}dd |jD }|rqi }|jD ]D}|jtjjkr2||j |j| 	|i}n'|jtjj
krUg }	|jD ]}
||
 |	| 	|g q>|j|	i}nt|}|| qtjj|j|j|jfd|ji|}|| || q	|d |j | |  |S )Nr   c                 S   s,   g | ]}|j tjjks|j tjjkr|qS r   )typer9   AttributeProtoGRAPHGRAPHS).0attrr   r   r   
<listcomp>   s
    z9MatMulBnb4Quantizer._process_subgraph.<locals>.<listcomp>r   r,   )r,   	attributerB   r9   rC   rD   appendgr   _process_subgraphrE   Zgraphsr;   r	   updater<   r=   r4   r7   r>   rA   Z
ClearFieldpop)r   r-   Z	new_nodesr   r,   Zgraph_attrsr@   rG   kvvalueZsubgraphr   r   r   rL      s@   




z%MatMulBnb4Quantizer._process_subgraphc                 C   sd   | j  g}| j  }d}|D ]	}|jdkrd}q|s&|tjddg | | | j 	  d S )NFr2   Tr   )
r   r   opset_importr3   r;   r9   r<   Zmake_opsetidrL   Zclean_initializers)r   r-   rQ   Zhas_ms_domainZopsetr   r   r   process   s   


zMatMulBnb4Quantizer.processr   )__name__
__module____qualname____doc__r   r   r   intr   staticmethodlistr   tupler   r8   nptZ	ArrayLiker&   Zndarrayr+   r   rA   rL   rR   r   r   r   r   r
      s     7&r
   c                  C   s   t jdd} | jdddd | jdddd | jd	d
dtjtjgdd | jdd
ddd | jddd
dd | jd
d | jddtd
g dd |  S )Na  Blockwise FP4/NF4 quantization for MatMul 2D weight matrices.

A weight matrix is partitioned into blocks, where each block is a contiguous
subset inside the flattened transposed weight matrix. Each block is quantized
into a set of 4b integers with an absolute value scaling factor.
)descriptionz--input_modelTzPath to the input model file)requiredhelpz--output_modelzPath to the output model filez--quant_typeFr   z&Quantization data type. 0: FP4, 1: NF4)r]   defaultchoicesr^   z--block_size@   zVBlock size for blockwise quantization. Note: bnb.nn.Linear4bit only uses block_size=64)r]   r_   r^   z-vz	--verbose
store_true)r]   action)verbosez--nodes_to_exclude+zBSpecify the nodes to be excluded from quantization with node names)nargsrB   r]   r_   r^   )	argparseArgumentParseradd_argumentr
   r   r   set_defaultsstr
parse_args)parserr   r   r   rl      s:   	
	rl   __main__zfile z already exists)r   T)-rg   loggingosnumpyr&   Znumpy.typingtypingr[   r9   Zonnx.onnx_pbr   r   r   r   Zonnxruntime.capi._pybind_stater   Z
onnx_modelr   Zquant_utilsr	   	getLoggerrS   r5   r
   rl   argsrd   setLevelDEBUGZinput_modelZinput_model_pathZoutput_modelZoutput_model_pathpathexistserror	Exceptionloadr   r   r   r   ZquantrR   Zsave_model_to_filer   r   r   r   <module>   s8   
 "'
