o
    ¢Äi3Ä  ã                   @   sx   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	Z	ddl
Z	ddlmZmZ e d¡ZG dd	„ d	ƒZdS )
zClass for ONNX model.é    N)Údeque)ÚPathé   )ÚMAXIMUM_PROTOBUFÚfind_by_nameZneural_compressorc                   @   sÀ  e Zd ZdZdd„ Zdd„ Zedd„ ƒZedd	„ ƒZej	d
d	„ ƒZdd„ Z
edd„ ƒZej	dd„ ƒZedd„ ƒZedd„ ƒZej	dd„ ƒZdd„ Zdd„ Zdd„ Zedd„ ƒZdd„ Zdd „ Zd!d"„ Zd#d$„ Zd%d&„ Zd'd(„ Zd)d*„ Zd+d,„ Zd-d.„ Zd/d0„ Zd1d2„ Zd3d4„ Zd5d6„ Zd7d8„ Zd9d:„ Z d;d<„ Z!d=d>„ Z"d?d@„ Z#d•dBdC„Z$edDdE„ ƒZ%dFdG„ Z&edHdI„ ƒZ'dJdK„ Z(dLdM„ Z)d–dOdP„Z*d–dQdR„Z+d–dSdT„Z,dUdV„ Z-dWdX„ Z.dYdZ„ Z/d•d[d\„Z0e1d]d^„ ƒZ2d—d_d`„Z3e1dadb„ ƒZ4d—dcdd„Z5dedf„ Z6d•dgdh„Z7d–didj„Z8dkdl„ Z9d•dmdn„Z:dodp„ Z;dqdr„ Z<dsdt„ Z=dudv„ Z>d–dwdx„Z?	N	N	N	Nd˜dydz„Z@	N	Nd—d{d|„ZAd}d~„ ZBdd€„ ZC	d™d‚dƒ„ZDd„d…„ ZEd†d‡„ ZFdˆd‰„ ZGdŠd‹„ ZHd–dŒd„ZIdšdd„ZJd‘d’„ ZKd“d”„ ZLdNS )›Ú	ONNXModelzBuild ONNX model.c                 K   s.  t |tƒs|ntj|dd| _t |tƒsdn|| _|  ¡  | jr0| jdu r0| dd¡s0t	 
d¡ | jrKt |tƒrK| dd¡rKtj | jtj | j¡¡ d| _t |tƒrrtj t|ƒj d¡ ¡ ¡rrd	d
lm} | t|ƒj ¡ ¡| _i | _i | _i | _|  | jjj¡ |  | jjj¡ i | _ |  !¡  d| _"dS )a;  Initialize an ONNX model.

        Args:
            model (str or ModelProto): path to onnx model or loaded ModelProto model object.
            ignore_warning (bool): ignore large model warning. Default is False.
            load_external_data (bool): load external data for large model. Default is True.
        F)Úload_external_dataNÚignore_warningzPModel size > 2GB. Please use model path instead of onnx model object to quantizer   Túconfig.jsonr   )Ú
AutoConfig)#Ú
isinstanceÚstrÚonnxÚloadÚ_modelÚ_model_pathÚcheck_is_large_modelÚ_is_large_modelÚgetÚloggerÚwarningÚexternal_data_helperÚload_external_data_for_modelÚosÚpathÚdirnameÚ_configÚexistsr   ÚparentÚjoinpathÚas_posixZtransformersr   Zfrom_pretrainedZnode_name_counterÚ_output_name_to_nodeÚ_input_name_to_nodesÚ_get_input_name_to_nodesÚgraphÚnodeÚ_get_output_name_to_nodeÚ_graph_infoÚ_get_graph_infoÚ	_q_config)ÚselfÚmodelÚkwargsr   © r-   úo/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/quantization/neural_compressor/onnx_model.pyÚ__init__)   s&   
&
zONNXModel.__init__c                 C   s°   d}| j jjD ]K}| d¡r|jtjjkrd| _ dS z| 	¡ }|t
 |¡7 }W n tyG } zdt|ƒv rAd| _W Y d}~ dS |‚d}~ww |tkrRd| _ dS qd| _dS )zCheck model > 2GB.r   Údata_locationTNz$exceeds maximum protobuf size of 2GBF)r   r$   ÚinitializerÚHasFieldr0   r   ÚTensorProtoÚEXTERNALr   ZSerializeToStringÚsysÚ	getsizeofÚ	Exceptionr   r   )r*   Z	init_sizeÚinitZ
init_bytesÚer-   r-   r.   r   J   s(   €ûþ
zONNXModel.check_is_large_modelc                 C   ó   | j S )z!Check the onnx model is over 2GB.)r   ©r*   r-   r-   r.   Úis_large_modela   ó   zONNXModel.is_large_modelc                 C   r:   )zReturn model path.©r   r;   r-   r-   r.   Ú
model_pathf   r=   zONNXModel.model_pathc                 C   ó
   || _ dS )zSet model path.Nr>   )r*   r   r-   r-   r.   r?   k   ó   
c                 C   s   dS )zReturn framework.Zonnxruntimer-   r;   r-   r-   r.   Ú	frameworkp   s   zONNXModel.frameworkc                 C   r:   )zReturn q_config.©r)   r;   r-   r-   r.   Úq_configt   r=   zONNXModel.q_configc                 C   r@   )zSet q_config.NrC   )r*   rD   r-   r-   r.   rD   y   rA   c                 C   r:   )z8Return huggingface config if model is Transformer-based.)r   r;   r-   r-   r.   Ú	hf_config~   r=   zONNXModel.hf_configc                 C   r:   )zReturn model itself.)r   r;   r-   r-   r.   r+   ƒ   r=   zONNXModel.modelc                 C   sD   || _ i | _|  ¡  i | _i | _|  | j jj¡ |  | j jj¡ dS )zSet model itself.N)	r   r'   r(   r!   r"   r#   r$   r%   r&   )r*   r+   r-   r-   r.   r+   ˆ   s   c                 C   ó   dd„ | j jjD ƒS )zReturn input of model.c                 S   ó   g | ]}|j ‘qS r-   ©Úname©Ú.0Úir-   r-   r.   Ú
<listcomp>•   ó    z#ONNXModel.input.<locals>.<listcomp>)r   r$   Úinputr;   r-   r-   r.   rO   “   ó   zONNXModel.inputc                 C   rF   )zReturn output of model.c                 S   rG   r-   rH   rJ   r-   r-   r.   rM   ™   rN   z$ONNXModel.output.<locals>.<listcomp>)r   r$   Úoutputr;   r-   r-   r.   rQ   —   rP   zONNXModel.outputc                 C   s>   i | _ |  ¡  i | _i | _|  | jjj¡ |  | jjj¡ dS )zUpdate model info.N)	r'   r(   r!   r"   r#   r   r$   r%   r&   r;   r-   r-   r.   Úupdate›   s   zONNXModel.updatec                 C   r:   )zEReturn ORT Graph Info object holding information about backend graph.)r'   r;   r-   r-   r.   Ú
graph_info¤   r=   zONNXModel.graph_infoc                 C   s(   | j jjD ]}| j |j|ji¡ qdS )zUpdate graph info.N)r   r$   r%   rS   rR   rI   Úop_type©r*   r%   r-   r-   r.   r(   ©   s   ÿzONNXModel._get_graph_infoc              	   C   sè   t j |¡d dkrt j t j |¡d ¡stdƒ‚| jrAtj | j	t j | j
¡d ¡ tj| j	|dd| d¡d d dd	d
 nt | j	|¡ | jdurrt| jdƒsUdn| jj}|| jj_t|ƒj d¡ ¡ }| jj|d	d dS dS )zSave ONNX model.r   Ú z!"root" directory does not exists.Tú/éÿÿÿÿÚ_dataé   F©Zsave_as_external_dataÚall_tensors_to_one_fileÚlocationZsize_thresholdZconvert_attributeNÚ
model_typer
   )Zuse_diff)r   r   Úsplitr   Ú
ValueErrorr<   r   r   r   r   r   Ú
save_modelÚsaver   Úhasattrr^   Ú	__class__r   r   r   r    Zto_json_file)r*   Úrootr^   Zoutput_config_filer-   r-   r.   rb   ®   s(   ,ù


üzONNXModel.savec                 C   ó
   | j jjS )zReturn model nodes.)r   r$   r%   r;   r-   r-   r.   ÚnodesÆ   ó   
zONNXModel.nodesc                 C   rf   )zReturn model initializer.)r   r$   r1   r;   r-   r-   r.   r1   Ê   rh   zONNXModel.initializerc                 C   ó   | j jS )zReturn model graph.)r   r$   r;   r-   r-   r.   r$   Î   ó   zONNXModel.graphc                 C   ri   )zReturn model ir_version.)r   Ú
ir_versionr;   r-   r-   r.   rk   Ò   rj   zONNXModel.ir_versionc                 C   ri   )zReturn model opset_import.)r   Úopset_importr;   r-   r-   r.   rl   Ö   rj   zONNXModel.opset_importc                 C   ó&   || j jjv r| j jj |¡ dS dS )zRemove a node from model.N)r   r$   r%   ÚremoverU   r-   r-   r.   Úremove_nodeÚ   ó   ÿzONNXModel.remove_nodec                 C   ó   |D ]}|   |¡ qdS )zRemove nodes from model.N)ro   )r*   Znodes_to_remover%   r-   r-   r.   Úremove_nodesß   ó   ÿzONNXModel.remove_nodesc                 C   s   | j jj |g¡ dS )zAdd a node to model.N©r   r$   r%   ÚextendrU   r-   r-   r.   Úadd_nodeä   s   zONNXModel.add_nodec                 C   s   | j jj |¡ dS )zAdd nodes to model.Nrt   )r*   Znodes_to_addr-   r-   r.   Ú	add_nodesè   rP   zONNXModel.add_nodesc                 C   s0   t |j| jjjƒdu r| jjj |g¡ dS dS )zAdd a initializer to model.N)r   rI   r   r$   r1   ru   ©r*   Útensorr-   r-   r.   Úadd_initializerì   s   ÿzONNXModel.add_initializerc                 C   rq   )zAdd initializers to model.N)rz   )r*   Ztensorsry   r-   r-   r.   Úadd_initializersñ   rs   zONNXModel.add_initializersc                 C   ó&   | j jjD ]}|j|kr|  S qdS )zGet an initializer by name.N)r   r$   r1   rI   )r*   rI   ry   r-   r-   r.   Úget_initializerö   ó
   
ÿzONNXModel.get_initializerc                 C   s:   d}|   |¡du r|S |  ¡ D ]}||jv r|d7 }q|S )z(Get the number of shares of initializer.r   Nr   )r}   rg   rO   )r*   rI   Únumr%   r-   r-   r.   Úget_initializer_share_numý   s   
€z#ONNXModel.get_initializer_share_numc                 C   r|   )zGet a node by name.N)r   r$   r%   rI   )r*   rI   r%   r-   r-   r.   Úget_node  r~   zONNXModel.get_nodec                 C   rm   )z!Remove an initializer from model.N)r   r$   r1   rn   rx   r-   r-   r.   Úremove_initializer  rp   zONNXModel.remove_initializerc                 C   rq   )zRemove initializers from model.N)r‚   )r*   Zinit_to_remover1   r-   r-   r.   Úremove_initializers  rs   zONNXModel.remove_initializersFc                 C   sf   |   |¡}|  |¡ |j}|j}|stj |||| ¡  ¡ ¡ntjj|||| 	¡ |d}|  
|¡ dS )zUpdate initializer.)ÚrawN)r}   r‚   ÚdimsÚ	data_typer   ÚhelperZmake_tensorÚflattenÚtolistZtostringrz   )r*   ry   Úarrayr„   Z
old_tensorr…   r†   Z
new_tensorr-   r-   r.   Úset_initializer  s   

ÿýzONNXModel.set_initializerc                 C   r:   )zReturn input names of nodes.)r"   r;   r-   r-   r.   Úinput_name_to_nodes&  r=   zONNXModel.input_name_to_nodesc                 C   s†   |D ]>}dd„ |j D ƒ}t|ƒdkr|D ]	}|  |jj¡ q|jD ]}t| ¡ ƒdkr?|| jvr7|g| j|< q!| j|  |¡ q!qdS )zGet input names of nodes.c                 S   ó,   g | ]}|j tjjks|j tjjkr|‘qS r-   ©Útyper   ZAttributeProtoZGRAPHZGRAPHS©rK   Úattrr-   r-   r.   rM   .  ó
    þz6ONNXModel._get_input_name_to_nodes.<locals>.<listcomp>r   N)	Ú	attributeÚlenr#   Úgr%   rO   Ústripr"   Úappend)r*   rg   r%   Úattrsr‘   Ú
input_namer-   r-   r.   r#   +  s   þ

€û÷z"ONNXModel._get_input_name_to_nodesc                 C   r:   )zReturn output names of nodes.)r!   r;   r-   r-   r.   Úoutput_name_to_node=  r=   zONNXModel.output_name_to_nodec                 C   sh   |D ]/}dd„ |j D ƒ}t|ƒdkr|D ]	}|  |jj¡ q|jD ]}t| ¡ ƒdkr0|| j|< q!qdS )zGet output names of nodes.c                 S   r   r-   rŽ   r   r-   r-   r.   rM   E  r’   z6ONNXModel._get_output_name_to_node.<locals>.<listcomp>r   N)r“   r”   r&   r•   r%   rQ   r–   r!   )r*   rg   r%   r˜   r‘   Úoutput_namer-   r-   r.   r&   B  s   þ

€þ÷z"ONNXModel._get_output_name_to_nodec                 C   s>   g }|   |¡D ]}|  |¡D ]}|j|jkr| |¡ qq|S )zGet siblings nodes.)Úget_parentsÚget_childrenrI   r—   )r*   r%   Zsiblingsr   Úchildr-   r-   r.   Úget_siblingsQ  s   
€þzONNXModel.get_siblingsNc                 C   sB   |du r| j }g }|jD ]}||v r|| D ]}| |¡ qq|S )zGet children nodes.N)r"   rQ   r—   )r*   r%   rŒ   ÚchildrenrQ   rž   r-   r-   r.   r   Z  s   
€zONNXModel.get_childrenc                 C   s8   |du r| j }g }|jD ]}||v r| || ¡ q|S )zGet parents nodes.N)r!   rO   r—   )r*   r%   rš   ÚparentsrO   r-   r-   r.   rœ   f  s   
€zONNXModel.get_parentsc                 C   s>   |du r| j }t|jƒ|krdS |j| }||vrdS || S )zGet parent node by idx.N)r!   r”   rO   )r*   r%   Úidxrš   rO   r-   r-   r.   Ú
get_parentq  s   
zONNXModel.get_parentc                 C   s"   t |jƒ}| |¡ t||ƒ}|S )zFind out node by name.)Úlistr%   ru   r   )r*   Ú	node_nameZnew_nodes_listr$   Zgraph_nodes_listr%   r-   r-   r.   Úfind_node_by_name  s   


zONNXModel.find_node_by_namec                 C   s4   g }|j D ]}|jD ]}||jkr| |¡ q
q|S )z2Find all nodes with given initializer as an input.)r%   rO   rI   r—   )r*   r$   r1   rg   r%   Z
node_inputr-   r-   r.   Úfind_nodes_by_initializer†  s   



€þz#ONNXModel.find_nodes_by_initializerc                    s¨   |  d¡st d|› d¡ dS ‡ ‡fdd„‰ ˆj| d }|jdkr*||jd	 ks6|jd
kr8||jd kr8dS ˆ |ƒ\}}|sGJ d|› ƒ‚|sPJ d|› ƒ‚||fS )z*Help function to get scale and zero_point.Ú
_quantizedzFind z) in the quantized graph is not quantized.©NNc           	         sú   ˆj |  d }ˆj | d¡}g d¢}|dur-|j|v r-|jd  dd¡ dd¡ dd¡}n%|jdv rD|jd  dd¡ dd¡ dd¡}n|  dd¡ dd¡ dd¡}|d	 }ˆ |¡}|d
 }ˆ |¡}|du sl|du ry|duryˆ |jd ƒ\}}||fS )z/Search scale and zero point tensor recursively.r   N)ÚReshapeÚ	TransposeZSqueezeZ	UnsqueezeZMaxPoolZPadZSplitr¨   rV   Z_QuantizeLinearZ_QuantizeInput)ZGatherZ_scaleZ_zero_point)r"   r!   r   rT   rO   ÚreplacerQ   r}   )	Útensor_namer%   r   Zdirect_int8Zfp32_tensor_nameÚscaleÚscale_tensorZzoÚ	zo_tensor©Ú	_searcherr*   r-   r.   r²   •  s2   ü
üÿ

z+ONNXModel.get_scale_zero.<locals>._searcherr   ZQLinearConvrX   ZQGemméýÿÿÿzmissing scale for tensor zmissing zero point for tensor )Úendswithr   Údebugr"   rT   rO   )r*   ry   r%   r¯   r°   r-   r±   r.   Úget_scale_zero  s   
!zONNXModel.get_scale_zeroc                 C   s4   |rt jj| jdt|ƒjd d t  | j|¡ dS )zBSave model to external data, which is needed for model size > 2GB.Tz.data)r\   r]   N)r   r   Úconvert_model_to_external_datar   r   rI   ra   )r*   Zoutput_pathZuse_external_data_formatr-   r-   r.   Úsave_model_to_fileÃ  s
   ÿzONNXModel.save_model_to_filec                 C   óH   t |tƒr
t |tƒsJ ‚tt| jƒƒD ]}| j| |kr!|| j|< qdS )zReplace input of a node.N)r   r   Úranger”   rO   )r%   Úold_input_nameÚnew_input_nameÚjr-   r-   r.   Úreplace_node_inputË  ó   
€þzONNXModel.replace_node_inputc                 C   ó|   |du rg }|du rg }t |ƒdkr(| jjjD ]}|j|v r%t |||¡ qdS | jjjD ]}|j|vr;t |||¡ q-dS )zReplace inputs of all nodes.Nr   )r”   r+   r$   r%   rT   r   r¾   )r*   r»   r¼   Úwhite_optypeÚblack_optyper%   r-   r-   r.   Úreplace_input_of_all_nodesÓ  ó   
€þ
€þz$ONNXModel.replace_input_of_all_nodesc                 C   r¹   )zReplace output of a node.N)r   r   rº   r”   rQ   )r%   Úold_output_nameÚnew_output_namer½   r-   r-   r.   Úreplace_node_outputâ  r¿   zONNXModel.replace_node_outputc                 C   rÀ   )zReplace outputs of all nodes.Nr   )r”   r+   r$   r%   rT   r   rÇ   )r*   rÅ   rÆ   rÁ   rÂ   r%   r-   r-   r.   Úreplace_output_of_all_nodesê  rÄ   z%ONNXModel.replace_output_of_all_nodesc           
      C   sÂ  g }|   ¡ }|D ]•}|jdkr'|jd | jjjvr'|jd | jvr'| |¡ q|jdkrbt|  |¡ƒdkrb|  |¡d jdkrb|j	d | j
vrb|  |¡d jd | jvrb| |¡ | |  |¡¡ qd}|jD ]}|| jv st||  ¡ v rxd} nqg|j	D ]}|  |¡dur†q||| j
v s‘||  	¡ v r•d} nq||r| |¡ q|  |¡ g }| jjjD ]+}|j| jvrÕ|j| jjjvrÕ| |¡ |  ¡ j	D ]}	|	j|jkrÔ|  ¡ j	 |	¡ qÄqª|  |¡ |  ¡  dS )	zRemove unused nodes.ÚConstantr   ZQuantizeLinearr   ZDequantizeLinearTFN)rg   rT   rQ   r   r$   r"   r—   r”   r   rO   r!   ru   r}   rr   r1   rI   rn   rƒ   rR   )
r*   Zunused_nodesrg   r%   ZunusedrQ   rO   Zununsed_weightsÚwZgraph_inputr-   r-   r.   Úremove_unused_nodesù  sR   



þ
þ
€

€
zONNXModel.remove_unused_nodesc                    sþ  |s@i }i ‰ˆj jjD ]3}|jD ]}t| ¡ ƒdkr+||vr$|g||< q||  |¡ q|jD ]}t| ¡ ƒdkr=|ˆ|< q/qnˆj}ˆj	‰i ‰ t
ƒ ‰t
ƒ }ˆj jjD ]
}ˆ ||j ¡ qSˆj jjD ]}t‡‡fdd„|jD ƒƒrwˆ |¡ qcˆrÊˆ ¡ }t‡ ‡fdd„|jD ƒƒs•||vr”| |¡ qx|ˆ |j< |jD ]}	|	|v r²ˆ ‡ ‡fdd„||	 D ƒ¡ qtˆƒdkrÈt|ƒdkrÈt |¡‰| ¡  ˆszdd„ ˆ  ¡ D ƒ}
ttdd	„ |
D ƒƒƒttdd	„ ˆj jjD ƒƒƒksîJ ‚ˆj j d
¡ ˆj jj |
¡ dS )zTopological sort the model.r   c                 3   s$    | ]}|ˆ vo|ˆ  ¡ vV  qd S ©N)rO   rJ   )rš   r*   r-   r.   Ú	<genexpr>D  s   €" z-ONNXModel.topological_sort.<locals>.<genexpr>c                 3   s&    | ]}|ˆv rˆ| j ˆ v V  qd S rÌ   rH   rJ   )Ú	all_nodesrš   r-   r.   rÍ   I  s   €$ c                    s"   g | ]}|j ˆ vr|ˆvr|‘qS r-   rH   rJ   )rÎ   Úqr-   r.   rM   Q  s   " z.ONNXModel.topological_sort.<locals>.<listcomp>c                 S   s   g | ]}|d  ‘qS )r   r-   rJ   r-   r-   r.   rM   U  s    c                 S   s   h | ]}|j ’qS r-   rH   )rK   Únr-   r-   r.   Ú	<setcomp>V  rN   z-ONNXModel.topological_sort.<locals>.<setcomp>r%   N)r+   r$   r%   rO   r”   r–   r—   rQ   r"   r!   r   ru   rI   ÚallÚpopleftÚcopyÚdeepcopyÚclearÚitemsr¤   Ú
ClearField)r*   Zenable_subgraphrŒ   r%   r™   r›   ÚwaitÚinprÐ   Úoutrg   r-   )rÎ   rš   rÏ   r*   r.   Útopological_sort*  s\   
€
€þù
€


€
ó6zONNXModel.topological_sortc           	      C   sú   |du rg }t ƒ }|D ]}t|tƒr| |¡ qt|tjƒr%| |j¡ qJ dƒ‚g }|D ]}t|tƒr:| |¡ q-t|tjƒrG| |j¡ q-J dƒ‚|r{| ¡ }||v rVqK||vr`| |¡ nqKt|t	| j
jjƒƒ}|  |¡D ]}| |j¡ qp|sM|S )z4Get nodes chain with given start node and stop node.NFzM'get_nodes_chain' function only support list[string]or list[NodeProto] params)r   r   r   r—   r   Z	NodeProtorI   rÓ   r   r¤   r+   r$   r%   rœ   )	r*   ÚstartÚstopZresult_chainÚ
start_noder%   Z	stop_noder¥   r   r-   r-   r.   Úget_nodes_chainZ  s8   

õzONNXModel.get_nodes_chainc                 C   s  g }| j jjD ]ƒ}d\}}|jdkr*|}|  |g d¢g d¢¡|  |g d¢g d¢¡g}|jdkr}|}|  |g d¢g d¢¡|  |g d¢g d	¢¡| j|g d
¢g d¢| jg d|  |g d¢g d¢¡|  |g d¢g d¢¡|  |g d¢g d¢¡|  |g d¢g d¢¡g}|s€qt|ƒs…q| |¡ q|S )z,Find split node for layer wise quantization.r©   ÚSkipLayerNormalization©ÚMatMulrª   r«   rª   rã   ©Nr   r   r   r   ©ÚAddrã   rª   r«   rã   ©r   r   r   r   r   ræ   ©r   Nr   r   r   ©r   Nr   r   r   ©rª   ZGemmrª   rª   r«   rã   ©Nr   r   r   r   r   ©rš   Úreturn_indice©ræ   rã   rª   r«   rª   rã   ©r   Nr   r   r   r   ©r   Nr   r   r   r   )rã   ÚMulrã   rñ   ÚDivræ   )Nr   Nr   Nr   )rã   rñ   rã   ZSimplifiedLayerNormalizationræ   )Nr   Nr   r   )r   r$   r%   rT   Úmatch_parent_pathrš   Úanyr—   )r*   Zstart_nodesr%   rß   Úqkv_nodes_listr-   r-   r.   Ú+find_split_node_for_layer_wise_quantization  sx   

ýýú
ýÿûýýýýÞ(z5ONNXModel.find_split_node_for_layer_wise_quantizationc                 C   s¬  g }| j jjD ]Ì}|jdkr| |jg¡ qd\}}|jdkr7|}|  |g d¢g d¢¡|  |g d¢g d¢¡g}|jdkrv|}|  |g d¢g d	¢¡|  |g d¢g d
¢¡| j|g d¢g d¢| jg d|  |g d¢g d¢¡|  |g d¢g d¢¡g}|syqt|ƒs~qdd„ |D ƒd }g }|j	D ]}|| jvr”qŒ||d j
d kržqŒ| |¡ qŒt|ƒdkr«q|d }	| j}
|
|	 }dd„ |D ƒ}| d¡dkrÓ| dd„ |D ƒ¡ |sÓ |S q|S )z½Find qkv MatMul in Attention.

        Args:
            find_all (bool, optional): find all qkv MatMul. Defaults to False

        Returns:
            qkv (list): qkv MatMul list
        Z	Attentionr©   rá   râ   rä   rå   rç   ræ   rè   ré   rê   rë   rì   rî   rï   rð   c                 S   s   g | ]}|d ur|‘qS rÌ   r-   )rK   Úqkvr-   r-   r.   rM     s    z3ONNXModel.find_qkv_in_attention.<locals>.<listcomp>rX   r   r   c                 S   rG   r-   )rT   ©rK   rž   r-   r-   r.   rM     rN   rã   é   c                 S   s   g | ]
}|j d kr|j‘qS )rã   )rT   rI   rø   r-   r-   r.   rM     s    )r   r$   r%   rT   r—   rI   ró   rš   rô   rO   rQ   r”   rŒ   Úcount)r*   Zfind_allr÷   r%   rß   rõ   Z	qkv_nodesZother_inputsrO   Z
root_inputrŒ   r    Zchildren_typesr-   r-   r.   Úfind_qkv_in_attentionÍ  s   	

ýýú
ýÿûýýè

€ zONNXModel.find_qkv_in_attentionc                 C   s¤   g }t t|ƒƒD ]G}|t|ƒd kr.||d  }|d dkr-| ||d  ||d  g¡ q|| }|| d t|ƒk rO| ||| d  ||| d  g¡ q|S )a  Find MatMul in FFN.

        Args:
            attention_index (list): index of Attention
            attention_matmul_list (list): list of Attention and MatMul nodes
            block_len (int): block length

        Returns:
            list: list of MatMul in FFN
        r   é   r   )rº   r”   r—   )r*   Zattention_indexZattention_matmul_listZ	block_lenZ
ffn_matmulr¢   Úindexr-   r-   r.   Úfind_ffn_matmul"  s   €ÿ€zONNXModel.find_ffn_matmulc                 C   s   ddl m} ddlm} t||ƒr=|| j| jƒ\}}}|  |¡ |  |¡ |  	|¡ |  
¡  |  ¡  |  ¡  |  |¡ dS t d¡ tdƒ dS )zExport Qlinear to QDQ model.r   )ÚONNXQlinear2QDQConfig)Úonnx_qlinear_to_qdqzGUnsupported config for export, only ONNXQlinear2QDQConfig is supported!N)Zneural_compressor.configrÿ   Zneural_compressor.utils.exportr   r   r   r"   rw   rr   r{   rR   rË   rÜ   rb   r   r   Úexit)r*   Ú	save_pathZconfrÿ   r   rw   rr   Zinitsr-   r-   r.   Úexport;  s   




zONNXModel.exportc                 C   sH   g }|D ]}||   ¡ vrtj ¡ }||_| |¡ q| jjj  |¡ dS )zAdd the tensors to the model outputs to gets their values.

        Args:
            tensor_names: The names of tensors to be dumped.
        N)	rQ   r   r‡   ZValueInfoProtorI   r—   r   r$   ru   )r*   Útensor_namesZadded_outputsry   Zadded_tensorr-   r-   r.   Úadd_tensors_to_outputsM  s   

€z ONNXModel.add_tensors_to_outputsc                 C   sV   g }|D ]}||   ¡ v r| | jjj |   ¡  |¡ ¡ q|D ]
}| jjj  |¡ qdS )z€Remove the tensors from the model outputs.

        Args:
            tensor_names: The names of tensors to be removed.
        N)rQ   r—   r   r$   rý   rn   )r*   r  Zremoved_outputsry   rQ   r-   r-   r.   Úremove_tensors_from_outputs[  s   €ÿz%ONNXModel.remove_tensors_from_outputsc                 C   sR   |du rg }t |jƒD ]\}}||v r&|| }|j|kr&||vr&||f  S qdS )a  Find parent node based on constraints on op_type.

        Args:
            node (str): current node name.
            parent_op_type (str): constraint of parent node op_type.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            exclude (list): list of nodes that are excluded (not allowed to match as parent).

        Returns:
            parent: The matched parent node. None if not found.
            index: The input index of matched parent node. None if not found.
        Nr©   )Ú	enumeraterO   rT   )r*   r%   Úparent_op_typerš   ÚexcluderL   rO   r   r-   r-   r.   Úmatch_first_parenth  s   €zONNXModel.match_first_parentc           	      C   s®   |dusJ ‚|du s|dksJ ‚|du rg }|du r| j }|du r6|  ||||¡\}}|dur4| |¡ |S |t|jƒkr?dS |  |||¡}|durU|j|krU||vrU|S dS )a|  Find parent node based on constraints on op_type and index.

        Args:
            node (str): current node name.
            parent_op_type (str): constraint of parent node op_type.
            input_index (int or None): only check the parent given input index of current node.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            exclude (list): list of nodes that are excluded (not allowed to match as parent).
            return_indice (list): a list to append the input index when input_index is None.

        Returns:
            parent: The matched parent node.
        Nr   )r!   r
  r—   r”   rO   r£   rT   )	r*   r%   r  Zinput_indexrš   r	  rí   r   rý   r-   r-   r.   Úmatch_parent~  s"   
zONNXModel.match_parentc              	   C   sv   t |ƒt |ƒks
J ‚|du r| j}|}g }t|ƒD ]\}}	| j||	|| |g |d}
|
du r1 dS | |
¡ |
}q|S )aÃ  Find a sequence of input edges based on constraints on parent op_type and index.

        Args:
            node (str): current node name.
            parent_op_types (str): constraint of parent node op_type of each input edge.
            parent_input_index (list): constraint of input index of each input edge.
                                       None means no constraint.
            output_name_to_node (dict): dictionary with output name as key, and node as value.
            return_indice (list): a list to append the input index when there is
                                  no constraint on input index of an edge.

        Returns:
            parents: a list of matched parent node.
        N)r	  rí   )r”   r!   r  r  r—   )r*   r%   Zparent_op_typesZparent_input_indexrš   rí   Zcurrent_nodeZmatched_parentsrL   rT   Zmatched_parentr-   r-   r.   ró   ª  s&   ú
zONNXModel.match_parent_pathc                 C   s$   | j jjD ]
}d|jv r dS qdS )z~Check the model is smooth quantized or not.

        Returns:
            bool: the model is smooth quantized or not.
        Z_smooth_scaleTF)r+   r$   r1   rI   )r*   r8   r-   r-   r.   Úis_smoothquant_modelØ  s
   
ÿzONNXModel.is_smoothquant_modelc                 C   s   |   ¡ }|S )z-Find split nodes for layer-wise quantization.)rö   )r*   Zsplit_nodesr-   r-   r.   Úfind_split_nodesã  s   zONNXModel.find_split_nodesTc              
   C   sî  t  ¡ }| | j¡ |j d¡ t  ¡ }| | j¡ |j d¡ d}d}| jjjD ]#}	|dkr7|jj |	¡ n|dkrB|jj |	¡ |	j|krL|	j	}d}q)t
|ƒdks`J d|› dt
|ƒ› dƒ‚|d }
|rzdd	lm} || jd
tj | j¡d| _W n ty } zt d¡ |‚d}~ww |  |
¡\}}t j |
||¡}t|d
d}t|d
d}| ¡  | ¡  |jjj	 |¡ |jjj |¡ g }g }|jD ].}||jv rø|  |¡\}}t j |||¡}||jjj	vrì| |¡ ||jjjvrø| |¡ qÊ|D ]
}|jjj	 |¡ qû|D ]}|jjj |¡ q| ¡  | ¡  | ¡  | ¡  tj |¡}| |¡ tj  |d¡}||_!| "|¡ | #¡  t $d|› d¡ |rs| |¡ tj  |d¡}||_!| "|¡ | #¡  t $d|› d¡ ||fS ||fS )a[  Split model into two parts at a given node.

        Args:
            split_node_name (str): name of the node where the model is split at>
            path_of_model_to_split (str): path of model to be split.
            shape_infer (bool): do shape inference. Default is True.
            save_both_split_models (bool): whether to save the two split models.
                False means only save the first split model.
                True means save both the two split models.
                Default id True.

        Returns:
            tuple: the first split model, the second split model
        r%   Nr   rü   zJOnly support split at node with 1 output tensor, while current split node z has z output tensorsr   )Úinfer_shapesT)Z
auto_mergeÚbase_dirzËShape infer fails for layer-wise quantization. We would recommend checking the graph optimization level of your model and setting it to 'DISABLE_ALL' or 'ENABLE_BASIC', as this may help avoid this error.)r	   zsplit_model_part_1.onnxzsave split model part 1 to z for layer wise quantizationzsplit_model_part_2.onnxzsave split model part 2 to )%r   Z
ModelProtoZCopyFromr   r$   rØ   r%   r—   rI   rQ   r”   Z'neural_compressor.adaptor.ox_utils.utilr  r   r   r   r   r7   r   ÚerrorÚ%_get_output_type_shape_by_tensor_namer‡   Zmake_tensor_value_infor   Ú_remove_unused_input_outputr+   rO   rš   rŒ   Úremove_unused_initrR   Ú load_model_initializer_by_tensorÚjoinr?   Ú_save_split_modelr   rµ   )r*   Zsplit_node_nameZpath_of_model_to_splitZshape_inferZsave_both_split_modelsZsplit_model_part_1Zsplit_model_part_2Zsplit_node_outputZpart_idxr%   Zsplit_tensor_namer  r9   Zsplit_tensor_typeZsplit_tensor_shapeZsplit_tensorZinsert_output_for_model_1Zinsert_input_for_model_2rQ   Úoutput_typeZoutput_shapeZoutput_tensorrO   Zdir_of_model_to_splitZsplit_model_part_1_pathZsplit_model_part_2_pathr-   r-   r.   Úsplit_model_with_nodeè  s–   
€ÿ ÿ€ù	



€



zONNXModel.split_model_with_nodec              	   C   sJ   t j |d ¡rt  |d ¡ tj| j|dd| d¡d d ddd dS )	z”Save split model as external data for layer wise quantization.

        Args:
            save_path (str): the path to save the split model
        rY   TrW   rX   rZ   Fr[   N)r   r   r   rn   r   ra   r   r_   )r*   r  r-   r-   r.   r  `  s   
ùzONNXModel._save_split_modelc                 C   sX   t jj}d}| jjjD ]}|j|kr'|jjj	}dd„ |jjj
jD ƒ} ||fS q||fS )z°Get output type and shape with a tensor name.

        Args:
            tensor_name (str): name of a tensor

        Returns:
            tuple: output type and shape
        Nc                 S   s    g | ]}|  d ¡r|jnd‘qS )Ú	dim_valuerX   )r2   r  )rK   Údimr-   r-   r.   rM   €  s    ÿzCONNXModel._get_output_type_shape_by_tensor_name.<locals>.<listcomp>)r   r3   ÚFLOATr   r$   Z
value_inforI   r   Ztensor_typeÚ	elem_typeÚshaper  )r*   r­   r  r  rQ   r-   r-   r.   r  r  s   	


ÿúz/ONNXModel._get_output_type_shape_by_tensor_namec                 C   sŒ   g }g }| j jjD ]}|j| jvr| |¡ q	| j jjD ]}|j| jvr)| |¡ q|D ]
}| j jj |¡ q,|D ]
}| j jj |¡ q9dS )z-Remove unused input & output for split model.N)	r   r$   rQ   rI   rš   r—   rO   rŒ   rn   )r*   Zremove_outputsZremove_inputsrQ   rO   r-   r-   r.   r  †  s   
€
€ÿz%ONNXModel._remove_unused_input_outputc                 C   s8   g }| j jjD ]}|j| jvr| |¡ q|  |¡ dS )zRemove unused init.N)r   r$   r1   rI   rŒ   r—   rƒ   )r*   Zremov_initsr8   r-   r-   r.   r  —  s   
€zONNXModel.remove_unused_initc                 C   sP   |du rt j | j¡}| jjjD ]}| d¡r%|jt	j
jkr%t	j ||¡ qdS )z–Load model initializer by tensor.

        Args:
            data_path (str, optional): the directory of saved initializer. Defaults to None.
        Nr0   )r   r   r   r   r   r$   r1   r2   r0   r   r3   r4   r   Zload_external_data_for_tensor)r*   Z	data_pathr8   r-   r-   r.   r  Ÿ  s   €þz*ONNXModel.load_model_initializer_by_tensorúexternal.datac                 C   s|   |r!t j t j t j | j¡|¡¡r!t  t j t j | j¡|¡¡ |  ¡  tj	j
| j|d tj	j| jt j | j¡d dS )a}  Write external data of merged quantized model to new location to save memory.

        Args:
            external_data_location (str, optional): external data location of merged quantized model.
                                                    Defaults to "external.data".
            overwrite (bool, optional): if True, remove existed externa data. Defaults to False.
        )r]   )ÚfilepathN)r   r   r   r  r   r   rn   r  r   r   r·   r   Zwrite_external_data_tensors)r*   Zexternal_data_locationÚ	overwriter-   r-   r.   Ú#write_external_data_to_new_location«  s
   $ z-ONNXModel.write_external_data_to_new_locationc                 C   sô   |  ¡  |  t| ¡ ƒ¡ |  t| ¡ ƒ¡ |  ¡  | ¡ jD ]}|j	|  ¡ vr0| j
jj |¡ qg }| j
jjD ]}|j	| ¡ v rF| |¡ q8|D ]
}| j
jj |¡ qI| ¡ jD ]}|j	|  ¡ vrw|j	|  ¡ vrw|j	| jvrw| j
jj |¡ qYdS )z'Merge two split model into final model.N)r!  rw   r¤   rg   r{   r1   rR   r$   rQ   rI   r   r—   rO   rn   rš   )r*   Zto_merge_modelrQ   Zremove_outputrO   r-   r-   r.   Úmerge_split_modelsº  s,   €
€€úzONNXModel.merge_split_modelsc                 C   sh   i }g }| j jjD ]}|||j< | |¡ q	|D ]
}| j jj |¡ q|D ]}| j jj || ¡ q%dS )z:Re-org output of merged model for layer-wise quantization.N)r   r$   rQ   rI   r—   rn   )r*   Zorigin_outputZoutputsZ
tmp_removerQ   Zout_namer-   r-   r.   Úre_org_output×  s   
ÿzONNXModel.re_org_output)FrÌ   r©   )NNNN)TT)r  F)MÚ__name__Ú
__module__Ú__qualname__Ú__doc__r/   r   Úpropertyr<   r?   ÚsetterrB   rD   rE   r+   rO   rQ   rR   rS   r(   rb   rg   r1   r$   rk   rl   ro   rr   rv   rw   rz   r{   r}   r€   r   r‚   rƒ   r‹   rŒ   r#   rš   r&   rŸ   r   rœ   r£   r¦   r§   r¶   r¸   Ústaticmethodr¾   rÃ   rÇ   rÈ   rË   rÜ   rà   rö   rû   rþ   r  r  r  r
  r  ró   r  r  r  r  r  r  r  r  r!  r"  r#  r-   r-   r-   r.   r   &   s¾    !








	




	

	
4




1
0'
LU

ù1
ú.
ÿx

r   )r'  rÔ   Úloggingr   r5   Úcollectionsr   Úpathlibr   r   Zonnx.external_data_helperÚutilr   r   Ú	getLoggerr   r   r-   r-   r-   r.   Ú<module>   s   
