o
    irL                     @   s~   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZ e eZG dd dZG dd dZdS )    N)	Precision)
Gpt2Helper
Gpt2Inputsc                   @   s^   e Zd ZdddZdd Zdefdd	Zd
efddZdddZ	dddZ
dd Zdd ZdS )
Gpt2MetricTorch   c                 C   sj   |dkr|dks
J || _ || _| d| | _|| _d| _d| _d| _d| _d| _d | _	d | _
i | _d S )N   d   z vs r   )baseline	treatmentnametop_ktop_1_errortop_k_errortotal_samplesmax_logits_diffmax_logits_diff_no_pastbatch_top1_errorbatch_topk_errorseq_len_latency)selftreatment_namebaseline_namer    r   j/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/gpt2/gpt2_tester.py__init__   s   
zGpt2Metric.__init__c                 C   s  | j | jkr`td td| j d| j  d | jdkrId| j | j }d| j | j }td| j d| j d	|d
d| j d| j d	|d
d td td| jd td| jd n	td| j d | j	rtd d}d}t
| j	 D ]H}t| j	| d }|dkrtd| d|d
d ntdd|  dd|d  d  d|d
d ||t| j	|  7 }|t| j	| 7 }q{td|| d
d d S d S )Nz---zMetrics for z (baseline=z):r   g      Y@zTotal=z Top1Error=z (z.2fz%) TopzError=z%)zMax logits diffs:z	with past  = z.6fz	empty past = z (baseline):z/Past sequence length range and average latency:     @@	z:         	z msz	[   z, r   z]:	zAverage Latency: )r
   r   printr   r   r   r   r   r   r   sortedkeys
statisticsmeanlen)r   Ztop_1_error_rateZtop_k_error_ratetotalcountkeyZaverager   r   r   r   %   s4   
4.zGpt2Metric.printis_empty_pastc                 C   s8   ||    }|rt| j|| _|S t| j|| _|S N)absmaxr   r   )r   Zbaseline_logitsZtreatment_logitsr(   diffr   r   r   diff_logitsC   s   zGpt2Metric.diff_logits
batch_sizec                 C   s>   |  j |7  _ tj|dftjd| _tj|dftjd| _d S )Nr   Zdtype)r   torchzerosboolr   r   )r   r.   r   r   r   start_batchL   s   zGpt2Metric.start_batchTc                 C   sb   |  |j|jd| |  |j|j| j| | |j|j|dk}|r/td| j d|  d S d S )Nr   r   zMax logits diffs of z: )
_eval_topktop_1_tokenstop_k_tokensr   r-   logitsr   r   )r   r
   r   past_seq_lenverbosemax_diffr   r   r   
eval_batchQ   s   zGpt2Metric.eval_batchc                 C   s   t t ||sL|dkr&|rtd| j  |  jt || O  _d S |r4td| d| j d |  jt || dj	dddkO  _d S d S )Nr   z!Generated tokens not matched for zTop z tokens not matched for z-. This will lead to wrong beam search results)dimr   )
r0   alleqr   r   r   Zlogical_notr   sum	unsqueeze)r   Zbaseline_topkZtreatment_topkr   r9   r   r   r   r4   Y   s    
zGpt2Metric._eval_topkc                 C   s,   |  j | j 7  _ |  j| j 7  _d S r)   )r   r   r?   r   r   r   r   r   r   	end_batchh   s   zGpt2Metric.end_batchc                 C   sF   |dkrt t|d nd}|| jvrg | j|< | j| | d S )Nr   r   )intmathlog2r   append)r   r8   Zlatencyr'   r   r   r   add_latencyl   s   

zGpt2Metric.add_latencyN)r   r   )T)__name__
__module____qualname__r   r   r2   r-   rC   r3   r;   r4   rB   rG   r   r   r   r   r      s    
	

r   c                
   @   s   e Zd Z			dddZdefddZdd	 Zd
d Zdd Ze	dddZ
e	dd Ze	dd Ze	ejddddddddf	ddZdS )
Gpt2TesterFr   c                 C   s   |j d | _|j d | _|| _|| _|| _|| _|d u| _|d u| _g | _	d| j|d|| g}t
|D ]}t||r?tjntj}| j	|| q3d | _d | _d | _|	| _|
| _d S )Nr   r   r   )shaper.   input_lengthn_layer	input_idsposition_idsattention_maskhas_position_idshas_attention_maskpastranger0   emptytypefloat16Zfloat32rF   tor7   r5   r6   r   top_k_required_order)r   rO   rP   rQ   Znum_attention_headsZhidden_sizeZ	num_layerdeviceZis_fp16r   rZ   Z
past_shapeZ_iZ
empty_pastr   r   r   r   t   s.   


zGpt2Tester.__init__returnc                 C   s   t | j| j| j| jS r)   )r   rO   rP   rQ   rT   rA   r   r   r   
get_inputs   s   zGpt2Tester.get_inputsc              	      s  ddl m  tj|dt| }tj|r!td| d d S tj|dd  fdd	}g }||| j	d
 | j
rA||| jd | jrK||| jd t| jD ]}||| j| dt|  qPt|D ])\}}	ttj|d| dd}
|
|	  W d    n1 sw   Y  qddd | D }t|D ]B\}} t|| tjr|| n	||    }	ttj|d| dd}
|
|	  W d    n1 sw   Y  qtd|  d S )Nr   numpy_helperZtest_data_set_z
Directory z existed. Skip saving test dataT)exist_okc                    s"   |   |   | d S r)   )rF   
from_arrayclonecpunumpy)input_tensorsZtorch_tensorr   r^   r   r   
add_tensor   s   "z-Gpt2Tester.save_test_data.<locals>.add_tensorrO   rP   rQ   Zpast_Zinput_z.pbwbc                 S   s   g | ]}|j qS r   )r   ).0outputr   r   r   
<listcomp>   s    z-Gpt2Tester.save_test_data.<locals>.<listcomp>Zoutput_zTest data saved to directory )onnxr_   ospathjoinstrexistsr   makedirsrO   rR   rP   rS   rQ   rU   rN   rT   	enumerateopenwriteZSerializeToStringget_outputsra   
isinstancerd   ndarrayrb   rc   )r   sessionri   save_test_data_dirZtest_case_idrm   rf   re   itensorfZoutput_names_namer   r^   r   save_test_data   s<   *zGpt2Tester.save_test_datac                 C   sh  t |d tjrt|d n	|d    | _t	
| j| _t	
| j| j| j| _| j  | jdg|| _| jrWt| j| d gd| jd|| _| jrqt| jt| jdg| jgd|| _g | _t |d trt |d | _dS t!| j"D ](}t ||d  tjrt||d  n	||d    }| j#|| qdS )z7
        Update the inputs for next inference.
        r   r   N)$rv   rd   rw   r0   
from_numpyrb   detachrc   r7   rK   predict_next_tokenr5   r   rZ   r6   Zreshaper.   rY   rO   rR   r{   rM   r@   repeatrP   rS   catrQ   ZonesZtype_asrT   tuplelistrU   rN   rF   )r   ri   stepr[   rz   past_ir   r   r   update   s6   0"*zGpt2Tester.updatec                 C   s2  t d | jdur| j|j   }|dkrt d|  t| j|jks/t d| j|j | jrCt| j|jksCt d| j|j | j	rWt| j
|j
ksWt d| j
|j
 t| jt|jkscJ t| jD ].\}}|j|j| jkswJ | dkr||j|    }|dkrt d	| d
|  qhdS )z3
        Compare inputs and logits output.
        zstart diff...Ng-C6?z$Max logits difference is too large: zInput_ids is differentzposition_ids is differentzattention_mask is differentr   zmax_past_diff[z]=)r   r7   r*   r+   r0   r=   rO   rR   rP   rS   rQ   r$   rT   rr   rL   Znelement)r   r
   Zmax_io_diffrz   r   Zmax_past_diffr   r   r   r,      s<   
zGpt2Tester.diffr   c                 C   sb   | dddf }|dkrt |dd}|S t j|dddddd|f }|s/| \}}|S |S )z4
        Get top k topkens based on logits.
        Nr   T)Z
descending)r0   ZargmaxZargsortsort)r7   r   Zrequired_orderZlastTokenLogitsZgeneratedTokensZtopkZsorted_topk_r   r   r   r     s    zGpt2Tester.predict_next_tokenc                 C   s   g }t |D ]A}t| |d  tjrt| |d  n| |d  }t||d  tjr4t||d  n||d  }||   }|| qt	d|  dS )zO
        Compare the present outputs of two outputs from ONNX Runtime.
        r   zpresent_diff_max=N)
rU   rv   rd   rw   r0   r   r*   r+   rF   r   )onnx_outputonnx_io_outputrN   Zpresent_diff_maxrz   Zonnx_present_iZonnx_io_present_ir:   r   r   r   diff_present+  s   

zGpt2Tester.diff_presentc                 C   s*   ddl m} || }ddlm} |j|kS )z>
        Returns True if the ONNX model is quantized.
        r   )load)__producer__)rk   r   Z!onnxruntime.quantization.quantizer   Zproducer_name)Zonnx_model_pathr   modelZquantize_producerr   r   r   is_quantized_onnx_model@  s   
z"Gpt2Tester.is_quantized_onnx_modelZGpt2LMHeadModelT   r   .c           -      C   s  t d| d| d| dt| d|	 d |jj}|jj}|jj}|jj}d}|tjk}|r:d| 	 d j
v s:J | | tjd	d
d|j|d}tj|||d}d}|tjkr]dnd}t|||}t|||}t|d ||}t|D ]b\}}|	dkr||	kr nT|d dkrt |  |d }|dd}|dd}t|||||||||| 
}t|||||||||| 
} t|||||||d|| 
}!|!j}"||" ||" t  tj|"tjd}#t|D ]}$t|j d }%t|jd  d }&t ! }'t"||!# }(|$|&t ! |'  |!%|(|$| tj&| |# dd\})}*|$|&|*d  |%|)|$| tj|"|&|%|j|d}+t'||+ tj(| | # ||+dddd\},}*|$|&|*d  ||k rr| )| |,|| |d7 }| %|,|$| |
r|*|  t+|)|,| t d  t d!|!j, t d"|j, t d#| j, |j-|!||&|
d$ |j-|!| |&|
d$ |#|!j,|k. B }#t/|#r nqW d   n	1 sw   Y  |0  |0  qw|   |   |   dS )%z
        Test Generation using greedy beam search (without sampling) to compare PyTorch and ONNX model.
        It will print top 1 and top k errors on the given test inputs.
        zstart test generation: (top_k=z top_k_no_order=z max_steps=z test_inputs=z max_inputs=)r   rX             )r.   Zpast_sequence_lengthZsequence_lengthconfigmodel_class)
is_float16r   zQuantized OnnxZOnnxz with IO Binding
   rO   rP   NrQ   Fr/   r      )
total_runsr   )r   T)r   Zreturn_numpyZinclude_copy_output_latencyzTop 1 tokens:z	Torchz	ONNXz	ONNX with IO binding)r9   )1r   r$   r   rN   n_headn_embdeos_token_idr   ZFLOAT16ru   rW   evalrY   r   Zget_output_shapesZget_output_buffersZINT8r   rr   getrK   r.   r3   r0   Zno_gradr1   r2   rU   r   rO   sizerT   timeitZdefault_timerZpytorch_inferencer]   rG   r   Zonnxruntime_inferenceZauto_increase_buffer_sizeZ$onnxruntime_inference_with_binded_ior~   r,   r   r5   r;   anyr=   rB   )-rx   r   r[   Ztest_inputs	precisionr   r   Ztop_k_no_orderZ	max_stepsZ
max_inputsr9   r~   ry   rN   r   r   r   Ztest_data_savedr   Zinit_output_shapesZoutput_buffersr   r   Ztorch_metricZonnx_metricZonnx_io_metricrz   ZinputsrO   rP   rQ   Zonnx_runnerZonnx_io_runnerZtorch_runnerr.   doner   Zseq_lenr8   
start_timeZpytorch_outputr   Zavg_latency_msZoutput_shapesr   r   r   r   test_generationL  s  &









>
zGpt2Tester.test_generationN)Fr   F)r   F)rH   rI   rJ   r   r   r]   r~   r   r,   staticmethodr   r   r   r   ZFLOAT32r   r   r   r   r   rK   s   s4    

+'('

rK   )loggingrD   rl   r"   r   rd   r0   Zbenchmark_helperr   Zgpt2_helperr   r   	getLoggerrH   loggerr   rK   r   r   r   r   <module>   s   
^