o
    iP                     @   s:  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlZd dlZd dlZd dlmZmZ e	G dd dZe	G dd	 d	Z		
	d&ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Z d!d" Z!d#d$ Z"e#d%krdZ$e"  dS dS )'    N)	dataclass)datetime)Path)generate_test_dataget_bert_inputsc                   @   sv   e Zd ZU eed< eed< eed< eed< eed< eed< eed< eed< eed	< eed
< eed< eed< eed< dS )TestSetting
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_lengthN)__name__
__module____qualname__int__annotations__boolstr r   r   a/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/bert_perf_test.pyr   !   s   
 r   c                   @   sV   e Zd ZU eed< eed< eed< eed< eed< edB ed< edB ed< eed	< dS )
ModelSetting
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelNinput_tuning_resultsoutput_tuning_results	mask_type)r   r   r   r   r   r   r   r   r   r   r   2   s   
 r      c                 C   sd  dd l }|| |rd| vrtd |rI|dkr ddg}n,|dkr)ddg}n#|d	kr2g d
}n|dkr;ddg}n|dkrDg d}nddg}ndg}| }	||	_|jj|	_|d u rb|j	j
|	_n5|dkrl|j	j|	_n+|dkrv|j	j|	_n!|dkr|j	j|	_n|dkr|j	j|	_n|dkr|j	j
|	_n||	_|d ur||	_|j| |	|d}
|r|dkrd|
 v sJ nW|dkrd|
 v sJ nJ|d	krd|
 v sJ d|
 v sJ n5|dkrd|
 v sJ n(|dkrd|
 v sJ d|
 v sJ nd|
 v sJ n	d|
 v sJ |d ur0t|}|
t| W d    |
S 1 s+w   Y  |
S )Nr   CUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.ZdmlZDmlExecutionProviderCPUExecutionProviderZrocmROCMExecutionProviderZmigraphx)MIGraphXExecutionProviderr*   r)   cudaZtensorrt)TensorrtExecutionProviderr(   r)      r'      c   )	providersr+   r-   )onnxruntimeZset_default_logger_severityZget_available_providersprintZSessionOptionsZlog_severity_levelZExecutionModeZORT_SEQUENTIALZexecution_modeZGraphOptimizationLevelZORT_ENABLE_ALLgraph_optimization_levelZORT_DISABLE_ALLZORT_ENABLE_BASICZORT_ENABLE_EXTENDEDZORT_ENABLE_LAYOUTr   ZInferenceSessionZget_providersopenZset_tuning_resultsjsonload)r   r   r   r   r4   r   tuning_results_pathr2   Zexecution_providerssess_optionssessionfr   r   r   create_session>   sx   	










r<   c                 C   s,   t jtjt jtjt jtjt jtji}||  S )N)torchZfloat32npZfloat16Zint32Zint64Zlonglong)Z
torch_typeZtype_mapr   r   r   
numpy_type   s   r?   c                    s4    fdd|   D } fdd|  D }||fS )Nc                    "   i | ]\}}|t | qS r   r=   Z
from_numpyto.0namearraydevicer   r   
<dictcomp>      " z/create_input_output_tensors.<locals>.<dictcomp>c                    r@   r   rA   rC   rG   r   r   rI      rJ   )items)inputsoutputsrH   input_tensorsoutput_tensorsr   rG   r   create_input_output_tensors   s   rP   c              
   C   sx   |   }| D ]\}}|||jjdt|j|j|  q| D ]\}}|	||jjdt|j|j|  q#|S Nr   )

io_bindingrK   Z
bind_inputrH   typer?   ZdtypeshapeZdata_ptrZbind_output)sessrN   rO   rR   rE   Ztensorr   r   r   create_io_binding   s(   rV   c                 C   s   g }g }|j r	dnd}t|D ]I\}}| ||}	||	 i }
tt|D ]
}|	| |
|| < q&t||
|\}}t| ||}| | t	
 }| | t	
 | }|| q||fS )Nr,   cpu)r   	enumeraterunappendrangelenrP   rV   Zrun_with_iobindingtimeitdefault_timer)r:   
all_inputsoutput_namestest_settingresultslatency_listrH   _test_case_idrL   resultrM   irN   rO   rR   
start_timelatencyr   r   r   %onnxruntime_inference_with_io_binding   s"   


ri   c           
      C   st   t |dkr| |t| g }g }t|D ]\}}t }| ||}t | }	|| ||	 q||fS rQ   )r\   rY   randomchoicerX   r]   r^   rZ   )
r:   r_   r`   rb   rc   rd   rL   rg   re   rh   r   r   r   onnxruntime_inference   s   
rl   c                 C   s   |  }dtj|  d}|d|j d|j ddd7 }|d|j d|j d7 }|d	|j	 d
|j
 d7 }|d|j d|j d7 }|d|j d7 }|d|j 7 }|S )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)Zget_session_optionsospathbasenamer4   r   replacer   r	   r
   r   r   r   r   r   )r   r:   ra   r9   optionr   r   r   	to_string   s   rt   c              	   C   s   t | j|j|j|| j|j| jd}dd | D }t| j||}||v r,t	d| d S t	d| g }|j
rMt|jD ]}	t||||\}
}|| q;nt|jD ]}	t|||\}
}|| qRt|d }t|}t|d}t|d}t|d	}t|d
}t|d}|jd|  }|||||||f||< t	dt|dt|d | jrtj| j}tj|r|}|ddd  dt   d}t	d|d|d |  }t!|d}t"#|| W d    n1 sw   Y  t	d| d S d S )N)r   r8   c                 S   s   g | ]}|j qS r   )rE   )rD   outputr   r   r   
<listcomp>  s    z run_one_test.<locals>.<listcomp>zskip duplicated test:zRunning test:  2   K   Z   _   r0   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr.   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)$r<   r   r   r   r#   r   r$   Zget_outputsrt   r3   r   r[   r   ri   extendrl   r>   rF   
statisticsmeanZ
percentiler   formatr%   ro   rp   abspathexistsrsplitr   now	timestampZget_tuning_resultsr5   r6   dump)model_settingra   perf_resultsr_   r   r:   r`   keyZall_latency_listZ_irb   rc   Z
latency_msZaverage_latencyZ
latency_50Z
latency_75Z
latency_90Z
latency_95Z
latency_99Z
throughputZoutput_pathZold_output_pathZtrsr;   r   r   r   run_one_test   sr   	



$r   c                 C   s,   t jt| ||||fd}|  |  d S )N)targetargs)multiprocessingProcessr   startjoin)r   ra   r   r_   r   processr   r   r   launch_test=  s   
r   c           	      C   s   |j d urt| ||||j  d S tjdd}tjdd}t||h}tdtd|D ]}||vr5|| q*|jdd |D ]
}t| |||| q>d S )NF)ZlogicalTr.      )reverse)	r   r   psutil	cpu_countlistr[   minrZ   sort)	r   ra   r   r_   r   Zlogical_coresZcandidate_threadsrf   r   r   r   r   run_perf_testsL  s(   

r   c                 C   s|   t | j| j| j| j\}}}td|j d|j d|j  t	|j|j|j|j
|j||||j|j| jd}t| ||| d S )NzGenerating z samples for batch_size=z sequence_length=)r&   )r   r   r    r!   r"   r3   r
   r   r	   r   r   r   r   r   r&   r   )r   ra   r   Z	input_idsZsegment_idsZ
input_maskr_   r   r   r   run_performanced  s.   
r   c                  C   s  t  } | jddtdd | jdddtddd	 | jd
ddtdd | jddtddd | jdddtddd | jddtg dddd | jddtddd | jdddd d! | jdd" | jd#dtd$g d%d&d' | jd(ddd)d! | jdd* | jd+ddd,d! | jdd- | jd.dtd d/d | jd0d1dtd d2d | jd3dtd d4d | jd5dtd d6d | jd7dtd d8d | jd9d td:d; | jd<d td=d; | jd>d?d@tdAd; | jdBdCdddDd! | jddE | jdFdtd$dGd |  }|S )HNz--modelTzbert onnx model path)requiredrS   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rS   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rS   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_level)r   r.   r'   r/   r0   r0   zfonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 3 - layout, 99 - enable all.)r   rS   choicesr   r   z--seedr/   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityr'   )r   r.   r'   r/      z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rS   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rS   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   r   r   r   parse_arguments  s  					r   c                  C   s  t  } | jdkrtdtd| j | _| jdkr| j| _t }|	 }t
| j}t|dkr5t|dks9tdt| j| j| j| j| j| j| j| j}|D ])}t|| j| j| j| j| j| j| j| j| j| j| j| j}t d| t!||| qNt"|# ddd	 d
}t$j%&t'| jj(d)| jrdndd&dd t"|D | jt*+ ,d}t-|dddK}	t.j/|	ddd}
d }|D ]6\}}|0d}|d u rg d}|1dd |D  |
2| dd |D }|1dd |D  |
2| qW d    n1 sw   Y  t d| d S )Nr   r.   rw      z batch_size not in range [1, 128]ztest settingFc                 S   s   | d S )Nr.   r   )xr   r   r   <lambda>S  s    zmain.<locals>.<lambda>)r   r   zperf_results_{}_B{}_S{}_{}.txtZGPUZCPU-c                 S   s   g | ]}t |qS r   )r   rD   r   r   r   r   rv   Y  s    zmain.<locals>.<listcomp>z%Y%m%d-%H%M%Szw+rn   )newline	
)	delimiterlineterminatorrm   )zLatency(ms)ZLatency_P50ZLatency_P75ZLatency_P90ZLatency_P95ZLatency_P99zThroughput(QPS)c                 S      g | ]	}| d d qS )=r   splitr   r   r   r   rv   m      c                 S   s   g | ]}t |d qS )r|   )r   r   r   r   r   rv   p  s    c                 S   r   )r   r.   r   r   r   r   r   rv   q  r   zTest summary is saved to)3r   r   maxr   Zsamplesr   r	   r   Managerdictsetr   r   	Exceptionr   modelr    r!   r"   r#   r$   r%   r&   r   r   r   r   r   r   r   r   r   r3   r   sortedrK   ro   rp   r   r   parentr   r   r   strftimer5   csvwriterr   r   writerow)r   managerr   Zbatch_size_setr   r   ra   Zsorted_resultsZsummary_fileZtsv_fileZ
tsv_writerheadersr   Zperf_resultparamsvaluesr   r   r   main#  s~   




	
	
r   __main__)Nr'   N)%r   r   r6   r   ro   rj   r   r]   dataclassesr   r   pathlibr   numpyr>   r   r=   Zbert_test_datar   r   r   r   r<   r?   rP   rV   ri   rl   rt   r   r   r   r   r   r   r   __spec__r   r   r   r   <module>   sP   
[
E #T
