o
    i$X                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlZd dlZd dlZd dlZd dlmZ d dlZeeZG d	d
 d
eZG dd deZG dd dZdejiZ ddddddi fddZ!d8ddZ"d9ddZ#dd Z$dd Z%dd Z&dd  Z'd:d!d"Z(ej)d fd#d$Z*d%d& Z+d;d(d)Z,d*e-e.e/ef  dB fd+d,Z0G d-d. d.eZ1G d/d0 d0e1Z2G d1d2 d2e1Z3d<d4d5Z4d6d7 Z5dS )=    N)ABCabstractmethod)ThreadPoolExecutor)datetime)Enum)sleep)Any)versionc                   @   s$   e Zd ZdZdZdZdZdd ZdS )	PrecisionZfp32Zfp16Zint8Zint4c                 C      | j S Nvalueself r   c/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/benchmark_helper.py__str__&      zPrecision.__str__N)__name__
__module____qualname__ZFLOAT32ZFLOAT16ZINT8ZINT4r   r   r   r   r   r
       s    r
   c                   @   s    e Zd ZdZdZdZdd ZdS )OptimizerInfoZno_optZby_ortZ	by_scriptc                 C   r   r   r   r   r   r   r   r   1   r   zOptimizerInfo.__str__N)r   r   r   ZNOOPTZBYORTZBYSCRIPTr   r   r   r   r   r   *   s
    r   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )ConfigModifierc                 C   
   || _ d S r   
num_layers)r   r   r   r   r   __init__6      
zConfigModifier.__init__c                 C   s   | j d u rd S t|dr| j |_td| j   t|dr+| j |_td| j   t|dr?| j |_td| j   d S d S )Nnum_hidden_layersz6Modifying pytorch model's number of hidden layers to: encoder_layersz7Modifying pytorch model's number of encoder layers to: zdecoder_layers z7Modifying pytorch model's number of decoder layers to: )r   hasattrr   loggerinfor    Zdecoder_layers)r   configr   r   r   modify9   s   



zConfigModifier.modifyc                 C   r   r   r   r   r   r   r   get_layer_numF   r   zConfigModifier.get_layer_numN)r   r   r   r   r%   r&   r   r   r   r   r   5   s    r   float32TFc	              	      sX  t  }	|rt jj|	_nt jj|	_|rd|	_|dkr&||	_t	d|	j  |r,d|	_
nd|	_
|t  v r9|g}
n=|rs|dkrDddg}
n2|dkrMd	dg}
n)|d
krVg d}
n |dks^|d u rcddg}
n|dkrlg d}
n
td| dg}
 r fdd|
D }
|r|	dd d }zt j| |	|
d}W |S  ty   td|  d|
  Y |S w )NTr   z%Session option: intra_op_num_threads=   dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProviderZmigraphx)MIGraphXExecutionProviderr.   r,   cudaCUDAExecutionProviderZtensorrt)ZTensorrtExecutionProviderr1   r,   z)The execution provider is not supported: c                    s$   g | ]}| v r| | fn|qS r   r   ).0nameprovider_optionsr   r   
<listcomp>   s   $ z.create_onnxruntime_session.<locals>.<listcomp>z(mlas.enable_gemm_fastmath_arm64_bfloat161)	providerszFailed to create session for z with providers=)onnxruntimeZSessionOptionsZGraphOptimizationLevelZORT_ENABLE_ALLZgraph_optimization_levelZORT_ENABLE_BASICenable_profilingZintra_op_num_threadsr"   debugZlog_severity_levelget_available_providersRuntimeErrorZadd_session_config_entryZInferenceSession	Exception	exception)Zonnx_model_pathuse_gpuproviderZenable_all_optimizationnum_threadsr:   verboseZ(enable_mlas_gemm_fastmath_arm64_bfloat16r5   Zsess_optionsr8   sessionr   r4   r   create_onnxruntime_sessionP   sN   





rE   c                 C   s8   | rt jddd d S t jdd tdtj d S )NDEBUGz8[%(filename)s:%(lineno)s - %(funcName)20s()] %(message)s)levelfmtz%(message)s)rH   transformers)coloredlogsinstalllogging	getLoggersetLevelWARNING)rC   r   r   r   setup_logger   s   
rP   c                 C   s   | rt j| st |  |rt j|st | |r:|dkr+dt v s*J dntt g dr:J dt	dt
j  t	dtj  t	dtj  tt
jtd	ksbJ ttjtd
ksoJ ttjtd	ks|J d S )Nr*   r+   zBPlease install onnxruntime-directml package to test GPU inference.)r1   r.   r/   zWPlease install onnxruntime-gpu package, or install ROCm support, to test GPU inference.zPyTorch Version:zTransformers Version:zOnnxRuntime Version:z1.10.0z4.12.0)ospathexistsmakedirsr9   r<   set
isdisjointr"   r#   torch__version__rI   r	   parse)	cache_dir
output_dirr@   rA   r   r   r   prepare_environment   s(   

r\   c                 C   s   t | tt|  d }tj| tjdd }|d|  }t| |dt| dd dt| dd dt| dd d|d|ddS )Ng     @@)dtypez.2fZ   _   c   )
test_timeslatency_variancelatency_90_percentilelatency_95_percentilelatency_99_percentileaverage_latency_msQPS)sumfloatlennumpyvarZfloat64Z
percentile)latency_list
batch_sizeZ
latency_msrb   Z
throughputr   r   r   get_latency_result   s   ro   c                 C   sv   t |dddd!}g d}tj||d}|  | D ]}|| qW d    n1 s,w   Y  td|  d S )Na asciimodenewlineencoding)enginer	   r8   device	precision	optimizer
io_binding
model_nameinputsthreadsrn   sequence_lengthcustom_layer_numr   ra   rg   rf   rb   rc   rd   re   
fieldnamesz&Detail results are saved to csv file: )opencsv
DictWriterwriteheaderwriterowr"   r#   )resultscsv_filenamecsv_filecolumn_names
csv_writerresultr   r   r   output_details   s   r   c                    s  t |dddd}g d g }|jD ]"}|jdgkr#|d|  q|jD ]}|d| d|  q&qtj| | d}|  |jD ]}d	D ]}	|jD ]}
d
D ]}|j	D ]y}i }| D ]k}|d |kr|d |	kr|d |
kr|d |kr|d |krȇ fdd|
 D }|s|| |t|d n D ]}|| || ksJ q|d }|d }|r|d |d| d| < q]|d |d| < q]|r|| qWqRqNqIqEW d    n1 sw   Y  td|  d S )Nrp   rq   rr   rs   )r|   r}   r   rw   r	   r8   rx   ry   rz   r{   r~   bZ_sr   )         )TFrq   r|   r}   rw   r{   r~   c                    s   i | ]\}}| v r||qS r   r   )r2   kvheader_namesr   r   
<dictcomp>  s    z"output_summary.<locals>.<dictcomp>rn   r   rf   z'Summary results are saved to csv file: )r   Zbatch_sizesZsequence_lengthsappendr   r   r   modelsZenginesrB   itemsupdatedictfromkeysr   r"   r#   )r   r   argsr   Z
data_namesrn   r   r   r|   Zinput_countZengine_namer{   r~   rowr   headersr   r   sr   r   r   output_summary   sZ   






6r   c                 C   s   t |ddddO}ddddgttt|   }tj||d	}|  | D ]'}t	t
 | | d< tj| | d< tj| | d< || | d< || |  q(W d    n1 sZw   Y  td
|  d S )Nrp   rq   rr   rs   Zmodel_filenamer   rI   rW   r   z(Fusion statistics is saved to csv file: )r   listnextitervalueskeysr   r   r   strr   nowrI   rX   rW   r   r"   r#   )Zmodel_fusion_statisticsr   r   r   r   keyr   r   r   output_fusion_statistics*  s&   r   c                    sd   i }t j fddd|d t j fddd|d}|| |ddi |t|| |S )Nc                          d  S r   runr   
ort_inputsort_sessionr   r   <lambda>@      zinference_ort.<locals>.<lambda>r   numberrepeatc                      r   r   r   r   r   r   r   r   A  r   r{   F)timeitr   r   ro   )r   r   result_templaterepeat_timesrn   warm_up_repeatr   rm   r   r   r   inference_ort>  s   
r   c              
      s  i }   |D ]&}t|| |	}tt|| j|
} ||j	j
d||j|  qt|dkr;t|||	 t|D ]\}} ||| j	j
dtj|| j||   q?tj fddd|d tj fddd|d}|| |ddi |t|| |S )	Nr   c                      
     S r   Zrun_with_iobindingr   r{   r   r   r   r   u     
 z/inference_ort_with_io_binding.<locals>.<lambda>r   r   c                      r   r   r   r   r   r   r   r   {  r   r{   T)r{   rW   Z
from_numpytoIO_BINDING_DATA_TYPE_MAPgetr   r]   Z
bind_inputrx   typeshapeZdata_ptrrj   allocateOutputBuffers	enumerateZbind_outputrk   r'   r   r   r   ro   )r   r   r   r   Zort_output_namesZort_outputsoutput_buffersoutput_buffer_max_sizesrn   rx   Z	data_typer   r   r3   Znp_inputZ
input_typeiZort_output_namerm   r   r   r   inference_ort_with_io_bindingH  sL   	

	
r   c                 C   s&   |D ]}|  tj|tj|d qd S )N)r]   rx   )r   rW   emptyr'   )r   r   rx   r   r   r   r   r     s   r   {   c                 C   s<   t |  tj |  t|  tj|  tj|  dS )z5Set random seed manually to get deterministic resultsN)randomseedrk   rW   Zmanual_seedr0   Zmanual_seed_all)r   r   r   r   set_random_seed  s
   

r   returnc               
   C   s   ddl m} m}m}m}m}m}m} z>|  g }| }t|t	s#W d S t
|D ]#}	|||	}
t|
tr8 W d S ||	|||	|
j|
j|
jd q'|  |W S  | yh } ztd| W Y d }~d S d }~ww )Nr   	NVMLErrornvmlDeviceGetCountnvmlDeviceGetHandleByIndexnvmlDeviceGetMemoryInfonvmlDeviceGetNamenvmlInitnvmlShutdown)idr3   totalfreeused-Error fetching GPU information using nvml: %s)py3nvml.py3nvmlr   r   r   r   r   r   r   
isinstanceintranger   r   r   r   r   print)r   r   r   r   r   r   r   r   device_countr   r#   errorr   r   r   get_gpu_info  s4   $



	
r   c                   @   s@   e Zd Zd
ddZdd Zedeeee	f  dB fdd	Z
dS )MemoryMonitorTc                 C   r   r   )keep_measuringr   r   r   r   r   r     r   zMemoryMonitor.__init__c                 C   sB   dd l }d}	 t||t  jd }td | js 	 |S q)Nr   T   {Gzt?)	psutilmaxProcessrQ   getpidZmemory_infoZrssr   r   )r   r   	max_usager   r   r   measure_cpu_usage  s   zMemoryMonitor.measure_cpu_usager   Nc                 C   s   t  r   )NotImplementedErrorr   r   r   r   measure_gpu_usage  s   zMemoryMonitor.measure_gpu_usageT)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s
    
$r   c                       s<   e Zd Zd fdd	Zdeeeef  dB fddZ  Z	S )	CudaMemoryMonitorTc                    s   t  | d S r   )superr   r   	__class__r   r   r     s   zCudaMemoryMonitor.__init__r   Nc           
   
      s>  ddl m}m}mm}mm}m} g g  zo|  | }t|t	s-t
d|  W d S dd t|D fddt|D  	 t|D ]%}||}t|tr`t
d|   W d S t| |jd	 |< qGtd
 | jsunqC|   fddt|D W S  |y }	 zt
d|	 W Y d }	~	d S d }	~	ww )Nr   r   z*nvmlDeviceGetCount result is not integer: c                 S      g | ]}d qS r   r   r2   r   r   r   r   r6         z7CudaMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>c                    s   g | ]} |qS r   r   r   )r   r   r   r   r6     s    Tz%nvmlDeviceGetMemoryInfo returns str: r   r   c                        g | ]}| | | d qS )Z	device_idr3   max_used_MBr   r   gpu_namemax_gpu_usager   r   r6         r   )r   r   r   r   r   r   r   r   r   r   r"   r   r   r   r   r   r   r   )
r   r   r   r   r   r   r   r   r#   r   r   )r   r  r   r   r   r     s>   $



z#CudaMemoryMonitor.measure_gpu_usager   )
r   r   r   r   r   r   r   r   r   __classcell__r   r   r   r   r     s    &r   c                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	RocmMemoryMonitorTc                    sl   t  | d}tj|r|tjvrtj| zdd l}|| _| j  W d S  t	y5   d | _Y d S w )Nz/opt/rocm/libexec/rocm_smir   )
r   r   rQ   rR   rS   sysr   rocm_smiZinitializeRsmiImportError)r   r   Zrocm_smi_pathr  r   r   r   r     s   
zRocmMemoryMonitor.__init__c                 C   s(   | j d u rdS | j |dd d d S )Nr(   ZVRAMr   i   )r  Z
getMemInfo)r   devr   r   r   get_used_memory  s   
z!RocmMemoryMonitor.get_used_memoryc                    s   | j d u rd S | j d urt| j  nd}dd t|D dd t|D  	 t|D ]}t| | ||< q,td | jsDnq( fddt|D S )Nr   c                 S   r   r   r   r   r   r   r   r6     r   z7RocmMemoryMonitor.measure_gpu_usage.<locals>.<listcomp>c                 S   s   g | ]}d | qS )ZGPUr   r   r   r   r   r6     s    Tr   c                    r   r   r   r   r   r   r   r6   &  r  )	r  rj   ZlistDevicesr   r   r	  timer   r   )r   r   r   r   r   r   r     s   

z#RocmMemoryMonitor.measure_gpu_usager   )r   r   r   r   r	  r   r  r   r   r   r   r    s    r  r0   c              	   C   sD  d }|dkr	t }nt}|d}| r|d ur|}n| }|d u r"d S |d u r(|S t }| }||j}z||}	|	 }
W d|_| }nd|_| }w |d u r]	 W d    d S td| d|  t	|dkrt	|dkrt	|t	|krd}t
|D ]\}}|d }|| d }|| }t||}q|W  d    S W d    d S W d    d S W d    d S 1 sw   Y  d S |d ur|}n| }|d u r|S t >}| }||j}z||}	|	 }
W d|_| }nd|_| }w td|d	d
|d	d || W  d    S 1 sw   Y  d S )Nr-   FzGPU memory usage: before=z  peak=r   r   r   zCPU memory usage: before=z.1fz
 MB, peak=z MB)r  r   r   r   Zsubmitr   r   r"   r#   rj   r   r   r   )Zis_gpufuncZmonitor_typeZstart_memoryZmemory_monitor_typeZmonitorZmemory_before_testexecutorZ
mem_threadZ	fn_thread_r   Zmax_usedr   Zmemory_beforebeforeafterr   r   r   r   measure_memory0  s   



(






&r  c                  C   sL   g d} d}| D ]}t |}|d u rq|r|d7 }|| d| 7 }q|S )N)ZORT_DISABLE_FUSED_ATTENTIONZ!ORT_ENABLE_FUSED_CAUSAL_ATTENTIONZ!ORT_DISABLE_FUSED_CROSS_ATTENTIONZORT_DISABLE_TRT_FLASH_ATTENTIONZ&ORT_DISABLE_MEMORY_EFFICIENT_ATTENTIONZORT_TRANSFORMER_OPTIONSZORT_CUDA_GEMM_OPTIONSrq   ,=)rQ   getenv)Z	env_namesenvr3   r   r   r   r   get_ort_environment_variablest  s   	
r  r   r   r   )r   )r0   N)6r   rL   rQ   r   r  r
  r   abcr   r   concurrent.futuresr   r   enumr   r   typingr   rJ   rk   rW   rI   	packagingr	   r9   rM   r   r"   r
   r   r   r'   r   rE   rP   r\   ro   r   r   r   r   Zlonglongr   r   r   r   r   r   r   r   r   r  r  r  r   r   r   r   <module>   sf   

	

G
":

=
&2
+D