o
    iX                     @   sD  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZ	d dl
Z
d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZ d dlZe e!Z"de j#fd	d
Z$de j#fddZ%dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+dd Z,dd Z-e!dkre-  dS dS )    N)measure_memorysetup_logger)get_library_path)ORTModelForSpeechSeq2Seq)ProfilerActivityprofilerecord_function)trange)AutoModelForSpeechSeq2SeqWhisperConfigWhisperProcessorargsc                    s   j dvr	td fdd fdd j j j j j jd} j dkrq| D ]\}}t	j
|gd	|v r<t	jnt	jd
||< q- jrSt	j
 jgt	jd
|d<  jrbt	j
 jgt	jd
|d<  jrqt	j
 jgt	jd
|d< td j  fdd}t | j | j} jr||d< |S td  j dkrdnd fdd}t || ||} j dkr||d< |S |j jrtjntj jd|d<  j|d< d|d< d|d<  jr j|d< |S )N>   hf-ortorthf-pt-eagerhf-pt-compilez/Unable to auto-detect inputs for provided modelc                     s   t  j} t | } | S N)whisperZ
load_audio
audio_pathZpad_or_trimaudior    k/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/whisper/benchmark.pyload_via_ffmpeg$   s   
z#get_inputs.<locals>.load_via_ffmpegc                     sV   t  jd} tjt|  tjd}t|g}W d    |S 1 s$w   Y  |S )Nrbdtype)openr   npZasarraylistreadZuint8array)fr   r   r   r   load_via_numpy)   s   
z"get_inputs.<locals>.load_via_numpy)
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyr   Zpenaltyr   decoder_input_idslogits_processortemperaturezLoad audio: c                    s   | r S   S r   r   )Zonnx_e2e)r   r$   r   r   <lambda>D   s    zget_inputs.<locals>.<lambda>audio_streamzFeature extraction: r   ptc                    s    j j| g jdjS )N)Zreturn_tensorssampling_rate)	processorZfeature_extractorr1   input_featuresr   )r   return_typer   r   r.   P   s    
r3   )r   deviceinputsno_repeat_ngram_sizeTZearly_stopping	use_cacheZforced_decoder_ids)benchmark_type	Exceptionr%   r&   r'   r(   r)   r*   itemsr   r"   float32Zint32has_decoder_input_idsr+   has_logits_processorr,   has_temperaturer-   loggerinfor   time_fnhas_audio_streamtouse_fp16torchfloat16target_devicer7   )r   r6   kvZload_audio_fnZ
audio_dataZprocessor_fnr3   r   )r   r   r$   r4   r   
get_inputs    sX   

&





rK   c                 C   s  d\}}d\}}| j dv r<| jr| jn| j}t }tj|| jr#tjntj	dd
| j}t }| j dkr;t|}n2| j dv rft }| j|_|t  | jred|_d|_| jretd td ntd	| j  | j d
krt| jtu r| jd n| j}t| jtu r| jd nd }t }tj| j|||dd}t }| j dkrt d| j!  t }tj"| j!|| jgd}t }t d||  d |S )N)NN   r   r   T)Ztorch_dtyper8   r   >   r   r      r   Cannot recognize r   )providerprovider_optionsZsession_optionsZuse_io_bindingr   zLoading model from )	providerszLoaded model in  s)#r9   Zhf_pt_model_path
model_nametimer
   from_pretrainedrE   rF   rG   r<   rD   rH   compiler   ZSessionOptionsr   Zenable_profilingZregister_custom_ops_libraryr   verboseZlog_verbosity_levelZlog_severity_leveltuneZset_default_logger_severityZset_default_logger_verbosityr:   typeexecution_providertupler   hf_ort_dir_pathr@   rA   ort_model_pathZInferenceSession)r   modelZsess_options
start_timeend_timesourcerO   rP   r   r   r   	get_modelh   sf   







rb   c                 C   sV  t |tu r
|d n|}t |tu r|d n|}t| j}| jdkr(t| jnt| jt	j
dd}| jr=||}t| |D ]}|| q?| jdkrQtj| t }	| jdkr_t| jnt| jt	j
dd}
|
D ]}|| qj| jdkr|tj| t }| jdkrtd d}||	 | j }|| }td	| d
 td| d d S )Nr   rM   r   zWarm up)fileZdesccpuZ	Benchmark z	Latency: rR   zThroughput: z qps)rY   r[   rF   r5   rH   r9   rangeZwarmup_runsr	   sysstdoutrW   r@   rA   cudaZsynchronizerT   Znum_runs)r   fnr6   Zwarmup_inputsZbenchmark_inputsZtorch_deviceZwarmup_rangeoutputs_r_   Zbench_ranger`   Z
batch_sizeZlatencyZ
throughputr   r   r   rB      s@   










rB   c           	      C   s6  | j   d| j d| j d|jdd d| dtj d}d }| j dv rtt	j
t	jgddd }td || W d    n1 sGw   Y  W d    n1 sVw   Y  |jdd	j| j| jd
}tj| j| d}t|d}|| W d    |S 1 sw   Y  |S || | d}|S )N-rl   z%Y-%m-%d_%H:%M:%SrL   T)Z
activitiesZrecord_shapesZprofile_memoryZmodel_inference   )Zgroup_by_stack_n)Zsort_byZ	row_limitz.logw.json)r9   lower	precisionr5   __name__replacedatetimenowr   r   ZCPUCUDAr   Zkey_averagestableZpt_filter_byZpt_num_rowsospathjoin
log_folderr   write)	r   rj   r6   Zinputs_typeprefixfilenameZprofZ	prof_datar#   r   r   r   
profile_fn   s.   B






r   c                    s   t  }t|}|jdd   td|jd d d t  t	j
  t| jdk fdd| jd tj  d S )	Ng?)intervalzCPU usage: %rd   c                      s    S r   r   r   rj   r6   r   r   r.     s    zmeasure_fn.<locals>.<lambda>)Zis_gpufuncmonitor_type)ry   getpidpsutilProcessZcpu_percentr@   rA   gcZcollectrF   ri   Zempty_cacher   r5   r   rg   rh   flush)r   rj   r6   pidprocessr   r   r   
measure_fn   s   

 r   c           
         s  fdd fdd}|} j dkr||  jrt ||d} j dkr|d td  }jj }|d	 }tj	|rWt
d
| d|  t|tj j| jj }|d }tj	|r~t
d
| d|  t|tj j| jj }|d }tj	|rt
d
| d|  t|tj j| d S t
d t || ||\}}	t
dt|d  d t
d|	d   t || d S )Nc                    s    j di | }|S )Nr   )generate)r6   predicted_idsr^   r   r   get_pred_ids  s   z&run_hf_inference.<locals>.get_pred_idsc                    s>   | }g }t  jD ]}| jj|ddd  q||fS )NTZskip_special_tokensr   )rf   r(   appendr2   batch_decode)r6   r   transcriptionrl   )r   r   r   r   gen_and_dec  s
   z%run_hf_inference.<locals>.gen_and_decr   zgen-and-decr   rp   z-encoder.json	Renaming  to z-decoder.jsonz-decoder-with-past.jsonz
Evaluating PyTorch...Generated token length: r    tokensTranscription: )r9   r   r   lenencodersessionend_profilingry   rz   isfiler@   warningrenamer{   r|   decoderZdecoder_with_pastrA   rB   r   )
r   r6   r^   r   generate_fnnew_logname
new_prefixold_lognamer   r   r   )r   r   r^   r   run_hf_inference  s>   


r   c                    sj  d fdd	}fdd}fdd} fdd	} j d
kr!|n|}||} jrNt ||d}	 }
td|
 d|	  t|
tj	 j
|	 d S td |} jrb||dd}||f}t || ||} j d
kru| }|d } jrtd|d d   n$||d d }tdt| d  jj|d ddd }t|  t || d S )NFc                    s   dd   D }t|  }|| }t|r#td|  td|r. jr.| d | d< || }t|rG|D ]}td| d | |= q8 j	d	krs
 }|  D ]
\}}	|||	 qT D ]}
|j|
j j	 jd
 qc|S | S )Nc                 S      h | ]}|j qS r   name.0Zmodel_inputr   r   r   	<setcomp>N      z@run_ort_inference.<locals>.prepare_ort_inputs.<locals>.<setcomp>z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.r%   r&   zRemoving unnecessary input 'z' from user provided inputsrd   )Zdevice_type	device_id)rK   setkeysr   r@   errorr:   rX   rA   r5   
io_bindingr;   Zbind_cpu_inputZget_outputsZbind_outputr   r   )r6   warmupZmodel_inputsZuser_inputsZmissing_inputsZunnecessary_inputsZunnecessary_inputr   rI   rJ   outputr   r^   r   r   prepare_ort_inputsL  s*   

z-run_ort_inference.<locals>.prepare_ort_inputsc                    s     |  | S r   )Zrun_with_iobinding)r   r   r   r   with_io_bindingj  s   
z*run_ort_inference.<locals>.with_io_bindingc                    s     d | }|S r   )run)r6   rk   r   r   r   without_io_bindingo  s   z-run_ort_inference.<locals>.without_io_bindingc                    s6    j | v rt|  j kd d }| d |d  S | S )Nr   rM   )eos_token_idr   where)r   Z	first_endr   r   r   handle_outputt  s   
z(run_ort_inference.<locals>.handle_outputrd   Ze2er   r   z
Evaluating ONNX Runtime...T)r   r   r   r   r   r   )F)r5   r   r   r   r@   r   ry   r   rz   r{   r|   rA   rX   rB   Zcopy_outputs_to_cpurC   r   r2   r   printr   )r   r6   r^   r   r   r   r   r   Z
ort_inputsr   r   Zort_evaluate_inputsZort_warmup_inputsZort_outputsZactual_outputr   r   r   r   run_ort_inferenceK  s:   


r   c                 C   sD   | j dv rt| || d S | j dkrt| || d S td| j  )N>   r   r   r   r   rN   )r9   r   r   r:   )r   r6   r^   r   r   r   run_inference  s
   

r   c               	   C   s  t  } | jddtdg dd | jddtddd	 | jd
dtddg ddd | jdtddd | jdtddd | jdtddd | jddtddd	 | jddttj rYdndg dd  | jd!d"td#d$ | jd%d&td'd$ | jd(d)td*d$ | jd+td,d$ | jd-td.d/d | jd0td1d$ | jd2td#d$ | jd3td4d$ | jd5td4d$ | jd6td7d$ | jd8td7d$ | jd9td:d$ | jd;td<d=d | jd>td4d?d | jd@td7dAd | jdBdCdDdE | jdFtdGdHd | jdItdJdKd | jdLdCdDdE | jdMtt	j
dNdOd | jdPdCdDdQdR |  }tj|j t|j |j|_dS|jv re|j  dT|_|jdUkrK|jdV|jif|_n|jdWkre|j|jd4|jr\d4nd#dXf|_d|_|jdYkrs|jssJ dZ|jdSkr|jsJ d[t|j|_|S )\Nz-btz--benchmark-typeT)r   r   r   r   )rY   requiredchoicesz-mz--model-namez;Hugging Face name of model (e.g. 'openai/whisper-large-v2'))rY   r   helpz-pz--precisionfp32)Zint8fp16r   zePrecision for model. For ONNX models, the model's precision should be set before running this script.)rY   r   defaultr   r   z--hf-pt-model-pathre   zNPath to directory containing all PyTorch files (e.g. tokenizer, PyTorch model))rY   r   r   z--hf-ort-dir-pathzaPath to directory containing all ONNX files (e.g. tokenizer, encoder, decoder, decoder_with_past)z--ort-model-pathzPath to ONNX modelz-az--audio-pathz%Path to audio file for E2E evaluationz-dz--deviceri   rd   )rd   ri   Zrocm)rY   r   r   z-idz--device-idr   )rY   r   z-wz--warmup-runsrn   z-nz
--num-runs
   z--seed   z--sampling-ratei>  zSampling rate for audio (in Hz)z--max-lengthi  z--min-lengthz--num-beamsrM   z--num-return-sequencesz--length-penaltyg      ?z--repetition-penaltyz--no-repeat-ngram-size   z--decoder-input-idsz[]zThe forced decoder ids for generation. Format is [start token, timestamp token, language token, task token]. Default is [start token]. See `decoder_input_ids` in https://github.com/microsoft/Olive/tree/main/examples/whisper for details.z--logits-processorzLWhether to use timestamps logits processor or not (0 for false, 1 for true).z--temperaturez!Temperature value for generation.z	--profileF
store_true)r   actionz--pt-filter-byZself_cpu_time_totalz"What to filter PyTorch profiler byz--pt-num-rowsi  z.Number of rows for PyTorch profiler to displayz	--verbosez--log-folder.zFolder to cache log filesz--tunezFOnly used by ROCm EP, enable TunableOp tuning to select fastest kernel)r   r   r   r   ZExecutionProviderZCUDAExecutionProviderr   ZROCMExecutionProvider)r   Ztunable_op_enableZtunable_op_tuning_enabler   z,Please specify a path to `--hf-ort-dir-path`z+Please specify a path to `--ort-model-path`)argparseArgumentParseradd_argumentstrrF   ri   Zis_availableintfloatry   rz   r{   
parse_argsr   randomseedZmanual_seedr5   r   r9   upperrZ   r   rX   r\   r]   astliteral_evalr+   )parserr   r   r   r   r     s   
r   c                  C   s>  t  } t| j t| j dtjj_	t
| j}t| j}| jdkr*d| j n| j}| jdk}t| d| t| d| t| d| t| dd	 t| d
|j td| j  t| }| jdkrdd | D }d|v | _t| dd|v  t| dd|v  t| dd|v  | jg kr|jg| _t| }t| || d S )NTrd   zcuda:r   r2   rH   rE   rC   Fr   zForced decoder prompt ids: r   c                 S   r   r   r   r   r   r   r   r   T  r   zmain.<locals>.<setcomp>r/   r=   r+   r>   r,   r?   r-   )r   r   rW   r@   rA   __dict__rF   backendsZcudnnZ	benchmarkr   rU   rS   r   r5   r   rr   setattrr   r+   rb   r9   rK   rC   Zdecoder_start_token_idr   )r   configr2   rH   rE   r^   Zort_model_inputsr6   r   r   r   main=  s2   






r   __main__).r   r   ru   r   loggingry   rg   rT   numpyr   r   rF   r   Zbenchmark_helperr   r   Zonnxruntime_extensionsr   Zoptimum.onnxruntimer   Ztorch.profilerr   r   r   Ztqdmr	   Ztransformersr
   r   r   Zonnxruntimer   	getLoggerrs   r@   	NamespacerK   rb   rB   r   r   r   r   r   r   r   r   r   r   r   <module>   sD   
HC1@X	 $
