o
    i                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlZd dl	Z	d dl
mZ dddddd	d
ddd	ZdddddZdd Zdd ZdWddZdededefddZdedededefd d!Zd"eded#ed$edef
d%d&Z	'dXd#ed(ed)efd*d+Zd,efd-d.Z	'dXd#ed(efd/d0Z	'dXdeded1ed#eded2ed3ed$ed4ed5ed6ed)efd7d8Z		9	'dYdededed:efd;d<Z	'	'dZd#ed(efd=d>Z	'	'dZdeded1ed#eded2ed3ed$ed4ed5ed:ed)efd?d@Z	'	9d[dAedBed#eded2ed3ed$ed4ed5edCedDedEefdFdGZ	'	9	'd\dAedBeded#eded2ed3ed$ed4ed5edCedDedEed)efdHdIZ 	'	9	'd\dAedBed#eded2ed3ed$ed4ed5edCedDed)efdJdKZ!	'	9	'd\dAedBed#eded2ed3ed$ed4ed5edCedDed)efdLdMZ"	9d]ded#edededed2ed3ed$ed4ed5ed)efdNdOZ#dPdQ Z$d]dRdSZ%dTdU Z&e'dVkrd dl(Z(ze&  W dS  e)y   e(j*e+   Y dS w dS )^    N)Pathmeasure_memoryzrunwayml/stable-diffusion-v1-5zstabilityai/stable-diffusion-2z stabilityai/stable-diffusion-2-1z+stabilityai/stable-diffusion-xl-refiner-1.0z/stabilityai/stable-diffusion-3-medium-diffusersz'stabilityai/stable-diffusion-3.5-mediumz&stabilityai/stable-diffusion-3.5-largez black-forest-labs/FLUX.1-schnellzblack-forest-labs/FLUX.1-dev)	1.5z2.02.1zxl-1.0z3.0Mz3.5Mz3.5LzFlux.1SzFlux.1DCUDAExecutionProviderROCMExecutionProviderZMIGraphXExecutionProviderZTensorrtExecutionProvider)cudarocmZmigraphxtensorrtc                  C   s   g d} d}| |fS )N)
z.a photo of an astronaut riding a horse on marsz@cute grey cat with blue eyes, wearing a bowtie, acrylic paintingzia cute magical flying dog, fantasy art drawn by disney concept artists, highly detailed, digital paintingzdan illustration of a house with large barn with many cute flower pots and beautiful blue sky sceneryzgone apple sitting on a table, still life, reflective, full color photograph, centered, close-up productzWbackground texture of stones, masterpiece, artistic, stunning photo, award winner photozSnew international organic style house, tropical surroundings, architecture, 8k, hdrznbeautiful Renaissance Revival Estate, Hobbit-House, detailed painting, warm colors, 8k, trending on Artstationzcblue owl, big green eyes, portrait, intricate metal design, unreal engine, octane render, realisticzldelicate elvish moonstone necklace on a velvet background, symmetrical intricate motifs, leaves, flowers, 8kz*bad composition, ugly, abnormal, malformed )promptsnegative_promptr   r   t/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/stable_diffusion/benchmark.pyexample_prompts(   s   r   c                   C   s   dS )N)zwarm upbadr   r   r   r   r   warmup_prompts;   s   r   c                 C   s   t d|| |dS )NT)Zis_gpufuncmonitor_typestart_memoryr   )r   r   r   r   r   r   measure_gpu_memory?   s   r   
model_name	directorydisable_safety_checkerc           	      C   s   ddl m}m} dd l}|d ur%tj|sJ | }|j|||d}n	|j| d|dd}|	|j
j|_
|jdd |rDd |_d |_|S )Nr   )DDIMSchedulerOnnxStableDiffusionPipeline)providerZsess_optionsZonnxT)revisionr   Zuse_auth_tokendisable)	diffusersr   r   onnxruntimeospathexistsZSessionOptionsfrom_pretrainedfrom_config	schedulerconfigset_progress_bar_configsafety_checkerfeature_extractor)	r   r   r   r   r   r   r!   Zsession_optionspiper   r   r   get_ort_pipelineC   s,   r-   enable_torch_compileuse_xformersc                 C   s`  d| v r+ddl m} |j| tjdd}|r)|jjtjd tj|jddd	|_|S d
| v rVddl m	} |j| tjdd}|rT|jjtjd tj|jddd	|_|S ddl m
}m} ddlm}	m}
 |j| |
dd}|jj|	d |r}|  |rt|j|_t|j|_t|j|_td ||jj|_|jdd |rd |_d |_|S )NZFLUXr   )FluxPipeline)Ztorch_dtyper	   )Zmemory_formatzmax-autotuneT)modeZ	fullgraphzstable-diffusion-3)StableDiffusion3Pipeline)r   StableDiffusionPipeline)channels_lastfloat16z)Torch compiled unet, vae and text_encoderr   )r    r0   r%   torchZbfloat16toZtransformerr4   compiler2   r   r3   r5   ZunetZ*enable_xformers_memory_efficient_attentionZvaeZtext_encoderprintr&   r'   r(   r)   r*   r+   )r   r   r.   r/   r0   r,   r2   r   r3   r4   r5   r   r   r   get_torch_pipelinea   s>   r:   engine
batch_sizestepsc                 C   s>   | dd dd}|  d| d| d| |rd S d	 S )
N/zstable-diffusion-sd__bZ_s Z_safe)splitreplace)r;   r   r<   r=   r   Zshort_model_namer   r   r   get_image_filename_prefix   s   (rF   Fimage_filename_prefixskip_warmupc                    s8  ddl m} t|sJ t \}} fdd}t|	||}t|	||}|  g }t|D ]K\}}||kr< nBt }|g  |g  dj}t }|| }|| t	d|dd t|D ]\}}|
| d	| d	| d
 qjq2ddlm} d| ||t|t| t|||dS )Nr   )r   c                     s4   rd S t  \} }| g  |g  d d S )Npromptheightwidthnum_inference_stepsr   r   rJ   negativer<   rK   r,   rH   r=   rL   r   r   warmup   s   

z run_ort_pipeline.<locals>.warmuprI   Inference took .3f secondsrA   .jpg__version__r!   r;   versionrK   rL   r=   r<   batch_countnum_promptsaverage_latencymedian_latencyfirst_run_memory_MBsecond_run_memory_MB)r    r   
isinstancer   r   	enumeratetimeimagesappendr9   saver!   rX   sumlen
statisticsmedian)r,   r<   rG   rK   rL   r=   r\   r[   r   memory_monitor_typerH   r   r   r   rR   first_run_memorysecond_run_memorylatency_listirJ   inference_startrd   inference_endlatencykimageort_versionr   rQ   r   run_ort_pipeline   sT   

rv   returnc                 C   sF   |s|rd| ind| g| ini }t j r!t jddd|d< |S )Nr   r	   )Zdevice{   	generator)r6   r	   Zis_available	GeneratorZmanual_seed)r   use_num_images_per_promptis_fluxr<   kwargsr   r   r   get_negative_prompt_kwargs   s   

r~   c                    sV  t  \}}dd l}t|j fdd}t|	||}t|	||}|  td g }t|D ]\\}}||kr? nStj	  t

 }t|d }d|g  d|j}tj	  t

 }|| }|| td|dd t|D ]\}}|| d	| d	| d
 q~q5dtj ||t|t| t|||dS )Nr   c                     sB   rd S t  \} }t|d }d| g  d| d S )NFrJ   rK   rL   rM   r   r   r~   rJ   rP   extra_kwargsr<   rK   r|   r,   rH   r=   rL   r   r   rR     s
   
"z"run_torch_pipeline.<locals>.warmupFr   rS   rT   rU   rA   rV   r6   rY   r   )r   r    ra   r0   r   r6   set_grad_enabledrb   r	   Zsynchronizerc   r~   rd   re   r9   rf   rX   rg   rh   ri   rj   )r,   r<   rG   rK   rL   r=   r\   r[   r   rk   rH   r   r   r    rR   rl   rm   rn   ro   rJ   rp   r   rd   rq   rr   rs   rt   r   r   r   run_torch_pipeline   s\   




r   r   rK   rL   r\   r[   tuningc                 C   s   |}|r|dv r|dddf}t   }t| |||}t   }td||  d td| |||}t||||||||	|
||d}|| ||dd	|d
d |S )N)r   r      )Ztunable_op_enableZtunable_op_tuning_enableModel loading took rU   ZortrH   ExecutionProviderrC   Fr   r   r   r   enable_cuda_graph)rc   r-   r9   rF   rv   updaterE   )r   r   r   r<   r   rK   rL   r=   r\   r[   r   rk   r   rH   Zprovider_and_options
load_startr,   load_endrG   resultr   r   r   run_ort:  s<   
	r   Tuse_io_bindingc                 C   sb   ddl m} |d urtj|r|j|||d}n|j| d||d}|| |r/d |_d |_|S )Nr   )ORTPipelineForText2Image)r   r   T)Zexportr   r   )	Zoptimum.onnxruntimer   r"   r#   r$   r%   Zsave_pretrainedr*   r+   )r   r   r   r   r   r   pipeliner   r   r   get_optimum_ort_pipelinen  s   
r   c                    sx  t dt ddlm} t|t \}} f	dd}t|	||}t|	||}|  t|}g }t|D ]\\}}||krK nSt		 }r`d|d|j
}nd|g d|j
}t		 }|| }|| t d|d	d
 t|D ]\}}|| d| d| d qqAddlm} d| |t|t| t|||dS )NzPipeline typer   )ORTFluxPipelinec                     sd   rd S t  \} }t|}r!d|  d| d S d| g d| d S )NrJ   rK   rL   rM   Znum_images_per_promptr   r   r   r   	r[   r<   rK   r|   r,   rH   r=   r{   rL   r   r   rR     s   

"	z(run_optimum_ort_pipeline.<locals>.warmupr   r   rS   rT   rU   rA   rV   rW   Zoptimum_ortrY   r   )r9   typeZ&optimum.onnxruntime.modeling_diffusionr   ra   r   r   r~   rb   rc   rd   re   rf   r!   rX   rg   rh   ri   rj   )r,   r<   rG   rK   rL   r=   r\   r[   r   rk   r{   rH   r   r   r   rR   rl   rm   r   rn   ro   rJ   rp   rd   rq   rr   rs   rt   ru   r   r   r   run_optimum_ort_pipeline  sl   

	
r   c                 C   s   t   }t| ||||d}t   }td||  d |r&| d t|j n| }td||||}t||||||||	|
||d}|| ||dd|d	d
 |S )Nr   r   rU   rA   optimumr   r   rC   Fr   )	rc   r   r9   r   namerF   r   r   rE   )r   r   r   r<   r   rK   rL   r=   r\   r[   r   rk   r   rH   r   r,   r   Zfull_model_namerG   r   r   r   r   run_optimum_ort  s@   


	r   work_dirrZ   max_batch_sizenvtx_profileuse_cuda_graphc           -         sn  t d ddlm} |   |ksJ ddlm} ||}| }ddlm}m} ddl	m
} |j}|| ||\}}}}}||d|d|||||d		jj|||d
 dddtj d    fdd}t|
||	}t|
||	}|  td| |}g }t \} }!t| D ]Q\}"}#|"|kr nHt }$j|#g  |!g  ddd\}%}&t }'|'|$ }(||( t d|(dd|&  t|%D ]\})}*|*| d|" d|) d qq  ddlm}+ ddlm}, i d| ddd|,dd|+ dd |d!d"d#d$ d%|d&|d't|t | d(t!"|d)|d*|d+|d,|S )-Nzd[I] Initializing ORT TensorRT EP accelerated StableDiffusionXL txt2img pipeline (static input shape)r   init_trt_pluginsPipelineInfo
EngineTypeget_engine_pathsr3   DDIMFr'   
output_dirverboser   r   r   framework_model_direngine_type   T)opt_image_heightopt_image_widthopt_batch_sizestatic_batchZstatic_image_shapeZmax_workspace_sizeZ	device_idc                     s.   t  \} }j| g  |g  d d S N)denoising_stepsr   runrO   r<   rK   r   r=   rL   r   r   rR   \  s   
$z"run_ort_trt_static.<locals>.warmuport_trtg      @rx   r   ZguidanceseedEnd2End took rT    seconds. Inference latency: rA   rV   rW   r   r;   r!   rZ   r   z	tensorrt()r   rK   rL   r=   r<   r[   r\   r]   r^   r_   r`   r   r   )#r9   trt_utilitiesr   diffusion_modelsr   
short_nameengine_builderr   r   pipeline_stable_diffusionr3   ORT_TRTbackendZbuild_enginesr6   r	   Zcurrent_deviceload_resourcesr   rF   r   rb   rc   r   re   rf   teardownr   rX   r!   r   rg   rh   ri   rj   )-r   rZ   r<   r   rK   rL   r=   r\   r[   r   rk   r   r   r   r   r   pipeline_infor   r   r   r3   r   onnx_dir
engine_dirr   r   rA   rR   rl   rm   rG   rn   r   r   ro   rJ   rp   rd   pipeline_timerq   rr   rs   rt   trt_versionru   r   r   r   run_ort_trt_static  s   

	
	

r   c           1         s8  t d ddlm} ddlm} |   |ksJ ddlm} ||}ddlm}m	} ddl
m} |j}|| ||\}}}}}||d|d	||d
|djj|||d d
d
d	|d tj j }||\}}j|    fdd} t|| |
}!t|| |
}"|   td| |}#g }$t \}%}&t|%D ]P\}'}(|'|kr nGt })j|(g  |&g  dd\}*}+t },|,|) }-|$|- t d|-dd|+  t|*D ]\}.}/|/|# d|' d|. d qq  dd l}0d|0jd |	|t|$t |$ t!"|$|!|"|dS )N][I] Initializing TensorRT accelerated StableDiffusionXL txt2img pipeline (static input shape)r   cudartr   r   r   r   r   FT)r'   r   r   r   r   r   r   r   r   r   r   Z
onnx_opsetr   r   r   r   Zstatic_shapeZenable_all_tacticstiming_cachec                     s6   rd S t  \} }j| g  |g  d d S r   r   rO   r<   rK   r   rH   r=   rL   r   r   rR     s   
$z#run_tensorrt_static.<locals>.warmuptrtrx   )r   r   r   rT   r   rA   rV   r   default)r;   rZ   r   rK   rL   r=   r<   r[   r\   r]   r^   r_   r`   r   )#r9   r	   r   r   r   r   r   r   r   r   r   r3   TRTr   load_enginesmaxmax_device_memory
cudaMallocactivate_enginesr   r   rF   r   rb   rc   r   re   rf   r   r   rX   rg   rh   ri   rj   )1r   rZ   r   r<   r   rK   rL   r=   r\   r[   r   rk   r   r   r   rH   r   r   r   r   r   r   r3   r   r   r   r   r   r   r   rA   shared_device_memoryrR   rl   rm   rG   rn   r   r   ro   rJ   rp   rd   r   rq   rr   rs   rt   r   r   r   r   run_tensorrt_static  s   


r   c           *         sN  t d dd l}ddlm} ddlm} d dks$d dkr/td d d|  ks8J dd	lm} dd
l	m
 m  f	dd}ddlm} ||}|||tj j }||\}}j|  dfdd			
fdd}t|
||	}t|
||	}|  | }td||}g }t \}} t|D ]L\}!}"|!|kr nCt }#	|"g | g dd\}$}%t }&|&|# }'||' t d|'dd|%  t|$D ]\}(})|)| d|! d|( d qq  |d|jd||t|t| t !|||dS )Nr   r   r   r      zCImage height and width have to be divisible by 8 but specified as: z and .r   r   c           	         s\    j }||\}}}}}| |d|d||d	}|jj|||dddd|d |S )Nr   Fr   r   Tr   )r   r   r   )	Zpipeline_classr   r   r   r   r   r   r   r   )	r   r<   r   rK   r   r   r   rL   r   r   r   init_pipelineJ  s:   z-run_tensorrt_static_xl.<locals>.init_pipeliner   c              	      s   j | | d|dS Ng      @r   r   rJ   r   r   )image_heightimage_widthr   r=   r   r   run_sd_xl_inferencez     z3run_tensorrt_static_xl.<locals>.run_sd_xl_inferencec                     ,   rd S t  \} }| g  |g   d S NrN   rO   r<   r   rH   r   r   rR        
z&run_tensorrt_static_xl.<locals>.warmupr   rx   r   r   rT   r   rA   .pngr   r   r   r;   rZ   r   rK   rL   r=   r<   r[   r\   r]   r^   r_   r`   r   r   )"r9   r   r	   r   r   r   
ValueErrorr   r   r   r   r   r   r3   r   r   r   r   r   r   r   r   rF   r   rb   rc   re   rf   r   rX   rg   rh   ri   rj   )*r   rZ   r<   r   rK   rL   r=   r\   r[   r   rk   r   r   r   rH   r   r   r   r   r   r3   r   r   rA   r   rR   rl   rm   r   rG   rn   r   r   ro   rJ   rp   rd   r   rq   rr   rs   rt   r   )r   r<   r   rK   r   r   r   r   r   r   rH   r=   r   rL   r   r   run_tensorrt_static_xl#  sx   #


r   c           %         s  ddl m} ddlm} |||j| || d |ksJ   dfdd	 fdd}t|
||	}t|
||	}|  j }t	d	| |}g }t
 \}}t|D ]S\}}||kri nJt }|g  |g  d
d\}}t }|| }|| td|dd|  t|D ]\} }!| d| d|  d}"|!|" td|" qq_  ddlm}# ddlm}$ |d|$d|# d ||t|t| t||||dS )Nr   )initialize_pipeline)r   )rZ   r   r   rK   rL   r   r   r   c              	      s   j | | d|dS r   r   r   )rK   r   r=   rL   r   r   r     r   z+run_ort_trt_xl.<locals>.run_sd_xl_inferencec                     r   r   rN   rO   r   r   r   rR     r   zrun_ort_trt_xl.<locals>.warmupr   rx   r   r   rT   r   rA   r   zImage saved torW   r!   r   r   r   r   )Z
demo_utilsr   r   r   r   r   r   r   r   rF   r   rb   rc   re   r9   rf   r   r   rX   r!   rg   rh   ri   rj   )%r   rZ   r<   r   rK   rL   r=   r\   r[   r   rk   r   r   r   rH   r   r   rR   rl   rm   r   rG   rn   r   r   ro   rJ   rp   rd   r   rq   rr   rs   rt   filenamer   ru   r   )r<   rK   r   r   rH   r=   rL   r   run_ort_trt_xl  sp   




r   c                 C   s   dt jj_dt jj_t d t }t| |||}t }td||  d t	d| |||}|sVt 
  t||||||||	|
||d}W d    n1 sPw   Y  nt||||||||	|
||d}|| d |rmdn|rqdnd	|dd
 |S )NTFr   rU   r6   r   r8   Zxformersr   r   )r6   backendsZcudnnenabledZ	benchmarkr   rc   r:   r9   rF   Zinference_moder   r   )r   r<   r   r.   r/   rK   rL   r=   r\   r[   r   rk   rH   r   r,   r   rG   r   r   r   r   	run_torch!  s^   



	r   c               	   C   s  t  } | jdddtdg ddd | jdd	dtd
tt dd | jddddd | jdddttt ddd | jdddtd dd | jdddtddd | jddddd  | jdd! | jd"ddd#d  | jdd$ | jd%ddd&d  | jdd' | jd(ddd)d  | jdd* | jd+ddd,d  | jdd- | jd.d/t	d0g d1d2d3 | jd4dt	d5d6d | jd7dt	d5d8d | jd9d:dt	d;d<d | jd=d>dt	d?d@d | jdAdBdt	t
d0dCdDdEd | jdFdGdt	t
d0dHdIdJd | jdKdLdddMd  | jddN |  }|S )ONz-ez--engineFr!   )r!   r   r6   r   z-Engines to benchmark. Default is onnxruntime.)requiredr   r   choiceshelpz-rz
--providerr	   z8Provider to benchmark. Default is CUDAExecutionProvider.z-tz--tuning
store_truezsEnable TunableOp and tuning. This will incur longer warmup latency, and is mandatory for some operators of ROCm EP.)actionr   z-vz	--versionr   z>Stable diffusion version like 1.5, 2.0 or 2.1. Default is 1.5.)r   r   r   r   r   z-pz
--pipelinez[Directory of saved onnx pipeline. It could be the output directory of optimize_pipeline.py.)r   r   r   r   z-wz
--work_dirr   z?Root directory to save exported onnx models, built engines etc.z--enable_safety_checkerzEnable safety checker)r   r   r   )enable_safety_checkerz--enable_torch_compilez#Enable compile unet for PyTorch 2.0)r.   z--use_xformerszUse xformers for PyTorch)r/   z--use_io_bindingzUse I/O Binding for Optimum.r   z--skip_warmupz
No warmup.r   z-bz--batch_sizer   )r            r   
          z)Number of images per batch. Default is 1.)r   r   r   r   z--heighti   z$Output image height. Default is 512.z--widthz#Output image width. Default is 512.z-sz--steps2   zNumber of steps. Default is 50.z-nz--num_promptsr  z!Number of prompts. Default is 10.z-cz--batch_count      z(Number of batches to test. Default is 5.z-mz--max_trt_batch_sizer  r  zdMaximum batch size for TensorRT. Change the value may trigger TensorRT engine rebuild. Default is 4.z-gz--enable_cuda_graphz/Enable Cuda Graph. Requires onnxruntime >= 1.16)r   )argparseArgumentParseradd_argumentstrlist	PROVIDERSkeys	SD_MODELSset_defaultsintrange
parse_args)parserargsr   r   r   parse_argumentsf  s.  




					

r  c                    sL   dd l }|t }| D ] | rt fdddD r#t j qd S )Nr   c                 3   s    | ]}| j v V  qd S r   )r#   ).0xlibr   r   	<genexpr>  s    z)print_loaded_libraries.<locals>.<genexpr>)ZlibcuZlibnvr   )psutilProcessr"   getpidZmemory_mapsanyr9   r#   )Zcuda_related_onlyr  pr   r  r   print_loaded_libraries  s   
r!  c                  C   sN  t  } t|  | jdkrU| jdv rdtjd< ddlm} ddlm} |	||	dkr1dtjd	< | j
rU| jdkrC| jd
v rC| jd u sGtd|	||	dk rUtdtjdd | jdkrbdnd}t|d }td| t| j }t| j }| jdkr| jdkrd| jv rtd t| j| j| jd| j| j| j| j| j||| jd| j
| jd}n;td t| j| j| j| j | j| j| j| j| j||| jd| j
| jd}n| jdkr|dkrd| jv rdtjd	< t|| j|| j| j | j| j| j| j| j||| j | jd}n| jdkrC| jrtj!"| jsJ dtd| d| j#  t$|| j|| j| j | j| j| j| j| j||| j#| jd }n| jdkrrd| jv rrtd! t%| j| j| jd| j| j| j| j| j||| jd| j
| jd}nt| jdkrtd" t&d>i d#| jd$| jd%|d&| jd'dd(| jd)| jd*| jd+| jd,| jd-|d.|d/| jd0dd1| j
d2| j}n)td3| j' d4| j( d5 t)|| j| j | j'| j(| j| j| j| j| j||| jd6}t| t*d7d8d9d:}g d;}	t+j,||	d<}
|
-  |
.| W d    n	1 sw   Y  | jd=kr%t/| jd
v  d S d S )?Nr!   )r   1ZORT_DISABLE_TRT_FLASH_ATTENTIONr   )rZ   rW   z1.16.0Z!ORT_ENABLE_FUSED_CAUSAL_ATTENTION)r	   r   z:The stable diffusion pipeline does not support CUDA graph.z1.16z.CUDA graph requires ONNX Runtime 1.16 or laterz%(funcName)20s: %(message)s)fmtr
   r	   z&GPU memory used before loading models:r   ZxlzNTesting Txt2ImgXLPipeline with static input shape. Backend is ORT TensorRT EP.TF)r   rZ   r<   r   rK   rL   r=   r\   r[   r   rk   r   r   r   rH   zLTesting Txt2ImgPipeline with static input shape. Backend is ORT TensorRT EP.r   r   )r   r   r   r<   r   rK   rL   r=   r\   r[   r   rk   r   rH   z?--pipeline should be specified for the directory of ONNX modelsz/Testing diffusers StableDiffusionPipeline with z provider and tuning=)r   r   r   r<   r   rK   rL   r=   r\   r[   r   rk   r   rH   zGTesting Txt2ImgXLPipeline with static input shape. Backend is TensorRT.zETesting Txt2ImgPipeline with static input shape. Backend is TensorRT.r   rZ   r   r<   r   rK   rL   r=   r\   r[   r   rk   r   r   r   rH   zNTesting Txt2ImgPipeline with dynamic input shape. Backend is PyTorch: compile=z, xformers=r   )r   r<   r   r.   r/   rK   rL   r=   r\   r[   r   rk   rH   zbenchmark_result.csvarC   )r1   newline)r   r   r;   rZ   r   r   rK   rL   r=   r<   r[   r\   r]   r^   r_   r`   r   )
fieldnamesr   r   )0r  r9   r;   rZ   r"   environ	packagingr!   rX   parser   r   r   r   coloredlogsinstallr   r  r  r   r   r<   rK   rL   r=   r\   r[   Zmax_trt_batch_sizerH   r   r   r   r   r#   isdirr   r   r   r   r.   r/   r   opencsv
DictWriterwriteheaderwriterowr!  )r  rZ   ru   rk   r   Zsd_modelr   r   Zcsv_fileZcolumn_namesZ
csv_writerr   r   r   main  sh  












	
r2  __main__r   )F)r   TF)FF)FT)FTF)T),r  r.  r"   ri   sysrc   pathlibr   r*  r6   Zbenchmark_helperr   r  r  r   r   r   r  boolr-   r:   r  rF   rv   dictr~   r   r   r   r   r   r   r   r   r   r   r  r!  r2  __name__	traceback	Exceptionprint_exceptionexc_infor   r   r   r   <module>   s"  
-
K
W	

7
&
g	

B	
 	

 	
 $	
u	

E 
1	 
N