o
    i&1                     @   s   d Z ddlZddlZedZdddZdd Zddd	ZdddZdd Z	dd Z
edkrQe Zede ddlmZ eej ejZe
eeZeD ]Zee qJdS dS )zThis profiler result processor print out the kernel time spent on each Node of the model.
Example of importing profile result file from onnxruntime_perf_test:
    python profile_result_processor.py --input profile_2021-10-25_12-02-41.json
    N)ZScanZLoopIfc                 C   s   t  }|jdddtdd |jddtddd	 |jd
dtddd	 |jddddd |jdd |jddddd |jdd || S )Nz-iz--inputFz2Set the input file for reading the profile results)requiredtypehelpz--thresholdg{Gz?zfThreshold of run time ratio among all nodes. Nodes with larger ratio will show in top expensive nodes.)r   r   defaultr   z
--providercudazExecution provider to usez--kernel_time_only
store_truez.Only include the kernel time and no fence time)r   actionr   )kernel_time_onlyz-vz	--verbose)r   r	   )verbose)argparseArgumentParseradd_argumentstrfloatset_defaults
parse_args)argvparser r   k/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/profile_result_processor.pyparse_arguments   s@   
r   c                 C   sT   t d|  d t| }t|}W d    n1 sw   Y  t|ts(J |S )Nzloading profile output z ...)printopenjsonload
isinstancelist)profile_fileZopened_file	sess_timer   r   r   load_profile_json;   s   
r    c                 C   sL  i }i }i }d}d}| D ]i}|d dkr|d dkrd}|sq|d dkrud	|v rud
|v rud|d
 v ru|d }|d
 d }	|	t v rBq|	sJd| d}	||v ra||  |d	 7  < ||  d7  < n|d	 ||< d||< |	||< ||d	 7 }q|s{dgS g }
|
d|d dd |
d |
d t| dd ddD ]0\}}|| }||k rq|| }|t| }|
|dd|d dd|dd|dd| 	 qi }| D ]\}}	|| }|	|v r||	  |7  < q|||	< q|
d |
d |
d  t| d!d ddD ]\}	}|| }|
|dd|d dd|	  q	|
S )"a<  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

    Args:
        sess_time (List[Dict]): profile data
        threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

    Returns:
        List[str]: lines of string for output.
    r   FcatSessionnameZsession_initializationTZKerneldurargsop_name()   zNo kernel record found!z%
Top expensive kernels with Time% >= d   .2f:@----------------------------------------------------------------u&   Total(μs)	Time%	Calls	Avg(μs)	Kernelc                 S      | d S Nr)   r   xr   r   r   <lambda>x       z&parse_kernel_results.<locals>.<lambda>keyreverse10d	      Y@5.2f5d8.1fz
Group kernel time by operator:u   Total(μs)	Time%	Operatorc                 S   r.   r/   r   r0   r   r   r   r2      r3   )_NODES_TYPE_CONTAINING_SUBGRAPHappendsorteditemsr   )r   	thresholdZkernel_name_to_op_namekernel_timeZkernel_freqtotalZsession_inititemZkernel_namer&   linesdurationratiocallsavg_timeZop_timer   r   r   parse_kernel_resultsE   sf   
(

4



&rJ   Fc                 C   s  g }i }i }i }d}| D ]}|d dkrd|v rd|v rd|d v r|d  dd	 d
d	 dd	}	d|d v rj|d d dkrCd}
n|d d dkrNd}
n
|d d dkrXd}
|	|vra|
||	< n||	 |
ksiJ n|rmq|d d }|tv rxq|	|v r||	  |d 7  < ||	  d7  < n|d ||	< d||	< ||	 ||d 7 }qg d}d}|D ]?}	||	 }||	 }|t| }|| d }||	d	}||7 }||dd|dd|dd|dd|dd|dd|	  q|d|d dd  |d! |d" t| d#d$ d%d&D ]A\}	}|| }||k rq||	 }|t| }|| d }||	d	}||dd|dd|dd|dd|dd|	  q|S )'a  Parse profile data and output nodes in two sections - nodes in the original order, and top expensive nodes.

    Args:
        sess_time (List[Dict]): profile data
        kernel_time_only (bool, optional): Only include items for kernel time. Defaults to False.
        threshold (int, optional): Minimum ratio of duration among all. Defaults to 0.

    Returns:
        List[str]: lines of string for output.
    r   r!   Noder$   r%   r&   r#   Z_kernel_time Z_fence_beforeZ_fence_afterproviderZCPUExecutionProviderZCPUZCUDAExecutionProviderCUDAZDmlExecutionProviderZDMLr)   )z
Nodes in the original order:r-   u3   Total(μs)	Time%	Acc %	Avg(μs)	Calls	Provider	Nodeg        r9   r7   r8   r:   r<   r;   8sz#
Top expensive nodes with Time% >= r*   r+   r,   r-   u-   Total(μs)	Time%	Avg(μs)	Calls	Provider	Nodec                 S   r.   r/   r   r0   r   r   r   r2      r3   z$parse_node_results.<locals>.<lambda>Tr4   )replacer=   r>   r   getr?   r@   )r   r
   rA   Znode_name_listZ	node_timeZ	node_freqZnode_providerrC   rD   Z	node_nameZdevicer&   rE   Zbefore_percentagerF   rH   rI   
percentagerM   rG   r   r   r   parse_node_results   sr   (

6


:rS   c                 C   s:  i }i }d}i }i }i }i }d}i }	| D ]}
|
d dkrd|
v rd|
v rd|
d v r|
d d }|t v r5qd|
d vr]d|
d	 v r\||v rP||  |
d 7  < n|
d ||< ||
d 7 }q|
d dd
}||	v rr|	|  d7  < nd|	|< | d| }||v r||  |
d 7  < ||  d7  < n
|
d ||< d||< ||v r||  |
d 7  < n|
d ||< ||v r||  |
d 7  < ||  d7  < n
|
d ||< d||< ||
d 7 }qd
dg}|d |d t| dd ddD ]G\}}||d}|| }|| }|||  }|| }|| }||dd|d dd|dd|d dd|dd|dd|dd|  q|d
dg7 }|d |d t| dd ddD ]C\}}|d}|d }|d }|dd
}|| }|| }|||  }||dd|d dd|dd|dd|d d|  qW|S )!zGroup results by operator name.

    Args:
        sess_time (List[Dict]): profile data

    Returns:
        List[str]: lines of string for output.
    r   r!   rK   r$   r%   r&   rM   Zfencer#   rL   r)   r,   zGrouped by operatorr-   uM   Total(μs)	Time%	Kernel(μs)	Kernel%	Calls	AvgKernel(μs)	Fence(μs)	Operatorc                 S   r.   r/   r   r0   r   r   r   r2   0  r3   z$group_node_results.<locals>.<lambda>Tr4   r7   r8   r9   r:   Z11dr;   z14.1fzGrouped by provider + operatoru<   Kernel(μs)	Provider%	Calls	AvgKernel(μs)	Provider	Operatorc                 S   r.   r/   r   r0   r   r   r   r2   >  r3   ZExecutionProviderz9.2frO   )r=   rQ   r>   r?   r@   splitrP   )r   Zop_kernel_timeZop_kernel_recordsZtotal_kernel_timeZprovider_op_kernel_timeZprovider_op_kernel_recordsZprovider_kernel_timeZop_fence_timeZtotal_fence_timeZprovider_counterrD   r&   rM   r5   rE   rB   Z
fence_timeZkernel_time_ratio
total_timeZ
time_ratioZkernel_callsZavg_kernel_timepartsZshort_eprH   Zprovider_time_ratior   r   r   group_node_results   s   	(

F


2rW   c                 C   s8   t | }t||j}|t||j|j7 }|t|7 }|S N)r    rJ   rA   rS   r
   rW   )r   r%   Zprofile_recordsrE   r   r   r   process_resultsM  s
   rY   __main__	Arguments)setup_loggerrX   )r   )Fr   )__doc__r   r   	frozensetr=   r   r    rJ   rS   rW   rY   __name__	argumentsr   Zbenchmark_helperr\   r   inputr   resultsliner   r   r   r   <module>   s*   
*


OWb



