o
    i3                     @   s  d dl Z d dlZd dlZd dlZd dlmZ 	 dddZdd Zdd Zd	d
 Z	dd Z
			dddZdd Zdd Zdd Zedkr}e Zede d dlmZ eej ejsfejsaJ deeZnejZd dlmZ eeeZeD ]Zee qvdS dS )    N)TensorProtoc                 C   s  t  }|jdddtdd |jdddtdd |jd	d
dtddd |jdddtddd |jddtddd |jddtddd |jddtddd |jddtddd |jddtddd |jddtd d d |jd!dtd d"d |jd#dtd d$d |jd%dd&g d'd(d) |jd*d+dd,d-d. |jdd/ |jd0dtd1d2d |jd3dd,d4d. |jdd5 |jd6dd,d7d. |jdd8 |jd9d:dd,d; |jdd< || S )=Nz-iz--inputFz2Set the input file for reading the profile results)requiredtypehelpz-mz--modelzIonnx model path to run profiling. Required when --input is not specified.z-bz--batch_size   zbatch size of input)r   r   defaultr   z-sz--sequence_length    zsequence length of inputz--past_sequence_lengthzpast sequence length for gpt2z--global_lengthz&number of global tokens for longformerz	--samplesi  z\number of samples to test. Set it large enough to reduce the variance of performance result.z--thresholdg{Gz?zfThreshold of run time ratio among all nodes. Nodes with larger ratio will show in top expensive nodes.z--thread_numznumber of threads to usez--input_ids_namez"input name for input IDs, for bertz--segment_ids_namez$input name for segment IDs, for bertz--input_mask_namez'input name for attention mask, for bertz--dummy_inputsr   )bertgpt2
longformerr   zEType of model inputs. The default will create dummy inputs with ones.)r   r   choicesr   z-gz	--use_gpu
store_truezuse GPU)r   actionr   )use_gpuz
--providercudazExecution provider to usez--basic_optimizationz_Enable only basic graph optimizations. By default, all optimizations are enabled in OnnxRuntime)basic_optimizationz--kernel_time_onlyz.Only include the kernel time and no fence time)Zkernel_time_onlyz-vz	--verbose)r   r   )verbose)argparseArgumentParseradd_argumentstrintfloatset_defaults
parse_args)argvparser r   [/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/profiler.pyparse_arguments   s   		
r    c                 C   sD   ddl m} || ||| |dd}|D ]}|d |}	q| }
|
S )Nr   )create_onnxruntime_sessionT)Zenable_all_optimizationnum_threadsZenable_profiling)benchmark_helperr!   runZend_profiling)Zonnx_model_pathr   providerr   
thread_num
all_inputsr!   sessionZinputs_profile_filer   r   r   run_profile   s   	r+   c                 C   s&   t | dtkrt| | dS d S )Nvalue)r   Z
WhichOneofr   getattr)dimr   r   r   get_dim_from_type_proto   s   &r/   c                 C   s   dd | j jjD S )Nc                 S   s   g | ]}t |qS r   )r/   ).0dr   r   r   
<listcomp>   s    z-get_shape_from_type_proto.<locals>.<listcomp>)tensor_typeshaper.   )Z
type_protor   r   r   get_shape_from_type_proto   s   r5   c                    s  i  |   D ]m}t|j}g }t|D ]\}}t|tr!|| qt|dkr+ dS t|dkr7|||d < t|dkrC|||d < |jjj	}	|	t
jt
jt
jfv sTJ |	t
jkr\tjn
|	t
jkrdtjntj}
tj||
d}| |j< q fddt|D }|S )a  Create dummy inputs for ONNX model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        samples (int): number of samples

    Returns:
        List[Dict]: list of inputs
       Nr   r   Zdtypec                       g | ]} qS r   r   r0   r)   dummy_inputsr   r   r2          z'create_dummy_inputs.<locals>.<listcomp>)'get_graph_inputs_excluding_initializersr5   r   	enumerate
isinstancer   appendlenr3   	elem_typer   FLOATINT32INT64numpyfloat32int64int32onesnamerange)
onnx_model
batch_sizesequence_lengthsamplesgraph_inputr4   Zsymbol_dimsir.   rB   	data_typedatar'   r   r:   r   create_dummy_inputs   s0   




rU   c                 C   sB   ddl m}m} || |||\}	}
}||||dd|	|
|dd	}|S )a-  Create dummy inputs for BERT model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        samples (int): number of samples
        input_ids_name (str, optional): Name of graph input for input IDs. Defaults to None.
        segment_ids_name (str, optional): Name of graph input for segment IDs. Defaults to None.
        input_mask_name (str, optional): Name of graph input for attention mask. Defaults to None.

    Returns:
        List[Dict]: list of inputs
    r   )find_bert_inputsgenerate_test_data{   F)Z
test_casesseedr   	input_idssegment_ids
input_maskZrandom_mask_length)Zbert_test_datarV   rW   )rM   rN   rO   rP   input_ids_namesegment_ids_nameinput_mask_namerV   rW   rZ   r[   r\   r'   r   r   r   create_bert_inputs   s   r`   c                    s   ||||| d}i  |   D ]V}t|j}t|D ]\}}	t|	tr4|	|vr.td|	 ||	 ||< q|jjj}
|
t	j
t	jt	jfv sFJ |
t	j
krNtjn
|
t	jkrVtjntj}tj||d}| |j< q fddt|D }|S )a  Create dummy inputs for GPT-2 model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        past_sequence_length (int): past sequence length
        samples (int): number of samples

    Raises:
        RuntimeError: symbolic is not supported. Use the tool convert_to_onnx.py to export ONNX model instead.

    Returns:
        List[Dict]: list of inputs
    )rN   Zseq_lenZpast_seq_lenZtotal_seq_lensymbol is not supported: r7   c                    r8   r   r   r9   r:   r   r   r2   7  r<   z&create_gpt2_inputs.<locals>.<listcomp>)r=   r5   r   r>   r?   r   RuntimeErrorr3   rB   r   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   )rM   rN   rO   past_sequence_lengthrP   symbolsrQ   r4   rR   r.   rB   rS   rT   r'   r   r:   r   create_gpt2_inputs  s0   



re   c                    s  ||d}i  |   D ]m}t|j}t|D ]\}}	t|	tr0|	|vr*td|	 ||	 ||< q|jjj}
|
t	j
t	jt	jfv sBJ |
t	j
krJtjn
|
t	jkrRtjntj}d|jv rltj||d}d|ddd|f< ntj||d}| |j< q fddt|D }|S )	a  Create dummy inputs for Longformer model.

    Args:
        onnx_model (OnnxModel): ONNX model
        batch_size (int): batch size
        sequence_length (int): sequence length
        global_length (int): number of global tokens
        samples (int): number of samples

    Raises:
        RuntimeError: symbolic is not supported. Use the tool convert_longformer_to_onnx.py to export ONNX model instead.

    Returns:
        List[Dict]: list of inputs
    )rN   rO   ra   globalr7   r   Nc                    r8   r   r   r9   r:   r   r   r2   f  r<   z,create_longformer_inputs.<locals>.<listcomp>)r=   r5   r   r>   r?   r   rb   r3   rB   r   rC   rD   rE   rF   rG   rH   rI   rK   ZzerosrJ   rL   )rM   rN   rO   global_lengthrP   rd   rQ   r4   rR   r.   rB   rS   rT   r'   r   r:   r   create_longformer_inputs;  s.   





rh   c                 C   s  | j dkr| j ntjdd}dtjvrt|tjd< ddlm} ddlm	} ||| j
}d }| jdkrEt|| j| j| j| j| j| j}n.| jdkrWt|| j| j| j| j}n| jd	krit|| j| j| j| j}n
t|| j| j| j}t| j
| j| j| j| j |}|S )
Nr   F)ZlogicalZOMP_NUM_THREADS)load)	OnnxModelr
   r   r   )r&   psutil	cpu_countosenvironr   onnxri   rM   rj   modelr;   r`   rN   rO   rP   r]   r^   r_   re   rc   rh   rg   rU   r+   r   r%   r   )argsr"   ri   rj   rM   r'   r*   r   r   r   r$   j  sV   


	
	r$   __main__	Arguments)setup_loggerzMrequires either --model to run profiling or --input to read profiling results)process_results)N)NNN)r   rm   rF   rk   ro   r   r    r+   r/   r5   rU   r`   re   rh   r$   __name__	argumentsprintr#   rt   r   inputrp   r*   Zprofile_result_processorru   resultsliner   r   r   r   <module>   sD    
	 /
)0/6




