o
    iC                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlZd dl	Z	d dl
mZmZ eeeee B f ZeeZG dd dZG dd	 d	ZG d
d dZG dd deZG dd dZdS )    N)OrderedDict)Mapping)Any)InferenceSession
RunOptionsc                   @   s   e Zd ZedededefddZededefddZedefd	d
ZedefddZ	ede
jfddZedejfddZededeee
jf fddZdS )
TypeHelperort_sessionnamereturnc                 C   :   t |  D ]\}}|j|kr|j  S qtd| d)Nzinput name 
 not found)	enumerate
get_inputsr	   type
ValueError)r   r	   _iinput r   d/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/io_binding_helper.pyget_input_type   s
   

zTypeHelper.get_input_typec                 C   r   )Nzoutput name r   )r   get_outputsr	   r   r   )r   r	   r   outputr   r   r   get_output_type   s
   

zTypeHelper.get_output_typeort_typec                 C   s:   t jt jt jt jtt jd}| |vrt|  d||  S )N)tensor(int64)tensor(int32)tensor(float)tensor(float16)tensor(bool)tensor(uint8) not found in map)numpylonglongintcfloat32float16booluint8r   )r   Zort_type_to_numpy_type_mapr   r   r   ort_type_to_numpy_type"   s   z!TypeHelper.ort_type_to_numpy_typec                 C   s@   t jt jt jt jt jt jt jd}| |vrt|  d||  S )N)r   r   r   r   ztensor(bfloat16)r   r   r    )	torchint64int32r$   r%   Zbfloat16r&   r'   r   )r   Zort_type_to_torch_type_mapr   r   r   ort_type_to_torch_type1   s   	z!TypeHelper.ort_type_to_torch_type
numpy_typec                 C   sX   t jtjt jtjt jtjt jtjt jtjttjt j	tj	i}| |vr(t
|  d||  S Nr    )r!   r"   r)   r*   r#   r+   r$   r%   r&   r'   r   )r-   Znumpy_type_to_torch_type_mapr   r   r   numpy_type_to_torch_typeA   s   	z#TypeHelper.numpy_type_to_torch_type
torch_typec                 C   sP   t jtjt jtjt jtjt jtjt jtt j	tj	i}| |vr$t
|  d||  S r.   )r)   r*   r!   r"   r+   r#   r$   r%   r&   r'   r   )r0   Ztorch_type_to_numpy_type_mapr   r   r   torch_type_to_numpy_typeQ   s   z#TypeHelper.torch_type_to_numpy_typec                 C   sH   i }|   D ]}t|j||j< q|  D ]}t|j||j< q|S )z:Create a mapping from input/output name to numpy data type)r   r   r(   r   r	   r   )r   Zname_to_numpy_typer   r   r   r   r   get_io_numpy_type_map`   s   z TypeHelper.get_io_numpy_type_mapN)__name__
__module____qualname__staticmethodr   strr   r   r(   r,   r!   dtyper/   r)   r1   dictr2   r   r   r   r   r      s    "r   c                   @   sZ   e Zd ZedefddZe	ddejdejdejdeej fd	d
Z	edddZ
dS )IOBindingHelperr   c                 C   sJ   i }|  D ]\}}t| |}t|}tjt|||d||< q|S )zpReturns a dictionary of output name as key, and 1D tensor as value. The tensor has enough space for given shape.)r8   device)itemsr   r   r,   r)   emptyr!   prod)r   output_shapesr;   output_buffersr	   shaper   r0   r   r   r   get_output_buffersm   s   
z"IOBindingHelper.get_output_buffersN	input_idsposition_idsattention_maskpastc              
   C   s  |du r	t | }|  }| sJ |d|jjd|d t| |	  |dur^t
|D ].\}	}
|
 s9J |
	 }|dkrE|	 }|d|	 |
jjd|d|	  t|
 | q/|dur|| shJ |d|jjd|d t| |	  |dur| sJ |d|jjd|d t| |	  |  D ].}|j}|| }t| d|jj dt|   |||jjd|| || |	  q|S )	z)Returnas IO binding object for a session.NrC   r   Zpast_rE   rD   z device type=z shape=)r   r2   
io_bindingis_contiguous
bind_inputr;   r   listsizedata_ptrr   r   r	   loggerdebugbind_output)r   rC   rD   rE   rF   r@   r?   Zname_to_np_typerG   iZpast_irL   r   output_nameZoutput_bufferr   r   r   prepare_io_bindingw   sv   

	
	
	

&	z"IOBindingHelper.prepare_io_bindingTc           
      C   sn   g }|   D ].}|j}|| }|| }|dt| |  }	|r/||	   q||	 q|S )z3Copy results to cpu. Returns a list of numpy array.r   )	r   r	   r!   r>   Zreshapeclonedetachappendcpu)
r   r@   r?   Zreturn_numpyZort_outputsr   rQ   bufferrA   Zcopy_tensorr   r   r   "get_outputs_from_io_binding_buffer   s    z2IOBindingHelper.get_outputs_from_io_binding_bufferN)T)r3   r4   r5   r6   r   rB   r)   TensorrJ   rR   rX   r   r   r   r   r:   l   s     		Ur:   c                   @   s   e Zd ZdZd"dedejfddZdedefd	d
Z	dd Z
dedejfddZdefddZd#deeejf dedefddZed$dedededeeef fd d!ZdS )%CudaSessionzLInference Session with IO Binding for ONNX Runtime CUDA or TensorRT providerFr   r;   c                 C   sr   || _ dd | j  D | _dd | j  D | _t| j | _| j  | _|| _	t
 | _t
 | _|| _i | _d S )Nc                 S      g | ]}|j qS r   r	   ).0r   r   r   r   
<listcomp>       z(CudaSession.__init__.<locals>.<listcomp>c                 S   r\   r   r]   )r^   r   r   r   r   r_      r`   )r   r   input_namesr   output_namesr   r2   io_name_to_numpy_typerG   enable_cuda_graphr   input_tensorsoutput_tensorsr;   buffer_sharing)selfr   r;   rd   r   r   r   __init__   s   
zCudaSession.__init__
input_namerQ   c                 C   s4   || j v sJ || jv sJ || j|< || j|< d S rY   )ra   rb   rg   )rh   rj   rQ   r   r   r   set_buffer_sharing   s   
zCudaSession.set_buffer_sharingc                 C   s   | ` | `| `d S rY   )re   rf   rG   )rh   r   r   r   __del__   s   zCudaSession.__del__r	   tensorc              	   C   s   |j jd ur
|j jnd}t|jdkrdgnt|j}| j||j j|| j| ||	  || j
v rQ| j| j
| |j j|| j| ||	  || j| j
| < d S d S )Nr      )r;   indexlenrA   rJ   rG   rI   r   rc   rL   rg   rO   rf   )rh   r	   rm   	device_idZtensor_shaper   r   r   bind_input_and_buffer_sharing   s*   
	z)CudaSession.bind_input_and_buffer_sharing
shape_dictc              
   C   sP  | j rH| D ]@\}}|| jv rG|| jv r&t| j| jt|kr"qtd| j| }tj	t|t
|dj| jd}|| j|< | || q| D ]Y\}}|| jv r|| jv rgt| j| jt|krgqL|| jv rmqL| j| }tj	t|t
|dj| jd}|| j|< | j||jj|jjdur|jjnd|t| |  qLdS )z Allocate tensors for I/O Bindingz(Expect static input shape for cuda graph)r8   )r;   Nr   )rd   r<   ra   re   tuplerA   RuntimeErrorrc   r)   r=   r   r/   tor;   rr   rb   rf   rg   rG   rO   r   ro   rJ   rK   rL   )rh   rs   r	   rA   Znumpy_dtyperm   r   r   r   allocate_buffers  sF   




"



zCudaSession.allocate_buffersNT	feed_dictrun_optionssynchronizec                 C   s   |  D ]G\}}t|tjr| sJ || jv rK| jrE| j|  | ks)J | j| j	|j	ks4J |j
jdks<J | j| | q| || q|rc| j  | j| j| | j  | jS | j| j| | jS )z$Bind input tensors and run inferencecuda)r<   
isinstancer)   rZ   rH   ra   rd   re   Znelementr8   r;   r   Zcopy_rr   rG   Zsynchronize_inputsr   Zrun_with_iobindingZsynchronize_outputsrf   )rh   rx   ry   rz   r	   rm   r   r   r   infer<  s"   


zCudaSession.inferr   rq   rd   streamr
   c                 C   s$   | d|d}|dkrt ||d< |S )NZkSameAsRequested)rq   Zarena_extend_strategyrd   r   Zuser_compute_stream)r7   )rq   rd   r~   optionsr   r   r   get_cuda_provider_optionsR  s   z%CudaSession.get_cuda_provider_optionsF)NT)r   )r3   r4   r5   __doc__r   r)   r;   ri   r7   rk   rl   rZ   rr   	ShapeDictrw   r9   r   r&   r}   r6   intr   r   r   r   r   r   r[      s    "**r[   c                       s   e Zd Z				ddedejdeded	ed
ede	e
e
f dB f fddZddedefddZdde	e
ejf def fddZ  ZS )
GpuBindingFr   Nr   r;   rs   enable_gpu_graphgpu_graph_idr~   rg   c           
         sf   t  ||| |r| D ]
\}}	| ||	 q| | || _|r(t|nd | _|| _	d | _
d S rY   )superri   r<   rk   rw   r   copydeepcopyrs   r~   last_run_gpu_graph_id)
rh   r   r;   rs   r   r   r~   rg   rj   rQ   	__class__r   r   ri   b  s   


zGpuBinding.__init__disable_cuda_graph_in_runr
   c                 C   s.   t  }|rdn| j}|dt| || _|S )Nr   r   )r   r   add_run_config_entryr7   r   )rh   r   r   r   r   r   r   get_run_optionsy  s
   zGpuBinding.get_run_optionsrx   c                    s*   |  |}| jr|dd t ||S )NZ'disable_synchronize_execution_providers1)r   r~   r   r   r}   )rh   rx   r   ry   r   r   r   r}     s   
zGpuBinding.infer)Fr   r   Nr   )r3   r4   r5   r   r)   r;   r   r&   r   r9   r7   ri   r   r   rZ   r}   __classcell__r   r   r   r   r   a  s,    *r   c                	   @   sZ   e Zd ZdZddedejdedefdd	Z	
	dde	de
deeef dB defddZdS )GpuBindingManagerzA manager for I/O bindings that support multiple CUDA Graphs.
    One cuda graph is reused for same input shape. Automatically add a new cuda graph for new input shape.
    r   rn   r   r;   r~   max_cuda_graphsc                 C   s(   || _ || _g | _d | _|| _|| _d S rY   )r   r;   graph_bindingsno_graph_bindingr~   r   )rh   r   r;   r~   r   r   r   r   ri     s   
zGpuBindingManager.__init__FNrs   use_cuda_graphrg   r
   c              	   C   s   | j D ]}|j|kr|  S qt| j | jks|s7| jd u r.t| j| j|| j|d| _| jS | j	| | jS t| j| j|dt| j | j|d}| j 
| |S )N)r~   rg   T)r   r   r~   rg   )r   rs   rp   r   r   r   r   r;   r~   rw   rU   )rh   rs   r   rg   Zgpu_graph_bindingr   r   r   get_binding  s.   


	zGpuBindingManager.get_binding)r   rn   )FN)r3   r4   r5   r   r   r)   r;   r   ri   r   r&   r9   r7   r   r   r   r   r   r   r     s    r   )r   loggingcollectionsr   collections.abcr   typingr   r!   r)   Zonnxruntimer   r   r7   rt   rJ   r   r   	getLoggerr3   rM   r   r:   r[   r   r   r   r   r   r   <module>   s     
Zq ,