o
    iN                     @  s   d dl mZ d dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZmZ dHd
dZ		dIdJddZ				dKdLddZ					dMdNd"d#ZdOd%d&ZdPdQd'd(ZdRd+d,Z		 	-dSdTd0d1ZdUd3d4ZdVd7d8ZdWd;d<ZdXd?d@ZdYdFdGZdS )Z    )annotationsN)
AutoConfigAutoTokenizer)DynamicCache)InferenceSessionOrtValueattention_masktorch.Tensoruse_past_kvboolc                 C  s@   |   dd }|| dkd |r|d d df d}|S )N   r   )longZcumsumZmasked_fill_	unsqueeze)r   r
   position_ids r   l/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/llama/llama_inputs.pyget_position_ids   s
   r   ptFconfigr   devicetorch.device
batch_sizeintseq_lenenginestrreturn_dictc           
      C  s   t jd| j||ft jd}t j||t jd}t|dd}|dkr$| n||}|dkr1| n||}|dkr>| n||}|sJ|||fS |||d}	|	S )Nr   lowhighsizedtyper"   Fr
   ort	input_idsr   r   )torchrandint
vocab_sizeint64onesr   numpyto)
r   r   r   r   r   r   r'   r   r   inputsr   r   r   get_sample_inputs    s   
r0   r   past_seq_lenuse_fp16
world_sizec                   s&  t jd| j|dft jd}t j||d t jd}	t|	dd}
t| ||||d}|dkr/| n| }|dkr<|	 n|	 }	|dkrI|
 n|
 }
|dkrVt	|n fd	d
|D }|snt
|tshJ ||	|
|fS ||	|
d}|dkrt
|tsJ || |S t
|tsJ ||d< |S )Nr   r   r   r#   Tr$   r3   r%   c                   (   g | ]}|d    |d   fqS r   r   r.   .0kvr   r   r   
<listcomp>X      ( z2get_sample_with_past_kv_inputs.<locals>.<listcomp>r&   past_key_values)r(   r)   r*   r+   r,   r   get_past_kv_inputsr-   r.   flatten_past_kv_inputs
isinstancelistdictupdate)r   r   r   r1   r2   r   r   r3   r'   r   r   past_kvr/   r   r;   r   get_sample_with_past_kv_inputsC   s.   
 
rF   max_seq_lenuse_buffer_sharec                   s:  t jd| j||ft jd}t j||| t jd}t||dkd}t| ||||
d}|dkr1| n| }|dkr>| n| }|dkrK| n| }|dkrXt	|n fdd|D }|	spt
|tsjJ ||||fS |||d	}|dkrt
|tsJ || |rt|||}|S t
|tsJ ||d
< |S )Nr   r   r#   r$   r4   r%   c                   r5   r6   r7   r8   r;   r   r   r<      r=   z9get_merged_sample_with_past_kv_inputs.<locals>.<listcomp>r&   r>   )r(   r)   r*   r+   r,   r   r?   r-   r.   r@   rA   rB   rC   rD    enable_past_present_share_buffer)r   r   r   r   r1   rG   r2   rH   r   r   r3   r'   r   r   rE   r/   r   r;   r   %get_merged_sample_with_past_kv_inputsy   s2    
rJ   split_kvc                 C  sn  |rt jnt j}| j| j }	|sTt j||| j|dt jt 	|||fdd |t j|| j
|| j|	|t j|| j
|| j|	|t j|t jdd}
|
S t j||| j|t jt j	|||ft jdddd t jt j|t jdd}
t| j
D ])}|
d| dt j|| j||	|d	| dt j|| j||	|i q|rt|
||}
|
S )
Ng     r   )kr#   )x	attn_maskZk_cacheZv_cachepos)rM   rN   rO   Zk__cacheZv_)npfloat16float32hidden_sizenum_attention_headsrandomrandZastypeZtriur,   num_hidden_layersarrayr+   int32rangerD   rI   )r   r   r1   r   rG   r2   rH   rK   Znp_dtype	head_size
ort_inputsir   r   r   get_msft_sample_inputs   sJ   
""$

r_   c                   sZ   | j | t| dr| jn| j| j |rtjntj fddt| j	D }|S )Nhead_dimc              
     s4   g | ]}t j d t j d fqS )r#   )r(   rW   )r9   _r   r\   	num_headsr1   torch_dtyper   r   r<      s    z&get_past_kv_inputs.<locals>.<listcomp>)
num_key_value_headshasattrr`   rT   rU   r(   rR   rS   r[   rX   )r   r   r1   r2   r3   rE   r   rb   r   r?      s   
r?   r>   'list[tuple[torch.Tensor, torch.Tensor]]c                 C  s   i }t | D ]B\}\}}t| tr,|   |d| < |   |d| < q|   |d| d< |   |d| d< q|S )NZpast_key_values_key_cache_Zpast_key_values_value_cache_past_key_values..key.value)	enumeraterA   r   detachcpur-   )r>   rE   r^   Zpast_kZpast_vr   r   r   r@      s   
r@      	pt_inputsrC   c                 C  sl   i }|   D ]%\}}t|tjr|||< q|dkr!|t| q|   ||< q|r4t	|||}|S )Nr>   )
itemsrA   rQ   ZndarrayrD   r@   rl   rm   r-   rI   )ro   rH   r1   rG   r]   rL   vr   r   r   convert_inputs_for_ort   s   
rr   r]   c           
      C  st   |   D ]3\}}d|v sd|v r7|j\}}}}tj||||f|jd}	||	d |d |d |d |f< |	| |< q| S )Ncacher>   r#   )rp   shaperQ   zerosr"   )
r]   r1   rG   rL   rq   r   rc   ra   r\   new_vr   r   r   rI     s    rI   modelr   c                 C  sh   dd |   D }t| }|| }t|r"td|  td|| }t|r2|D ]}||= q,|S )Nc                 S     h | ]}|j qS r   name)r9   Zmodel_inputr   r   r   	<setcomp>%      z$verify_ort_inputs.<locals>.<setcomp>z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.)
get_inputssetkeyslenprint	Exception)rw   r]   model_inputsZuser_inputsZmissing_inputsZunnecessary_inputsZunnecessary_inputr   r   r   verify_ort_inputs#  s   r   	device_idkv_cache_ortvaluesc                 C  s  |   }dd |  D }| D ]H\}}	||vrq|rKd|v s$d|v rK||vr;tj|	||d}
|||
 |
||< q|| |	 ||||  qtj|	||d}
|||
 q|  D ]*}|j}|rd|v smd|v r|	dd	dd}|
|||  q^|j|||d q^||fS )Nc                 S  rx   r   ry   )r9   r^   r   r   r   r{   A  r|   z/add_io_bindings_as_ortvalues.<locals>.<setcomp>rs   r>   )device_typer   outpresent)
io_bindingr}   rp   r   Zortvalue_from_numpyZbind_ortvalue_inputZupdate_inplaceget_outputsrz   replaceZbind_ortvalue_outputbind_output)rw   r]   r   r   rH   r   r   r   rL   rq   Zv_deviceoutputrz   Z
input_namer   r   r   add_io_bindings_as_ortvalues7  s*   
r   r/   outputsc              	   C  s   t | |}d }tjtjtjtjd}|  }| D ](\}}	|j||	j	j
|	j	j
dkr,dn|	j	j|t|	j t|	j|	 d |	j	}q|  D ]6}
|
j}|rZd|v rZ||dd n|| }	|j||j
|j
dkrjdn|j|rqtjntjt|	j|	 d qG|S )N)ztorch.int32ztorch.int64ztorch.float16ztorch.float32rm   r   )rz   r   r   Zelement_typert   Z
buffer_ptrr   r>   )r   rQ   rZ   r+   rR   rS   r   rp   Z
bind_inputr   typeindexreprr"   tuplert   Zdata_ptrr   rz   r   r   )rw   r/   r   r2   rH   r   Zpt_to_npr   rL   rq   r   rz   r   r   r   add_io_bindings_as_tensorsd  sF   
	r   	tokenizerr   requested_lengthprompt	list[str]c              	   C  s  |j |_|j|dd}|rtjntj}	tj|d |tjd}
tj|d |tjd}t|dd}|
j	d }||krS|
d d d |f }
|d d d |f }t|dd}n9||k r|
d d d	f 
d	j}|d d d	f 
d	j}t|| D ]}t||
f}
t||f}qut|dd}|
j	d }||ksJ |d
kr|
 n|
|d
kr| n||d
kr| n|d}|d
krg |d< |
j	\}}| j}| j}t| dr| jn| j| j }t| jD ]D}tj|||r|nd	|||	d}tj|||r|nd	|||	d}|d
kr|d| d| d| d| i q|d ||f qd }|d
krotj||| j||	d}d| i}|sot| jD ].}tj||||||	d}tj||||||	d}|d| d| d| d| i q@||fS )NT)paddingr'   )r   r"   r   Fr$   r   r   r%   r&   r>   r`   rh   ri   rj   logitszpresent.)Z	eos_tokenZ	pad_tokenZbatch_encode_plusr(   rR   rS   Ztensorr+   r   rt   r   Tr[   Zhstack
contiguousZmax_position_embeddingsre   rf   r`   rT   rU   rX   ru   rD   appendr*   )r   r   r   r   r   r2   rH   r   Zencodings_dictrd   r'   r   r   Ztokenized_lengthZinput_ids_first_colZattention_mask_first_colra   r/   r   Zsequence_lengthZmax_sequence_lengthrc   r\   r^   Zpast_keyZ
past_valuer   r   Zpresent_keyZpresent_valuer   r   r   get_initial_inputs_and_outputs  s   







"r   )r   r	   r
   r   )r   F)r   r   r   r   r   r   r   r   r   r   r   r   )Fr   Fr   )r   r   r   r   r   r   r1   r   r2   r   r   r   r   r   r3   r   )FFr   Fr   )r   r   r   r   r   r   r   r   r1   r   rG   r   r2   r   rH   r   r   r   r   r   r3   r   )r   r   r   r   r1   r   r   r   rG   r   r2   r   rH   r   rK   r   )r   )
r   r   r   r   r1   r   r2   r   r3   r   )r>   rg   )Fr   rn   )ro   rC   rH   r   r1   r   rG   r   )r]   rC   r1   r   rG   r   )rw   r   r]   rC   )rw   r   r]   rC   r   r   r   r   rH   r   r   rC   )
rw   r   r/   rC   r   rC   r2   r   rH   r   )r   r   r   r   r   r   r   r   r   r   r2   r   rH   r   r   r   )
__future__r   r-   rQ   r(   Ztransformersr   r   Ztransformers.cache_utilsr   Zonnxruntimer   r   r   r0   rF   rJ   r_   r?   r@   rr   rI   r   r   r   r   r   r   r   r   <module>   sB   
(=
45




-0