o
    i-=                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZ e e	Z
		d<dedejded	ed
ef
ddZ	d=dedejded	edef
ddZ	d>dedejded
efddZ	d>dedejdeded
ef
ddZdeeejejf  deeejejf  fddZdeeejejejejf  fddZ		d?dedejdedefdd Z	d>dejd!edefd"d#Z	d>dejd$edefd%d&Z	d>dedejded	ed
ef
d'd(Z		d<dedejded	ed
ef
d)d*Z			d@dedejded+ed,ed
edefd-d.Z		dAdedejded/ed	ed
edefd0d1Z		dAdedejded	eded!ed$ed
edefd2d3Zd4ed5efd6d7Z ded8ee! d9ee! fd:d;Z"dS )B    N)WhisperConfig)InferenceSession  Fconfigdevice
batch_sizesequence_lengthuse_fp16c                 C   s*   |rt jnt j}t j|| j|||d}|S Nr   dtype)torchfloat16float32randnZnum_mel_bins)r   r   r   r   r	   torch_dtypeaudio_features r   p/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/whisper/whisper_inputs.pyget_sample_audio_features!   s   r   T	use_int32c                 C   s.   |rt jnt j}t jd| j||f||d}|S )Nr   )lowhighsizer   r   )r   int32int64randintZ
vocab_size)r   r   r   r   r   r   decoder_input_idsr   r   r   get_sample_decoder_input_ids/   s
   r   c                 C   s,   |rt jnt j}t j|| j| j||d}|S r
   )r   r   r   r   max_source_positionsd_model)r   r   r   r	   r   encoder_hidden_statesr   r   r    get_sample_encoder_hidden_states?   s
   r"   past_seq_lenc                    st   | j | j | j|rtjntj fddt| jD } fddt| jD }t||S )Nc                    s8   g | ]}t j d t j d fqS r   r   rand.0_)r   r   	head_size	num_headsr#   r   r   r   
<listcomp>\       z.get_sample_past_key_values.<locals>.<listcomp>c                    s8   g | ]}t j d t j d fqS r$   r%   r'   )r   r   r*   r   r+   r   r   r   r,   c   r-   )	decoder_attention_headsr    r   r   r   r   rangedecoder_layersflatten_past_key_values)r   r   r   r#   r	   Zself_attention_kv_cachesZcross_attention_kv_cachesr   )r   r   r*   r   r+   r#   r   r   get_sample_past_key_valuesO   s   

r2   self_attn_kv_cachescross_attn_kv_cachesc                 C   s>   g }t | |ddD ]\\}}\}}||||f}|| q	|S )NF)strict)zipappend)r3   r4   past_key_valuesself_k_cacheself_v_cachecross_k_cachecross_v_cacheZlayer_kv_cachesr   r   r   r1   o   s   r1   	kv_cachesc                 C   sL   g g }}| D ]\}}}}| | | | | | | | q||fS N)r7   )r=   r3   r4   r9   r:   r;   r<   r   r   r   group_past_key_values~   s   



r?      num_alignment_headsc                 C   s(   |rt jnt j}t j|df||d}|S )N   r   )r   r   r   Zones)r   r   rA   r   r   alignment_headsr   r   r   get_sample_alignment_heads   s   rD   sot_sequence_lengthc                 C   &   |rt jnt j}t j|g| |d}|S r
   r   r   r   Ztensor)r   rE   r   r   Z
sot_lengthr   r   r   get_sample_sot_sequence_length      rH   segment_lengthc                 C   rF   r
   rG   )r   rJ   r   r   Zsegment_sizer   r   r   get_sample_segment_length   rI   rK   c                    s<   j |rtjntj fddtjD }|S )Nc              
      s$   g | ]}t j jd qS r$   )r   r&   r   r'   r   r   r   r+   r   r   r   r   r,      s    z"get_sample_QKs.<locals>.<listcomp>)r.   r   r   r   r/   r0   )r   r   r   r   r	   QKsr   rL   r   get_sample_QKs   s   rN   c                 C   s   t | ||||}d|iS )Nr   )r   )r   r   r   r   r	   r   r   r   r   get_sample_encoder_inputs   s   rO   decoder_sequence_lengthencoder_sequence_lengthc           	      C   s*   t | ||||}t| ||||}||dS )N)r   r   )r   r   )	r   r   r   rP   rQ   r	   r   r   r   r   r   r   &get_sample_encoder_decoder_init_inputs   s   	
rR   past_sequence_lengthc           
      C   s:   t | ||||}t| |||}t| ||||}	|||	dS )N)r   r!   r8   )r   r"   r2   )
r   r   r   rS   r   r	   r   r   r!   r8   r   r   r   get_sample_decoder_inputs   s   	rT   c	                 C   s@   t | |||}	t||}t||}t| ||||}
|	|||
dS )N)rC   rE   rJ   rM   )rD   rH   rK   rN   )r   r   r   r   rA   rE   rJ   r	   r   rC   rM   r   r   r   get_sample_jump_times_inputs   s   

rU   inputsmodelc                 C   sf  d\}}d\}}}}d\}}	d| v r#t | d \}}|d j\}}}}i }
ttdd | }d|v }|D ]}|d	v rI| d
    |
|< q6|dkrZ| d    |
|< q6|dv rk| d    |
|< q6d|v ssd|v r|d   }|rt	j
|||	|f|jd}||d |d |d |d |f< ||
|< q6||
|< q6d|v sd|v r|d   }||
|< q6|dkrt	j|gt	jd|
|< q6|dkrt	j
|||	ft	jd|
|< q6|dkr| d    |
|< q6|dkr| d    |
|< q6|dkr| d    |
|< q6d|v r*| d d   |
|< q6td| |
S )N)NN)r   r   r   r   )   i  r8   r   c                 S   s   | j S r>   )name)ir   r   r   <lambda>  s    z(convert_inputs_for_ort.<locals>.<lambda>Zcache_indirection>   encoder_input_idsr   r   r!   >   	input_idsr   r   past_key_selfpast_value_self)r   past_key_crosspast_value_crossrS   rC   rE   rJ   cross_qkrM   zUnknown name not recognized: )r?   shapelistmapZ
get_inputsdetachcpunumpypopnpZzerosr   arrayr   
ValueError)rV   rW   r3   r4   r   r+   r#   r*   Z	num_beamsZmax_seq_lenZ
ort_inputsZmodel_inputsZuse_buffer_sharingrY   Zorig_kv_cacheZnew_kv_cacher   r   r   convert_inputs_for_ort  sP    





 rm   input_namesoutput_namesc                 C   s<  i }|| D ]}|dv rddi||< q|dv rddd||< q|dkr*ddi||< q|d	v r/q|d
kr;ddd||< q|dkrFddi||< qd|v sNd|v rVddd||< qd|v s^d|v rfddd||< qd|v svd|v svd|v svd|v r}ddi||< qd|v rddd||< qd|v rddd||< qt d| |S )N>   r\   r   r   r   >   r]   r   r   )r   rX   rC   rA   >   rE   rJ   Zlogitsr!   r^   r_   rS   )r   rB   Zpresent_key_selfZpresent_value_selfZtotal_sequence_lengthr`   ra   Zpresent_key_crossZpresent_value_crossrb   Z
jump_times
max_lengthz$Unknown input or output name found: )	Exception)r   rn   ro   Zdynamic_axesrY   r   r   r   get_model_dynamic_axesL  s:   rr   )r   F)T)F)r@   T)r   FT)FT)#loggingrh   rj   r   Ztransformersr   Zonnxruntimer   	getLogger__name__loggerr   intboolr   r   r"   r2   rd   tupleZTensorr1   r?   rD   rH   rK   rN   rO   rR   rT   rU   dictrm   strrr   r   r   r   r   <module>   sj  




 








	

>