o
    i                      @   sx   d dl Z d dlZd dlmZ d dlmZ d dlmZ e eZ	G dd dej
Zdedefd	d
ZdedefddZdS )    N)SAM2Base)compare_tensors_with_tolerance)nnc                	       s   e Zd Zdef fddZe dejdejdejdejfdd	Zdejdejd
ejfddZ	dejdejd
ejfddZ
  ZS )SAM2PromptEncoder	sam_modelc                    s   t    |j| _|| _d S )N)super__init__Zsam_prompt_encoderprompt_encodermodel)selfr   	__class__ m/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/sam2/prompt_encoder.pyr      s   

zSAM2PromptEncoder.__init__point_coordspoint_labelsinput_maskshas_input_masksc                 C   s,   |  ||}| ||}| j }|||fS )aj  Encode prompts.

           Args:
            point_coords (torch.Tensor): [L, P, 2] shape and float32 dtype and contains the absolute pixel
                                         coordinate in (x, y) format of the P input points in image of size 1024x1024.
            point_labels (torch.Tensor): shape [L, P] and int32 dtype, where 1 means
                                         positive (foreground), 0 means negative (background), -1 means padding,
                                         2 (box left upper corner), 3 (box right bottom corner).
            input_masks (torch.Tensor): [L, 1, H/4, W/4]. Low resolution mask input to the model.
                                        Typically coming from a previous iteration.
            has_input_masks (torch.Tensor): [L]. 1.0 if input_masks is used, 0.0 otherwise.
        Returns:
            sparse_embeddings (torch.Tensor): [L, P+1, 256], embedding for points and boxes.
            dense_embeddings (torch.Tensor):  [L, 256, 64, 64]. embedding for input masks.
            image_pe (torch.Tensor, optional): [1, 256, 64, 64]. image positional encoding.
        )_embed_points_embed_masksr	   Zget_dense_pe)r   r   r   r   r   sparse_embeddingsdense_embeddingsimage_per   r   r   forward   s   

zSAM2PromptEncoder.forwardreturnc                 C   s4  |d }t j|jd ddf|jd}t j|jd df|jd }t j||gdd}t j||gdd}|d d d d df | jj |d d d d df< |d d d d df | jj |d d d d df< | jj	
|}|d|}||dk }|| jjj|dk  }t| jjD ]}|| jj| j||k  }q|S )Ng      ?r         )device)dim)torchzerosshaper   onescatr
   Z
image_sizer	   Zpe_layerZ_pe_encodingZ	unsqueezeZ	expand_asZnot_a_point_embedweightrangeZnum_point_embeddingsZpoint_embeddings)r   r   r   Zpadding_pointZpadding_labelZpoint_embeddingir   r   r   r   3   s   00zSAM2PromptEncoder._embed_pointsc                 C   sV   | j |}| j jjdddd}td|j || d| |  }td|j |S )Nr   r   zno_mask_embedding.shape: %sg      ?zmask_embedding.shape: %s)r	   Zmask_downscalingZno_mask_embedr%   Zreshapeloggerinfor"   )r   r   r   Zmask_embeddingZno_mask_embeddingr   r   r   r   J   s   zSAM2PromptEncoder._embed_masks)__name__
__module____qualname__r   r   r    Zno_gradZTensorr   r   r   __classcell__r   r   r   r   r      s    $r   
sam2_modelonnx_model_pathc                 C   sD  t |  }d}d}tjdd||dftjd}tjdd||ftjd}tj|dddtjd}tjdtjd}|||||\}	}
}t	d	|j
 t	d
|j
 t	d|j
 t	d|j
 t	d|	j
 t	d|
j
 t	d|j
 tjj|||||f|dddg dg dddddddddidddddidd	 td| d S )Nr      r      lowhighsizedtyper      r6   zpoint_coords.shape: %szpoint_labels.shape: %szinput_masks.shape: %szhas_input_masks.shape: %szsparse_embeddings.shape: %szdense_embeddings.shape: %szimage_pe.shape: %sT   r   r   r   r   )r   r   r   
num_labels
num_points)r   r   znum_points+1)r   r   r   r   r   )Zexport_paramsZopset_versionZdo_constant_foldinginput_namesoutput_namesZdynamic_axesz#prompt encoder onnx model saved to )r   cpur    randintfloatint32r!   r#   r(   r)   r"   ZonnxZexportprint)r.   r/   sam2_prompt_encoderr;   r<   r   r   r   r   r   r   r   r   r   r   export_prompt_encoder_onnxS   sD   

rE   c                    s  t |  }d}d}tjdd||dftjd}tjdd||ftjd}tj|dddtjd}tjdtjd}|||||\}	}
}dd l}|j	|d	gd
}|
   fddtt D }td| | fddttD }td| ||| | | | d}t|D ]\}}td||| j q|\}}}td|	t|ddrtd|
t|ddrtd|t|ddrtd|  d S td|  d S )Nr      r   r1   r   r2   r7   r8   ZCPUExecutionProvider)	providersc                       g | ]} | j qS r   name.0r'   )model_inputsr   r   
<listcomp>       z,test_prompt_encoder_onnx.<locals>.<listcomp>zinput_names: %sc                    rH   r   rI   rK   )model_outputsr   r   rN      rO   zoutput_names: %sr:   zoutput %s shape: %sr   g?)Zmismatch_percentage_tolerancer   r   zonnx model has been verified: z onnx model verification failed: )r   r?   r    r@   rA   rB   Zrandr#   onnxruntimeZInferenceSessionZ
get_inputsr&   lenr(   r)   Zget_outputsrunnumpy	enumerater"   r   ZtensorrC   )r.   r/   rD   r;   r<   r   r   r   r   r   r   r   rQ   Zort_sessionr=   r>   Zoutputsr'   Zoutput_nameZort_sparse_embeddingsZort_dense_embeddingsZort_image_per   )rM   rP   r   test_prompt_encoder_onnx   s\   



rV   )loggingr    Zsam2.modeling.sam2_baser   Z
sam2_utilsr   r   	getLoggerr*   r(   Moduler   strrE   rV   r   r   r   r   <module>   s"   
D
/