o
    i"                  	   @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dlm
Z
 e eZG dd de
jZ			dd
e	dededefddZd
e	dededefddZdS )    N)SAM2ImageEncoderrandom_sam2_input_image)SAM2PromptEncoder)SAM2Base)nnc                       sh   e Zd Z	ddedededdf fddZe d	ejd
ejdejdejdejdejfddZ	  Z
S )SAM2MaskDecoderT	sam_modelmultimask_outputdynamic_multimask_via_stabilityreturnNc                    s0   t    |j| _|j| _|| _|| _|| _d S )N)	super__init__Zsam_mask_decodermask_decoderZsam_prompt_encoderprompt_encodermodelr	   r
   )selfr   r	   r
   	__class__ k/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/sam2/mask_decoder.pyr      s   

zSAM2MaskDecoder.__init__image_features_0image_features_1image_embeddingsimage_pesparse_embeddingsdense_embeddingsc           
      C   s   | j j|||||jd dk||gd\}}}	}	| jr8|ddddddddf }|ddddf }||fS | jrH| j ||\}}||fS |ddddddddf }|ddddf }||fS )a  
        Decode masks from image and prompt embeddings. Only support H=W=1024.

        Args:
            image_features_0 (torch.Tensor): [1, 32, H/4, W/4]. high resolution features of level 0 from image encoder.
            image_features_1 (torch.Tensor): [1, 64, H/8, W/8]. high resolution features of level 1 from image encoder.
            image_embeddings (torch.Tensor): [1, 256, H/16, W/16]. image embedding from image encoder.
            image_pe (torch.Tensor): [1, 256, H/16, W/16]. image positional encoding.
            sparse_embeddings (torch.Tensor): [L, P+1, 256], embedding for points and boxes.
            dense_embeddings (torch.Tensor):  [L, 256, H/16, W/16]. embedding for input masks.

        Returns:
            low_res_masks (torch.Tensor, optional): [1, M, H/4, W/4]. low resolution masks.
            iou_predictions (torch.Tensor): [1, M]. scores for M masks.
        r      )r   r   Zsparse_prompt_embeddingsZdense_prompt_embeddingsZrepeat_imageZhigh_res_featuresN)r   Zpredict_masksshaper	   r
   Z _dynamic_multimask_via_stability)
r   r   r   r   r   r   r   low_res_masksiou_predictions_r   r   r   forward   s(   	  zSAM2MaskDecoder.forward)T)__name__
__module____qualname__r   boolr   torchZno_gradZTensorr!   __classcell__r   r   r   r   r      s2    r   TF
sam2_modelonnx_model_pathr	   r
   c                 C   s  t |  }t }t|  }||\}}	}
td|j td|	j td|
j d}d}tjdd||dftj	d}tjdd	||ftj	d}tj
|d	d
d
tj	d}tjd	tj	d}|||||\}}}td|j td|j td|j t| ||}||	|
|||f}|| \}}td|j td|j t ; |stjdtjjd tjdtd tjj|||dddg dddgdddddiddiddidd	 W d    n1 sw   Y  td| d S )Nzimage_features_0.shape: %szimage_features_1.shape: %szimage_embeddings.shape: %s      r      lowhighsizedtyper      r1   zsparse_embeddings.shape: %szdense_embeddings.shape: %szimage_pe.shape: %szlow_res_masks.shape: %sziou_predictions.shape: %signore)categoryT   r   r   r   r   r   r   r   r   
num_labelsznum_points+1)r   r   )r   r   r   r   )Zexport_paramsZopset_versionZdo_constant_foldinginput_namesoutput_namesZdynamic_axesz mask decoder onnx model saved to)r   cpur   r   loggerinfor   r&   randintfloatZzerosonesr   warningscatch_warningsfilterwarningsZjitZTracerWarningUserWarningZonnxZexportprint)r(   r)   r	   r
   verbosesam2_prompt_encoderimagesam2_encoderr   r   r   r8   
num_pointspoint_coordspoint_labelsinput_maskshas_input_masksr   r   r   sam2_mask_decoderinputsr   r   r   r   r   export_mask_decoder_onnxR   sZ   

rQ   c               
      s  t |  }t }t|  }||\}}}	d}
d}tjdd|
|dftjd}tjdd|
|ftjd}tj|
dddtjd}tjdtjd}|||||\}}}t	| ||}|||	|||f}|| \}}dd l
}|j|d	gd
}|   fddtt D }td| | fddttD }td| ||| | |	 | | | d}t|D ]\}}td||| j q|\}}tjj|t|ddd tjj|t|ddd td|  d S )Nr      r   r,   r*   r-   r2   r3   ZCPUExecutionProvider)	providersc                       g | ]} | j qS r   name.0i)model_inputsr   r   
<listcomp>       z*test_mask_decoder_onnx.<locals>.<listcomp>zinput_names: %sc                    rT   r   rU   rW   )model_outputsr   r   r[      r\   zoutput_names: %sr7   zoutput %s shape: %sg{Gzt?g-C6?)ZatolZrtolzonnx model has been verified: )r   r;   r   r   r&   r>   r?   Zrandr@   r   onnxruntimeZInferenceSessionZ
get_inputsrangelenr<   r=   Zget_outputsrunnumpy	enumerater   ZtestingZassert_closeZtensorrE   ) r(   r)   r	   r
   rG   rH   rI   r   r   r   r8   rJ   rK   rL   rM   rN   r   r   r   rO   rP   r   r   r^   Zort_sessionr9   r:   ZoutputsrY   Zoutput_nameZort_low_res_masksZort_iou_predictionsr   )rZ   r]   r   test_mask_decoder_onnx   sP   
rd   )TF)loggingrA   r&   Zimage_encoderr   r   r   r   Zsam2.modeling.sam2_baser   r   	getLoggerr"   r<   Moduler   strr%   rQ   rd   r   r   r   r   <module>   s:   
E
E