o
    iE*                  	   @   s   d dl Z d dlZd dlZd dlm  mZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ e eZG dd	 d	ejZ	
	
ddedededefddZ	
ddedefddZdS )    N)SAM2ImageEncoderrandom_sam2_input_image)SAM2MaskDecoder)SAM2PromptEncoder)SAM2Base)compare_tensors_with_tolerance)nnc                       s   e Zd Z			ddededededed	d
f fddZe 	ddej	dej	dej	dej	dej	dej	dej	dej	defddZ
  ZS )SAM2ImageDecoderTF        	sam_modelmultimask_outputdynamic_multimask_via_stabilityreturn_logitsmask_thresholdreturnNc                    s2   t    t|| _t|||| _|| _|| _d S )N)super__init__r   prompt_encoderr   mask_decoderr   r   )selfr   r   r   r   r   	__class__ l/home/kim/smarthome/.venv/lib/python3.10/site-packages/onnxruntime/transformers/models/sam2/image_decoder.pyr      s
   


zSAM2ImageDecoder.__init__image_features_0image_features_1image_embeddingspoint_coordspoint_labelsinput_maskshas_input_masksoriginal_image_sizeenable_nvtx_profilec
                 C   s  d}
|	rddl m} |g d}
|
dur|
jddd | ||||\}}}|
dur6|
d |
jdd	d | ||||||\}}|
durR|
d |
jd
dd tj||d |d fddd}t	|dd}| j
sp|| jk}|
dur}|
d
 |
  |||fS )a  
        Decode masks from image features and prompts. Batched images are not supported. H=W=1024.

        Args:
            image_features_0 (torch.Tensor): [1, 32, H/4, W/4]. high resolution features of level 0 from image encoder.
            image_features_1 (torch.Tensor): [1, 64, H/8, W/8]. high resolution features of level 1 from image encoder.
            image_embeddings (torch.Tensor): [1, 256, H/16, W/16]. image embedding from image encoder.
            point_coords (torch.Tensor): [L, P, 2] shape and float32 dtype and contains the absolute pixel
                                         coordinate in (x, y) format of the P input points in image of size 1024x1024.
            point_labels (torch.Tensor): shape [L, P] and int32 dtype, where 1 means
                                         positive (foreground), 0 means negative (background), -1 means padding,
                                         2 (box left upper corner), 3 (box right bottom corner).
            input_masks (torch.Tensor): [L, 1, H/4, W/4]. Low resolution mask input to the model.
                                        Typically coming from a previous iteration.
            has_input_masks (torch.Tensor): [L]. 1.0 if input_masks is used, 0.0 otherwise.
            original_image_size(torch.Tensor): [2]. original image size H_o, W_o.
            enable_nvtx_profile (bool): enable NVTX profiling.

        Returns:
            masks (torch.Tensor): [1, M, H_o, W_o] where M=3 or 1. Masks of original image size.
            iou_predictions (torch.Tensor): [1, M]. scores for M masks.
            low_res_masks (torch.Tensor, optional): [1, M, H/4, W/4]. low resolution masks.
        Nr   )
NvtxHelper)r   r   post_processr   blue)colorr   redr$   green   ZbilinearF)modeZalign_cornersg      @g      @@)nvtx_helperr#   Zstart_profiler   Zstop_profiler   FZinterpolatetorchclampr   r   Zprint_latency)r   r   r   r   r   r   r   r    r!   r"   r+   r#   Zsparse_embeddingsZdense_embeddingsZimage_pelow_res_masksiou_predictionsmasksr   r   r   forward#   s>   $





zSAM2ImageDecoder.forward)TFr
   F)__name__
__module____qualname__r   boolfloatr   r-   Zno_gradZTensorr2   __classcell__r   r   r   r   r	      sL    	
r	   F
sam2_modelonnx_model_pathr   verbosec                 C   s0  d}t |}t|  }||\}}}	td|j td|j td|	j t| |dd }
d}d}tjd	d
||dftj	d}tjd	d||ftj
d}tj|dddtj	d}tjdtj	d}tjddgtj
d}|||	|||||f}td|j td|j td|j td|j td|j |r|
| \}}}td|j td|j td|j g d}g d}ddddddd	did	diddddd	did	did }t ) |stjd!tjjd" tjd!td" tjj|
||dd#d|||d$	 W d    n	1 sw   Y  td%| d S )&Nr)   zimage_features_0.shape: %szimage_features_1.shape: %szimage_embeddings.shape: %sTr   r         r      lowhighsizedtype   rE   i  i  zpoint_coords.shape: %szpoint_labels.shape: %szinput_masks.shape: %szhas_input_masks.shape: %szoriginal_image_size.shape: %szmasks.shape: %sziou_predictions.shape: %szlow_res_masks.shape: %s)r   r   r   r   r   r   r    r!   )r1   r0   r/   
num_labels
num_points)r   r)   Zoriginal_image_heightZoriginal_image_width)r   r>   r?   )r   r   r   r    r1   r/   r0   ignore)category   )Zexport_paramsZopset_versionZdo_constant_foldinginput_namesoutput_namesdynamic_axeszdecoder onnx model saved to %s)r   r   cpuloggerinfoshaper	   r-   randintr8   int32zerosZonestensorwarningscatch_warningsfilterwarningsZjitZTracerWarningUserWarningZonnxZexport)r:   r;   r   r<   
batch_sizeimagesam2_encoderr   r   r   Zsam2_decoderrH   rI   r   r   r   r    r!   example_inputsr1   r0   r/   rM   rN   rO   r   r   r   export_decoder_onnxs   s   


r`   c                    s  d}t |}t|  }||\}}}t| |dd }	d}
d}tjdd|
|dftjd}tjdd|
|ftjd}tj|
dd	d	tjd
}tjdtjd
}tj	ddgtjd
}||||||||f |	  \}}}dd l
}|j|dgd}| fddttD }td| | fddttD }td|  fddttD }|||}t|D ]\}}t| d|| j q|\}}}td| t	| rtd|t	|rtd|t	|rtd| d S td| d S )Nr)   Tr=      r   r@   r>   rA   rF   rG   i  ZCPUExecutionProvider)	providersc                       g | ]} | j qS r   name.0i)model_inputsr   r   
<listcomp>       z%test_decoder_onnx.<locals>.<listcomp>zinput_names: %sc                    rc   r   rd   rf   )model_outputsr   r   rj      rk   zoutput_names: %sc                    s    i | ]}| j  |  qS r   )re   numpyrf   )r_   ri   r   r   
<dictcomp>  s     z%test_decoder_onnx.<locals>.<dictcomp>z
.shape: %sr1   r0   r/   zonnx model has been verified:zonnx model verification failed:)r   r   rP   r	   r-   rT   r8   rU   rV   rW   onnxruntimeZInferenceSessionZ
get_inputsrangelenrQ   rR   Zget_outputsrun	enumeraterS   r   print)r:   r;   r   r\   r]   r^   r   r   r   Zsam2_image_decoderrH   rI   r   r   r   r    r!   r1   r0   r/   ro   Zort_sessionrM   rN   ZinputsZoutputsrh   Zoutput_nameZ	ort_masksZort_iou_predictionsZort_low_res_masksr   )r_   ri   rl   r   test_decoder_onnx   sb   
ru   )FFr3   )loggingrX   r-   Ztorch.nn.functionalr   Z
functionalr,   Zimage_encoderr   r   r   r   r   r   Zsam2.modeling.sam2_baser   Z
sam2_utilsr   	getLoggerr4   rQ   Moduler	   strr7   r`   ru   r   r   r   r   <module>   s:   
b
b