o
    i4                     @   s$  d Z ddlZddlmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ ddlmZm Z  e Z!e Z"dd Z#ej$%dee  dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+ej$%dddgdd  Z,d!d" Z-d#d$ Z.d%d& Z/d'd( Z0d)d* Z1ed+edEi d,d-id.ej$%d/d0d1gd2d3 Z2ed+edEi d,d4id.ej$%d/d0d1gd5d6 Z3d7d8 Z4ej$%d9ed:d; Z5d<d= Z6ej$%dee  d>d? Z7ej$%d@dAdBgej$%dddgdCdD Z8dS )FzD
Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
    N)Mockpatch)parallel_backend)load_diabetes	load_irismake_classification)IsolationForest)_average_path_length)roc_auc_score)ParameterGridtrain_test_split)check_random_state)assert_allcloseassert_array_almost_equalassert_array_equalignore_warnings)CSC_CONTAINERSCSR_CONTAINERSc                 C   s   t ddgddgg}t ddgddgg}tdgg dddgd}t  |D ]}tdd	| i||| q)W d
   d
S 1 sFw   Y  d
S )z6Check Isolation Forest for various parameter settings.r            )      ?      ?r   TF)n_estimatorsmax_samples	bootstraprandom_stateN )nparrayr   r   r   fitpredict)global_random_seedX_trainX_testgridparamsr   r   ]/home/kim/smarthome/.venv/lib/python3.10/site-packages/sklearn/ensemble/tests/test_iforest.pytest_iforest"   s   
"r(   sparse_containerc                 C   s   t | }ttjdd |d\}}tddgddgd}||}||}|D ]+}tdd	| d
||}	|	|}
tdd	| d
||}||}t|
| q&dS )z=Check IForest for various parameter settings on sparse input.N2   r   r   r   TF)r   r   
   )r   r   r   )	r   r   diabetesdatar   r   r    r!   r   )r"   r)   rngr#   r$   r%   ZX_train_sparseZX_test_sparser&   Zsparse_classifierZsparse_resultsZdense_classifierZdense_resultsr   r   r'   test_iforest_sparse2   s0   

r0   c                  C   s(  t j} d}tjt|d tdd|  W d   n1 sw   Y  t  t	dt tdd|  W d   n1 sAw   Y  t  t	dt tt
dd|  W d   n1 sfw   Y  tt t | | ddd	df  W d   dS 1 sw   Y  dS )
z7Test that it gives proper exception on deficient input.3max_samples will be set to n_samples for estimationmatch  r   Nerrorautor   r   )irisr.   pytestwarnsUserWarningr   r    warningscatch_warningssimplefilterr   Zint64Zraises
ValueErrorr!   )Xwarn_msgr   r   r'   test_iforest_errorL   s    

$"rB   c               	   C   sF   t j} t | }|jD ]}|jttt	| j
d ks J qdS )zDCheck max_depth recalculation when max_samples is reset to n_samplesr   N)r8   r.   r   r    estimators_	max_depthintr   ceillog2shape)r@   clfZestr   r   r'   test_recalculate_max_depthb   s
   
&rJ   c                  C   s   t j} t | }|j| jd ksJ tdd}d}tjt|d ||  W d    n1 s1w   Y  |j| jd ks@J tdd| }|jd| jd  ksTJ d S )Nr   i  r5   r1   r2   g?)	r8   r.   r   r    max_samples_rH   r9   r:   r;   )r@   rI   rA   r   r   r'   test_max_samples_attributej   s   
rL   c                 C   s   t | }ttj|d\}}td| d|}|jdd ||}|jdd ||}t|| td| d|}||}t|| dS )zCheck parallel regression.r+   r   )n_jobsr   r   rM   r   N)	r   r   r-   r.   r   r    
set_paramsr!   r   )r"   r/   r#   r$   Zensembley1y2Zy3r   r   r'    test_iforest_parallel_regressiony   s   



rR   c           	      C   s   t | }d|dd }|t|d |d f}|dd }|jdddd	}t|dd |f}td
gd dgd  }td|d|}|	| }t
||dksXJ dS )z#Test Isolation Forest performs wellg333333?iX  r   Nr4   r   )   r   )lowhighsizer   rT   d   )r   r   g\(\?)r   randnZpermutationr   Zvstackuniformr   r   r    decision_functionr
   )	r"   r/   r@   r#   Z
X_outliersr$   y_testrI   Zy_predr   r   r'   test_iforest_performance   s   r]   contamination      ?r7   c              	   C   s   ddgddgddgddgddgddgddgddgg}t || d	}|| || }||}t|dd  t|d d ksDJ t|d
dg ddg   d S )NrS   r   r         	   r   r^      )r   r    r[   r!   r   minmaxr   )r^   r"   r@   rI   decision_funcpredr   r   r'   test_iforest_works   s   4

(rk   c                  C   s&   t j} t | }|j|jksJ d S N)r8   r.   r   r    rK   Z_max_samples)r@   rI   r   r   r'   test_max_samples_consistency   s   rm   c                  C   sV   t d} ttjd d tjd d | d\}}}}tdd}||| || d S )Nr   r*   r+   g?)Zmax_features)r   r   r-   r.   targetr   r    r!   )r/   r#   r$   Zy_trainr\   rI   r   r   r'    test_iforest_subsampled_features   s   
ro   c                  C   s   dt dt j  d } dt dt j  d }ttdgdg ttdgdg ttd	gd
g ttdg| g ttdg|g ttt g ddd
| |g tt d}t|t | d S )N       @g      @g?g     0@g}?r   g        r   r   r        )r   r   rq   rr   )	r   logZeuler_gammar   r	   r   Zaranger   sort)Z
result_oneZ
result_twoZavg_path_lengthr   r   r'    test_iforest_average_path_length   s   
ru   c                  C   s   ddgddgddgg} t dd| }t  | }t|ddgg|ddgg|j  t|ddgg|ddgg|j  t|ddgg|ddgg d S )Nr   r   皙?)r^   rp   )r   r    r   Zscore_samplesr[   Zoffset_)r#   Zclf1Zclf2r   r   r'   test_score_samples   s   rw   c                  C   sv   t d} | dd}tdd| dd}|| |jd }|jdd || t|jdks0J |jd |u s9J dS )	z/Test iterative addition of iTrees to an iForestr      r   r,   T)r   r   r   Z
warm_start)r   N)r   rY   r   r    rC   rO   len)r/   r@   rI   Ztree_1r   r   r'   test_iforest_warm_start   s   


rz   z*sklearn.ensemble._iforest.get_chunk_n_rowsreturn_valuer   )Zside_effectzcontamination, n_predict_calls)r_   r   )r7   r   c                 C      t || | j|ksJ d S rl   rk   Z
call_countZmocked_get_chunkr^   Zn_predict_callsr"   r   r   r'   test_iforest_chunks_works1     
r   r,   c                 C   r|   rl   r}   r~   r   r   r'   test_iforest_chunks_works2  r   r   c                  C   st  t d} t }||  t jd}t|| dksJ t||dddks-J t|| d dks:J t|| d dksGJ t 	|dddd} t }||  t|| dkseJ t||dddkstJ t|t ddksJ |dd} t }||  t|| dksJ t||dddksJ t|t ddksJ dS )z=Test whether iforest predicts inliers when using uniform data)rX   r,   r   r   rX   r,   N)
r   Zonesr   r    randomRandomStateallr!   rY   repeat)r@   iforestr/   r   r   r'   test_iforest_with_uniform_data  s(   



 r   csc_containerc                 C   s2   t dddd\}}| |}tdddd| d	S )
zdCheck that Isolation Forest does not segfault with n_jobs=2

    Non-regression test for #23252
    iL rX   r   Z	n_samplesZ
n_featuresr   r,      r   )r   r   rM   N)r   r   r    )r   r@   _r   r   r'   *test_iforest_with_n_jobs_does_not_segfault=  s   r   c                  C   s|   t d} tjd}| j|ddgd}tddd}t	  t
dt || W d	   d	S 1 s7w   Y  d	S )
zCheck that feature names are preserved when contamination is not "auto".

    Feature names are required for consistency checks during scoring.

    Non-regression test for Issue #25844
    Zpandasr   rb   a)r.   columnsg?re   r6   N)r9   Zimportorskipr   r   r   Z	DataFramerY   r   r<   r=   r>   r;   r    )pdr/   r@   modelr   r   r'   #test_iforest_preserve_feature_namesH  s   

"r   c                 C   sl   t dddd\}}| |}|  d}td|dd|}||}|dk  |jd  t|ks4J dS )	zCheck that `IsolationForest` accepts sparse matrix input and float value for
    contamination.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/27626
    r*   rb   r   r   rv   rq   )r   r^   r   N)	r   Zsort_indicesr   r    r[   sumrH   r9   Zapprox)r)   r@   r   r^   r   Z
X_decisionr   r   r'   -test_iforest_sparse_input_float_contaminationZ  s   
(r   rM   r   r   c           	   	   C   s   ddgddgddgddgddgddgddgddgg}t | |d	d
}|| || }||}t|dd	 t|d	d ksEJ t|ddg ddg   t | |dd
}|| td|d ||}W d	   n1 stw   Y  t|| d	S )z5Check that `IsolationForest.predict` is parallelized.r`   rS   r   r   ra   rb   rc   rd   N)r   r^   rM   rf   	threadingrN   )	r   r    r[   r!   r   rg   rh   r   r   )	r"   r^   rM   r@   rI   ri   rj   Zclf_parallelZpred_paralellr   r   r'   test_iforest_predict_paralleln  s"   4

(
r   r   )9__doc__r<   Zunittest.mockr   r   numpyr   r9   Zjoblibr   Zsklearn.datasetsr   r   r   Zsklearn.ensembler   Zsklearn.ensemble._iforestr	   Zsklearn.metricsr
   Zsklearn.model_selectionr   r   Zsklearn.utilsr   Zsklearn.utils._testingr   r   r   r   Zsklearn.utils.fixesr   r   r8   r-   r(   markZparametrizer0   rB   rJ   rL   rR   r]   rk   rm   ro   ru   rw   rz   r   r   r   r   r   r   r   r   r   r   r'   <module>   sh    

"


