o
    iM                     @   sV  d dl Zd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( ej)*dddgej)*dddgej)*dddgdd Z+ej)*dddgej)*dddgdd Z,ej)*dddgej)*dddgd>ddZ-dd  Z.d!d" Z/d#d$ Z0ej)*dd%dgd&d' Z1ej)*dg d(ej)*dddgd)d* Z2ej)*d+d,d-gd.d/ Z3d0d1 Z4d2d3 Z5ej)*d4d5d6gd5d6gfd5d6ged5ed6d7fd5d6gd8d9 fgd:d; Z6d<d= Z7dS )?    N)parallel_backend)assert_allclose)ColumnTransformer)load_diabetes	load_irismake_classificationmake_regression)DummyClassifier)RandomForestClassifierRandomForestRegressor)SimpleImputer)permutation_importance)LinearRegressionLogisticRegression)
get_scorermean_squared_errorr2_score)train_test_split)make_pipeline)KBinsDiscretizerOneHotEncoderStandardScalerscale)_convert_containern_jobs      max_samples      ?      ?sample_weightonesc              
   C   s   t jd}d}tdd\}}||jd|jd d dd	}t ||g}|d
kr/t |n|}t	ddd}	|	
|| t|	|||||| |d}
|
jj|jd	 |fksVJ t |
jd |
jd d ksgJ d S )N*      T)Z
return_X_yMbP?r   r   sizer   r!   
   n_estimatorsrandom_state)r    	n_repeatsr+   r   r   )nprandomRandomStater   normalshapereshapehstackZ	ones_liker   fitr   importancesallimportances_mean)r   r   r    rngr,   Xyy_with_little_noiseweightsclfresult r?   n/home/kim/smarthome/.venv/lib/python3.10/site-packages/sklearn/inspection/tests/test_permutation_importance.py9test_permutation_importance_correlated_feature_regression   s(    &rA   c              	   C   s   t d}tjd}d}t }|j|j}}||jd|j	d d 
dd}|j||jd	}||d
< tddd}	|	|| t|	||||| |d}
|
jj	|j	d |fksWJ t|
jd |
jd d kshJ d S )Npandasr"   r#   r$   r   r%   r'   r   )columnsZcorrelated_featurer(   r)   r,   r+   r   r   )pytestimportorskipr-   r.   r/   r   datatargetr0   r1   r2   	DataFrameZfeature_namesr
   r4   r   r5   r6   r7   )r   r   pdr8   r,   Zdatasetr9   r:   r;   r=   r>   r?   r?   r@   @test_permutation_importance_correlated_feature_regression_pandasC   s*   
 
&rK   r"   c              	      s  t j|}d}d}d}d}d}|| }	t |}
|j|
|d t  fdd|
d | D }|t j}||k s<J t j||	||gdd}|j
||	fksRJ t| d	|d
\}}}}td|d}||| |j}|d | }||d  }| | k sJ t|||||| |d}|jj
|j
d |fksJ |jd | }|j|d  }tt |dksJ | dk sJ | dksJ d S )Nr#     r   r   )r&   c                    s   g | ]
} |k d dqS )r'   r   )r2   ).0cr:   r?   r@   
<listcomp>~   s    zEtest_robustness_to_high_cardinality_noisy_feature.<locals>.<listcomp>)Zaxisr   )Z	test_sizer+   r)   rD   gHz>g?g333333?)r-   r.   r/   arangechoicer3   astypeZfloat32ZconcatenateZrandnr1   r   r
   r4   Zfeature_importances_maxminr   r5   r7   abs)r   r   seedr8   r,   	n_samplesZ	n_classesZn_informative_featuresZn_noise_features
n_featuresclassesr9   ZX_trainZX_testZy_trainZy_testr=   Ztree_importancesZinformative_tree_importancesZnoisy_tree_importancesrZinformative_importancesZnoisy_importancesr?   rO   r@   1test_robustness_to_high_cardinality_noisy_featurek   sL   
 
r\   c                  C   s  t jd} d}t dddt jgg dgj}t g d}tt tdd	}|	|| t
||||| d
}|jj|jd |fksCJ t |jd |jd d ksTJ t jd} t
||||| d
}|jj|jd |fkspJ t |j|jrzJ t |jd |jd d ksJ d S )Nr"      r          @      @)r   r   r   r   r   r   r   r   lbfgsZsolverr,   r+   r   r'   r   )r-   r.   r/   arraynanTr   r   r   r4   r   r5   r1   r6   r7   Zallclose)r8   r,   r9   r:   r=   r>   Zresult2r?   r?   r@   'test_permutation_importance_mixed_types   s   "&rg   c            	      C   s   t d} tjd}d}| dddtjgg dd}tg d	}tt	 t
 }td
|dgfdt dgfg}t|tdd}||| t|||||d}|jj|jd |fks]J t|jd |jd d ksnJ d S )NrB   r"   r#   r   r^   r_   )abrh   ri   )col1col2r`   numrj   catrk   ra   rb   rc   r   r'   )rE   rF   r-   r.   r/   rI   re   rd   r   r   r   r   r   r   r4   r   r5   r1   r6   r7   )	rJ   r8   r,   r9   r:   Znum_preprocess
preprocessr=   r>   r?   r?   r@   .test_permutation_importance_mixed_types_pandas   s   
&ro   c                  C   sf   t dddd\} }t| } t|}t | |}d|jd  }t|| |ddd}t||jd	d
d d S )N  r(   r   rX   rY   r+   r   2   neg_mean_squared_error)r,   scoringg?gư>)ZrtolZatol)r   r   r   r4   Zcoef_r   r   r7   )r9   r:   lrexpected_importancesresultsr?   r?   r@   .test_permutation_importance_linear_regresssion   s   


rx   rp   c           	   	   C   s   t dddd\}}t ||}t|||ddd| d}|d  }|d  }|| d	ks/J t|||ddd
d}t|d |d  td t|||ddd
d}W d    n1 s[w   Y  t|d |d  d S )Nrp   r(   r   rq   r#   r   rD   r5   333333?r   )r,   r+   r   	threading)r   r   r4   r   rU   rT   r   r   )	r   r9   r:   ru   Zimportance_sequentialimp_minimp_maxZimportance_processesZimportance_threadingr?   r?   r@   ;test_permutation_importance_equivalence_sequential_parallel  s,   
r}   )Nr   r   c              	   C   s6  t d}tdddd\}}||}tdddd	}||d
d}t||g}|j	j
dks2J || }t|j}|||< || j	|j	ksLJ tt|t|_tdddd}	|	|| d}
t|	|||
d| |d}|d  }|d  }|| dksJ t|	|||
d| |d}t|d |d  d S )NrB   d   r#   r   rq      ZordinalZaveraged_inverted_cdf)Zn_binsencodeZquantile_methodr'   r   f)r*   	max_depthr+   rD   r5   ry   )rE   rF   r   rI   r   Zfit_transformr2   r-   r3   ZdtypekindZCategoricalZravellenrC   rQ   rS   strindexr   r4   r   rU   rT   r   )r   r   rJ   r9   r:   ZX_dfZbinnerZ
cat_columnZnew_col_idxrfr,   Zimportance_arrayr{   r|   Zimportance_dataframer?   r?   r@   7test_permutation_importance_equivalence_array_dataframe-  sV   


	r   
input_typerd   Z	dataframec           	      C   s~   t dd}}t||dd\}}|jdksJ t|| }tdd||}d}t||||d	d
}t||f}t	||j
 d S )Ng     j@r]   r   rq   g    .AZprior)Zstrategyr#   r   )r,   r   )intr   nbytesr   r	   r4   r   r-   zerosr   r5   )	r   rX   rY   r9   r:   r=   r,   r[   rv   r?   r?   r@   /test_permutation_importance_large_memmaped_datas  s   

r   c               	   C   s  t jd} d}d}|d }| dd||f}t |}d|d |df  |d |df  |d |< ||d df d||d df   ||d < tdd}||| t|||dd	d
d}|jd |jd  }|t	
ddksqJ t |}	t|||dd	d
|	d}|jd |jd  }
|
t	
|dksJ t t d|t d|g}	||||	 t|||dd	d
|	d}|jd |jd  }|| t	
ddksJ d S )Nr   rL   r   g        r$   r   F)Zfit_interceptZneg_mean_absolute_error   r+   rt   r,   g{Gz?r+   rt   r,   r    g    _Br   )r-   r.   r/   r0   r   r   r4   r   r7   rE   Zapproxr!   r3   repeat)r8   rX   rY   Zn_half_samplesxr:   ru   piZx1_x2_imp_ratio_w_nonewZx1_x2_imp_ratio_w_onesZx1_x2_imp_ratio_wr?   r?   r@   )test_permutation_importance_sample_weight  sP   
,,

		r   c               
   C   s   dd } t ddgddgg}t ddg}t ddg}t }||| zt|||d| dd W n ty@   td Y nw tt t|||d| d|d	 W d    d S 1 s]w   Y  d S )
Nc                 S   s   dS )Nr   r?   Z	estimatorr9   r:   r?   r?   r@   	my_scorer  s   zJtest_permutation_importance_no_weights_scoring_function.<locals>.my_scorerr   r   r   r]   r   zpermutation_test raised an error when using a scorer function that does not accept sample_weight even though sample_weight was Noner   )	r-   rd   r   r4   r   	TypeErrorrE   Zfailraises)r   r   r:   r   ru   r?   r?   r@   7test_permutation_importance_no_weights_scoring_function  s$   	"r   z list_single_scorer, multi_scorerr2rs   r   rs   c                 C   s$   t || |t|| | dS )Nr   )r   Zpredictr   r   r?   r?   r@   <lambda>  s   r   c           	   	   C   s   t dddd\}}t ||}t|||d|dd}t| t| ks&J | D ]}|| }t|||d|dd}t|j|j q(d S )Nrp   r(   r   rq   r   r   r   )r   r   r4   r   setkeysr   r5   )	Zlist_single_scorerZmulti_scorerr   r:   ru   Zmulti_importanceZscorerZmulti_resultZsingle_resultr?   r?   r@   (test_permutation_importance_multi_metric  s   r   c                  C   sv   t dgj} t g d}t }|| | d}tjt|d t|| |dd W d   dS 1 s4w   Y  dS )zjCheck that a proper error message is raised when `max_samples` is not
    set to a valid input value.
    )r   r^   r_   g      @r`   z max_samples must be <= n_samples)matchr#   )r   N)	r-   rd   rf   r   r4   rE   r   
ValueErrorr   )r9   r:   r=   err_msgr?   r?   r@   -test_permutation_importance_max_samples_error  s   "r   )r"   )8numpyr-   rE   Zjoblibr   Znumpy.testingr   Zsklearn.composer   Zsklearn.datasetsr   r   r   r   Zsklearn.dummyr	   Zsklearn.ensembler
   r   Zsklearn.imputer   Zsklearn.inspectionr   Zsklearn.linear_modelr   r   Zsklearn.metricsr   r   r   Zsklearn.model_selectionr   Zsklearn.pipeliner   Zsklearn.preprocessingr   r   r   r   Zsklearn.utils._testingr   markZparametrizerA   rK   r\   rg   ro   rx   r}   r   r   r   r   r   r   r?   r?   r?   r@   <module>   sj    #&O
(D
=
