o
    ib                     @   s  d Z ddlZddlmZ ddlZddlZddlmZ ddl	m
Z
mZmZ ddlmZmZmZ ddlmZ ddlmZmZmZ dd	lmZmZmZ dd
lmZ ddlmZmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z( ddl)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1m2Z2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 G dd deeZ:dd Z;ej<=de9dd Z>dd Z?dd  Z@d!d" ZAej<=de9d#d$ ZBd%d& ZCd'd( ZDd)d* ZEd+d, ZFd-d. ZGd/d0 ZHd1d2 ZId3d4 ZJej<=d5ed6d6gej<=d7ed8fed9fgd:d; ZKej<=d<d=eLfd>eMfd?d@ eMfgej<=dAeegdBdC ZNej<=dDdd8gdEdF ZOdGdH ZPdIdJ ZQej<=g dKdLd9dLeRg dMgdLd8dLeRg dNgdLd9dOeRg dPgdLd8dOeRg dQgdLd9dReRdLd9ggdLd8dReRg dSgdLd9d9eRdLd9ggdLd8d9eRdLd8ggd9d9dOeRd9ggd9d8dLeRd9d8ggd9d8dOeRd9d8gggdTdU ZSej<=dVeegdWdX ZTej<=dVeegdYdZ ZUej<=dVeegej<=d[eeegd\d] ZVd^d_ ZWej<=d`edafedbfgdcdd ZXdedf ZYdgdh ZZdidj Z[dS )kz'
Testing Recursive feature elimination
    N)
attrgetter)parallel_backend)assert_allcloseassert_array_almost_equalassert_array_equal)BaseEstimatorClassifierMixinis_classifier)TransformedTargetRegressor)CCAPLSCanonicalPLSRegression)	load_irismake_classificationmake_friedman1)RandomForestClassifier)RFERFECV)SimpleImputer)LinearRegressionLogisticRegression)
get_scorermake_scorerzero_one_loss)
GroupKFoldcross_val_score)make_pipeline)StandardScaler)SVCSVR	LinearSVR)check_random_state)ignore_warnings)CSR_CONTAINERSc                       sb   e Zd ZdZdddZdd Zdd ZeZeZeZ	dd
dZ
dddZdd Z fddZ  ZS )MockClassifierz@
    Dummy classifier to test recursive feature elimination
    r   c                 C   s
   || _ d S N	foo_param)selfr'    r)   b/home/kim/smarthome/.venv/lib/python3.10/site-packages/sklearn/feature_selection/tests/test_rfe.py__init__$      
zMockClassifier.__init__c                 C   s>   t |t |ks
J tj|jd tjd| _tt|| _| S )N   )Zdtype)	lennponesshapeZfloat64coef_sortedsetZclasses_r(   Xyr)   r)   r*   fit'   s   zMockClassifier.fitc                 C   s   t |jd S )Nr   )r/   r0   r1   )r(   Tr)   r)   r*   predict-   s   zMockClassifier.predictNc                 C      dS )Ng        r)   r5   r)   r)   r*   score4      zMockClassifier.scoreTc                 C   s
   d| j iS )Nr'   r&   )r(   deepr)   r)   r*   
get_params7   r,   zMockClassifier.get_paramsc                 K   s   | S r%   r)   )r(   paramsr)   r)   r*   
set_params:   r=   zMockClassifier.set_paramsc                    s   t   }d|j_|S )NT)super__sklearn_tags__Z
input_tags	allow_nan)r(   tags	__class__r)   r*   rC   =   s   
zMockClassifier.__sklearn_tags__)r   )NN)T)__name__
__module____qualname____doc__r+   r8   r:   Zpredict_probadecision_function	transformr<   r?   rA   rC   __classcell__r)   r)   rF   r*   r$      s    


r$   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}td| dd}t	|ddd	}|
|| t|j|jd
 ks;J tdd}t	|ddd	}|
|| t| |  d S )Nr      size      )n_estimatorsrandom_state	max_depth   皙?	estimatorn_features_to_selectstepr-   linearZkernel)r!   r   r/   c_datanormalr.   targetr   r   r8   ranking_r1   r   r   Zget_support)	generatoririsr6   r7   clfrfeZclf_svcZrfe_svcr)   r)   r*   test_rfe_features_importanceC   s   "
rh   csr_containerc                 C   s6  t d}t }tj|j|jt|jdfdf }| |}|j}tdd}t	|ddd}|
|| ||}|
|| t|j|jd	 ksHJ tdd}	t	|	ddd}
|

|| |
|}|j|jjkshJ t|d d
 |jd d
  t||||j |||||j|jksJ t||  d S )Nr   rO   rP   r]   r^   rW   rX   rY   r-   
   )r!   r   r/   r_   r`   ra   r.   rb   r   r   r8   rM   rc   r1   r   r:   r<   toarray)ri   rd   re   r6   X_sparser7   rf   rg   X_rZ
clf_sparseZ
rfe_sparse
X_r_sparser)   r)   r*   test_rfeX   s(   "



 ro   c                  C   s   G dd dt t} tdd\}}tjtdd t|  d|| W d    n1 s,w   Y  tjtdd t|  dj||d	d
|| W d    n1 sSw   Y  t|  dj||d	d
j||d	d
 d S )Nc                   @   s    e Zd ZdddZdddZdS )z0test_RFE_fit_score_params.<locals>.TestEstimatorNc                 S   s2   |d u rt dtdd||| _| jj| _| S )Nfit: prop cannot be Noner]   r^   )
ValueErrorr   r8   svc_r2   r(   r6   r7   propr)   r)   r*   r8   |   s
   
z4test_RFE_fit_score_params.<locals>.TestEstimator.fitc                 S   s   |d u rt d| j||S )Nscore: prop cannot be None)rq   rr   r<   rs   r)   r)   r*   r<      s   z6test_RFE_fit_score_params.<locals>.TestEstimator.scorer%   )rH   rI   rJ   r8   r<   r)   r)   r)   r*   TestEstimator{   s    
rv   TZ
return_X_yrp   matchrZ   ru   Zfoo)rt   )	r   r   r   pytestraisesrq   r   r8   r<   )rv   r6   r7   r)   r)   r*   test_RFE_fit_score_paramsx   s   "(r}   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}tdd}t	|ddd}|
|| t	|d	dd}|
|| t|j|j t|j|j d S )
Nr   rO   rP   r]   r^   rW   rX   rY   g?)r!   r   r/   r_   r`   ra   r.   rb   r   r   r8   r   rc   support_)rd   re   r6   r7   rf   Zrfe_numZrfe_percr)   r)   r*   test_rfe_percent_n_features   s   "
r   c                  C   s   t d} t }tj|j| jt|jdfdf }|j}t }t	|ddd}|
|| ||}|
|| t|j|jd ksBJ |j|jjksKJ d S )Nr   rO   rP   rW   rX   rY   r-   )r!   r   r/   r_   r`   ra   r.   rb   r$   r   r8   rM   rc   r1   )rd   re   r6   r7   rf   rg   rm   r)   r)   r*   test_rfe_mockclassifier   s   "
r   c                 C   s  t d}t }tj|j|jt|jdfdf }t|j}t	t
dddd}||| |j D ]}t|j| |jd ksAJ q1t|j|jd ksNJ ||}t||j t	t
dddd}| |}	||	| ||	}
t|
 |j ttdd	}t	t
ddd|d
}t|j|| ||}t||j td}t	t
ddd|d
}||| ||}t||j dd }t	t
ddd|d
}||| |jdksJ t	t
dddd}||| |j D ]}t|j| dksJ qt|j|jd ksJ ||}t||j t	t
dddd}| |}	||	| ||	}
t|
 |j t	t
dddd}| |}	||	| ||	}
t|
 |j d S )Nr   rO   rP   r]   r^   r-   rZ   r\   F)Zgreater_is_better)rZ   r\   scoringaccuracyc                 S   r;   )Ng      ?r)   )rZ   r6   r7   r)   r)   r*   test_scorer   r=   ztest_rfecv.<locals>.test_scorerrS   皙?)r!   r   r/   r_   r`   ra   r.   listrb   r   r   r8   cv_results_keysr1   rc   rM   r   rk   r   r   r"   r   n_features_)ri   rd   re   r6   r7   rfecvkeyrm   Zrfecv_sparserl   rn   r   Zscorerr   r)   r)   r*   
test_rfecv   s^   "







r   c                  C   s   t d} t }tj|j| jt|jdfdf }t|j}t	t
 dd}||| |j D ]}t|j| |jd ks?J q/t|j|jd ksLJ d S )Nr   rO   rP   r-   r   )r!   r   r/   r_   r`   ra   r.   r   rb   r   r$   r8   r   r   r1   rc   )rd   re   r6   r7   r   r   r)   r)   r*   test_rfecv_mockclassifier	  s   "
r   c                  C   s   dd l } ddlm} | | _td}t }tj|j|j	t
|jdfdf }t|j}ttddddd}||| | j}|d t
| dksMJ d S )	Nr   )StringIOrO   rP   r]   r^   r-   )rZ   r\   verbose)sysior   stdoutr!   r   r/   r_   r`   ra   r.   r   rb   r   r   r8   seekreadline)r   r   rd   re   r6   r7   r   Zverbose_outputr)   r)   r*   test_rfecv_verbose_output  s   "

r   c           
      C   s   t | }t }tj|j|jt|jdfdf }t|j}ddgddgddgfD ]F\}}t	t
 ||d}||| t|jd | | d }|j D ]}	t|j|	 |ksZJ qMt|j|jd ksgJ |j|ksnJ q(d S )NrO   rP   rS   r-      rZ   r\   min_features_to_select)r!   r   r/   r_   r`   ra   r.   r   rb   r   r$   r8   ceilr1   r   r   rc   r   )
global_random_seedrd   re   r6   r7   r\   r   r   Z	score_lenr   r)   r)   r*   test_rfecv_cv_results_size.  s"   "
r   c                  C   sD   t tdd} t| sJ t }t| |j|j}| dks J d S )Nr]   r^   gffffff?)r   r   r	   r   r   r`   rb   min)rg   re   r<   r)   r)   r*   test_rfe_estimator_tagsG  s
   r   c                 C   s   d}t d|| d\}}|j\}}tdd}t|dd}|||}|j |d ks,J t|d	d}|||}|j |d ksCJ t|d
d}|||}|j |d ksZJ d S )Nrj   2   	n_samples
n_featuresrU   r]   r^   g{Gz?r\   rS   r      )r   r1   r   r   r8   r~   sum)r   r   r6   r7   r   rZ   selectorselr)   r)   r*   test_rfe_min_stepP  s   


r   c                 C   sz  dd }dd }ddg}ddg}ddg}t |||D ]D\}}}t| }	|	jd|fd	}
|	d }ttd
d||d}||
| t	|j
||||ksPJ t	|j
||||ks^J qd}ddg}ddg}t ||D ]L\}}t| }	|	jd|fd	}
|	d }ttd
d|d}||
| |j D ] }t|j| ||||ksJ t|j| ||||ksJ qqnd S )Nc                 S   s   d| | | d |  S Nr-   r)   r   r[   r\   r)   r)   r*   formula1q  s   z4test_number_of_subsets_of_features.<locals>.formula1c                 S   s   dt | | t|  S r   )r/   r   floatr   r)   r)   r*   formula2t  s   z4test_number_of_subsets_of_features.<locals>.formula2   r   rS   d   rP   r]   r^   rY   r-   rj   r   )zipr!   ra   Zrandroundr   r   r8   r/   maxrc   r   r   r   r.   )r   r   r   Zn_features_listZn_features_to_select_listZ	step_listr   r[   r\   rd   r6   r7   rg   r   r   r)   r)   r*   "test_number_of_subsets_of_featuresh  sJ   	
r   c           	      C   s   t | }t }tj|j|jt|jdfdf }|j}tt	ddd}|
|| |j}|j}|jdd |
|| t|j| | |j ksLJ | D ]}|| t|j| ks`J qPd S )NrO   rP   r]   r^   rz   rS   )n_jobs)r!   r   r/   r_   r`   ra   r.   rb   r   r   r8   rc   r   rA   r   r   r{   Zapprox)	r   rd   re   r6   r7   r   Zrfecv_rankingZrfecv_cv_results_r   r)   r)   r*   test_rfe_cv_n_jobs  s   "r   c                  C   s   t d} t }d}ttd|t|j}|j}|jdkt	}t
t| dddtddd}|j|||d	 |jdks>J d S )
Nr   rW   rU   r-   r   rS   )Zn_splits)rZ   r\   r   cv)groups)r!   r   r/   floorZlinspacer.   rb   r`   Zastypeintr   r   r   r8   r   )rd   re   Znumber_groupsr   r6   r7   Z
est_groupsr)   r)   r*   test_rfe_cv_groups  s   r   importance_getterzregressor_.coef_zselector, expected_n_featuresr   rW   c                 C   s\   t dddd\}}tdd}t|tjtjd}||| d}|||}|j |ks,J d S )Nr   rj   r   r   r   Z	regressorfuncZinverse_funcr   )	r   r    r
   r/   logexpr8   r~   r   )r   r   Zexpected_n_featuresr6   r7   rZ   log_estimatorr   r)   r)   r*   test_rfe_wrapped_estimator  s   

r   zimportance_getter, err_typeautorandomc                 C   s   | j S r%   )Z
importance)xr)   r)   r*   <lambda>  s    r   Selectorc                 C   sr   t dddd\}}t }t|tjtjd}t| ||| d}||| W d    d S 1 s2w   Y  d S )Nr   rj   *   r   r   r   )	r   r    r
   r/   r   r   r{   r|   r8   )r   Zerr_typer   r6   r7   rZ   r   modelr)   r)   r*   %test_rfe_importance_getter_validation  s   

"r   r   c                 C   sn   t  }|j}|j}tj|d d< tj|d d< t }| d ur%t|| d}nt|d}|	|| |
| d S )Nr   r-   )rZ   r   rz   )r   r`   rb   r/   naninfr$   r   r   r8   rM   )r   re   r6   r7   rf   rg   r)   r)   r*   test_rfe_allow_nan_inf_in_x  s   
r   c                  C   sR   t t t } tdd\}}t| ddd}||| ||jd dks'J d S )NTrw   rS   $named_steps.logisticregression.coef_)r[   r   r-   )r   r   r   r   r   r8   rM   r1   )Zpipeliner`   r7   Zsfmr)   r)   r*   test_w_pipeline_2d_coef_  s   r   c           	         s   t | }t }tj|j|jt|jdfdf }|j}tt	ddd  
|| dd  j D }t fdd|D }tj|d	d
}tj|d	d
}t jd | t jd | d S )NrO   rP   r]   r^   rz   c                 S   s   g | ]
}t d |r|qS )zsplit\d+_test_score)research.0r   r)   r)   r*   
<listcomp>!  s    
z+test_rfecv_std_and_mean.<locals>.<listcomp>c                    s   g | ]} j | qS r)   )r   r   r   r)   r*   r   &  s    r   ZaxisZmean_test_scoreZstd_test_score)r!   r   r/   r_   r`   ra   r.   rb   r   r   r8   r   r   ZasarraymeanZstdr   )	r   rd   re   r6   r7   Z
split_keysZ	cv_scoresZexpected_meanZexpected_stdr)   r   r*   test_rfecv_std_and_mean  s   "r   )r   r   r\   cv_results_n_featuresr-   )r-   rS   r   rW   )r-   rS   r   rW   r   rS   )r-   rS   rW   )r-   r   r   r   )r-   rS   r   c                    sh   t d||dd\}}ttdd|| d  || t jd | t fdd	 j D s2J d S )
NrR   r   )r   r   Zn_informativeZn_redundantr]   r^   r   r   c                 3   s&    | ]}t |t  jd  kV  qdS )r   N)r.   r   )r   valuer   r)   r*   	<genexpr>N  s
    
z3test_rfecv_cv_results_n_features.<locals>.<genexpr>)r   r   r   r8   r   r   allvalues)r   r   r\   r   r6   r7   r)   r   r*    test_rfecv_cv_results_n_features.  s   
r   ClsRFEc                 C   s@   t jjdd}t jjddd}tdd}| |}||| d S )N)rj   r   rP   rS   )rj   rS   r   )rT   )r/   r   ra   randintr   r8   )r   r6   r7   rf   Zrfe_testr)   r)   r*   test_multioutputT  s
   
r   c                 C   sF   t dd\}}tj|d< tt t t }| |dd}||| dS )z`Check that RFE works with pipeline that accept nans.

    Non-regression test for gh-21743.
    Trw   )r   r   r   )rZ   r   N)r   r/   r   r   r   r   r   r8   )r   r6   r7   pipefsr)   r)   r*   test_pipeline_with_nans]  s   
r   PLSEstimatorc                 C   sH   t dddd\}}|dd}| |dd||}|||dks"J d	S )
zCheck the behaviour of RFE with PLS estimators.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/12410
    r   rj   r   r   r-   )Zn_componentsr   g      ?N)r   r8   r<   )r   r   r6   r7   rZ   r   r)   r)   r*   test_rfe_plss  s   
r   c                  C   s   t  } tt d}d}d}tjt|d}|| j| j	| j W d   n1 s+w   Y  t
|jjts9J |t|jjv sCJ dS )a  Check that we raise the proper AttributeError when the estimator
    does not implement the `decision_function` method, which is decorated with
    `available_if`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/28108
    rz   z/This 'RFE' has no attribute 'decision_function'z>'LinearRegression' object has no attribute 'decision_function'rx   N)r   r   r   r{   r|   AttributeErrorr8   r`   rb   rL   
isinstancer   	__cause__str)re   rg   Z	outer_msgZ	inner_msgZ	exec_infor)   r)   r*   "test_rfe_estimator_attribute_error  s   r   zClsRFE, paramr[   r   c                 C   sn   t ddd\}}tjt| dd | d	dt i|di}||| W d   dS 1 s0w   Y  dS )
zCheck if the correct warning is raised when trying to initialize a RFE
    object with a n_features_to_select attribute larger than the number of
    features present in the X variable that is passed to the fit method
    rR   r   )r   rU   z=21 > n_features=20rx   rZ      Nr)   )r   r{   ZwarnsUserWarningr   r8   )r   paramr6   r7   Zclsrfer)   r)   r*   %test_rfe_n_features_to_select_warning  s
   "r   c                  C   s   t dd\} }| jd }t|}d|d|d < tj| | d|d  gdd}t||d|d  g}tdd}t|dd	}|j| ||d
 t|dd	}||| t|j	|j	 t|dd	}	t|}
|	j| ||
d
 t
|	j	|j	rxJ dS )z4Test that `RFE` works correctly with sample weights.r   r   rS   Nr   r]   r^   rX   r   )sample_weight)r   r1   r/   Z	ones_likeZconcatenater   r   r8   r   rc   Zarray_equal)r6   r7   r   r   ZX2y2rZ   Zrfe_swrg   Zrfe_sw_2Zsample_weight_2r)   r)   r*   test_rfe_with_sample_weight  s    



r   c                 C   sv   t | d\}}t }t|dd}||| |j}td ||| W d    n1 s.w   Y  t||j d S )Nr   rS   )rZ   r   	threading)r   r   r   r8   rc   r   r   )r   r6   r7   rf   rg   Zranking_refr)   r)   r*   &test_rfe_with_joblib_threading_backend  s   
r   c                 C   s   t | d\}}t }t|ddd}||| t|jd t|jd ks'J t|jd t|jd ks7J t|jd	 t|jd
 ksGJ dS )zx
    Test that the results of RFECV are consistent across the different folds
    in terms of length of the arrays.
    r   rS   r   )rZ   r   r   Zsplit1_test_scoreZsplit2_test_scoreZsplit1_supportZsplit2_supportZsplit1_rankingZsplit2_rankingN)r   r   r   r8   r.   r   )r   r6   r7   rf   r   r)   r)   r*   test_results_per_cv_in_rfecv  s"   

r   )\rK   r   operatorr   numpyr/   r{   Zjoblibr   Znumpy.testingr   r   r   Zsklearn.baser   r   r	   Zsklearn.composer
   Zsklearn.cross_decompositionr   r   r   Zsklearn.datasetsr   r   r   Zsklearn.ensembler   Zsklearn.feature_selectionr   r   Zsklearn.imputer   Zsklearn.linear_modelr   r   Zsklearn.metricsr   r   r   Zsklearn.model_selectionr   r   Zsklearn.pipeliner   Zsklearn.preprocessingr   Zsklearn.svmr   r   r    Zsklearn.utilsr!   Zsklearn.utils._testingr"   Zsklearn.utils.fixesr#   r$   rh   markZparametrizero   r}   r   r   r   r   r   r   r   r   r   r   r   r   rq   r   r   r   r   r   arrayr   r   r   r   r   r   r   r   r   r)   r)   r)   r*   <module>   s    $

Q	A





!