o
    i~                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
 ddlmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlm Z m!Z!m"Z"m#Z#m$Z$ dd	l%m&Z& d
d Z'dd Z(ej)*de&dd Z+ej)*dddgdd Z,ej)*de&dd Z-dd Z.dd Z/ej)*de0ddgddgddgdd gge0g d!e0d"d#gdfe0d$dgd%dgddgd&d gge0g d'e0d"d"gdfe0ddgddgddgdd gge0g d!e0ej1d#gdfe0d$dgd%dgddgd&d gge0g d'e0ej1ej1gdfgd(d) Z2ej)*d*e0ddgddgddgdd gge0g d!e0d"d+ge0d,d-gdfe0d$dgd%dgddgd&d gge0g d'e0d"d"ge0d,d,gdfe0ddgddgddgd%d gge0g d.e0e3ej4j5d/ge0d"d0gdfe0d%dgddgddgdd gge0g d.e0e3ej4j5d/ge0d"d0gdfe0ddgddgddgdd gge0g d!e0ej1d+ge0ej1d-gdfe0d$dgd%dgddgd&d gge0g d'e0ej1ej1ge0ej1ej1gdfe0ddgddgddgd%d gge0g d.e0ej6d/ge0d"d0gdfe0d%dgddgddgdd gge0g d.e0ej6d/ge0d"d0gdfgd1d2 Z7d3d4 Z8d5d6 Z9ej)*de&d7d8 Z:d9d: Z;d;d< Z<ej)*d=ej=ej4gd>d? Z>d@dA Z?dBdC Z@dDdE ZAdFdG ZBdHdI ZCdJdK ZDdLdM ZEej)*dNg dOej)*dPg dQdRdS ZFdTdU ZGdVdW ZHdXdY ZIdZd[ ZJd\d] ZKd^d_ ZLd`da ZMdbdc ZNddde ZOdfdg ZPdhdi ZQdjdk ZRdldm ZSej)*dned doedpdqedrd dsedtdpdsgdudv ZTdS )wz0
Todo: cross-check the F-value with stats model
    N)assert_allclose)sparsestats)	load_irismake_classificationmake_regression)GenericUnivariateSelect	SelectFdr	SelectFpr	SelectFweSelectKBestSelectPercentilechi2	f_classiff_onewayf_regressionmutual_info_classifmutual_info_regressionr_regression)	safe_mask)_convert_containerassert_almost_equalassert_array_almost_equalassert_array_equalignore_warnings)CSR_CONTAINERSc                  C   sj   t jd} | dd}d| dd }t||\}}t||\}}t ||s+J t ||s3J d S )Nr   
         )nprandomRandomStaterandnr   r   Zallclose)rngX1X2fpvf2pv2 r*   m/home/kim/smarthome/.venv/lib/python3.10/site-packages/sklearn/feature_selection/tests/test_feature_select.pytest_f_oneway_vs_scipy_stats+   s   r,   c                  C   sf   t jd} | jddd}t d}t||\}}t|t|\}}t||dd t||dd d S )Nr   r   )r   r   size   decimal)	r   r    r!   randintaranger   astypefloatr   )r#   XyZfintZpintr&   pr*   r*   r+   test_f_oneway_ints6   s   
r9   csr_containerc                 C   s   t ddddddddd	d
dd\}}t||\}}t| ||\}}|dk s)J |dk s1J |dk  s9J |d d dk  sEJ |dd  dk sQJ t|| t|| d S N      r      r      r           r   F	n_samples
n_featuresn_informativeZn_redundantZ
n_repeatedZ	n_classesZn_clusters_per_classZflip_yZ	class_sepshufflerandom_state   皙?-C6?)r   r   allr   r:   r6   r7   Fr'   ZF_sparseZ	pv_sparser*   r*   r+   test_f_classifD   s,   

rM   centerTFc           	      C   s   t dddddd\}}t||| d}d|k  sJ |d	k  s"J t|d
}t||| d}t|| t||d d tjf f}tj|dd}|d ddf }t	||dd d S )Ni  r=   rG   Fr   rB   rC   rD   rE   rF   rN   r   r   )Zrowvarr   r0   )
r   r   rJ   r   r   r   ZhstackZnewaxisZcorrcoefr   )	rN   r6   r7   Zcorr_coeffsZsparse_XZsparse_corr_coeffsZZcorrelation_matrixZnp_corr_coeffsr*   r*   r+   test_r_regressiona   s   



rS   c                 C   s   t dddddd\}}t||\}}|dk sJ |dk s"J |dk  s*J |d d dk  s6J |dd  d	k sBJ t||d
d\}}t| ||d
d\}}t|| t|| t||dd\}}t| ||dd\}}t|| t|| d S )Nr<   r=   rG   Fr   rO   r   rH   rI   TrP   )r   r   rJ   r   rK   r*   r*   r+   test_f_regressionw   s"   




rT   c                  C   sf   t jd} | dd}t dt}t||\}}t||t\}}t	||d t	||d d S )Nr   r   r=   rG   )
r   r    r!   randr3   r4   intr   r5   r   )r#   r6   r7   F1Zpv1F2r)   r*   r*   r+   test_f_regression_input_dtype   s   rY   c                  C   s   t dddd} | j}t |}|d d d  d9  < d|d< t| |d	d
\}}t| |dd
\}}t||d  |d  | t|d d d S )N   rQ   r   r>   g      r@   r   TrP   F      ?       @g@9w?)r   r3   Zreshaper.   onesr   r   r   )r6   rB   YrW   _rX   r*   r*   r+   test_f_regression_center   s   
ra   z&X, y, expected_corr_coef, force_finiter>   r   r   r/   )r   r   r   r   r@   gI+?rG   r   r?   )r   r   r   r   c                 C   sT   t   t dt t| ||d}W d   n1 sw   Y  tj|| dS )zCheck the behaviour of `force_finite` for some corner cases with `r_regression`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/15672
    errorforce_finiteN)warningscatch_warningssimplefilterRuntimeWarningr   r   testingr   )r6   r7   Zexpected_corr_coefrd   Z	corr_coefr*   r*   r+   test_r_regression_force_finite   s
   
'rj   z;X, y, expected_f_statistic, expected_p_values, force_finiteg
[?r\   gSr.j?)r   r   r>   r   g?gajK?c                 C   sf   t   t dt t| ||d\}}W d   n1 sw   Y  tj|| tj|| dS )zCheck the behaviour of `force_finite` for some corner cases with `f_regression`.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/15672
    rb   rc   N)re   rf   rg   rh   r   r   ri   r   )r6   r7   Zexpected_f_statisticZexpected_p_valuesrd   Zf_statisticZp_valuesr*   r*   r+   test_f_regression_corner_case   s   
Mrk   c                  C   s   t ddddddddd	d
dd\} }t| |\}}|dk s J |dk s(J |dk  s0J |d d dk  s<J |dd  dk sHJ d S r;   )r   r   rJ   )r6   r7   rL   r'   r*   r*   r+   test_f_classif_multi_class2  s&   
rl   c                  C   s   t ddddddddd	d
dd\} }ttdd}|| || }ttddd| || }t|| | }t	d}d|d d< t|| d S Nr<   r=   r   r>   r   r?   r   r@   r   FrA      
percentilerp   modeparamrG   )
r   r   r   fit	transformr   r   get_supportr   zerosr6   r7   univariate_filterX_rX_r2supportgtruthr*   r*   r+   test_select_percentile_classifK  0   


r~   c           
      C   s  t ddddddddd	d
dd\}}| |}ttdd}||||}ttddd|||}t| |  | }t	
d}d|d d< t|| ||}t|s[J t||}	|j|jkshJ t|d d |	f  |  |j|jksJ d S rm   )r   r   r   rt   ru   r   r   Ztoarrayrv   r   rw   inverse_transformr   issparser   shapeZnnz)
r:   r6   r7   ry   rz   r{   r|   r}   ZX_r2invZsupport_maskr*   r*   r+   %test_select_percentile_classif_sparsek  s>   




r   c                  C   s   t ddddddddd	d
dd\} }ttdd}|| || }ttddd| || }t|| | }t	d}d|d d< t|| d S )Nr<   r=   r   r>   r   r?   r   r@   r   FrA   rG   kk_bestrq   )
r   r   r   rt   ru   r   r   rv   r   rw   rx   r*   r*   r+   test_select_kbest_classif  r   r   c                  C   sf   t ddddd\} }ttdd}|| || }t| | ttddd	| || }t|| d S )
Nr=   r   Fr   rB   rC   rE   rF   rJ   r   r   rq   )r   r   r   rt   ru   r   r   )r6   r7   ry   rz   r{   r*   r*   r+   test_select_kbest_all  s   

r   dtype_inc                 C   s   t ddddd\}}|| }ttdd}||| | }tjdtd}t	|| t
jtdd	 ||}W d    n1 sBw   Y  |jd
ksNJ |j| ksUJ d S )Nr=   r   Fr   r   r   dtypeNo features were selectedmatch)r=   r   )r   r4   r   r   rt   rv   r   rw   boolr   pytestwarnsUserWarningru   r   r   )r   r6   r7   ry   r|   r}   
X_selectedr*   r*   r+   test_select_kbest_zero  s   


r   c                  C   s   t ddddddddd	d
dd\} }ttdd}|| || }td}d|d d< dD ]}tt|dd| || }t|| |	 }t
|| q-d S )Nr<   r=   r   r>   r   r?   r   r@   r   FrA   {Gz?alpharG   fdrZfprfwerq   )r   r   r   rt   ru   r   rw   r   r   rv   r   r6   r7   ry   rz   r}   rr   r{   r|   r*   r*   r+   test_select_heuristics_classif  s4   


r   c                 C   s:   | j }|  }tt|| t||  d   d S N)Zscores_rv   r   r   sortsum)Zscore_filterscoresr|   r*   r*   r+   assert_best_scores_kept  s   ,r   c                  C   s   t dddddd\} }ttdd}|| || }t| ttd	dd
| || }t|| | }t	
d}d|d d< t|| |  }d|d d t	|f< t||| t|t||t d S )Nr<   r=   rG   Fr   rO   rn   ro   rp   rq   r   )r   r   r   rt   ru   r   r   r   rv   r   rw   copyZlogical_notr   r4   r   )r6   r7   ry   rz   r{   r|   r}   ZX_2r*   r*   r+   !test_select_percentile_regression	  s*   




r   c                  C   s   t dddddd\} }ttdd}|| || }t| ttd	dd
| || }t|| | }t	
d}t|| d S )Nr<   r=   rG   Fr   rO   d   ro   rp   rq   )r   r   r   rt   ru   r   r   r   rv   r   r^   rx   r*   r*   r+   &test_select_percentile_regression_full'  s   



r   c                  C   s   t ddddddd\} }ttdd}|| || }t| ttd	dd
| || }t|| | }t	
d}d|d d< t|| d S )Nr<   r=   rG   Fr   r   rB   rC   rD   rE   rF   noiser   r   rq   r   )r   r   r   rt   ru   r   r   r   rv   r   rw   rx   r*   r*   r+   test_select_kbest_regression<  s(   
	

r   c                  C   s   t ddddddd\} }ttdd	}|| || }td}d
|d d< dD ]6}tt|dd| || }t|| |	 }t|d d tj
dtd t|dd  d
kdk s^J q(d S )Nr<   r=   rG   Fr   r   r   r   r   r   r   rq   rG   r   r   )r   r
   r   rt   ru   r   rw   r   r   rv   r^   r   r   r   r*   r*   r+   !test_select_heuristics_regressionX  s,   
	

 r   c                  C   sp  t ddgddgddgg} t dgdgdgg}t| |\}}t|t ddg t|t dd	g ttd
d}|| | | }t|t ddg ttdd}|| | | }t|t ddg t	tdd}|| | | }	t|	t ddg t
td
d}
|
| | |
 }t|t ddg ttd
d}|| | | }t|t ddg d S )Nr   r=      r   r   g      @ggm?gQaK?gX٬<y?皙?r   TFr   2   ro   )r   arrayr   r   r	   rt   rv   r   r   r   r
   r   )r6   r7   r   ZpvaluesZ
filter_fdrZsupport_fdrZfilter_kbestZsupport_kbestZfilter_percentileZsupport_percentileZ
filter_fprZsupport_fprZ
filter_fweZsupport_fwer*   r*   r+   test_boundary_case_ch2u  s2   r   r   )gMbP?r   r   rD   )r   rG   r   c                    sT   dd t  fddtdD } |ksJ |dkr&| d ks(J d S d S )Nc                 S   s   t dd|d|dd\}}tjdd% tt| d}||||}ttd	| d
|||}W d    n1 s:w   Y  t|| |	 }t
||d  dk}	t
|d | dk}
|	dkrddS |	|
|	  }|S )N   r=   Fr   r   T)recordr   r   rq   r   r   r@   )r   re   rf   r	   r   rt   ru   r   r   rv   r   r   )r   rD   rF   r6   r7   ry   rz   r{   r|   Znum_false_positivesZnum_true_positivesfalse_discovery_rater*   r*   r+   
single_fdr  s4   
	
z.test_select_fdr_regression.<locals>.single_fdrc                    s   g | ]} |qS r*   r*   ).0rF   r   rD   r   r*   r+   
<listcomp>  s    z.test_select_fdr_regression.<locals>.<listcomp>r   r   r   )r   meanrange)r   rD   r   r*   r   r+   test_select_fdr_regression  s   $r   c                  C   s   t dddddd\} }ttdd}|| || }ttd	dd
| || }t|| | }t	d}d|d d< t|d d tj
dtd t|dd  dkdk sYJ d S )Nr<   r=   rG   Fr   rO   r   r   r   rq   r   r   r   r>   )r   r   r   rt   ru   r   r   rv   r   rw   r^   r   r   rx   r*   r*   r+   test_select_fwe_regression  s   



"r   c                  C   s   g dg dg dg dg} dg}dd }| D ]:}t |dd}t|j|g|}|jd dks1J t| t |d	d}t|j|g|}|jd d	ksMJ t| qd S )
Nr   r   r   r   r   r   r   r   r   r   r   r   r   c                 S      | d | d fS Nr   r*   r6   r7   r*   r*   r+   <lambda>      z.test_selectkbest_tiebreaking.<locals>.<lambda>r   r>   )r   r   fit_transformr   r   ZXsr7   Zdummy_scorer6   selr$   r%   r*   r*   r+   test_selectkbest_tiebreaking  s   
r   c                  C   s   g dg dg dg dg} dg}dd }| D ]:}t |dd	}t|j|g|}|jd dks1J t| t |d
d	}t|j|g|}|jd dksMJ t| qd S )Nr   r   r   r   r   c                 S   r   r   r*   r   r*   r*   r+   r     r   z3test_selectpercentile_tiebreaking.<locals>.<lambda>"   ro   C   r>   )r   r   r   r   r   r   r*   r*   r+   !test_selectpercentile_tiebreaking  s   
r   c                  C   s   t g dg dg} ddg}tdD ]8}| d d |f }ttdd||}|jdks/J d	|vs5J ttd
d||}|jdksFJ d	|vsLJ qd S )N)'  '  '  r   r   r   r   r   r   r   r>   r>   r   )r>   r>   r   r   ro   )	r   r   	itertoolspermutationsr   r   r   r   r   )ZX0r7   permr6   Xtr*   r*   r+   test_tied_pvalues  s   r   c                  C   s   t g dg dg dg} ddgddgddgg}ttdd| |}|jdks*J d|vs0J ttd	d
| |}|jdksAJ d|vsGJ d S )N)r   r   r   )r   r   r   )i  c   r   r   r   r>   r   )r   r>   r   ro   )r   r   r   r   r   r   r   )r6   r7   r   r*   r*   r+   test_scorefunc_multilabel  s   r   c                  C   sn   t g dg dg} ddg}dD ]#}tt|d| |}|g dg}t|d t d| d   qd S )	N)r   r   r   r   r   r   )r   r>   r   r   r   r   )r   r   r   r   rt   ru   r   r3   )ZX_trainZy_trainrC   r   ZX_testr*   r*   r+   test_tied_scores%  s    r   c                  C   sl   g dg dg dg} g d}t tddttddfD ]}t|j| | t|jd	d
tddg qd S )Nr   r   r   r   rQ   rQ   r         ?r   r   r   r   r>   r   r   ro   T)indicesr   )	r   r   r   r   rt   r   rv   r   r   )r6   r7   selectr*   r*   r+   	test_nans0  s   

r   c                  C   s   g dg dg dg} g d}d}t jt|d tdd| | W d    n1 s,w   Y  t jt|d td	dd
| | W d    d S 1 sNw   Y  d S )Nr   r   r   r   zDk=4 is greater than n_features=3. All the features will be returned.r   r/   r   r   rq   )r   r   r   r   rt   r   )r6   r7   msgr*   r*   r+   test_invalid_k?  s   "r   c                  C   sZ   t ddd\} }d| d d df< tt t| | W d    d S 1 s&w   Y  d S )Nr   rG   )rB   rC   r]   r   )r   r   r   r   r   r   r*   r*   r+   test_f_classif_constant_featureJ  s
   "r   c               	   C   s   t jd} | dd}| jdddd}tdd||tdd||tdd||t	dd||t
dd	||g}|D ]/}t| t d tjtd
d ||}W d    n1 scw   Y  |jdksoJ q@d S )Nr   (   r   r/   r-   r   r   ro   r   r   r   )r   r   )r   r    r!   rU   r2   r   rt   r	   r
   r   r   r   rv   rw   r   r   r   ru   r   )r#   r6   r7   Zstrict_selectorsselectorr   r*   r*   r+   test_no_feature_selectedS  s    r   c                  C   s   t dddddddddddd	\} }ttdd
}|| || }ttddd| || }t|| | }t	d}d|d d< t|| t
tdd}|| || }ttddd| || }t|| | }t	d}d|d d< t|| d S )Nr   rG   r   r   r>   r@   r   FrA   r   r   rq   r   ro   rp   )r   r   r   rt   ru   r   r   rv   r   rw   r   rx   r*   r*   r+   test_mutual_info_classifh  sF   





r   c                  C   s   t ddddddd\} }ttdd}|| || }t| ttddd	| || }t|| | }t	
d}d
|d d< t|| ttdd}|| || }ttddd	| || }t|| | }t	
d}d
|d d< t|| d S )Nr   r   r>   Fr   r   r   r   rq   r   r=   ro   rp   )r   r   r   rt   ru   r   r   r   rv   r   rw   r   rx   r*   r*   r+   test_mutual_info_regression  s>   






r   c                     s   t d} tddd\}}|tjtjd}| j|d dd|d< |j  fd	d
}t	|ddj
dd}|||}t|jg d |j D ]\}}||j| ksUJ qHdS )zmCheck that the output datafarme dtypes are the same as the input.

    Non-regression test for gh-24860.
    ZpandasT)Z
return_X_yZas_frame)petal length (cm)petal width (cm)r   r   )Zbinspetal_width_binnedc                    s(   dddddd t  fddD S )	Nr   r>   r   r/   rG   )zsepal length (cm)zsepal width (cm)r   r   r   c                    s   g | ]} | qS r*   r*   )r   nameZrankingr*   r+   r     s    zBtest_dataframe_output_dtypes.<locals>.selector.<locals>.<listcomp>)r   Zasarrayr   Zcolumn_orderr   r+   r     s   z.test_dataframe_output_dtypes.<locals>.selectorr   r   )ru   )r   r   r   N)r   Zimportorskipr   r4   r   float32float64cutcolumnsr   Z
set_outputr   r   Zdtypesitems)pdr6   r7   r   ry   outputr   r   r*   r   r+   test_dataframe_output_dtypes  s$   


r   r   r   P   ro   r   rq   rp   c                 C   s   t jd}|dd}d	dd}| j|d | | | |}t||ddddf  | |}t||ddddf  dS )
zeCheck support for unsupervised feature selection for the filter that could
    require only `X`.
    r   r   rG   Nc                 S   s   t g dS )N)r   r   r   r   r   )r   r   r   r*   r*   r+   
score_func  s   z,test_unsupervised_filter.<locals>.score_func)r   r/   r   )	r   r    r!   r"   Z
set_paramsrt   ru   r   r   )r   r#   r6   r   ZX_transr*   r*   r+   test_unsupervised_filter  s   



r   )U__doc__r   re   numpyr   r   Znumpy.testingr   Zscipyr   r   Zsklearn.datasetsr   r   r   Zsklearn.feature_selectionr   r	   r
   r   r   r   r   r   r   r   r   r   r   Zsklearn.utilsr   Zsklearn.utils._testingr   r   r   r   r   Zsklearn.utils.fixesr   r,   r9   markZparametrizerM   rS   rT   rY   ra   r   nanrj   Zfinfor   maxinfrk   rl   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r*   r*   r*   r+   <module>   s   <


  	 	 	
!  
 
 
 
 
 
 

E 
, 
%"1	,(&

	