o
    i&]                  
   @   s  d dl Z d dlmZ d dlmZ d dlZd dlZd dlm	Z
 d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlm Z m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& dd Z'd	d
 Z(dd Z)ej*+dg e,dfg de,dfe-g e,dfe-g de,dfej..de,dfgdd Z/ej*+di dddgdd Z0dd Z1dd Z2dd Z3dd Z4dd  Z5d!d" Z6d#d$ Z7d%d& Z8d'd( Z9ej*+d)g d*e-g d*e:g d*gd+d, Z;d-d. Z<d/d0 Z=d1d2 Z>d3d4 Z?d5d6 Z@d7d8 ZAd9d: ZBd;d< ZCd=d> ZDej*+d?d@dAgej*+dBg dCdDdE ZEej*+dFdAd@gdGdH ZFdIdJ ZGdKdL ZHdMdN ZIdOdP ZJdQdR ZKdSdT ZLdUdV ZMdS )W    N)defaultdict)partial)make_biclusters
make_blobsmake_checkerboardmake_circlesmake_classificationmake_friedman1make_friedman2make_friedman3make_hastie_10_2make_low_rank_matrix
make_moonsmake_multilabel_classificationmake_regressionmake_s_curvemake_sparse_coded_signalmake_sparse_spd_matrixmake_sparse_uncorrelatedmake_spd_matrixmake_swiss_roll)assert_allcloseassert_allclose_dense_sparseassert_almost_equalassert_array_almost_equalassert_array_equal)assert_all_finitec                  C   sN  ddg} t ddddddddd d | d	d
\}}| ddgksJ |jdks'J d|jdks0J dt|jdks<J dt|d	kdksHJ dt|dkdksTJ dt|dkdks`J dt dddd	d	ddd	d\}}|jdkswJ d|jdksJ dt|d|jfg|jd  |jd |jd jd	 dksJ d!d S )"N皙?      ?d               Fr   )	n_samples
n_featuresn_informativen_redundant
n_repeated	n_classesn_clusters_per_class	hypercubeshiftscaleweightsrandom_stater   r    X shape mismatchr   y shape mismatchr#   Unexpected number of classes
   z(Unexpected number of samples in class #0   z(Unexpected number of samples in class #1   A   z(Unexpected number of samples in class #2     T      ?)r$   r%   r&   r'   r(   r+   r-   r/   )r:   r;   )r:    z Unexpected number of unique rows)r   shapenpuniquesumviewdtypeZreshape)r.   Xy rG   g/home/kim/smarthome/.venv/lib/python3.10/site-packages/sklearn/datasets/tests/test_samples_generator.pytest_make_classification(   sV   

 rI   c                  C   s  d} t t| ddddddd}ddgdfddgd dfdd	gd
 dfddgd dfddd	gdfddgd dfddgdffD ]\}}}t|}|| }|d }dD ]}||||||||dd\}	}
|	j||fkshJ |
j|fkspJ t|	}|jd|jd d	 }tj
|dd\}}t||ksJ dtt}t||
D ]\}}|| | q| D ]}t||ksJ dqt||ksJ dtt|
t|
 | dg| dd tt|D ]E}|	||k jdd}|rtt||  t|ddd qtt tt||  t|ddd W d    n	1 sw   Y  qqOq<tt |ddddd! W d    n	1 s@w   Y  tt |ddddd! W d    d S 1 s_w   Y  d S )"zTest the construction of informative features in make_classification

    Also tests `n_clusters_per_class`, `n_classes`, `hypercube` and
    fully-specified `weights`.
    g    .Ar   r"   F)	class_sepr'   r(   flip_yr,   r-   shuffler8   gUUUUUU?r#   r      r<         ?r6   @   2   )FT)r$   r)   r.   r%   r&   r*   r+   r/   z|S{0})rD   T)Zreturn_inversez6Wrong number of clusters, or not in distinct quadrantsz"Wrong number of clusters per classzWrong number of classesz!Wrong number of samples per classerr_msgZaxisr!   z/Clusters are not centered on hypercube vertices)decimalrR   z5Clusters should not be centered on hypercube verticesN)r%   r&   r)   r*   )r   r   lenr?   r@   signrC   formatstridesZravelrA   r   setzipaddvaluesr   bincountrangemeanabsonespytestraisesAssertionError
ValueError)rJ   maker&   r.   r*   r)   
n_clustersr$   r+   rE   rF   ZsignsZunique_signsZcluster_indexZclusters_by_classZclusterclsZclustersZcentroidrG   rG   rH   -test_make_classification_informative_featuresX   s   

	

<$ri   c                     sP  i ddddddddd	dd
ddddddddddddddddddddd} t d(i | \}}d| d< t d(i |  t dr_t dr_t d r_t d!r_t d"saJ  fd#d$}t| jspJ t| jsyJ  jt jksJ  j| ksJ |d%| d ksJ |d&| d ksJ |d'| d	 ksJ dS ))zy
    Test that make_classification returns a Bunch when return_X_y is False.

    Also that bunch.X is the same as X
    r$   r   r%   r    r&   r!   r'   r"   r(   r)   r#   r*   r8   r.   NrK   {Gz?rJ         ?r+   Tr,           r-   rL   r/   *   Z
return_X_yFDESCR
parametersfeature_inforE   rF   c                    s    j | S )N)rp   count)Zstr_ZbunchrG   rH   rq      s   z2test_make_classification_return_x_y.<locals>.countZinformativeZ	redundantZrepeatedrG   )	r   hasattrr@   Zarray_equalrE   rF   rn   __doc__ro   )kwargsrE   rF   rq   rG   rr   rH   #test_make_classification_return_x_y   sl   	
rv   zweights, err_type, err_msgz:Weights specified but incompatible with number of classes.)r   rN   r   r#   c                 C   s>   t j||d t| d W d    d S 1 sw   Y  d S )Nmatch)r.   )rb   rc   r   )r.   Zerr_typerR   rG   rG   rH   %test_make_classification_weights_type   s   "ry   ru   )r)   r&   c                 C   sV   t dddgdd| \}}t dtddgdd| \}}t|| t|| d S )Nr   g?r   )r.   r/   rG   )r   r@   arrayr   )ru   X1y1X2y2rG   rG   rH   1test_make_classification_weights_array_or_list_ok  s   "
r   c               	   C   s   t ddD ]B\} }tddddd| d\}}|jd	ksJ d
| s-tdd |D dks-J tdd |D |ks:J tdd |D dksGJ qd S )NTFr   r"   r   r    r#   r   Fr$   r%   r)   r/   Zreturn_indicatorallow_unlabeledr0   r1   c                 S      g | ]}t |qS rG   )max.0rF   rG   rG   rH   
<listcomp>!      zHtest_make_multilabel_classification_return_sequences.<locals>.<listcomp>r8   c                 S   r   rG   rU   r   rG   rG   rH   r   "  r   c                 S   r   rG   r   r   rG   rG   rH   r   #  r   )rZ   r   r?   r   minr   
min_lengthrE   YrG   rG   rH   4test_make_multilabel_classification_return_sequences  s   
r   c                  C   s   t ddD ]/\} }tdddd| d\}}|jdksJ d	|jd
ks&J dttj|dd|ks4J qtdddd| dd\}}}}t|| t|| |jdksTJ t| d |jdksbJ t|jdddgd  d S )Nr   r   r7   r    r#   r   )r$   r%   r)   r/   r   r7   r    r1   r7   r#   Y shape mismatchrS   T)r$   r%   r)   r/   r   Zreturn_distributionsr4   r"   )r    r#   )	rZ   r   r?   r@   allrB   r   r   r   )r   r   rE   r   r}   ZY2Zp_cZp_w_crG   rG   rH   4test_make_multilabel_classification_return_indicator&  s2   

	
r   c               	   C   sb   t ddD ])\} }tddddd| d\}}|jd	ksJ d
|jdks'J dt|s.J qd S )Nr   r   r7   r    r#   r   sparser   r   r1   r   r   )rZ   r   r?   spissparser   rG   rG   rH   ;test_make_multilabel_classification_return_indicator_sparseE  s   
r   c                  C   sP   t ddd\} }| jdksJ d|jdksJ dt|jdks&J d	d S )
Nr   r   )r$   r/   r   r6   r1   r2   r3   )r8   r5   )r   r?   r@   rA   rE   rF   rG   rG   rH   test_make_hastie_10_2T  s   r   c              
   C   s   t ddddddd| d\}}}|jd	ksJ d
|jdks!J d|jdks*J dt|dkdks6J dtt|t|| ddd t ddd\}}|jdksUJ d S )N   r6   r#   r!   Trl   rk   )r$   r%   r&   effective_rankcoefZbiasnoiser/   )r   r6   r1   )r   r3   r6   coef shape mismatch)Unexpected number of informative featuresr"   rT   r   )r$   r%   )r   r"   )r   r?   rB   r   r@   stddotglobal_random_seedrE   rF   crG   rG   rH   test_make_regression[  s"    r   c              	   C   s   t dddddd| d\}}}|jdksJ d|jd	ks J d
|jdks)J dtt|dkdd tt|t|| ddd d S )Nr   r6   r#   Trk   )r$   r%   r&   Z	n_targetsr   r   r/   r   r1   )r   r#   r3   )r6   r#   r   rl   r   r"   r   )r   r?   r   rB   r   r@   r   r   r   rG   rG   rH    test_make_regression_multitargett  s   
$r   c                 C   s   t g d}t ddgddgddgg}t| dd||d\}}|jdks)J d|jd	ks2J d
t |jdks>J dtt||D ]\}\}}t|||k |  |dd qEd S )Ng?g?g?rl   rk   rP   r8   )r/   r$   r%   centerscluster_std)rP   r8   r1   )rP   r3   r4   zUnexpected number of blobsr"   Unexpected std)	r@   rz   r   r?   rA   	enumeraterZ   r   r   )r   cluster_stdsZcluster_centersrE   rF   ictrr   rG   rG   rH   test_make_blobs  s   
 r   c                  C   sZ   g d} t | ddd\}}|jt| dfksJ dttj|t| d| ks+J dd S )N)rP      r    r8   r   r$   r%   r/   r1   Z	minlength$Incorrect number of samples per blobr   r?   rB   r   r@   r]   rU   )r$   rE   rF   rG   rG   rH   test_make_blobs_n_samples_list  s   r   c           	      C   s   g d}t ddgddgddgg}t g d}t|||| d\}}|jt|dfks0J dtt j|t|d|ksAJ d	tt	||D ]\}\}}t
|||k |  |d
d qHd S )Nr    r    r    rl   rk   r   )r$   r   r   r/   r8   r1   r   r   r"   r   )r@   rz   r   r?   rB   r   r]   rU   r   rZ   r   r   )	r   r$   r   r   rE   rF   r   r   r   rG   rG   rH   +test_make_blobs_n_samples_list_with_centers  s    
 r   r$   )r!   r#   r   c                 C   sV   d }t | |dd\}}|jt| dfksJ dttj|t| d| ks)J dd S )Nr   )r$   r   r/   r8   r1   r   r   r   )r$   r   rE   rF   rG   rG   rH   &test_make_blobs_n_samples_centers_none  s   r   c                  C   s<   ddg} d}t | |ddd\}}}|jt| |fksJ d S )Nr6   r    r#   Tr   )r$   r%   Zreturn_centersr/   )r   r?   rU   )r$   r%   rE   rF   r   rG   rG   rH   test_make_blobs_return_centers  s   r   c                  C   s>  g d} t ddgddgddgg}t g d}td|  d|d d  }tjt|d t| |d d d	 W d    n1 sDw   Y  td
| d|d d  }tjt|d t| ||d d d W d    n1 suw   Y  dd}tjt|d t| dd	 W d    d S 1 sw   Y  d S )Nr   rl   rk   r   zMLength of `n_samples` not consistent with number of centers. Got n_samples = z and centers = r>   rw   )r   zNLength of `clusters_std` not consistent with number of centers. Got centers = z and cluster_std = )r   r   z8Parameter `centers` must be array-like. Got {!r} insteadr#   )	r@   rz   reescaperb   rc   re   r   rW   )r$   r   r   Zwrong_centers_msgZwrong_std_msgZwrong_type_msgrG   rG   rH   test_make_blobs_error  s:   

"r   c              	   C   s   t ddd| d\}}|jdksJ d|jdksJ dt|dttj|d d d	f  |d d d
f   d|d d df d d   d|d d df   d|d d df    d S )Nr!   r6   rl   )r$   r%   r   r/   r!   r6   r1   r!   r3   r   r"   r    r8   r<   r#   rM   )r	   r?   r   r@   sinpir   rE   rF   rG   rG   rH   test_make_friedman1  s   
.r   c              	   C   s   t dd| d\}}|jdksJ d|jdksJ dt||d d df d	 |d d d
f |d d d	f  d
|d d d
f |d d df    d	  d  d S )Nr!   rl   r$   r   r/   r!   rM   r1   r   r3   r   r8   r"   r#   r<   )r
   r?   r   r   rG   rG   rH   test_make_friedman2  s   `r   c              
   C   s   t dd| d\}}|jdksJ d|jdksJ dt|t|d d df |d d d	f  d|d d df |d d d
f    |d d df   d S )Nr!   rl   r   r   r1   r   r3   r"   r8   r#   r   )r   r?   r   r@   Zarctanr   rG   rG   rH   test_make_friedman3  s   Zr   c                  C   sZ   t dddddd} | jdksJ ddd	lm} || \}}}t|d d
k s+J dd S )NrP   r7   r!   rj   r   )r$   r%   r   Ztail_strengthr/   )rP   r7   r1   )svdr   zX rank is not approximately 5)r   r?   numpy.linalgr   rB   )rE   r   usvrG   rG   rH   test_make_low_rank_matrix  s   r   c                 C   s   t dddd| d\}}}|jdksJ d|jdksJ d	|jd
ks'J d|D ]}tt|dks8J dq)t|||  tt|d jddt|jd  d S )Nr!      r6   r#   )r$   Zn_componentsr%   Zn_nonzero_coefsr/   r   r   )r   r6   zD shape mismatch)r!   r   r1   zNon-zero coefs mismatchr8   r"   rS   r   )	r   r?   rU   r@   Zflatnonzeror   sqrtrB   ra   )r   r   DrE   rowrG   rG   rH   test_make_sparse_coded_signal  s   ,r   c                  C   s:   t dddd\} }| jdksJ d|jdksJ dd S )	Nr!   r6   r   r   r   r1   r   r3   )r   r?   r   rG   rG   rH   test_make_sparse_uncorrelated.  s   r   c                 C   s\   t d| d}|jdksJ dt||j ddlm} ||\}}t|dks,J dd S )Nr!   )n_dimr/   )r!   r!   r1   r   eigX is not positive-definite)r   r?   r   Tr   r   r@   r   )r   rE   r   eigenvalues_rG   rG   rH   test_make_spd_matrix5  s   r   	norm_diagTFsparse_format)NZbsrZcooZcscZcsrZdiaZdokZlilc           	      C   s   d}t || ||d}|j||fksJ d|d u r)t|r J t||j |}nt|r3|j|ks5J t||j | }ddl	m
} ||\}}t|dksVJ d| rdt| t| d S d S )Nr!   )r   r   r   r/   r1   r   r   r   )r   r?   r   r   r   r   rW   r   Ztoarrayr   r   r@   r   r   Zdiagonalra   )	r   r   r   r   rE   ZXarrr   r   r   rG   rG   rH   test_make_sparse_spd_matrixA  s*   r   holec                 C   st   t dd| |d\}}|jdksJ |jdksJ t|d d df |t|  t|d d df |t|  d S )Nr!   rl   )r$   r   r/   r   r!   r#   r   r   r8   )r   r?   r   r@   cosr   )r   r   rE   trG   rG   rH   test_make_swiss_rollc  s   
 $r   c                 C   s   t dd| d\}}|jdksJ d|jdksJ dt|d d df t| t|d d d	f t|t|d
   d S )Nr!   rl   r   r   r1   r   zt shape mismatchr   r8   r"   )r   r?   r   r@   r   rV   r   )r   rE   r   rG   rG   rH   test_make_s_curveo  s
   .r   c                  C   s   t ddddd\} }}| jdksJ d|jdksJ d|jdks&J d	t|  t| t| t ddddd\}}}t| | d S )
Nr   r   rM   Tr   r?   rg   rL   r/   r1   )rM   r   rows shape mismatchcolumns shape mismatch)r   r?   r   r   )rE   rowscolsr}   r   rG   rG   rH   test_make_biclustersx  s   r   c                  C   s   t ddddd\} }}| jdksJ d|jdksJ d|jdks&J dt dd	ddd\} }}t|  t| t| t dd	ddd\}}}t dd	ddd\}}}t|| d S )
Nr   )r    r!   Tr   r   r1   r   r   r8   )r   r?   r   r   )rE   r   r   r{   r   r}   rG   rG   rH   test_make_checkerboard  s*   r   c                 C   s`   t dd| d\}}t||D ]\}}|dkrddgnddg}|| d  }t|dd	d
 qd S )Nr#   F)rL   r/   r   rl   rk   r<   r8   z$Point is not on expected unit circlerQ   )r   rZ   rB   r   )r   rE   rF   xlabelcenterdist_sqrrG   rG   rH   test_make_moons  s   r   c                  C   s   t dd\} }t|dkdkrt|dkdksJ d| jdks&J d	|jd
ks/J dtjtdd t dd W d    d S 1 sGw   Y  d S )N)   r!   r$   r   r   r"   r!   z$Number of samples in a moon is wrong)   r8   r1   )r   r3   z8`n_samples` can be either an int or a two-element tuple.rw   r   )r   r@   rB   r?   rb   rc   re   r   rG   rG   rH   test_make_moons_unbalanced  s   &"r   c                  C   s   d} dD ]s\}}}t |dd | d\}}|j|dfksJ d|j|fks(J dddg}t||D ]'\}}|| d  }	|d	krCd
n| d }
|d	krMd
n| d }
t|	|
dd q1||d	k j|dfkshJ d||dk j|dfkswJ dqd S )Ng333333?))r   r#   rM   )r   rM   rM   F)rL   r   factorr8   r1   r3   rl   r   rk   zPoint is not on expected circlerQ   z1Samples not correctly distributed across circles.r"   )r   r?   rZ   rB   r   )r   r$   Zn_outerZn_innerrE   rF   r   r   r   r   Zdist_exprG   rG   rH   test_make_circles  s4   r   c                  C   s   t dd\} }t|dkdksJ dt|dkdks!J d| jd	ks*J d
|jdks3J dtjtdd t dd W d    d S 1 sKw   Y  d S )N)r8   r   r   r   r8   z*Number of samples in inner circle is wrongr"   r   z*Number of samples in outer circle is wrong)r6   r8   r1   r   r3   z7When a tuple, n_samples must have exactly two elements.rw   )r   r@   rB   r?   rb   rc   re   r   rG   rG   rH   test_make_circles_unbalanced  s   "r   )Nr   collectionsr   	functoolsr   numpyr@   rb   Zscipy.sparser   r   Zsklearn.datasetsr   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   Zsklearn.utils._testingr   r   r   r   r   Zsklearn.utils.validationr   rI   ri   rv   markZparametrizere   rz   randomry   r   r   r   r   r   r   r   r   r   r   tupler   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rG   rG   rG   rH   <module>   s    T0c3



 



	
