o
    iU                     @   s  d Z ddlZddlZddlZddlmZ ddlmZm	Z	 ddl
mZmZ ddlmZmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZmZ ddlmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z)m*Z* ej+,dZ-ddgddgddgddgddgddggZ.g dZ/g dZ0ddgddgddggZ1g dZ2g dZ3e4 Z5e-6e5j7j8Z9ee5j:e5j7e-d\e5_:e5_7e; Z<ee<j:e<j7e-d\e<_:e<_7dd Z=dd Z>dd Z?d d! Z@d"d# ZAejBCd$g d%d&d' ZDd(d) ZEd*d+ ZFd,d- ZGd.d/ ZHd0d1 ZId2d3 ZJd4d5 ZKejBCd6eLg e'e(e*e&e)e'd7e(  d8d9 ZMejBCd6eLg e'e(e*e&e)e'd7e(  d:d; ZNd<d= ZOd>d? ZPd@dA ZQdBdC ZRdDdE ZSejBCdFe e5j:e5j7fe e<j:e<j7fgdGdH ZTdIdJ ZUdKdL ZVdMdN ZWdS )Oz6Testing for the boost module (sklearn.ensemble.boost).    N)datasets)BaseEstimatorclone)DummyClassifierDummyRegressor)AdaBoostClassifierAdaBoostRegressor)_samme_proba)LinearRegression)GridSearchCVtrain_test_split)SVCSVR)DecisionTreeClassifierDecisionTreeRegressor)shuffle)NoSampleWeightWrapper)assert_allcloseassert_array_almost_equalassert_array_equal)COO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERSDOK_CONTAINERSLIL_CONTAINERS      )foor   r   r   r   r   )r   r   r   r   r   r      )r   r   r   )r   r   r   random_statec                     s   t g dg dg dg dg  t  jddd d t jf   G  fddd} |  }t|d	t  }t|j j t 	|
 sHJ tt j|ddg d
 tt j|ddg d d S )N)r   ư>r   )gRQ?g333333?皙?)igRQ?g      ?)r#   r   g&.>r   Zaxisc                       s   e Zd Z fddZdS )z'test_samme_proba.<locals>.MockEstimatorc                    s   t |j j  S N)r   shapeselfXZprobs e/home/kim/smarthome/.venv/lib/python3.10/site-packages/sklearn/ensemble/tests/test_weight_boosting.pypredict_probaC   s   z5test_samme_proba.<locals>.MockEstimator.predict_probaN)__name__
__module____qualname__r.   r,   r+   r,   r-   MockEstimatorB   s    r2   r    )r   r   r   r   )r   r   r   r   )nparrayabssumnewaxisr	   	ones_liker   r'   isfiniteallZargminargmax)r2   ZmockZsamme_probar,   r+   r-   test_samme_proba7   s   $r<   c                  C   s>   t tt} t t| }t|tt ttdf d S )Nr   )r3   Zoneslenr*   r   fitr   r.   )Zy_tclfr,   r,   r-   test_oneclass_adaboost_probaT   s   "r@   c                  C   sx   t dd} | tt t| tt tt	t
t| j | tjttdfks-J | tjttfks:J d S )Nr   r!   r   )r   r>   r*   y_classr   predictT	y_t_classr3   uniqueasarrayclasses_r.   r'   r=   decision_functionr?   r,   r,   r-   test_classification_toy]   s   
rJ   c                  C   s*   t dd} | tt t| tt d S Nr   r!   )r   r>   r*   y_regrr   rB   rC   y_t_regrrI   r,   r,   r-   test_regression_toyg   s   
rN   c                  C   s   t tj} t }|tjtj t| |j |	tj}|j
d t| ks(J |tjj
d t| ks7J |tjtj}|dksJJ d|t|jdksSJ ttdd |jD t|jksfJ d S )Nr   g?zFailed with score = c                 s       | ]}|j V  qd S r&   r!   .0Zestr,   r,   r-   	<genexpr>       ztest_iris.<locals>.<genexpr>)r3   rE   iristargetr   r>   datar   rG   r.   r'   r=   rH   scoreestimators_set)classesr?   probarW   r,   r,   r-   	test_irisn   s   *r\   loss)ZlinearZsquareZexponentialc                 C   st   t | dd}|tjtj |tjtj}|dksJ t|jdks%J ttdd |jD t|jks8J d S )Nr   )r]   r"   g?r   c                 s   rO   r&   r!   rP   r,   r,   r-   rR      rS   z test_diabetes.<locals>.<genexpr>)	r   r>   diabetesrV   rU   rW   r=   rX   rY   )r]   regrW   r,   r,   r-   test_diabetes   s   *r`   c            
      C   s  t jd} | jdtjjd}| jdtjjd}tdd}|j	tj
tj|d |tj
}dd |tj
D }|tj
}dd |tj
D }|jtj
tj|d}d	d |jtj
tj|dD }	t|dksjJ t||d
  t|dksyJ t||d
  t|	dksJ t||	d
  tddd}|j	tj
tj|d |tj
}dd |tj
D }|jtj
tj|d}dd |jtj
tj|dD }	t|dksJ t||d
  t|	dksJ t||	d
  d S )Nr   
   sizen_estimatorssample_weightc                 S      g | ]}|qS r,   r,   rQ   pr,   r,   r-   
<listcomp>       z'test_staged_predict.<locals>.<listcomp>c                 S   rh   r,   r,   ri   r,   r,   r-   rk      rl   c                 S   rh   r,   r,   rQ   sr,   r,   r-   rk      s    r   re   r"   c                 S   rh   r,   r,   ri   r,   r,   r-   rk      rl   c                 S   rh   r,   r,   rm   r,   r,   r-   rk      s    )r3   randomRandomStaterandintrT   rU   r'   r^   r   r>   rV   rB   staged_predictr.   staged_predict_probarW   staged_scorer=   r   r   )
rngZiris_weightsZdiabetes_weightsr?   ZpredictionsZstaged_predictionsr[   Zstaged_probasrW   Zstaged_scoresr,   r,   r-   test_staged_predict   sB   

rw   c                  C   sf   t t d} ddd}t| |}|tjtj tt dd} ddd}t| |}|t	jt	j d S )N	estimator)r   r   )re   Zestimator__max_depthr   ry   r"   )
r   r   r   r>   rT   rV   rU   r   r   r^   )boost
parametersr?   r,   r,   r-   test_gridsearch   s   


r}   c                  C   s   dd l } t }|tjtj |tjtj}| |}| |}t	||j
ks*J |tjtj}||ks8J tdd}|tjtj |tjtj}| |}| |}t	||j
ks`J |tjtj}||ksnJ d S rK   )pickler   r>   rT   rV   rU   rW   dumpsloadstype	__class__r   r^   )r~   objrW   rn   obj2Zscore2r,   r,   r-   test_pickle   s"   




r   c               	   C   sp   t jdddddddd\} }t }|| | |j}|jd dks#J |d dtjf |dd  k s6J d S )Ni  ra   r    r   Fr   )	n_samples
n_featuresZn_informativeZn_redundantZ
n_repeatedr   r"   )	r   make_classificationr   r>   feature_importances_r'   r3   r7   r:   )r*   yr?   Zimportancesr,   r,   r-   test_importances   s   

*r   c                  C   s\   t  } td}tjt|d | jttt	
dgd W d    d S 1 s'w   Y  d S )Nz*sample_weight.shape == (1,), expected (6,)matchr   rf   )r   reescapepytestraises
ValueErrorr>   r*   rA   r3   rF   )r?   msgr,   r,   r-   ,test_adaboost_classifier_sample_weight_error  s
   
"r   c                  C   s   ddl m}  t|  }|tt tt }|tt ddl m} t	| dd}|tt t	t
 dd}|tt ddgddgddgddgg}g d}tt }tjtdd ||| W d    d S 1 slw   Y  d S )	Nr   )RandomForestClassifier)RandomForestRegressorr!   r   )r   barr   r   zworse than randomr   )sklearn.ensembler   r   r>   r*   rL   r   rA   r   r   r   r   r   r   )r   r?   r   ZX_failZy_failr,   r,   r-   test_estimator	  s    


"r   c                  C   sT   d} t ddd}tjt| d |tjtj W d    d S 1 s#w   Y  d S )Nz+Sample weights have reached infinite values   g      7@)re   Zlearning_rater   )r   r   warnsUserWarningr>   rT   rV   rU   )r   r?   r,   r,   r-   test_sample_weights_infinite%  s
   "r   z(sparse_container, expected_internal_type   c                    s   G dd dt }tjddddd\}}t|}t||dd	\}}}}| |}	| |}
t|d
ddd|	|}t|d
ddd||}||
}||}t	|| |
|
}|
|}t|| ||
}||}t|| ||
}||}t|| ||
|}|||}t|| ||
}||}t||D ]	\}}t|| q||
}||}t||D ]	\}}t	|| q||
}||}t||D ]	\}}t|| q||
|}|||}t||D ]	\}}t	|| qdd |jD }t fdd|D sJ d S )Nc                       "   e Zd ZdZd fdd	Z  ZS )z-test_sparse_classification.<locals>.CustomSVCz8SVC variant that records the nature of the training set.Nc                        t  j|||d t|| _| S z<Modification on fit caries data type for later verification.rf   superr>   r   
data_type_r)   r*   r   rg   r   r,   r-   r>   ?     
z1test_sparse_classification.<locals>.CustomSVC.fitr&   r/   r0   r1   __doc__r>   __classcell__r,   r,   r   r-   	CustomSVC<      r   r         *   )	n_classesr   r   r"   r   r!   T)Zprobabilityrz   c                 S      g | ]}|j qS r,   r   rQ   ir,   r,   r-   rk         z.test_sparse_classification.<locals>.<listcomp>c                       g | ]}| kqS r,   r,   rQ   texpected_internal_typer,   r-   rk         )r   r   Zmake_multilabel_classificationr3   Zravelr   r   r>   rB   r   rH   r   Zpredict_log_probar.   rW   staged_decision_functionziprs   rt   ru   rX   r:   )sparse_containerr   r   r*   r   X_trainX_testy_trainy_testX_train_sparseX_test_sparseZsparse_classifierZdense_classifierZsparse_clf_resultsZdense_clf_resultsZsparse_clf_resZdense_clf_restypesr,   r   r-   test_sparse_classification,  sj   	




















 r   c                    s   G dd dt }tjddddd\}}t||dd	\}}}}| |}	| |}
t| dd
|	|}t| dd
||}||
}||}t|| ||
}||}t	||D ]	\}}t|| qZdd |j
D }t fdd|D syJ d S )Nc                       r   )z)test_sparse_regression.<locals>.CustomSVRz8SVR variant that records the nature of the training set.Nc                    r   r   r   r   r   r,   r-   r>     r   z-test_sparse_regression.<locals>.CustomSVR.fitr&   r   r,   r,   r   r-   	CustomSVR  r   r   r   2   r   r   )r   r   Z	n_targetsr"   r   r!   rz   c                 S   r   r,   r   r   r,   r,   r-   rk     r   z*test_sparse_regression.<locals>.<listcomp>c                    r   r,   r,   r   r   r,   r-   rk     r   )r   r   Zmake_regressionr   r   r>   rB   r   rs   r   rX   r:   )r   r   r   r*   r   r   r   r   r   r   r   Zsparse_regressorZdense_regressorZsparse_regr_resultsZdense_regr_resultsZsparse_regr_resZdense_regr_resr   r,   r   r-   test_sparse_regression  s,   	





r   c                  C   sF   G dd dt } t|  dd}|tt t|jt|jks!J dS )z
    AdaBoostRegressor should work without sample_weights in the base estimator
    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    c                   @   s   e Zd Zdd Zdd ZdS )z=test_sample_weight_adaboost_regressor.<locals>.DummyEstimatorc                 S   s   d S r&   r,   )r)   r*   r   r,   r,   r-   r>     s   zAtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.fitc                 S   s   t |jd S )Nr   )r3   Zzerosr'   r(   r,   r,   r-   rB     s   zEtest_sample_weight_adaboost_regressor.<locals>.DummyEstimator.predictN)r/   r0   r1   r>   rB   r,   r,   r,   r-   DummyEstimator  s    r   r    rd   N)r   r   r>   r*   rL   r=   Zestimator_weights_Zestimator_errors_)r   r{   r,   r,   r-   %test_sample_weight_adaboost_regressor  s   r   c                  C   s   t jd} | ddd}| ddgd}| d}ttdd}||| || |	| t
t }||| || dS )zX
    Check that the AdaBoost estimators can work with n-dimensional
    data matrix
    r   3   r    r   Zmost_frequent)ZstrategyN)r3   rp   rq   Zrandnchoicer   r   r>   rB   r.   r   r   )rv   r*   Zycyrr{   r,   r,   r-   test_multidimensional_X  s   



r   c                  C   sp   t jt j} }tt }t|d}d|jj}t	j
t|d || | W d    d S 1 s1w   Y  d S )Nrx   z {} doesn't support sample_weightr   )rT   rV   rU   r   r   r   formatr   r/   r   r   r   r>   )r*   r   ry   r?   err_msgr,   r,   r-   -test_adaboostclassifier_without_sample_weight  s   

"r   c            
      C   sL  t jd} t jdddd}d| d | |jd d  }|d	d
}|d	  d9  < d|d	< tt d
dd}t	|}t	|}|
|| |
|d d	 |d d	  t |}d|d	< |j
|||d ||d d	 |d d	 }||d d	 |d d	 }||d d	 |d d	 }	||k sJ ||	k sJ |t|	ksJ d S )Nr   r   d     )numg?r$   g-C6?r   r   ra   i'  ry   re   r"   rf   )r3   rp   rq   ZlinspaceZrandr'   Zreshaper   r
   r   r>   r8   rW   r   Zapprox)
rv   r*   r   Zregr_no_outlierZregr_with_weightZregr_with_outlierrg   Zscore_with_outlierZscore_no_outlierZscore_with_weightr,   r,   r-   $test_adaboostregressor_sample_weight   s,    
r   c                  C   sX   t tjddddi\} }}}tdd}|| | ttj||dd|	| d S )NT)Z
return_X_yr"   r   r!   r   r%   )
r   r   Zload_digitsr   r>   r   r3   r;   r.   rB   )r   r   r   r   modelr,   r,   r-    test_adaboost_consistent_predict&  s   

r   zmodel, X, yc                 C   sZ   t |}d|d< d}tjt|d | j|||d W d    d S 1 s&w   Y  d S )Nir   z1Negative values in data passed to `sample_weight`r   rf   )r3   r8   r   r   r   r>   )r   r*   r   rg   r   r,   r,   r-   #test_adaboost_negative_weight_error5  s   
"r   c                  C   s~   t jd} | jdd}| jddgdd}t |d }tdd	d
}t|dd	d}|j|||d t 	|j
 dks=J dS )zCheck that we don't create NaN feature importance with numerically
    instable inputs.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/20320
    r   )r   ra   rb   r   r   r   gtDS 'T	ra      )	max_depthr"      r   rf   N)r3   rp   rq   normalr   r8   r   r   r>   isnanr   r6   )rv   r*   r   rg   treeZ	ada_modelr,   r,   r-   Ftest_adaboost_numerically_stable_feature_importance_with_small_weightsE  s   r   c                 C   s  d}t j|d| d\}}td| d||}||}t|jddddd tt	|dd	|d  hks7J |
|D ]}t|jddddd tt	|dd	|d  hksZJ q<|jd
d|| ||}t|jddddd |
|D ]}t|jddddd qzdS )zCheck that the decision function respects the symmetric constraint for weak
    learners.

    Non-regression test for:
    https://github.com/scikit-learn/scikit-learn/issues/26520
    r    r   )r   Zn_clusters_per_classr"   ro   r%   r   g:0yE>)Zatolr   r   rd   N)r   r   r   r>   rH   r   r6   rY   r3   rE   r   Z
set_params)Zglobal_random_seedr   r*   r   r?   Zy_scorer,   r,   r-   test_adaboost_decision_functionV  s"   

"$
r   c                  C   sL   t ddd} tjtdd | tt W d    d S 1 sw   Y  d S )Nr   ZSAMME)re   	algorithmz'The parameter 'algorithm' is deprecatedr   )r   r   r   FutureWarningr>   r*   rA   )Zadaboost_clfr,   r,   r-   test_deprecated_algorithm|  s   "r   )Xr   r   numpyr3   r   Zsklearnr   Zsklearn.baser   r   Zsklearn.dummyr   r   r   r   r   Z!sklearn.ensemble._weight_boostingr	   Zsklearn.linear_modelr
   Zsklearn.model_selectionr   r   Zsklearn.svmr   r   Zsklearn.treer   r   Zsklearn.utilsr   Zsklearn.utils._mockingr   Zsklearn.utils._testingr   r   r   Zsklearn.utils.fixesr   r   r   r   r   rp   rq   rv   r*   rA   rL   rC   rD   rM   Z	load_irisrT   ZpermutationrU   rc   permrV   Zload_diabetesr^   r<   r@   rJ   rN   r\   markZparametrizer`   rw   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r,   r,   r,   r-   <module>   s    	(
	

-

Z

/	&
	&