o
    iQ                     @   s	  U d Z ddlZddlZddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZ ddlmZ ddlZddlZddlZddlmZ ddlZdd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlm Z m!Z!m"Z"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z-m.Z. ddl/m0Z0m1Z1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z> ddl?m@Z@mAZAmBZB ddlCmDZD ddlEmFZF ddlGmHZH ddgddgddgddgddgddggZIg dZJddgddgddggZKg dZLejd d!dddd"dd#\ZMZNeO ZPeHdZQeQRePjSjTZUePjVeU eP_VePjSeU eP_SejWd d!dd$\ZXZYejd%dd&\ZZZ[eZ\ej]ZZej^_ d j`Zae e"d'Zbe!e#d(Zcd)e$iZdee Zfeegef ehd*< efieb efiec efied ebj Zkeegef ehd+< ekiec ejlmd,ebd-d. Znejlmd,ebejlmd/d0d1d2 Zoejlmd,ecejlmd/d3d4d5 Zpd6d7 Zqejlmd/d8d9d: Zrejlmd,ecd;d< Zsejlmd,ebd=d> Ztejlmd?ejuej]fejlmd@eve	ebdAdBge	ecg dCdDdE ZwdFdG Zxejlmd,efdHdI ZyejlmdJebz ejlmdKg dLejlmdMg ejdNdddOdPR g ejdQddRddSdTR ePjVePjSd d dTfg ej{dNdd&dUR gejlmdVdWee,dXdYgdZd[ Z|ejlmd\ecz ejlmdKg dLejlmd]g ejWd d!ddd^d_R g ejWd d!ddd^d`R gejlmdVdWe+gdadb Z}ejlmdcekz ddde Z~ejlmdcekz dfdg ZejlmdJebz dhdi Zejlmd\ecz djdk ZejlmdVdWd"gdldm Zejlmd,ebdndo Zejlmd,ekdpdq Zejlmd,ekdrds Zejlmd,ekdtdu Zejlmd,ebdvdw Zejlmd,ebdxdy Zdzd{ Zd|d} Zd~d ZejlmdeAdd Zdd Zdd Zejlmd,efdd Zejlmd,efdd Zejlmd,efdd Zejlmd,efdd Zejlmd,efejlmde@eA eB dd Zejlmd,ekejlmd?ejuej]fdd Zejlmd,efdd Zejlmd,ebdd Zejlmd,ebdd Zejlmd,ebdd Zejlmd,efdd Zejlmd,efdd Zejlmd,efdd Zejlmd,efdd Zejlmd,ekdd Zejlmd,ekdd ZdddZejlmd,ekdd Zdd Zdd ZG dd deaZede e>dd Zdd Zdd Zejlmd,ekdd Zejlmd,ekdd Zejlmd,ecdd Zejlmd,ebdd ZejlmdeBddĄ Zejlmde"e#gddǄ Zejlmdecddʄ Zdd̄ ZejlmdeBdd΄ Zejlmdddgdd҄ ZejlmdddgejlmddWd"gejlmdekz ddք ZejlmdejWe#feje"fejWe!feje fgddل Zejlmde"e#e!e gddۄ Zejlmdecz dd݄ ZdS )z:
Testing for the forest module (sklearn.ensemble.forest).
    N)defaultdict)partial)combinationsproduct)AnyDict)patch)comb)clonedatasets)make_classificationmake_hastie_10_2)TruncatedSVD)DummyRegressor)ExtraTreesClassifierExtraTreesRegressorRandomForestClassifierRandomForestRegressorRandomTreesEmbedding)_generate_unsampled_indices_get_n_samples_bootstrap)NotFittedError)explained_variance_scoref1_scoremean_poisson_deviancemean_squared_error)GridSearchCVcross_val_scoretrain_test_split)	LinearSVC)SPARSE_SPLITTERS)_convert_containerassert_allcloseassert_almost_equalassert_array_almost_equalassert_array_equalignore_warningsskip_if_no_parallel)COO_CONTAINERSCSC_CONTAINERSCSR_CONTAINERS)type_of_target)Parallel)check_random_state      )r/   r/   r/   r0   r0   r0      )r/   r0   r0     
   F)	n_samples
n_featuresn_informativeZn_redundantZ
n_repeatedshufflerandom_stater5   r6   r9      r5   r9   )r   r   )r   r   r   FOREST_ESTIMATORSFOREST_CLASSIFIERS_REGRESSORSnamec                 C   s   t |  }|ddd}|tt t|tt dt|ks J |dddd}|tt t|tt dt|ks=J |	t}|j
tt|jfksNJ dS )z&Check classification on a toy dataset.r4   r0   n_estimatorsr9   )rA   max_featuresr9   N)FOREST_CLASSIFIERSfitXyr%   predictTtrue_resultlenapplyshaperA   )r?   ForestClassifierclfZleaf_indices rO   \/home/kim/smarthome/.venv/lib/python3.10/site-packages/sklearn/ensemble/tests/test_forest.pytest_classification_toyy   s   
rQ   	criterion)ginilog_lossc                 C   s   t |  }|d|dd}|tjtj |tjtj}|dks'J d||f |d|ddd}|tjtj |tjtj}|dksKJ d||f d S )	Nr4   r0   rA   rR   r9   ?z'Failed with criterion %s and score = %fr1   rA   rR   rB   r9         ?)rC   rD   irisdatatargetscore)r?   rR   rM   rN   r\   rO   rO   rP   test_iris_criterion   s   r]   )squared_errorabsolute_errorfriedman_msec                 C   s   t |  }|d|dd}|tt |tt}|dks#J d||f |d|ddd}|tt |tt}|dksCJ d	||f d S )
N   r0   rU   g(\?z:Failed with max_features=None, criterion %s and score = %f   rW   gq=
ףp?z7Failed with max_features=6, criterion %s and score = %f)FOREST_REGRESSORSrD   X_regy_regr\   )r?   rR   ForestRegressorregr\   rO   rO   rP   test_regression_criterion   s(   
rh   c                  C   sB  t jd} d\}}}tj|| || d}| jdd|dt j|dd }| jt || d	}t	|||| d
\}}}	}
t
ddd| d}t
ddd| d}|||	 |||	 tdd||	}||	df||
dffD ]4\}}}t|||}t|t ||dd}t|||}|dkr||k sJ |d| k sJ qjdS )zTest that random forest with poisson criterion performs better than
    mse for a poisson target.

    There is a similar test for DecisionTreeRegressor.
    *   r3   r3   r4   r:   r.   r1   lowhighsizer   ZaxisZlam	test_sizer9   poissonr4   sqrt)rR   min_samples_leafrB   r9   r^   mean)Zstrategytraintestgư>N皙?)nprandomRandomStater   make_low_rank_matrixuniformmaxrs   expr   r   rD   r   r   rG   Zclip)rngn_trainn_testr6   rE   coefrF   X_trainX_testy_trainy_testZ
forest_poiZ
forest_msedummyZ	data_nameZ
metric_poiZ
metric_mseZmetric_dummyrO   rO   rP   test_poisson_vs_mse   s@   

r   )rs   r^   c           	      C   s   t jd}d\}}}tj|| ||d}|jdd|dt j|dd }|jt || d	}t	| d
d|d}|
|| t ||tt |ksOJ dS )z9 "Test that sum(y_pred)==sum(y_true) on the training set.ri   rj   r:   r.   r1   rk   r   ro   rp   r4   F)rR   rA   	bootstrapr9   N)rz   r{   r|   r   r}   r~   r   rs   r   r   rD   sumrG   pytestapprox)	rR   r   r   r   r6   rE   r   rF   rg   rO   rO   rP   #test_balance_property_random_forest   s   

(r   c                 C   sf   t |  dd}t|drJ t|drJ |g dg dgddg t|dr*J t|dr1J d S )	Nr   r9   classes_
n_classes_r0   r1   r2      ra   rb   r0   r1   )rc   hasattrrD   )r?   rrO   rO   rP   test_regressor_attributes  s   r   c                 C   s   t |  }tjdd> |ddddd}|tjtj ttj|	tjddt
tjjd  t|	tjt|tj W d    d S 1 sKw   Y  d S )Nignoredivider4   r0   )rA   r9   rB   	max_depthro   r   )rC   rz   errstaterD   rY   rZ   r[   r$   r   predict_probaonesrL   r   predict_log_proba)r?   rM   rN   rO   rO   rP   test_probability  s   $"r   dtypezname, criterionrS   rT   )r^   r`   r_   c                 C   s`  d}|t v r|dkrd}tj| dd}tj| dd}t| }|d|dd}||| |j}t|d	k}	|j	d dks>J |	d
ksDJ t
|d d
 d	ksQJ |j}|jdd |j}
t||
 tdddt|}|dd|d}|j|||d |j}t
|dksJ dD ]#}|dd|d}|j|||| d |j}t||  |k sJ qd S )N{Gz?r_   皙?Fcopyr4   r   rU   皙?r2   r1   n_jobsr0   )rA   r9   rR   sample_weight        )rX   d   )rc   X_largeastypey_larger=   rD   feature_importances_rz   r   rL   all
set_paramsr$   r-   randintrJ   absrv   )r   r?   rR   Z	tolerancerE   rF   ForestEstimatorestimportancesZn_importantZimportances_parallelr   scaleZimportances_bisrO   rO   rP   test_importances$  s8   	
r   c            	         s(  dd  dd  fdd} t g dg dg d	g d
g dg dg dg dg dg dg
}t j|d d d df td|d d df }}|jd }t |}t|D ]
}| |||||< qXtddddd||}tdd |j	D |j
 }t|t| t ||  dk sJ d S )Nc                 S   s*   | dk s| |kr
dS t t|t| ddS )Nr   T)exact)r	   int)knrO   rO   rP   binomial[  s   *z-test_importances_asymptotic.<locals>.binomialc                 S   sF   t | }d}t| D ]}d| | }|dkr ||t| 8 }q|S )Nr         ?r   )rJ   rz   bincountlog2)Zsamplesr5   entropycountprO   rO   rP   r   ^  s   z,test_importances_asymptotic.<locals>.entropyc              
      sb  j \}}tt|}||  fddt|D d}t|D ]}d||||   }t||D ]z t fddt|D  D ]i}	tj|td}
t|D ]}|
d d  | f |	| kM }
qP|
d d f ||
 }}t	|dkrg }|  D ]}|d d | f |k}|
||  q~||d |  |tfdd|D   7 }qCq3q!|S )	Nc                    s"   g | ]}t  d d |f qS )N)rz   unique).0i)rE   rO   rP   
<listcomp>n  s   " zGtest_importances_asymptotic.<locals>.mdi_importance.<locals>.<listcomp>r   r   c                    s   g | ]} |  qS rO   rO   )r   j)BvaluesrO   rP   r   y      r   r   c                    s    g | ]} |t |  qS rO   )rJ   )r   c)r   n_samples_brO   rP   r     s    )rL   listrangepopr   r   rz   r   boolrJ   appendr   )ZX_mrE   rF   r5   r6   featuresimpr   r   bZmask_br   ZX_Zy_childrenxiZmask_xir   r   )r   rE   r   r   rP   mdi_importancei  sH   

 "
 z3test_importances_asymptotic.<locals>.mdi_importance)r   r   r0   r   r   r0   r   r0   )r0   r   r0   r0   r0   r   r0   r1   )r0   r   r0   r0   r   r0   r0   r2   )r   r0   r0   r0   r   r0   r   r   )r0   r0   r   r0   r   r0   r0   ra   )r0   r0   r   r0   r0   r0   r0   rb   )r0   r   r0   r   r   r0   r      )r0   r0   r0   r0   r0   r0   r0      )r0   r0   r0   r0   r   r0   r0   	   )r0   r0   r0   r   r0   r0   r0   r   r   r   r0   r3   rT   r   )rA   rB   rR   r9   c                 s   s    | ]
}|j jd dV  qdS )F)	normalizeN)tree_Zcompute_feature_importancesr   treerO   rO   rP   	<genexpr>  s
    
z.test_importances_asymptotic.<locals>.<genexpr>r   )rz   arrayr   rL   zerosr   r   rD   r   estimators_rA   r#   r   rv   )	r   rZ   rE   rF   r6   Ztrue_importancesr   rN   r   rO   r   rP   test_importances_asymptoticV  sD   00

	r   c                 C   sN   d | }tjt|d tt|   d W d    d S 1 s w   Y  d S )NzfThis {} instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.matchr   )formatr   raisesr   getattrr=   )r?   err_msgrO   rO   rP   !test_unfitted_feature_importances  s   "r   rM   X_type)r   Z
sparse_csrZ
sparse_csczX, y, lower_bound_accuracy,  )r5   	n_classesr9   rV     rb   )r5   r   r7   r9   g?g
ףp=
?	oob_scoreTmicro)Zaveragec                 C   s@  t ||d}t||ddd\}}}}	| dd|dd}
t|
dr!J t|
d	r(J |
|| t|r;||	|
|}n|
||	}|
j|ksHJ t||
j }|d
ks[J d|dt|
dsbJ t|
driJ t|
d	spJ |j	dkr|j
d tt|f}n|j
d tt|dddf |j
d f}|
jj
|ksJ dS )z5Check that OOB score is close to score on a test set.Zconstructor_namerX   r   rq   (   TrA   r   r   r9   
oob_score_oob_decision_function_g)\(?z	abs_diff=z is greater than 0.11oob_prediction_r0   N)r!   r   r   rD   callablerG   r\   r   r   ndimrL   rJ   setr   )rM   rE   rF   r   Zlower_bound_accuracyr   r   r   r   r   
classifier
test_scoreZabs_diffexpected_shaperO   rO   rP   test_forest_classifier_oob  s:   
*r  rf   zX, y, lower_bound_r2)r5   r6   Z	n_targetsr9   ffffff?g?c                 C   s  t ||d}t||ddd\}}}}	| dd|dd}
t|
dr!J t|
d	r(J |
|| t|r;||	|
|}n|
||	}|
j|ksHJ t||
j d
ksSJ t|
dsZJ t|
d	saJ t|
drhJ |j	dkrt|j
d f}n|j
d |j	f}|
jj
|ksJ dS )z\Check that forest-based regressor provide an OOB score close to the
    score on a test set.r   rX   r   rq   2   Tr   r   r   r   r   r0   N)r!   r   r   rD   r   rG   r\   r   r   r   rL   r   )rf   rE   rF   r   Zlower_bound_r2r   r   r   r   r   Z	regressorr   r  rO   rO   rP   test_forest_regressor_oob  s8   
r  r   c                 C   sT   | ddddd}t jtdd |tjtj W d   dS 1 s#w   Y  dS )zfCheck that a warning is raised when not enough estimator and the OOB
    estimates will be inaccurate.r0   Tr   rA   r   r   r9   z"Some inputs do not have OOB scoresr   N)r   warnsUserWarningrD   rY   rZ   r[   )r   	estimatorrO   rO   rP   test_forest_oob_warningN  s   "r
  c                 C   s\   t j}t j}d}| ddd}tjt|d ||| W d   dS 1 s'w   Y  dS )zaCheck that we raise an error if OOB score is requested without
    activating bootstrapping.
    z6Out of bag estimation only available if bootstrap=TrueTFr   r   r   N)rY   rZ   r[   r   r   
ValueErrorrD   )r   rE   rF   r   r	  rO   rO   rP   (test_forest_oob_score_requires_bootstrap\  s   "r  c                 C   s   t jd}tj}|jddtjjd dfd}t|}|dks!J | ddd}d	}tj	t
|d
 ||| W d   dS 1 sBw   Y  dS )zwCheck that we raise an error with when requesting OOB score with
    multiclass-multioutput classification target.
    ri   r   ra   r1   rk   zmulticlass-multioutputTr  z:The type of target cannot be used to compute OOB estimatesr   N)rz   r{   r|   rY   rZ   r   rL   r+   r   r   r  rD   )rM   r   rE   rF   Zy_typer	  r   rO   rO   rP   6test_classifier_error_oob_score_multiclass_multioutputi  s   "r  c              	   C   s
  t jd}tj}|jddtjjd dfd}| ddddd}||| tt	||j
}|jd d	 }t |dg}t|d
| D ]5\}}	d}
t d}|jD ] }t|jt	||}||v rq|
d7 }
|||	dd 7 }qQ||
 ||< qCt||jd
|  d
S )zCheck that multioutput regression with integral values is not interpreted
    as a multiclass-multioutput target and OOB score can be computed.
    ri   r   r4   r1   rk      Tr  r   Nr0   r/   )rz   r{   r|   rY   rZ   r   rL   rD   r   rJ   max_samplesr   	enumerater   r   r9   rG   reshapeZsqueezer"   r   )rf   r   rE   rF   r	  Zn_samples_bootstrapZn_samples_testZoob_predZ
sample_idxsampleZn_samples_oobZoob_pred_sampler   Zoob_unsampled_indicesrO   rO   rP   2test_forest_multioutput_integral_regression_targety  s.   

r  c                 C   sz   t jtdd t| d W d    n1 sw   Y  t jtdd t tt W d    d S 1 s6w   Y  d S )Nz"got an unexpected keyword argumentr   r   zOOB score not supported)r   r   	TypeErrorr   NotImplementedError_set_oob_score_and_attributesrE   rF   r  rO   rO   rP   +test_random_trees_embedding_raise_error_oob  s   "r  c                 C   s.   t |   }t|ddd}|tjtj d S )Nr0   r1   )rA   r   )rC   r   rD   rY   rZ   r[   )r?   forestrN   rO   rO   rP   test_gridsearch  s   
r  c                 C   s   | t v rtj}tj}n| tv rt}t}t|  }|dddd}||| t	|dks,J |j
dd ||}|j
dd ||}t||d dS )	z-Check parallel computations in classificationr4   r2   r   rA   r   r9   r0   r   r1   N)rC   rY   rZ   r[   rc   rd   re   r=   rD   rJ   r   rG   r$   )r?   rE   rF   r   r  y1y2rO   rO   rP   test_parallel  s   

r   c           	      C   s   | t v rtjd d d }tjd d d }n| tv r'td d d }td d d }t|  }|dd}||| |	||}t
|}t
|}t||jksOJ |	||}||ks[J d S )Nr1   r   r   )rC   rY   rZ   r[   rc   rd   re   r=   rD   r\   pickledumpsloadstype	__class__)	r?   rE   rF   r   objr\   Zpickle_objectobj2Zscore2rO   rO   rP   test_pickle  s   


r(  c           	      C   s  ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgg}ddgddgddgddgg}t |  ddd}||||}t|| | tv rtjd	d
G ||}t|dksJ |d j	dksJ |d j	dksJ |
|}t|dksJ |d j	dksJ |d j	dksJ W d    d S 1 sw   Y  d S d S )Nr.   r/   r0   r1   r   r2   Fr9   r   r   r   r   r1   r   r   )r=   rD   rG   r$   rC   rz   r   r   rJ   rL   r   	r?   r   r   r   r   r   Zy_predZprobaZ	log_probarO   rO   rP   test_multioutput  sV   


"r-  c           	      C   s  ddgddgddgddgddgddgddgddgddgddgddgddgg}ddgddgddgddgddgddgddgddgddgdd	gdd	gdd	gg}ddgddgddgddgg}ddgddgddgdd	gg}t |  d
dd}||||}t|| tjddG ||}t|dksJ |d
 jdksJ |d jdksJ |	|}t|dksJ |d
 jdksJ |d jdksJ W d    d S 1 sw   Y  d S )Nr.   r/   r0   r1   redbluegreenpurpleyellowr   Fr)  r   r   r*  r+  )
r=   rD   rG   r%   rz   r   r   rJ   rL   r   r,  rO   rO   rP   test_multioutput_string  sZ   


"r3  c                 C   s   t |  }|ddtt}|jdksJ t|jddg ttt	td fj
}|ddt|}t|jddg t|jddgddgg d S )Nr   r   r1   r/   r0   r.   )rC   rD   rE   rF   r   r%   r   rz   vstackr   rH   )r?   rM   rN   _yrO   rO   rP   test_classes_shape@  s   r6  c                  C   s:   t ddd} tjdd\}}| |}t|tjsJ d S )Nr4   F)rA   sparse_outputrX   factor)r   r   make_circlesfit_transform
isinstancerz   Zndarray)hasherrE   rF   X_transformedrO   rO   rP   test_random_trees_dense_typeS  s   
r?  c                  C   sR   t dddd} t dddd}tjdd\}}| |}||}t| | d S )Nr4   Fr   )rA   r7  r9   TrX   r8  )r   r   r:  r;  r%   toarray)Zhasher_denseZhasher_sparserE   rF   ZX_transformed_denseX_transformed_sparserO   rO   rP   test_random_trees_dense_equal`  s   

rB  c                  C   s   t ddd} tjdd\}}| |}t ddd} t| || |  |jd |jd ks4J t|j	dd| j
 tdd	}||}t }||| |||d
ks[J d S )Nr  r0   r@   rX   r8  r   ro   r1   )Zn_componentsr   )r   r   r:  r;  r%   rD   	transformr@  rL   r   rA   r   r   r\   )r=  rE   rF   r>  ZsvdZ	X_reducedZ
linear_clfrO   rO   rP   test_random_hashers  s   


rD  csc_containerc                 C   sJ   t jdd\}}tddd}||}|| |}t| |  d S )Nr   r   r  r0   r@   )r   make_multilabel_classificationr   r;  r%   r@  )rE  rE   rF   r=  r>  rA  rO   rO   rP   test_random_hasher_sparse_data  s
   
rG  c                     s~   t d} d\}}| ||| dd|fdddD }| ||  fdd|D }t|D ]	\}}t|| q3d S )	N!0  )P   r  r   r1   c                    s"   g | ]}t d |dd qS )r;   i90  r  )r   rD   )r   r   )r   r   rO   rP   r     s    z'test_parallel_train.<locals>.<listcomp>)r0   r1   r2   r          c                    s   g | ]}|  qS rO   )r   )r   rN   )r   rO   rP   r     s    )r-   randnr   	itertoolspairwiser$   )r   r5   r6   ZclfsZprobasZproba1Zproba2rO   )r   r   r   rP   test_parallel_train  s   rO  c                     s  t d} | jdddd}| d}d t dd	||}tt}|jD ]}d
dd t	|j
j|j
jD }||  d7  < q$t fdd| D }t|dksUJ d|d d ks_J d|d d ksiJ d|d d kssJ d|d d ks}J |d d dksJ |d d dksJ td}tjddd|d d df< tjddd|d d df< | d}tddd||}tt}|jD ]}d
dd t	|j
j|j
jD }||  d7  < qdd | D }t|dksJ d S )NrH  r   r   )r   r0   rn   r   r3   ri   r@    c                 s   0    | ]\}}|d krd|t |f ndV  qdS r   z%d,%d/-Nr   r   ftrO   rO   rP   r     
    
z$test_distribution.<locals>.<genexpr>r0   c                    s    g | ]\}}d |   |fqS )r   rO   r   r   r   Zn_treesrO   rP   r     s     z%test_distribution.<locals>.<listcomp>ra   g?r1   r2   333333?z0,1/0,0/--0,2/--)r   r1   )rB   r9   c                 s   rR  rS  rU  rV  rO   rO   rP   r     rY  c                 S   s   g | ]\}}||fqS rO   rO   rZ  rO   rO   rP   r     r   r   )r-   r   randr   rD   r   r   r   joinzipr   feature	thresholdsorteditemsrJ   rz   emptyr{   )r   rE   rF   rg   Zuniquesr   rO   r[  rP   test_distribution  s@   






re  c                 C   sp   t t}}t|  }|ddddd||}|jd  dks J |dddd||}|jd  dks6J d S )Nr0   r   r   )r   Zmax_leaf_nodesrA   r9   )r   rA   r9   )hastie_Xhastie_yr=   rD   r   Z	get_depthr?   rE   rF   r   r   rO   rO   rP   test_max_leaf_nodes_max_depth  s   
ri  c                 C   s   t t}}t|  }|dddd}||| |jd jjdk}|jd jj| }t	|t
|d d ks<J d| |dddd}||| |jd jjdk}|jd jj| }t	|t
|d d ksoJ d| d S )Nr4   r0   r   )Zmin_samples_splitrA   r9   r/   rX   Failed with {0})rf  rg  r=   rD   r   r   Zchildren_leftZn_node_samplesrz   minrJ   r   )r?   rE   rF   r   r   Znode_idxZnode_samplesrO   rO   rP   test_min_samples_split  s   
(,rl  c                 C   s   t t}}t|  }|dddd}||| |jd j|}t|}||dk }t	|dks8J d
| |dddd}||| |jd j|}t|}||dk }t	|t|d d ksmJ d
| d S )Nra   r0   r   )ru   rA   r9   r   rj  g      ?)rf  rg  r=   rD   r   r   rK   rz   r   rk  r   rJ   )r?   rE   rF   r   r   outZnode_countsZ
leaf_countrO   rO   rP   test_min_samples_leaf  s   


,rn  c                 C   s   t t}}t|  }tjd}||jd }t|}t	dddD ]A}||ddd}d| v r3d|_
|j|||d |jd j|}	tj|	|d	}
|
|
dk }t|||j ksdJ d
| |jq#d S )Nr   rX   rb   r0   )min_weight_fraction_leafrA   r9   ZRandomForestFr   )weightsz,Failed with {0} min_weight_fraction_leaf={1})rf  rg  r=   rz   r{   r|   r]  rL   r   Zlinspacer   rD   r   r   rK   r   rk  ro  r   )r?   rE   rF   r   r   rp  Ztotal_weightfracr   rm  Znode_weightsZleaf_weightsrO   rO   rP   test_min_weight_fraction_leaf  s*   

rr  sparse_containerc                 C   s  t jddd\}}t|  }|ddd||}|ddd|||}t|||| | tv s6| tv rHt|||| t|j	|j	 | tv rbt|
||
| t|||| | tv rt|| ||  t|| ||  d S d S )Nr   r  )r9   r5   r1   )r9   r   )r   rF  r=   rD   r$   rK   rC   rc   rG   r   r   r   FOREST_TRANSFORMERSrC  r@  r;  )r?   rs  rE   rF   r   ZdensesparserO   rO   rP   test_sparse_input7  s.   rv  c                 C   s
  t |  ddd}tji ftjddiftjddiftji ffD ]\}}|tjfd|i|}tj}t|||	|| q|j
jtv rbtt t D ]}|tj|d}tj}t|||	|| qItjtjd d d	 |d}tjd d d	 }t|||	|| d S )
Nr   Fr)  orderCFr   r   r2   )r=   rz   ZasarrayZascontiguousarrayrY   rZ   r[   r$   rD   rG   r	  splitterr    r(   r)   r*   )r?   r   r   	containerkwargsrE   rF   rs  rO   rO   rP   test_memory_layoutZ  s"   r}  c              	   C   s  t jd d df }t jd d df d}t j}t i t|  }tt |ddd	|| W d    n1 s:w   Y  |dd}|	|| | t
v sR| tv rttt || W d    n1 sgw   Y  W d    d S W d    d S W d    d S 1 sw   Y  d S )Nr   r/   r0   r0   r@   r   )rY   rZ   r  r[   r&   r=   r   r   r  rD   rC   rc   rG   )r?   rE   ZX_2drF   r   r   rO   rO   rP   test_1d_inputx  s&   
	"r  c           	      C   s  t |  }|dd}|tjtj |ddd}|tjtj t|j|j ttjtjtjfj	}|ddddddddddddgdd}|tj| t|j|j |ddd}|tj| t|j|j t
tjj}|tjdk  d	9  < dd
dd}|dd}|tjtj| ||dd}|tjtj t|j|j |dd}|tjtj|d  ||dd}|tjtj| t|j|j d S )Nr   r   balancedclass_weightr9          @r   r   r0   r1   r0   r   g      Y@r1   )rC   rD   rY   rZ   r[   r#   r   rz   r4  rH   r   rL   )	r?   rM   Zclf1Zclf2Z
iris_multiZclf3Zclf4r   r  rO   rO   rP   test_class_weights  s@   





r  c                 C   s~   t |  }ttttd fj}|ddd}|t| |ddddddgdd}|t| |d	dd}|t| d S )
Nr1   r  r   r  rX   r   r~  )r.   r1   balanced_subsample)rC   rz   r4  rF   r   rH   rD   rE   )r?   rM   r5  rN   rO   rO   rP   5test_class_weight_balanced_and_bootstrap_multi_output  s   r  c                 C   s   t |  }ttttd fj}|dddd}|tt d}tj	t
|d |t| W d    n1 s8w   Y  |dd	d
gdd}tt |t| W d    d S 1 s^w   Y  d S )Nr1   r  Tr   )r  
warm_startr9   JWarm-start fitting without increasing n_estimators does not fit new trees.r   rX   r   r~  r  )rC   rz   r4  rF   r   rH   rD   rE   r   r  r  r   r  )r?   rM   r5  rN   warn_msgrO   rO   rP   test_class_weight_errors  s   "r  c                 C   s   t t}}t|  }d }dD ]"}|d u r||ddd}n|j|d ||| t||ks/J q|dddd}||| tdd	 |D td
d	 |D ksQJ t||||d	| d d S )N)ra   r4   ri   T)rA   r9   r  rA   r4   Fc                 S      g | ]}|j qS rO   r   r   rO   rO   rP   r         z#test_warm_start.<locals>.<listcomp>c                 S   r  rO   r   r   rO   rO   rP   r     r  rj  )r   )
rf  rg  r=   r   rD   rJ   r   r%   rK   r   )r?   rE   rF   r   Zest_wsrA   Z	est_no_wsrO   rO   rP   test_warm_start  s&   


r  c                 C   s~   t t}}t|  }|ddddd}||| |ddddd}||| |jddd ||| t|||| d S )Nra   r0   FrA   r   r  r9   Tr1   )r  r9   )rf  rg  r=   rD   r   r$   rK   )r?   rE   rF   r   r   est_2rO   rO   rP   test_warm_start_clear  s   
r  c                 C   st   t t}}t|  }|dddd}||| |jdd tt ||| W d    d S 1 s3w   Y  d S )Nra   r0   T)rA   r   r  r   r  )rf  rg  r=   rD   r   r   r   r  rh  rO   rO   rP   $test_warm_start_smaller_n_estimators  s   
"r  c                 C   s   t t}}t|  }|ddddd}||| |ddddd}||| |jdd d}tjt|d	 ||| W d    n1 sEw   Y  t|	||	| d S )
Nra   r2   Tr0   r  r1   r   r  r   )
rf  rg  r=   rD   r   r   r  r  r%   rK   )r?   rE   rF   r   r   r  r  rO   rO   rP   "test_warm_start_equal_n_estimators  s   
r  c                 C   s   t t}}t|  }|ddddddd}||| |ddddddd}||| |jdddd ||| t|d	s>J |j|jksFJ |ddddddd}||| t|d	r]J |jdd
 t|j|| |j|jkssJ d S )N   r2   Fr0   T)rA   r   r  r9   r   r   ra   )r  r   rA   r   r  )rf  rg  r=   rD   r   r   r   r&   )r?   rE   rF   r   r   r  Zest_3rO   rO   rP   test_warm_start_oob6  sJ   
r  c              	   C   s   t t}}t|  }|ddddd}tj|d|jd0}||| tjt	dd ||| W d    n1 s9w   Y  |
  W d    d S 1 sMw   Y  d S )Nr4   T)rA   r  r   r   r  )wrapsz%Warm-start fitting without increasingr   )rf  rg  r=   r   objectr  rD   r   r  r  Zassert_called_once)r?   rE   rF   r   r   Z!mock_set_oob_score_and_attributesrO   rO   rP   test_oob_not_computed_twicei  s   

"r  r  c                 C   sX   t ddd}t| }dd dd |  D }||||}t|j| t|| d S )Nr   Fr)  c                 S   s   g | ]}|qS rO   rO   )r   chrO   rO   rP   r     s    z&test_dtype_convert.<locals>.<listcomp>ZABCDEFGHIJKLMNOPQRSTU)r   rz   eyerD   rG   r%   r   )r   r   rE   rF   resultrO   rO   rP   test_dtype_convert~  s   
r  c                    s   t t}}|jd }t|  }|ddddd}||| ||\jd d ks.J jd |ks7J ttdd |j	D  |
|}t|jd D ]  fd	dt|d d  f D }t|tj|d
 qQd S )Nr   ra   r0   Fr  r/   c                 S   s   g | ]}|j jqS rO   )r   
node_count)r   erO   rO   rP   r     s    z&test_decision_path.<locals>.<listcomp>c                    s$   g | ]\}}|  | f qS rO   rO   )r   r   r   Zest_idZ	indicatorZn_nodes_ptrrO   rP   r     s    )rL   )rf  rg  rL   r=   rD   Zdecision_pathr%   rz   diffr   rK   r   r  r$   r   )r?   rE   rF   r5   r   r   leavesZleave_indicatorrO   r  rP   test_decision_path  s$   


r  c                  C   s\   t jddd\} }ttttg}|D ]}|dd}|| | |jD ]	}|jdks*J q!qd S )Nr   r0   r<   r   )min_impurity_decrease)	r   r   r   r   r   r   rD   r   r  )rE   rF   Zall_estimatorsZ	Estimatorr   r   rO   rO   rP   test_min_impurity_decrease  s   

r  c                  C   s   t dd} td}g d}d}tjt|d | || W d    n1 s(w   Y  g d}d}tjt|d | || W d    d S 1 sLw   Y  d S )	Nrs   rR   )r2   r2   )r/   r0   r2   zNSome value\(s\) of y are negative which is not allowed for Poisson regression.r   )r   r   r   zLSum of y is not strictly positive which is necessary for Poisson regression.)r   rz   r   r   r   r  rD   )r   rE   rF   r   rO   rO   rP   test_poisson_y_positive_check  s   

"r  c                       s(   e Zd Z fddZ fddZ  ZS )	MyBackendc                    s   d| _ t j|i | d S )Nr   )r   super__init__)selfargsr|  r%  rO   rP   r    s   zMyBackend.__init__c                    s   |  j d7  _ t  S )Nr0   )r   r  
start_call)r  r  rO   rP   r    s   
zMyBackend.start_call)__name__
__module____qualname__r  r  __classcell__rO   rO   r  rP   r    s    r  testingc                  C   s   t ddd} td\}}| tt W d    n1 sw   Y  |jdks*J td\}}| t W d    n1 sAw   Y  |jdksMJ d S )Nr4   r1   )rA   r   r  r   )r   joblibZparallel_backendrD   rE   rF   r   r   )rN   bar   _rO   rO   rP   test_backend_respected  s   r  c                  C   sH   t ddddd\} }tdddd| |}tjd|j d	d
s"J d S )Nr  r2   r0   )r5   r7   r9   r   ra   ri      )ru   r9   rA   gHz>)Zabs_tol)r   r   rD   mathiscloser   r   )rE   rF   rN   rO   rO   rP   #test_forest_feature_importances_sum  s   
r  c                  C   sB   t d} t d}tdd| |}t|jt jdt jd d S )N)r4   r4   )r4   r4   r  r   )rz   r   r   r   rD   r%   r   float64)rE   rF   ZgbrrO   rO   rP   *test_forest_degenerate_feature_importances  s   

r  c                 C   sT   t |  ddd}d}tjt|d |tt W d    d S 1 s#w   Y  d S )NFrX   r   r  zl`max_sample` cannot be set if `bootstrap=False`. Either switch to `bootstrap=True` or set `max_sample=None`.r   )r>   r   r   r  rD   rE   rF   )r?   r   r   rO   rO   rP   test_max_samples_bootstrap  s   "r  c                 C   sX   t |  dtdd}d}tjt|d |tt W d    d S 1 s%w   Y  d S )NTg    eAr  z=`max_samples` must be <= n_samples=6 but got value 1000000000r   )r>   r   r   r   r  rD   rE   rF   )r?   r   r   rO   rO   rP    test_large_max_samples_exception  s
   "r  c                 C   s   t ttdddd\}}}}t|  dddd}||||}t|  dd dd}||||}t||}	t||}
|	t|
ksDJ d S )Nr  r\  r   )Z
train_sizerr   r9   Tr   r   r  r9   )	r   rd   re   rc   rD   rG   r   r   r   )r?   r   r   r   r   
ms_1_modelZms_1_predictms_None_modelZms_None_predictZms_1_msZ
ms_None_msrO   rO   rP   $test_max_samples_boundary_regressors  s   


r  c           	      C   sr   t ttdtd\}}}}t|  dddd}||||}t|  dd dd}||||}tj|| d S )Nr   )r9   ZstratifyTr   r  )	r   r   r   rC   rD   r   rz   r  r"   )	r?   r   r   r   r  r  Z
ms_1_probar  Zms_None_probarO   rO   rP   %test_max_samples_boundary_classifiers%  s   r  csr_containerc                 C   sb   g dg}| g dg}t  }d}tjt|d ||| W d    d S 1 s*w   Y  d S )Nr   r   z3sparse multilabel-indicator for y is not supported.r   )r   r   r   r  rD   )r  rE   rF   r   msgrO   rO   rP   test_forest_y_sparse8  s   
"r  ForestClassc           	      C   s   t jd}|dd}|ddk}| d|d d}| d|dd}||| ||| |jd j}|jd j}d}|j|jksEJ |d S )Nr0   i'  r1   r   )rA   r9   r  z=Tree without `max_samples` restriction should have more nodes)rz   r{   r|   rL  rD   r   r   r  )	r  r   rE   rF   Zest1Zest2Ztree1Ztree2r  rO   rO   rP   'test_little_tree_with_small_max_samplesB  s&   r  Forestc                 C   sN   ddl m} tdd}|j\}}|||}t|  dd|d}|t| d S )Nr   )MSEr/   r0   r1   )rA   r   rR   )Zsklearn.tree._criterionr  re   r  rL   rc   rD   rd   )r  r  rF   r5   Z	n_outputsZmse_criterionr   rO   rO   rP   -test_mse_criterion_object_segfault_smoke_testa  s   

r  c                  C   sX   t jd} t | dd}tddddd|}| }dd d	D }t|| d
S )z3Check feature names out for Random Trees Embedding.r   r   r   r1   F)rA   r   r7  r9   c                 S   s    g | ]\}}d | d| qS )Zrandomtreesembedding_r  rO   )r   r   leafrO   rO   rP   r   y  s    zAtest_random_trees_embedding_feature_names_out.<locals>.<listcomp>))r   r1   )r   r2   )r   ra   )r   rb   r  )r0   r2   )r0   ra   )r0   rb   N)	rz   r{   r|   r   rL  r   rD   Zget_feature_names_outr%   )r9   rE   r=  namesZexpected_namesrO   rO   rP   -test_random_trees_embedding_feature_names_outq  s   r  c                 C   sf   | tjjdttdd tjjdd}t	dd|d\}}| |dd	}t
d
|d}t|||d
d dS )zRandomForestClassifier must work on readonly sparse data.

    Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/25333
    r,   r   )Z
max_nbytesr   )seedr  r:   Tr   r1   )r   r9   )ZcvN)setattrsklearnZensembleZ_forestr   r,   rz   r{   r|   r   r   r   )r  Zmonkeypatchr   rE   rF   rN   rO   rO   rP   test_read_only_buffer  s   
r  r  r  c                 C   s0   t jdd\}}tdd| dd}||| dS )z^Check low max_samples works and is rounded to one.

    Non-regression test for gh-24037.
    T)Z
return_X_yr4   g-C6?r   )rA   r  r  r9   N)r   Z	load_winer   rD   )r  rE   rF   r  rO   rO   rP   .test_round_samples_to_one_when_samples_too_low  s
   r  r  r   c                 C   sV  t ddd\}}|rd}nd}| d|d||d}||| |j }t||j |j}t|ts3J t|t|ks=J |d j	t
jksGJ tt|D ]2}	|rqt||	 t|d	 ks_J tt
||	 t||	 k spJ qMtt||	 t|ksJ qMd}
||
 }||
 }|| }|| }|jj}t|}||| |jj}t|| dS )
zEstimators_samples_ property should be consistent.

    Tests consistency across fits and whether or not the seed for the random generator
    is set.
    r  r0   r<   rX   Nr4   )rA   r  rB   r9   r   r   r1   )r   rD   Zestimators_samples_r   r%   r   r<  r   rJ   r   rz   Zint32r   r   r   r   valuer
   r"   )r  r   r  rE   rF   r  r   Zestimators_samplesZ
estimatorsr   Zestimator_indexZestimator_samplesr	  r   r   Zorig_tree_valuesZnew_tree_valuesrO   rO   rP   test_estimators_samples  sB   	
$r  zmake_data, Forestc                 C   s   t jd}d\}}| |||d\}}| }t j||jddg|jddgd< t | s0J t	||dd	\}}	}
}||d
d}|
||
 ||	|}t	||dd	\}}}
}||d
d}|
||
 |||}|d| ksrJ dS )zJCheck that forest can deal with missing values and has decent performance.r   )r   r4   r:   FTffffff?r   rn   r   r   r  )r9   rA   ry   N)rz   r{   r|   r   nanchoicerL   isnananyr   rD   r\   )Z	make_datar  r   r5   r6   rE   rF   Z	X_missingZX_missing_trainZX_missing_testr   r   Zforest_with_missingZscore_with_missingr   r   r  Zscore_without_missingrO   rO   rP    test_missing_values_is_resilient  s"    r  c                 C   s  t jd}d}d}|j|dfd}|jdd|d}|jdd	g|d
dgd}|t}||  ||< |j|d}t j||< t 	|
 sFJ | }	||	dddf< t|	||dd\}
}}}}}| dd|
|}| dd||}|||}||ks~J ||||ksJ dS )z_Check that the forest learns when missing values are only present for
    a predictive feature.r   r   g      ?r4   rP  r1   )rm   rn   FTr  r   r  Nra   r   )rz   r{   r|   Zstandard_normalr   r  r   r   r  r  r  r   r   rD   r\   )r  r   r5   Zexpected_scoreZX_non_predictiverF   ZX_random_maskZy_maskZpredictive_featureZX_predictiveZX_predictive_trainZX_predictive_testZX_non_predictive_trainZX_non_predictive_testr   r   Zforest_predictiveZforest_non_predictiveZpredictive_test_scorerO   rO   rP    test_missing_value_is_predictive  s8   

	r  c                 C   sp   t g dt jddgg}ddg}| dd}d}tjt|d	 ||| W d
   d
S 1 s1w   Y  d
S )zDRaise error for unsupported criterion when there are missing values.r  r   r  rX   r   r_   r  z .*does not accept missing valuesr   N)rz   r   r  r   r   r  rD   )r  rE   rF   r  r  rO   rO   rP   =test_non_supported_criterion_raises_error_with_missing_values?  s   
"r  )r  )__doc__rM  r  r!  collectionsr   	functoolsr   r   r   typingr   r   Zunittest.mockr   r  numpyrz   r   Zscipy.specialr	   r  r
   r   Zsklearn.datasetsr   r   Zsklearn.decompositionr   Zsklearn.dummyr   Zsklearn.ensembler   r   r   r   r   Zsklearn.ensemble._forestr   r   Zsklearn.exceptionsr   Zsklearn.metricsr   r   r   r   Zsklearn.model_selectionr   r   r   Zsklearn.svmr   Zsklearn.tree._classesr    Zsklearn.utils._testingr!   r"   r#   r$   r%   r&   r'   Zsklearn.utils.fixesr(   r)   r*   Zsklearn.utils.multiclassr+   Zsklearn.utils.parallelr,   Zsklearn.utils.validationr-   rE   rF   rH   rI   r   r   Z	load_irisrY   r   Zpermutationr[   rn   permrZ   Zmake_regressionrd   re   rf  rg  r   Zfloat32ZparallelZget_active_backendr%  ZDEFAULT_JOBLIB_BACKENDrC   rc   rt  dictr=   str__annotations__updater   r>   markZparametrizerQ   r]   rh   r   r   r   r   r  chainr   r   r   r   rF  r  r  r
  r  r  r  r  r  r   r(  r-  r3  r6  r?  rB  rD  rG  rO  re  ri  rl  rn  rr  rv  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  Zregister_parallel_backendr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rO   rO   rO   rP   <module>   s   $	(





5


*m
	*(








3
7

6





/






2










	



6
	
	)