o
    i                     @   s   d dl Zd dlZd dlmZ d dlmZ d dlmZ	 dd Z
dd Zejjd	d
dgdd dD gddgddd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% ZdS )&    N)assert_array_equal)FeatureHasher)	transformc                  C   st   t dd} d| jksJ dddddd	d
g}t dd|}dd |D }t ddd|}t| |  d S )N   
n_featuresdictbar*   %   )fooZdadaZtzarabazZstring1)r   Zgagac                 s       | ]	}t | V  qd S Niteritems.0d r   n/home/kim/smarthome/.venv/lib/python3.10/site-packages/sklearn/feature_extraction/tests/test_feature_hasher.py	<genexpr>       z,test_feature_hasher_dicts.<locals>.<genexpr>pairr   
input_type)r   r   r   r   toarray)feature_hasherraw_XZX1genZX2r   r   r   test_feature_hasher_dicts	   s   
r!   c                  C   s   dddd dgd dddgg} dD ]H}d| }dd	 | D }t|d
dd}||}|jd t| ks7J |jd |ks@J |d  dksJJ |d  dksTJ |jdks[J qd S )Nr   r	   r   asciiquux)   	      r         c                 s       | ]}|V  qd S r   r   r   xr   r   r   r          z.test_feature_hasher_strings.<locals>.<genexpr>stringF)r   r   alternate_signr               )encoder   r   shapelensumZnnz)r   Zlg_n_featuresr   itr   Xr   r   r   test_feature_hasher_strings   s    
r9   r   	my_stringanother_stringc                 c   r)   r   r   r*   r   r   r   r   2   r,   r   )r:   r;   list	generator)Zidsc                 C   sN   d}t ddd}tjt|d ||  W d   dS 1 s w   Y  dS )zhFeatureHasher raises error when a sample is a single string.

    Non-regression test for gh-13199.
    z"Samples can not be a single string
   r-   r   )matchN)r   pytestraises
ValueErrorr   )r   msgr   r   r   r   !test_feature_hasher_single_string.   s
   "rD   c                  C   s   dddd dgd dddgg} dd | D }t|dtd	\}}}d
d | D }t|dtd	dd\}}}t|| t|| dd | D }t|dtd	dd\}}}tt t|| W d    d S 1 shw   Y  d S )Nr   r	   r   r"   r#   c                 s       | ]
}d d |D V  qdS )c                 s       | ]}|d fV  qdS r/   Nr   r   fr   r   r   r   I       8test_hashing_transform_seed.<locals>.<genexpr>.<genexpr>Nr   r*   r   r   r   r   I       z.test_hashing_transform_seed.<locals>.<genexpr>   Fc                 s   rE   )c                 s   rF   rG   r   rH   r   r   r   r   L   rJ   rK   Nr   r*   r   r   r   r   L   rL   r   )seedc                 s   rE   )c                 s   rF   rG   r   rH   r   r   r   r   Q   rJ   rK   Nr   r*   r   r   r   r   Q   rL   r/   )r3   _hashing_transformstrr   r@   rA   AssertionError)r   Zraw_X_indicesZindptr_Z	indices_0Zindptr_0Z	indices_1r   r   r   test_hashing_transform_seedB   s   

"rT   c                  C   s   dd ddddddd	fD } t d
dd}||  \}}tt||dk }tt||dk }ddg|ks=J g d|ksEJ d S )Nc                 s   r   r   r   r   r   r   r   r   X   
    

z,test_feature_hasher_pairs.<locals>.<genexpr>r/   r(   r   r	   r1   r0   r   r#   r   r   r   r   r   )r/   r1   r0   )r   r   r   sortednpabsr   r   x1Zx2Zx1_nzZx2_nzr   r   r   test_feature_hasher_pairsW   s   r^   c                  C   s  dd ddddddd	fD } t d
dd}||  \}}tt||dk }tt||dk }ddg|ks=J g d|ksEJ dd ddiddifD } ||  \}}t||dk }t||dk }dg|kstJ dg|ks{J t|| d S )Nc                 s   r   r   r   r   r   r   r   r   e   rU   z?test_feature_hasher_pairs_with_string_values.<locals>.<genexpr>r/   arV   abcr0   rW   rX   r   r   r   r   )r/   r/   r0   c                 s   r   r   r   r   r   r   r   r   p   r   Zbax)r   r   r   rY   rZ   r[   r   r\   r   r   r   ,test_feature_hasher_pairs_with_string_valuesd   s    ra   c                  C   sL   d} g dt tdg}t| dd}||}t| tt|| f d S )Nr   r   r   r-   r   )	r   ranger   r   r   r   rZ   Zzerosr5   )r   r   r   r8   r   r   r   test_hash_empty_inputy   s
   
 rc   c                  C   s&   t  ddig} | jjdksJ d S )Nr   r   )r   )r   r   datar4   )r8   r   r   r   test_hasher_zeros   s   re   c                  C   sd   t dg} tddd| }|j dk r|j dksJ tddd| }|j dks0J d S )NThequickbrownfoxjumpedTr-   )r.   r   r   F)r<   r   fit_transformrd   minmaxr8   ZXtr   r   r   test_hasher_alternate_sign   s
   
 rk   c                  C   sn   t dg} tdddd| }t|jd t| d k sJ tdddd| }|jd t| d ks5J d S )Nrf   Tr/   r-   )r.   r   r   r   F)r<   r   rg   r[   rd   r5   rj   r   r   r   test_hash_collisions   s   
rl   c                  C   s   t  } |  }|jrJ dS )z3Test that FeatureHasher has requires_fit=False tag.N)r   Z__sklearn_tags__Zrequires_fit)hashertagsr   r   r   $test_feature_hasher_requires_fit_tag   s   ro   c                  C   s:   t dd} ddddddg}| |}|jdksJ d	S )
z6Test that FeatureHasher can transform without fitting.r>   r   r/   r(   )dogcat   )rp   run)r(   r>   N)r   r   r4   )rm   rd   resultr   r   r   )test_feature_hasher_transform_without_fit   s   

ru   )numpyrZ   r@   Znumpy.testingr   Zsklearn.feature_extractionr   Z(sklearn.feature_extraction._hashing_fastr   rO   r!   r9   markZparametrizerD   rT   r^   ra   rc   re   rk   rl   ro   ru   r   r   r   r   <module>   s0    


