o
    0i'                     @   s  d Z ddlZddlZddlmZmZmZ ddlZddlm	Z	 ddl
mZmZmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ e eeZ!eeZ"ej#d	d
d e"D e$dZ%eee$ ej&dd'e! dee!df Z(eddZ)e)j*dg ddddddddej+ddf	dedededeee% ej&ddf dede(ded ed!ed"dfd#d$Z,e)j*d%d&d'gd			d<d(ee$ej-d)df d*ede(d!ed"df
d+d,Z.e)j*d-g d.dddej+ddfd(ee$ej-d)df d/eee$ ej&d0d1df d2eee$ ej&d3df ded ed!ed"dfd4d5Z/e)j*d6d7d8gdej+dfd6ee$ej-d9df ded!ed"dfd:d;Z0dS )=a  Contains commands to interact with datasets on the Hugging Face Hub.

Usage:
    # list datasets on the Hub
    hf datasets ls

    # list datasets with a search query
    hf datasets ls --search "code"

    # get info about a dataset
    hf datasets info HuggingFaceFW/fineweb
    N)	AnnotatedOptionalget_args)execute_raw_sql_query)CLIErrorRepositoryNotFoundErrorRevisionNotFoundError)DatasetSort_TExpandDatasetProperty_T   )	AuthorOpt	FilterOpt	FormatOptLimitOptOutputFormatQuietOptRevisionOpt	SearchOptTokenOptapi_object_to_dict
get_hf_apimake_expand_properties_parserprint_list_outputtyper_factoryDatasetSortEnumc                 C   s   i | ]}||qS  r   ).0sr   r   V/home/kim/smarthome/.venv/lib/python3.10/site-packages/huggingface_hub/cli/datasets.py
<dictcomp>9   s    r   )typezComma-separated properties to return. When used, only the listed properties (and id) are returned. Example: '--expand=downloads,likes,tags'. Valid: z, .)helpcallbackz"Interact with datasets on the Hub.)r"   z	list | ls)zhf datasets lsz*hf datasets ls --sort downloads --limit 10zhf datasets ls --search "code")Zexamples
   FsearchauthorfiltersortzSort results.limitexpandformatquiettokenreturnc	              	   C   sJ   t |d}	|r
|jnd}
dd |	j||| |
||dD }t|||d dS )zList datasets on the Hub.r-   Nc                 S   s   g | ]}t |qS r   )r   )r   dataset_infor   r   r   
<listcomp>a   s    zdatasets_ls.<locals>.<listcomp>)r'   r&   r%   r(   r)   r*   r+   r,   )r   valueZlist_datasetsr   )r%   r&   r'   r(   r)   r*   r+   r,   r-   apisort_keyresultsr   r   r   datasets_lsH   s   
r7   infoz&hf datasets info HuggingFaceFW/finewebz9hf datasets info my-dataset --expand downloads,likes,tags
dataset_idz+The dataset ID (e.g. `username/repo-name`).revisionc              
   C   s   t |d}z
|j| ||d}W n, ty$ } z	td|  d|d}~w ty; } ztd| d|  d|d}~ww ttjt|d	d
 dS )z>Get info about a dataset on the Hub. Output is in JSON format.r/   )repo_idr:   r*   z	Dataset 'z' not found.Nz
Revision 'z' not found on 'z'.   )indent)	r   r0   r   r   r   printjsondumpsr   )r9   r:   r*   r-   r4   r8   er   r   r   datasets_infoo   s   
rB   Zparquet)z(hf datasets parquet cfahlgren1/hub-statsz8hf datasets parquet cfahlgren1/hub-stats --subset modelsz6hf datasets parquet cfahlgren1/hub-stats --split trainz6hf datasets parquet cfahlgren1/hub-stats --format jsonsubsetz--subsetz(Filter parquet entries by subset/config.splitz Filter parquet entries by split.c           
         sL   t |d}|j| |d} fdd|D }dd |D }	t|	||dd dS )	z/List parquet file URLs available for a dataset.r/   )r;   configc                    s"   g | ]} d u s|j  kr|qS )NrD   r   entryrF   r   r   r1      s   " z$datasets_parquet.<locals>.<listcomp>c                 S   s"   g | ]}|j |j|j|jd qS ))rC   rD   urlsize)rE   rD   rI   rJ   rG   r   r   r   r1      s    rI   )r+   r,   Zid_keyN)r   Zlist_dataset_parquet_filesr   )
r9   rC   rD   r+   r,   r-   r4   entriesfilteredr6   r   rF   r   datasets_parquet   s   
rM   sqlzhf datasets sql "SELECT COUNT(*) AS rows FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet')"zhf datasets sql "SELECT * FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet') LIMIT 5" --format jsonzRaw SQL query to execute.c              
   C   sJ   zt | |d}W n ty } ztt||d}~ww t||dd dS )zAExecute a raw SQL query with DuckDB against dataset parquet URLs.)Z	sql_queryr-   NFr2   )r   ImportErrorr   strr   )rN   r+   r-   resultrA   r   r   r   datasets_sql   s   rR   )NNN)1__doc__enumr?   typingr   r   r   ZtyperZhuggingface_hub._dataset_viewerr   Zhuggingface_hub.errorsr   r   r   Zhuggingface_hub.hf_apir	   r
   Z
_cli_utilsr   r   r   r   r   r   r   r   r   r   r   r   r   r   sortedZ_EXPAND_PROPERTIESZ_SORT_OPTIONSEnumrP   r   OptionjoinZ	ExpandOptZdatasets_clicommandtabler7   ZArgumentrB   rM   rR   r   r   r   r   <module>   s   @
		
	
		