o
    miA                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlmZ ddlmZ g dZejeejejeje f Zdedefd	d
ZG dd dZG dd dZG dd deZG dd deZG dd deZ			d(dedeje deje deje dejf
ddZeZ dejeejf dej!e"ej#e gejdej#e f f fddZ$dejeejf d ej#e fd!d"Z%dejeejf d#ej!e&gef de"fd$d%Z'G d&d' d'e(Z)dS ))a  Support for reading delimiter-separated value files.

This module contains unicode aware replacements for :func:`csv.reader`
and :func:`csv.writer`. It was stolen/extracted from the ``csvkit``
project to allow re-use when the whole ``csvkit`` package isn't
required.

The original implementations were largely copied from
`examples in the csv module documentation <http://docs.python.org/library/csv.html#examples>`_.

.. seealso:: http://en.wikipedia.org/wiki/Delimiter-separated_values
    N   )utils)Dialect)	UnicodeWriterUnicodeReaderUnicodeReaderWithLineNumberUnicodeDictReaderNamedTupleReaderiterrowsrewriteadd_rowsfilter_rows_as_dictencodingreturnc                 C   s   t | jS N)codecslookupname)r    r   B/home/kim/smarthome/.venv/lib/python3.10/site-packages/csvw/dsv.pynormalize_encoding&   s   r   c                   @   s   e Zd ZdZ		ddejejeej	f  dejeje
ef  fddZdd Zd	eje fd
dZdd Zdejejedf  fddZdejejeeef  fddZdS )r   u  
    Write Unicode data to a csv file.

    :param f: The target to which to write the data; a local path specified as `str` or     `pathlib.Path` or `None`, in which case the data, formatted as DSV can be retrieved     via :meth:`~UnicodeWriter.read`
    :param dialect: Either a dialect name as recognized by `csv.writer` or a     :class:`~Dialect` instance for dialect customization beyond what can be done with     `csv.writer`.
    :param kw: Keyword arguments passed through to `csv.writer`.

    .. code-block:: python

        >>> from csvw import UnicodeWriter
        >>> with UnicodeWriter('data.tsv', delimiter='	') as writer:
        ...     writer.writerow(['ä', 'ö', 'ü'])
    Nfdialectc                 K   s   || _ |dd| _t|tr|j| _| | _| j| n
|| _|r)|| jd< t	| j| _| j
d| _| jrO| j
dtjkrOt| jd| j fdd}nd	d }|| _d
| _d| _d S )Nr   utf-8r   
escapecharquoting   c                    s    fdd| D S )Nc                    s&   g | ]}t |r| n|qS r   )
isinstancereplace.0s_new_old_typer   r   
<listcomp>U      & zBUnicodeWriter.__init__.<locals>._escapedoubled.<locals>.<listcomp>r   )rowr%   r$   r#   r   r"   r   _escapedoubledQ   s   z.UnicodeWriter.__init__.<locals>._escapedoubledc                 S      | S r   r   )r(   r   r   r   r)   W      Fr   )r   popr   r   r   python_encodingas_python_formatting_parameterskwupdater   getr   csv
QUOTE_NONEstrr)   _close_rows_written)selfr   r   r/   r)   r   r   r   __init__=   s(   



zUnicodeWriter.__init__c                 C   s   t | jttjfr&t | jtjrt| j| _tj| jd| jdd| _d| _n| jd u r2tj	dd| _t
j| jfi | j| _| S )NZwt )r   newlineT)r:   )r   r   r4   pathlibPathioopenr   r5   StringIOr2   writerr/   r7   r   r   r   	__enter__]   s   
zUnicodeWriter.__enter__r   c                 C   s8   t | jdr| jd t | jdr| j dS dS )z
        If the writer has been initialized passing `None` as target, the CSV data as `bytes` can be
        retrieved calling this method.
        seekr   readr   N)hasattrr   rC   rD   encoderA   r   r   r   rD   j   s
   zUnicodeWriter.readc                 C      | j r
| j  d S d S r   r5   r   close)r7   typevalue	tracebackr   r   r   __exit__t      zUnicodeWriter.__exit__r(   c                 C   s$   | j | | |  jd7  _d S )Nr   )r@   writerowr)   r6   r7   r(   r   r   r   rO   x   s   zUnicodeWriter.writerowrowsc                 C   sN   t |D ] \}}t|tr|dkr| js| |  | }| | qdS )aL  
        Writes each row in `rows` formatted as CSV row. This behaves as
        [`csvwriter.writerows`](https://docs.python.org/3/library/csv.html#csv.csvwriter.writerows)
        except when an iterable of `dict` objects is passed. In that case, it is assumed that all
        items in `rows` are `dict`s and all have the same keys in the same order (as what would
        be read by `UnicodeDictReader`). Then, the keys of the first item are written as header row
        and the values of each row are written as subsequent rows.

        :param rows: The data to be written.
        r   N)	enumerater   dictr6   rO   keysvalues)r7   rQ   ir(   r   r   r   	writerows|   s   
zUnicodeWriter.writerows)NN)__name__
__module____qualname____doc__typingOptionalUnionr4   r;   r<   r   r8   rB   bytesrD   rM   IterablerO   tuplelistrS   rW   r   r   r   r   r   *   s    
 
$r   c                   @   s^   e Zd ZdZ	ddedejejee	f  fddZ
dd Zd	d
 Zdd Zdd Zdd ZdS )r   a  
    Read Unicode data from a csv file.

    :param f: The source from which to read the data; a local path specified as `str` or     `pathlib.Path`, a file-like object or a `list` of lines.
    :param dialect: Either a dialect name as recognized by `csv.reader` or a     :class:`~Dialect` instance for dialect customization beyond what can be done with     `csv.writer`.
    :param kw: Keyword arguments passed through to `csv.reader`.

    .. code-block:: python

        >>> with UnicodeReader('tests/fixtures/frictionless-data.csv', delimiter='|') as reader:
        ...     for row in reader:
        ...         print(row)
        ...         break
        ...
        ['FK', 'Year', 'Location name', 'Value', 'binary', 'anyURI', 'email', 'boolean', 'array',
        'geojson']
    Nr   r   c                 K   s   || _ t|dd| _|dd | _t|tr|nd | _| jr1| jj| _|	 | _
| j
| n
|| _
|r;|| j
d< d| _g | _| jdkrId| _| j| _d S )Nr   z	utf-8-siglineterminatorr   Fr   )r   r   r,   r   r:   r   r   r   r-   r.   r/   r0   r5   comments_reader_encoding)r7   r   r   r/   r   r   r   r8      s    



zUnicodeReader.__init__c                 C   s   t | jttjfr)t | jtjrt| j| _tj| jd| j| jp!dd| _d| _	n!t
| jdsJg }| jD ]}|t |trC|| jn| q4|| _tj| jfi | j| _d| _| S )Nrtr9   )moder   r:   TrD   )r   r   r4   r;   r<   r=   r>   r   r:   r5   rE   appendr_   decoder2   readerr/   lineno)r7   linesliner   r   r   rB      s   
"zUnicodeReader.__enter__c                    sF     j d7  _  fddt jD }  j tdd |D 7  _ |S )Nr   c                    s&   g | ]}t |tr|n| jqS r   )r   r4   rj   re   r   rA   r   r   r&      s    z+UnicodeReader._next_row.<locals>.<listcomp>c                 S   s   g | ]	}t |d qS )
)rb   countr   r   r   r   r&      s    )rl   nextrk   sumrP   r   rA   r   	_next_row   s   
zUnicodeReader._next_rowc                    s     } jr|r jjr|d  jjs*|rt|dhkr# jjs* j jjk r~|r9 jjr9|d  jjsB|rW j jjk rW j	 j jj
| jj f    }|rj jjrj|d  jjs*|rst|dhkrw jjs* j jjk s* fdd|D  jjd  }|S )Nr   r9   c                    s   g | ]} j |qS r   )r   Ztrimmerr   rA   r   r   r&      s    z*UnicodeReader.__next__.<locals>.<listcomp>)rs   r   ZcommentPrefix
startswithsetZskipBlankRowsrl   ZskipRowsrd   ri   	delimiterjoinlstripstripZskipColumnsrP   r   rA   r   __next__   s:   	zUnicodeReader.__next__c                 C   rG   r   rH   )r7   exc_typeexc_valexc_tbr   r   r   rM      rN   zUnicodeReader.__exit__c                 C   r*   r   r   rA   r   r   r   __iter__   r+   zUnicodeReader.__iter__r   )rX   rY   rZ   r[   LINES_OR_PATHr\   r]   r^   r   r4   r8   rB   rs   rz   rM   r~   r   r   r   r   r      s    
 r   c                       s    e Zd ZdZ fddZ  ZS )r   z
    A `UnicodeReader` yielding (lineno, row) pairs, where "lineno" is the 1-based number of the
    the **text line** where the (possibly multi-line) row data starts in the DSV file.
    c                    s   t t|  }| jd |fS )zI
        :return: a pair (1-based line number in the input, row)
        r   )superr   rz   rl   rP   	__class__r   r   rz      s   z$UnicodeReaderWithLineNumber.__next__)rX   rY   rZ   r[   rz   __classcell__r   r   r   r   r      s    r   c                       sV   e Zd ZdZd fdd	Ze fddZdejf fdd	Z	dejfd
dZ
  ZS )r   a  
    A `UnicodeReader` yielding one `dict` per row.

    :param f: As for :class:`UnicodeReader`
    :param fieldnames:

    .. code-block:: python

        >>> with UnicodeDictReader(
        ...         'tests/fixtures/frictionless-data.csv',
        ...         dialect=Dialect(delimiter='|', header=False),
        ...         fieldnames=[str(i) for i in range(1, 11)]) as reader:
        ...     for row in reader:
        ...         print(row)
        ...         break
        ...
        OrderedDict([('1', 'FK'), ('2', 'Year'), ('3', 'Location name'), ('4', 'Value'),
        ('5', 'binary'), ('6', 'anyURI'), ('7', 'email'), ('8', 'boolean'), ('9', 'array'),
        ('10', 'geojson')])

    Nc                    s4   || _ || _|| _d| _tt| j|fi | d S Nr   )_fieldnamesrestkeyrestvalline_numr   r   r8   )r7   r   
fieldnamesr   r   r/   r   r   r   r8     s
   zUnicodeDictReader.__init__c                    sj   | j d u rz
tt|  | _ W n	 ty   Y nw | jj| _| j r2tt| j t| j kr2t	
d | j S )NzDuplicate column names!)r   r   r   rz   StopIterationrk   r   lenru   warningswarnrA   r   r   r   r   #  s   


zUnicodeDictReader.fieldnamesr   c                    sP   | j dkr| j tt|  }| jj | _ |g kr#tt|  }|g ks| |S r   )r   r   r   r   rz   rk   itemrP   r   r   r   rz   0  s   


zUnicodeDictReader.__next__c                 C   sv   t dd t| j|D }t| j}t|}||k r&||d  || j< |S ||kr9| j|d  D ]}| j||< q1|S )Nc                 s   s    | ]	\}}||fV  qd S r   r   r    kvr   r   r   	<genexpr>?  s    z)UnicodeDictReader.item.<locals>.<genexpr>)collectionsOrderedDictzipr   r   r   r   )r7   r(   dlflrkeyr   r   r   r   >  s   
zUnicodeDictReader.item)NNN)rX   rY   rZ   r[   r8   propertyr   r   r   rz   r   r   r   r   r   r   r     s    r   c                   @   s0   e Zd ZdZeejZej	dd Z
dd ZdS )r	   a  
    A `UnicodeReader` yielding one `namedtuple` per row.

    .. note::

        This reader has some limitations, notably that fieldnames must be normalized to be
        admissible Python names, but also bad performance (compared with `UnicodeDictReader`).
    c                 C   s   t t| j| j}td|S )NRow)rb   map_normalize_fieldnamer   r   
namedtuple)r7   r   r   r   r   clsV  s   zNamedTupleReader.clsc                    sF   t  |} jD ]}||d  q	 jdi  fdd| D S )Nc                    s&   i | ]\}}| j v r ||qS r   )r   r   r   rA   r   r   
<dictcomp>`  r'   z)NamedTupleReader.item.<locals>.<dictcomp>r   )r   r   r   
setdefaultr   items)r7   r(   r   r   r   rA   r   r   [  s   
zNamedTupleReader.itemN)rX   rY   rZ   r[   staticmethodr   normalize_namer   	functoolscached_propertyr   r   r   r   r   r   r	   J  s    
	
r	   Fr   lines_or_filenamedtuplesdictsc                 k   st    |r	|r	t d|rt}n|rt}nt}|| fd|i|}|D ]}|V  q"W d   dS 1 s3w   Y  dS )a  Convenience factory function for csv reader.

    :param lines_or_file: Content to be read. Either a file handle, a file path or a list    of strings.
    :param namedtuples: Yield namedtuples.
    :param dicts: Yield dicts.
    :param encoding: Encoding of the content.
    :param kw: Keyword parameters are passed through to csv.reader.
    :return: A generator over the rows.
    z:either namedtuples or dicts can be chosen as output formatr   N)
ValueErrorr	   r   r   )r   r   r   r   r/   _readerrr   r   r   r   r
   c  s   "r
   fnamevisitorc           	   	   K   s   t | } |  sJ tjdd}t|j}W d   n1 s"w   Y  t| fi |7}t	|fi |}t
|D ]\}}|||}|durO|| q=W d   n1 sZw   Y  W d   n1 siw   Y  tt|t|  dS )aD  Utility function to rewrite rows in dsv files.

    :param fname: Path of the dsv file to operate on.
    :param visitor: A callable that takes a line-number and a row as input and returns a     (modified) row or None to filter out the row.
    :param kw: Keyword parameters are passed through to csv.reader/csv.writer.
    FdeleteN)r   ensure_pathis_filetempfileNamedTemporaryFiler;   r<   r   r   r   rR   rO   shutilmover4   )	r   r   r/   fptmpreader_r@   rV   r(   r   r   r   r     s"   



r   rQ   c              	   G   s   t jdd}t|j}W d    n1 sw   Y  t| } t|/}|  rHt	| }|D ]}|
| q1W d    n1 sCw   Y  || W d    n1 sWw   Y  tt|t|  d S )NFr   )r   r   r;   r<   r   r   r   r   existsr   rO   rW   r   r   r4   )r   rQ   r   r   r@   r   r(   r   r   r   r     s   


r   filter_c                 K   s    t |}t| |fi | |jS )a  Rewrite a dsv file, filtering the rows.

    :param fname: Path to dsv file
    :param filter_: callable which accepts a `dict` with a row's data as single argument    returning a `Boolean` indicating whether to keep the row (`True`) or to discard it     `False`.
    :param kw: Keyword arguments to be passed `UnicodeReader` and `UnicodeWriter`.
    :return: The number of rows that have been removed.
    )
DictFilterr   removed)r   r   r/   r   r   r   r     s   r   c                   @   s   e Zd Zdd Zdd ZdS )r   c                 C   s   d | _ || _d| _d S r   )headerfilterr   )r7   r   r   r   r   r8     s   
zDictFilter.__init__c                 C   sJ   |dkr	|| _ |S |r#tt| j |}| |r|S |  jd7  _d S d S )Nr   r   )r   rS   r   r   r   )r7   rV   r(   r   r   r   r   __call__  s   
zDictFilter.__call__N)rX   rY   rZ   r8   r   r   r   r   r   r     s    r   )FFr   )*r[   r=   r2   r   r   r\   r;   r   r   r   r   r9   r   Zdsv_dialectsr   __all__r^   r4   r<   IOr`   r   r   r   r   r   r   r	   r]   bool	Generatorr
   rk   CallableintListr   r   rS   r   objectr   r   r   r   r   <module>   sX    fgE
&
$
