o
    mi5A                  
   @  s6  U d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZmZmZmZmZmZ ddlmZ ddlZddlmZmZmZ ddlmZ dgZd	Zi Zd
e d< dJddZ!dd Z"dKddZ#ej$fddZ%dLdMddZ&G dd  d Z'G d!d" d"e'Z(G d#d$ d$e'Z)G d%d& d&e)Z*G d'd( d(e)Z+G d)d* d*e)Z,G d+d, d,e'Z-G d-d. d.e)Z.G d/d0 d0e'Z/e' Z0d1d2 Z1dNd3d4Z2d5d6 Z3dLd7d8Z4d9d: Z5dLd;d<Z6dLd=d>Z7d?d@ Z8dNdAdBZ9e1e2e3e4e6e8e9e5e7dC	Z:dDdE Z;G dFd dZ<dGdH Z=e>dIkre=  dS dS )Oz
A commandline tool for semi-automatically converting CSV to RDF.

See also https://github.com/RDFLib/pyTARQL in the RDFlib family of tools

try: `csv2rdf --help`
    )annotationsN)AnyDictListOptionalTupleUnion)quote)RDFRDFS	split_uri)URIRefCSV2RDFa^  
csv2rdf.py     -b <instance-base>     -p <property-base>     [-D <default>]     [-c <classname>]     [-i <identity column(s)>]     [-l <label columns>]     [-s <N>] [-o <output>]     [-f configfile]     [--col<N> <colspec>]     [--prop<N> <property>]     <[-d <delim>]     [-C] [files...]"

Reads csv files from stdin or given files
if -d is given, use this delimiter
if -s is given, skips N lines at the start
Creates a URI from the columns given to -i, or automatically by numbering if
none is given
Outputs RDFS labels from the columns given to -l
if -c is given adds a type triple with the given classname
if -C is given, the class is defined as rdfs:Class
Outputs one RDF triple per column in each row.
Output is in n3 format.
Output is stdout, unless -o is specified

Long options also supported:     --base,     --propbase,     --ident,     --class,     --label,     --out,     --defineclass

Long options --col0, --col1, ...
can be used to specify conversion for columns.
Conversions can be:
    ignore, float(), int(), split(sep, [more]), uri(base, [class]), date(format)

Long options --prop0, --prop1, ...
can be used to use specific properties, rather than ones auto-generated
from the headers

-D sets the default conversion for columns not listed

-f says to read config from a .ini/config file - the file must contain one
section called csv2rdf, with keys like the long options, i.e.:

[csv2rdf]
out=output.n3
base=http://example.org/
col0=split(";")
col1=split(";", uri("http://example.org/things/",
                    "http://xmlns.com/foaf/0.1/Person"))
col2=float()
col3=int()
col4=date("%Y-%b-%d %H:%M:%S")

z*Dict[Any, Tuple[URIRef, Optional[URIRef]]]urislabelstrc                 C  sP   t dd| } t dd| } | d} d| d  gdd | d	d
 D  S )zc
    CamelCase + lowercase initial a string


    FIRST_NM => firstNm

    firstNm => firstNm

    z[^\w] z([a-z])([A-Z])z\1 \2 r   c                 S  s   g | ]}|  qS  )
capitalize.0xr   r   N/home/kim/smarthome/.venv/lib/python3.10/site-packages/rdflib/tools/csv2rdf.py
<listcomp>n       ztoProperty.<locals>.<listcomp>   N)resubsplitjoinlowerr   r   r   r   
toProperty`   s   

*r#   c                 C  s0   | dd   s| dd  | dd   S | S )Nr      r   )isupperr!   r"   r   r   r   toPropertyLabelq   s   r&   l_	List[int]iTuple[int, ...]returnc                   s   t  fdd|D S )zPreturn a set of indexes from a list
    >>> index([1,2,3],(0,2))
    (1, 3)
    c                   s   g | ]} | qS r   r   r   r'   r   r   r   |   r   zindex.<locals>.<listcomp>)tuple)r'   r)   r   r,   r   indexw   s   r.   c                 k  s,    t j| fd|i|}|D ]}|V  qd S )Ndialect)csvreader)Zcsv_datar/   kwargs
csv_readerrowr   r   r   r3      s
   r3   class_Optional[URIRef]c                 C  sD   |rt |t| ddddd }nt | }||ft| < |S )Nutf8r   _r   safe)rdflibr   r	   encodereplacer   )r   prefixr5   rr   r   r   	prefixuri   s
   &
r@   c                   @  s   e Zd Zdd ZdddZdS )		NodeMakerc                 C     t jjS N)r;   r   Literalselfr   r   r   range      zNodeMaker.ranger   r   c                 C  s
   t |S rC   )r;   rD   rF   r   r   r   r   __call__      
zNodeMaker.__call__N)r   r   )__name__
__module____qualname__rG   rJ   r   r   r   r   rA      s    rA   c                   @  $   e Zd Zdd Zdd Zdd ZdS )NodeUric                 C  s*   d | _ || _|rt|| _ d S d | _ d S rC   )r5   r>   r;   r   )rF   r>   r5   r   r   r   __init__   s
   
zNodeUri.__init__c                 C  s   t || j| jS rC   )r@   r>   r5   rI   r   r   r   rJ         zNodeUri.__call__c                 C  s   | j ptjjS rC   )r5   r;   r
   ResourcerE   r   r   r   rG      s   zNodeUri.rangeNrL   rM   rN   rQ   rJ   rG   r   r   r   r   rP      s    rP   c                   @  s   e Zd ZdddZdS )NodeLiteralNc                 C  s
   || _ d S rC   f)rF   rW   r   r   r   rQ      rK   zNodeLiteral.__init__rC   )rL   rM   rN   rQ   r   r   r   r   rU      s    rU   c                   @     e Zd Zdd Zdd ZdS )	NodeFloatc                 C  :   | j s
tt|S t| j rtt|  |S td)Nz(Function passed to float is not callable)rW   r;   rD   floatcallable	ExceptionrI   r   r   r   rJ      
   
zNodeFloat.__call__c                 C  rB   rC   )r;   XSDdoublerE   r   r   r   rG      rH   zNodeFloat.rangeNrL   rM   rN   rJ   rG   r   r   r   r   rY          rY   c                   @  rX   )NodeIntc                 C  rZ   )Nz&Function passed to int is not callable)rW   r;   rD   intr\   r]   rI   r   r   r   rJ      r^   zNodeInt.__call__c                 C  rB   rC   )r;   r_   rd   rE   r   r   r   rG      rH   zNodeInt.rangeNra   r   r   r   r   rc      rb   rc   c                   @  rX   )NodeBoolc                 C  rZ   )Nz'Function passed to bool is not callable)rW   r;   rD   boolr\   r]   rI   r   r   r   rJ      r^   zNodeBool.__call__c                 C  rB   rC   )r;   r_   rf   rE   r   r   r   rG      rH   zNodeBool.rangeNra   r   r   r   r   re      rb   re   c                   @  rX   )NodeReplacec                 C     || _ || _d S rC   ab)rF   rj   rk   r   r   r   rQ         
zNodeReplace.__init__c                 C  s   | | j| jS rC   )r=   rj   rk   rI   r   r   r   rJ      rR   zNodeReplace.__call__N)rL   rM   rN   rQ   rJ   r   r   r   r   rg      s    rg   c                   @  rX   )NodeDatec                 C  s   t tj|| jS rC   )r;   rD   datetimestrptimerW   rI   r   r   r   rJ      s   zNodeDate.__call__c                 C  rB   rC   )r;   r_   ZdateTimerE   r   r   r   rG      rH   zNodeDate.rangeNra   r   r   r   r   rm      s    rm   c                   @  rO   )	NodeSplitc                 C  rh   rC   seprW   )rF   rr   rW   r   r   r   rQ      rl   zNodeSplit.__init__c                   s:    j stj _ t j std fdd| jD S )Nz)Function passed to split is not callable!c                   s&   g | ]}|  d kr |  qS )r   )striprW   )r   yrE   r   r   r      s   & z&NodeSplit.__call__.<locals>.<listcomp>)rW   r;   rD   r\   r]   r   rr   rI   r   rE   r   rJ      s
   
zNodeSplit.__call__c                 C  s&   | j rt| j tr| j  S t| S rC   )rW   
isinstancerA   rG   rE   r   r   r   rG      s   

zNodeSplit.rangeNrT   r   r   r   r   rp      s    rp   c                  O  s   dS )Nignorer   )argsr2   r   r   r   _config_ignore   s   rx   c                 C  
   t | |S rC   )rP   )r>   r5   r   r   r   _config_uri   rK   rz   c                   C  s   t  S rC   )rU   r   r   r   r   _config_literal   s   r{   c                 C     t | S rC   )rY   rV   r   r   r   _config_float  rH   r}   c                 C  ry   rC   )rg   ri   r   r   r   _config_replace  rK   r~   c                 C  r|   rC   )rc   rV   r   r   r   _config_int  rH   r   c                 C  r|   rC   )re   rV   r   r   r   _config_bool  rH   r   c                 C  r|   rC   )rm   )Zformat_r   r   r   _config_date  rH   r   c                 C  ry   rC   )rp   rq   r   r   r   _config_split  rK   r   )	rv   uriliteralr[   rd   dater   r=   rf   c                 C  s
   t | tS )z$Return a function for column mapping)evalconfig_functions)vr   r   r   column)  s   
r   c                   @  rO   )r   c                 C  sT   d | _ d | _d | _d| _d | _d| _d| _d| _d | _i | _	i | _
tj| _d| _d S )NautoFr   ,)CLASSBASEPROPBASEIDENTLABELDEFINECLASSSKIPDELIMDEFAULTCOLUMNSPROPSsysstdoutOUTtriplesrE   r   r   r   rQ   0  s   
zCSV2RDF.__init__c                 C  s4   | j d| | | f  |  jd7  _d S )Nz%s %s %s .
r   )r   writeZn3r   )rF   spor   r   r   tripleB  s   "zCSV2RDF.triplec                   s0  t   } jrtjd jj   jdkr!t jts! jf _ j	s/t
d td _	 js=t
d td _t jD ]}t| qBtt|}tt fdd|D } j D ]\}}|||< t|d	 ||< qa jrӈ  jtjtj tt|D ]M}|| || }	}
|	d
ks|
d
krq j !| j"dkrq |	tjtj#  |	tj$t%t&|
  |	tj' j  |	tj j !|t(  qd}|D ]}
zψ jdkr j	d|  }n j	d)dd t*|
 jD  } j+r |tj$t%d)t*|
 j+  jr |tj j t|
D ]l\}}|, }|d
kr j !| j"dkr9qz, j !|tj%|}t|trZ|D ]} ||| | qLn	 ||| | W q t-y } zt
dd||| ||j.f   W Y d }~qd }~ww q|d	7 }|d dkrtjd| j/t   | f  W q t-y   tjd|   w t0 }t1 D ])\}
}|\}} |tj$t%|
 |rt2|}|3|  |tj| q|D ]} |tjtj q j4  tjd| j/f  tjdt   |   d S )NzOutput to %s
r   z2No base given, using http://example.org/instances/zhttp://example.org/instances/z:No property base given, using http://example.org/property/zhttp://example.org/props/c                   s   g | ]	} j t| qS r   )r   r#   r   rE   r   r   r   ]  s    z#CSV2RDF.convert.<locals>.<listcomp>r   r   rv   r   z%dr8   c                 S  s&   g | ]}t |d ddddqS )r7   r   r8   r   r9   )r	   r<   r=   r   r   r   r   r   {  s    r   z#Could not process value for column z%d:%s in row %d, ignoring: %s i z$%d rows, %d triples, elapsed %.2fs.
zError processing line: %d
z#Converted %d rows into %d triples.
zTook %.2f seconds.
)5timer   r   stderrr   namer   ru   r-   r   warningswarnr;   	Namespacer   rG   r   nextlistdict	enumerater   itemsr   r   r   r   r
   typer   Classlenr   getr   Propertyr   rD   r&   domaindefault_node_maker    r.   r   rs   r]   messager   setr   r   addclose)rF   Z	csvreaderstartr   Zheader_labelsheaderskr   r)   hr'   rowsr   r   Z_oeclassesucr   rE   r   convertF  s   









zCSV2RDF.convertN)rL   rM   rN   rQ   r   r   r   r   r   r   r   /  s    c                  C  s  t  } ttjdd  dg d\}}t|}d|v sd|v r(tt td d|v rt	 }|
t|d  |dD ]\}}|d	krOt|d
d| _q>|dkrZt|| _q>|dkret|| _q>|dkrpt|| _q>|dkrzt|| _q>|dkrt|| _q>|dkrt|| _q>|dkr|| _q>|dkrt|| _q>|dkrt|| _q>|drt|| j t|dd  < q>|drt|| j!t|dd  < q>d|v rt|d d
d| _d|v rt|d d
d| _d|v rt|d | _d|v rt|d | _d|v r|d | _d|v r|d | _d|v r&t|d | _d |v r2t|d  | _d!|v r?t|d! | _d"|v rLt|d" | _d#|v rXt|d# | _d$|v rdt|d$ | _d%|v rpt|d% | _d&|v r|t|d& | _d'|v rt|d' | _d(|v rt|d( | _d)|v rt|d) | _d*|v rt|d* | _| D ].\}}|d+rt|| j t|d,d  < q|d-rt|| j!t|d.d  < q| jrd/|v sd0|v rd1| _| "t#t$%|| jd2 d S )3Nr   zhc:b:p:i:o:Cf:l:s:d:D:)
zout=zbase=zdelim=z	propbase=zclass=zdefault=ident=zlabel=zskip=defineclasshelpz-hz--helpz-fcsv2rdfoutwzutf-8baseZpropbaseclassr   identr   delimskipdefaultcol   prop   z-oz--outz-bz--basez-dz--delimz-Dz	--defaultz-pz
--propbasez-lz--labelz-iz--identz-sz--skipz-cz--classz--col   z--prop   z-Cz--defineclassT)	delimiter)&r   getoptr   argvr   printHELPexitconfigparserConfigParser	read_fileopenr   codecsr   r;   r   r   r   r   r   rf   r   r   r   r   r   rd   r   r   r   
startswithr   r   r   r3   	fileinputinput)r   optsfilesconfigr   r   r   r   r   main  s   



















r   __main__)r   r   )r'   r(   r)   r*   r+   r*   rC   )r5   r6   )NN)?__doc__
__future__r   r   r   r0   rn   r   r   r   r   r   r   typingr   r   r   r   r   r   urllib.parser	   r;   Zrdflib.namespacer
   r   r   Zrdflib.termr   __all__r   r   __annotations__r#   r&   r.   excelr3   r@   rA   rP   rU   rY   rc   re   rg   rm   rp   r   rx   rz   r{   r}   r~   r   r   r   r   r   r   r   r   rL   r   r   r   r   <module>   sz     ?

	




 
p
