o
    mi42                     @  st  d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZmZmZmZmZmZmZmZmZ er;d dlZnz
d dlZesDJ W n eyR   d dlZY nw d dlmZmZ d dl
mZmZ d dlmZmZmZ zd dlZd	ZW n ey   dZd
ZY nw d dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& 		
d&d'ddZ'dZ(d(ddZ)d)ddZ*d*d!d"Z+g d#Z,G d$d% d%eZ-dS )+    )annotationsN)
HTMLParser)StringIO
TextIOBaseTextIOWrapper)	IOTYPE_CHECKINGAnyDictListOptionalTextIOTupleUnion)normpathsep)r   cast)urljoinurlsplit
urlunsplitTF)BytesIOWrapperInputSourcePythonInputSourceStringInputSourceURLInputSourcecreate_input_sourcesourceMOptional[Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath]]fragment_idOptional[str]extract_all_scriptsOptional[bool]return#Tuple[Union[Dict, List[Dict]], Any]c                 C  s  t | tr
| jdfS t | tr~d}|  }d}t |tr8tttt	t
f |j}t |tr/|}n	t |t	r8| }trf|durGt|}||fS t |tr[|  }t| }||fS t| }||fS |durst|}||fS t|  }||fS t| dd} z| j}	W n ttfy   d}	Y nw |	duo|	 dv }
|
rt||d}nd}z|  }W n ttfy   d}Y nw z|  }W n ttfy   d}Y nw |du r|du rtdt|  z|du rdn|  }W n ttfy   d}Y nw d}|dur"t |tr"|j}t |tr|}n
t |t	r"| }z|
rf|durf|dur4|}n#|dur>| }ntrH|dusHJ |du rOd}t||d }|| |  |! }}ntrd}|durvt|}n|durt |ts|du r|durt| }notr|dusJ t| }n]d}|durt|W |durz|"  W n
 ty   Y nw |durz|"  W S  ty   Y S w S |dur|}ntr|dusJ |du rd}t||d}t|}||fW |durz|"  W n
 ty   Y nw |dur2z|"  W S  ty1   Y S w S |durIz|"  W n
 tyH   Y nw |dur_z|"  W w  ty^   Y w w w )	a  Extract JSON from a source document.

    The source document can be JSON or HTML with embedded JSON script elements (type attribute = "application/ld+json").
    To process as HTML `source.content_type` must be set to "text/html" or "application/xhtml+xml".

    Args:
        source: the input source document (JSON or HTML)
        fragment_id: if source is an HTML document then extract only the script element with matching id attribute, defaults to None
        extract_all_scripts: if source is an HTML document then extract all script elements (unless fragment_id is provided), defaults to False (extract only the first script element)

    Returns:
        Tuple with the extracted JSON document and value of the HTML base element
    Nzjson-ld)format)z	text/htmlzapplication/xhtml+xml)r   r    zLSource does not have a character stream or a byte stream and cannot be used zutf-8)encoding)#
isinstancer   datar   ZgetByteStreamr   r   r   strr   r   wrappedgetvalue_HAS_ORJSONorjsonloadsZgetCharacterStreamreadjsonloadr   content_typeAttributeErrorLookupErrorlowerHTMLJSONParser
ValueErrortypeZgetEncodingr   r   feedget_jsonget_baseclose)r   r   r    Z	html_baseZb_streamZoriginal_stringZwrapped_innerZ	json_dictZc_streamr1   Zis_htmlZhtml_docparserZ
b_encodingZunderlying_stringZhtml_stringZ
use_stream r<   [/home/kim/smarthome/.venv/lib/python3.10/site-packages/rdflib/plugins/shared/jsonld/util.pysource_to_json+   s  





























r>   )#/:irir(   Tuple[str, Optional[str]]c                 C  sH   t D ]}| |}|dkr| d |d  | |d d  f  S q| d fS )N   )VOCAB_DELIMSrfind)rB   delimatr<   r<   r=   	split_iri   s   
$rJ   baseurlc                 C  s.  d|v r|S t | }t |}|jr|S |jdv rM|jdd}dt|dkr*|d nd }t||j}|jr<d|j nd}|j d|d  | | }n:t t| |}	t|	d	 }
tdkrfd	|
t}
|	d	 
drv|

dsv|
d7 }
t|	dd	 |
f |	d
d  }|
dr|
ds|d7 }|S )a  
    ```python
    >>> norm_url('http://example.org/', '/one')
    'http://example.org/one'
    >>> norm_url('http://example.org/', '/one#')
    'http://example.org/one#'
    >>> norm_url('http://example.org/one', 'two')
    'http://example.org/two'
    >>> norm_url('http://example.org/one/', 'two')
    'http://example.org/one/two'
    >>> norm_url('http://example.org/', 'http://example.net/one')
    'http://example.net/one'
    >>> norm_url('http://example.org/', 'http://example.org//one')
    'http://example.org//one'

    ```
    z://)urnzurn-xr@   rE    r?   rA   r         N)r   schemepathsplitlenr   fragmentr   r   joinendswithr   )rK   rL   Zparsed_base
parsed_urlZbase_path_parts	base_pathZjoined_pathrU   resultpartsrR   r<   r<   r=   norm_url   s,   
"r\   r   c                 C  s   | j dkr?z| j}W n
 ty   Y dS w |D ])}d|v r>|d|d}}|dkr>|dkr>t| j||d |   S qdS dS )aL  
    Please note that JSON-LD documents served with the `application/ld+json` media type
    MUST have all context information, including references to external contexts,
    within the body of the document. Contexts linked via a
    http://www.w3.org/ns/json-ld#context HTTP Link Header MUST be
    ignored for such documents.
    application/ld+jsonNz+ rel="http://www.w3.org/ns/json-ld#context"<>rD   rE   )r1   linksr2   indexr   rL   )r   r`   linkijr<   r<   r=   context_from_urlinputsource  s   

re   )r/   r>   rJ   r\   re   r,   r+   c                      sF   e Zd Z		dd fddZd	d
 Zdd ZdddZdd Z  ZS )r5   NFr   r   r    r!   c                   s8   t    || _g | _d| _d| _d | _|| _d| _d S )NFr   )	super__init__r   r/   contains_jsonfragment_id_does_not_matchrK   r    script_count)selfr   r    	__class__r<   r=   rg   $  s   

zHTMLJSONParser.__init__c                 C  s   d| _ d| _|dkr.|D ]\}}|dkr|dkrd| _ q|dkr+| jr+|| jkr+d| _qd S |dkr@|D ]\}}|dkr?|| _q4d S d S )	NFscriptr7   r]   TidrK   href)rh   ri   r   rK   )rk   tagattrsattrvaluer<   r<   r=   handle_starttag2  s"   zHTMLJSONParser.handle_starttagc                 C  s   | j du rD| jdu rF| js| jdkrd S | dkrd S tr$t|}nt|}t	|t
r5| j| n| j| |  jd7  _d S d S d S )NTFr   rN   rE   )rh   ri   r    rj   stripr+   r,   r-   r/   r&   listextendappend)rk   r'   parsedr<   r<   r=   handle_dataD  s   

zHTMLJSONParser.handle_datar"   
List[Dict]c                 C     | j S N)r/   rk   r<   r<   r=   r9   b     zHTMLJSONParser.get_jsonc                 C  r}   r~   )rK   r   r<   r<   r=   r:   e  r   zHTMLJSONParser.get_baseNF)r   r   r    r!   )r"   r|   )	__name__
__module____qualname__rg   ru   r{   r9   r:   __classcell__r<   r<   rl   r=   r5   #  s    
r5   r   )r   r   r   r   r    r!   r"   r#   )rB   r(   r"   rC   )rK   r(   rL   r(   r"   r(   )r   r   r"   r   ).
__future__r   r/   pathlibhtml.parserr   ior   r   r   typingr   r   r	   r
   r   r   r   r   r   ImportErrorZ
simplejson	posixpathr   r   r   urllib.parser   r   r   r,   r+   Zrdflib.parserr   r   r   r   r   r   r>   rF   rJ   r\   re   __all__r5   r<   r<   r<   r=   <module>   sF   ,
  


0