o
    i/                     @  s
  U d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d
dlmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ eedduddZ%eeddvddZ&eeddwddZ'e(dd e) D Z*de+d< dd e*D Z,de+d< eeddxd!d"Z-eeddvd#d$Z.eeddvd%d&Z/eeddvd'd(Z0eeddvd)d*Z1eeddvd+d,Z2eeddvd-d.Z3eeddvd/d0Z4eeddvd1d2Z5eeddvd3d4Z6eeddvd5d6Z7eeddvd7d8Z8eeddvd9d:Z9eeddvd;d<Z:eeddvd=d>Z;ee<eddyd@dAZ=eeddvdBdCZ>	Ddzd{dHdIZ?edJdd|dLdMZ@d}dOdPZAd~dRdSZBdddWdXZCdd\d]ZDdd^d_ZEd`ejFdafddedfZG	dddsdtZHdS )    )annotationsN)bisect_right)IncrementalDecoder)aliases)	lru_cache)findall)	Generator)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATIONCOMMON_CJK_CHARACTERS_LATIN_CJK_HANGUL	_KATAKANA	_HIRAGANA_THAI_ARABIC_ARABIC_ISOLATED_FORM_ACCENT_KEYWORDS_ACCENTUATED)maxsize	characterstrreturnintc                 C  s   zt | }W n
 ty   Y dS w d}d|v r|tO }d|v r$|tO }d|v r,|tO }d|v r4|tO }d|v r<|tO }d|v rD|tO }d|v rT|t	O }d	|v rT|t
O }tD ]}||v rc|tO } |S qV|S )
zRCompute all name-based classification flags with a single unicodedata.name() call.r   ZLATINZCJKZHANGULZKATAKANAZHIRAGANAZTHAIZARABICzISOLATED FORM)unicodedataname
ValueErrorr   r   r   r   r   r   r   r   r   r   )r   Zdescflagskw r&   R/home/kim/smarthome/.venv/lib/python3.10/site-packages/charset_normalizer/utils.py_character_flags&   s:   r(   boolc                 C     t t| t@ S N)r)   r(   r   r   r&   r&   r'   is_accentuatedI      r-   c                 C  s.   t | }|s	| S |d}tt|d dS )N r      )r!   decompositionsplitchrr    )r   Z
decomposedcodesr&   r&   r'   remove_accentN   s
   

r5   c                 c  s"    | ]\}}|j |j|fV  qd S r+   )startstop).0r"   Z	ord_ranger&   r&   r'   	<genexpr>[   s
    
r9   zlist[tuple[int, int, str]]_UNICODE_RANGES_SORTEDc                 C  s   g | ]}|d  qS )r   r&   )r8   er&   r&   r'   
<listcomp>_   s    r<   z	list[int]_UNICODE_RANGE_STARTS
str | Nonec                 C  s<   t | }tt|d }|dkrt| \}}}||k r|S dS )zK
    Retrieve the Unicode range official name from a single character.
    r
   r   N)ordr   r=   r:   )r   Zcharacter_ordidxr6   r7   r"   r&   r&   r'   unicode_rangeb   s   rA   c                 C  r*   r+   )r)   r(   r   r,   r&   r&   r'   is_latins   r.   rB   c                 C  s2   t | }d|v rdS t| }|d u rdS d|v S )NPTFPunctuationr!   categoryrA   r   character_categorycharacter_ranger&   r&   r'   is_punctuationx   s   
rJ   c                 C  sB   t | }d|v sd|v rdS t| }|d u rdS d|v o |dkS )NSNTFZFormsZLorE   rG   r&   r&   r'   	is_symbol   s   
rM   c                 C  s$   t | }|d u r
dS d|v pd|v S )NFZ	EmoticonsZPictographs)rA   )r   rI   r&   r&   r'   is_emoticon   s   rN   c                 C  s.   |   s| dv r
dS t| }d|v p|dv S )N>   <>u   ｜+TZ>   ZPcZPoZPd)isspacer!   rF   )r   rH   r&   r&   r'   is_separator   s   
rT   c                 C  s   |   |  kS r+   )islowerisupperr,   r&   r&   r'   is_case_variable   r.   rW   c                 C  r*   r+   )r)   r(   r   r,   r&   r&   r'   is_cjk   r.   rX   c                 C  r*   r+   )r)   r(   r   r,   r&   r&   r'   is_hiragana   r.   rY   c                 C  r*   r+   )r)   r(   r   r,   r&   r&   r'   is_katakana   r.   rZ   c                 C  r*   r+   )r)   r(   r   r,   r&   r&   r'   	is_hangul   r.   r[   c                 C  r*   r+   )r)   r(   r   r,   r&   r&   r'   is_thai   r.   r\   c                 C  r*   r+   )r)   r(   r   r,   r&   r&   r'   	is_arabic   r.   r]   c                 C  r*   r+   )r)   r(   r   r,   r&   r&   r'   is_arabic_isolated_form   r.   r^   c                 C  s   | t vS r+   )r   r,   r&   r&   r'   is_cjk_uncommon   s   r_   
range_namec                   s   t  fddtD S )Nc                 3  s    | ]}| v V  qd S r+   r&   )r8   keywordr`   r&   r'   r9      s    z-is_unicode_range_secondary.<locals>.<genexpr>)anyr   rb   r&   rb   r'   is_unicode_range_secondary   s   rd   c                 C  s(   |   du o|  du o| dko| dkS )NFu   ﻿)rS   isprintabler,   r&   r&   r'   is_unprintable   s   
rg       sequencebytes | bytearraysearch_zonec                 C  s   t | ttfs	tt| }tt| dt|| jddd}t|dkr&dS |D ]'}|	 
dd}t D ]\}}||krD|    S ||krN|    S q6q(dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nasciiignoreerrorsr   -_)
isinstancebytes	bytearray	TypeErrorlenr   r   mindecodelowerreplacer   items)ri   rk   Zseq_lenresultsZspecified_encodingencoding_aliasencoding_ianar&   r&   r'   any_specified_encoding   s&   r      r"   c                 C  s    | dv pt td|  jtS )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   Z	utf_8_sig	utf_32_leutf_7utf_16utf_32	utf_16_leutf_8	utf_32_be	utf_16_be
encodings.)
issubclass	importlibimport_moduler   r	   )r"   r&   r&   r'   is_multi_byte_encoding	  s   
r   tuple[str | None, bytes]c                 C  sJ   t D ] }t | }t|tr|g}|D ]}| |r!||f    S qqdS )z9
    Identify and extract SIG/BOM in given sequence.
    )N    )r   rr   rs   
startswith)ri   iana_encodingZmarksmarkr&   r&   r'   identify_sig_or_bom  s   

r   r   c                 C  s   | dvS )N>   r   r   r&   )r   r&   r&   r'   should_strip_sig_or_bom0  s   r   Tcp_namestrictc                 C  sN   |   dd} t D ]\}}| ||fv r|  S q|r%td|  d| S )zIReturns the Python normalized encoding name (Not the IANA official name).rp   rq   zUnable to retrieve IANA for '')ry   rz   r   r{   r#   )r   r   r}   r~   r&   r&   r'   	iana_name4  s   r   iana_name_aiana_name_bfloatc           	      C  s   t | st |r
dS td|  j}td| j}|dd}|dd}d}tdD ]}t|g}||||krA|d7 }q,|d S )Ng        r   rm   rn   r      r
   )r   r   r   r   rangers   rx   )	r   r   Z	decoder_aZ	decoder_bZid_aZid_bZcharacter_match_countiZto_be_decodedr&   r&   r'   cp_similarityE  s   


r   c                 C  s   | t v o	|t |  v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   r&   r&   r'   is_cp_similarY  s   
r   Zcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevelformat_stringNonec                 C  s:   t | }|| t  }|t | || d S r+   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r"   r   r   loggerhandlerr&   r&   r'   set_logging_handlerd  s
   

r   	sequencesr~   offsetsr   
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadrs   is_multi_byte_decoderdecoded_payloadGenerator[str, None, None]c	                 c  s&   |r|du r|D ]}	||	|	|  }
|
s d S |
V  q	d S |D ]p}	|	| }|t | d kr/q | |	|	|  }|rA|du rA|| }|j||rHdndd}
|r|	dkrt|d}|r|
d | |vrt|	|	d d	D ]#}| || }|r{|du r{|| }|j|dd}
|
d | |v r nqi|
V  q d S )
NF   rm   r   rn   r   r0      )rv   rx   rw   r   )r   r~   r   r   r   r   r   r   r   r   chunkZ	chunk_endZcut_sequenceZchunk_partial_size_chkjr&   r&   r'   cut_sequence_chunksq  sD   

r   )r   r   r   r    )r   r   r   r)   )r   r   r   r   )r   r   r   r>   )r`   r   r   r)   )rh   )ri   rj   rk   r    r   r>   )r"   r   r   r)   )ri   rj   r   r   )r   r   r   r)   )T)r   r   r   r)   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r)   )r"   r   r   r    r   r   r   r   r+   )r   rj   r~   r   r   r   r   r    r   r)   r   r)   r   rs   r   r)   r   r>   r   r   )I
__future__r   r   r   r!   bisectr   codecsr   Zencodings.aliasesr   	functoolsr   rer   typingr   _multibytecodecr	   Zconstantr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   r-   r5   sortedr{   r:   __annotations__r=   rA   rB   rJ   rM   rN   rT   rW   rX   rY   rZ   r[   r\   r]   r^   r_   rv   rd   rg   r   r   r   r   r   r   r   INFOr   r   r&   r&   r&   r'   <module>   s    L"		"



