o
    h/                     @  s  d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d	d
lmZmZmZmZmZmZmZ e
eddiddZe
eddjddZe
eddkddZe
eddiddZe
eddiddZe
eddiddZe
eddiddZe
eddidd Z e
eddid!d"Z!e
eddid#d$Z"e
eddid%d&Z#e
eddid'd(Z$e
eddid)d*Z%e
eddid+d,Z&e
eddid-d.Z'e
eddid/d0Z(e
eddid1d2Z)e
e*eddld4d5Z+e
eddid6d7Z,dmdnd=d>Z-e
d?ddodAdBZ.dpdDdEZ/dqdGdHZ0drdsdLdMZ1dtdQdRZ2dudSdTZ3dUej4dVfdvdZd[Z5	dwdxdgdhZ6dS )y    )annotationsN)IncrementalDecoder)aliases)	lru_cache)findall)	Generator)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATIONCOMMON_CJK_CHARACTERS)maxsize	characterstrreturnboolc                 C  sd   zt | }W n
 ty   Y dS w d|v p1d|v p1d|v p1d|v p1d|v p1d|v p1d|v p1d	|v S )
NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEzWITH MACRONzWITH RING ABOVEunicodedataname
ValueErrorr   description r   `/var/www/html/figdemos/bartoux_crm/venv/lib/python3.10/site-packages/charset_normalizer/utils.pyis_accentuated   s(   r   c                 C  s.   t | }|s	| S |d}tt|d dS )N r      )r   decompositionsplitchrint)r   Z
decomposedcodesr   r   r   remove_accent-   s
   

r&   
str | Nonec                 C  s.   t | }t D ]\}}||v r|  S qdS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   Zcharacter_ord
range_nameZ	ord_ranger   r   r   unicode_range8   s   r+   c                 C  *   z
t | }W d|v S  ty   Y dS w )NFZLATINr   r   r   r   r   is_latinF   s   r-   c                 C  s2   t | }d|v rdS t| }|d u rdS d|v S )NPTFPunctuationr   categoryr+   r   character_categorycharacter_ranger   r   r   is_punctuationO   s   
r5   c                 C  sB   t | }d|v sd|v rdS t| }|d u rdS d|v o |dkS )NSNTFFormsZLor0   r2   r   r   r   	is_symbol^   s   
r9   c                 C  s$   t | }|d u r
dS d|v pd|v S )NF	EmoticonsZPictographs)r+   )r   r4   r   r   r   is_emoticonm   s   r;   c                 C  s.   |   s| dv r
dS t| }d|v p|dv S )N>   +u   ｜<>TZ>   ZPcZPdZPo)isspacer   r1   )r   r3   r   r   r   is_separatorw   s   
rA   c                 C  s   |   |  kS N)islowerisupperr   r   r   r   is_case_variable   s   rF   c                 C  r,   )NFCJKr   r   Zcharacter_namer   r   r   is_cjk      rI   c                 C  r,   )NFZHIRAGANAr   rH   r   r   r   is_hiragana   rJ   rK   c                 C  r,   )NFZKATAKANAr   rH   r   r   r   is_katakana   rJ   rL   c                 C  r,   )NFZHANGULr   rH   r   r   r   	is_hangul   rJ   rM   c                 C  r,   )NFZTHAIr   rH   r   r   r   is_thai   rJ   rN   c                 C  r,   )NFARABICr   rH   r   r   r   	is_arabic   rJ   rP   c                 C  s4   zt | }W n
 ty   Y dS w d|v od|v S )NFrO   zISOLATED FORMr   rH   r   r   r   is_arabic_isolated_form   s   rQ   c                 C  s   | t vS rB   )r   rE   r   r   r   is_cjk_uncommon   s   rR   r*   c                   s   t  fddtD S )Nc                 3  s    | ]}| v V  qd S rB   r   ).0keywordr*   r   r   	<genexpr>   s    z-is_unicode_range_secondary.<locals>.<genexpr>)anyr   rU   r   rU   r   is_unicode_range_secondary   s   rX   c                 C  s(   |   du o|  du o| dko| dkS )NFu   ﻿)r@   isprintablerE   r   r   r   is_unprintable   s   
r[       sequencebytessearch_zoner$   c                 C  s   t | tstt| }tt| dt|| jddd}t|dkr$dS |D ]'}| 	dd}t
 D ]\}}||krB|    S ||krL|    S q4q&dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nasciiignoreerrorsr   -_)
isinstancer^   	TypeErrorlenr   r   mindecodelowerreplacer   r)   )r]   r_   Zseq_lenresultsspecified_encodingencoding_aliasencoding_ianar   r   r   any_specified_encoding   s&   
rq      r   c                 C  s    | dv pt td|  jtS )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   	utf_16_be	utf_32_leutf_7utf_32utf_16	utf_16_leutf_8	utf_32_be	utf_8_sig
encodings.)
issubclass	importlibimport_moduler   r   )r   r   r   r   is_multi_byte_encoding  s   
r   tuple[str | None, bytes]c                 C  sJ   t D ] }t | }t|tr|g}|D ]}| |r!||f    S qqdS )z9
    Identify and extract SIG/BOM in given sequence.
    )N    )r
   rf   r^   
startswith)r]   iana_encodingZmarksmarkr   r   r   identify_sig_or_bom  s   

r   r   c                 C  s   | dvS )N>   rv   rw   r   )r   r   r   r   should_strip_sig_or_bom(  s   r   Tcp_namestrictc                 C  sN   |   dd} t D ]\}}| ||fv r|  S q|r%td|  d| S )zIReturns the Python normalized encoding name (Not the IANA official name).rd   re   zUnable to retrieve IANA for '')rk   rl   r   r)   r   )r   r   ro   rp   r   r   r   	iana_name,  s   r   iana_name_aiana_name_bfloatc           	      C  s   t | st |r
dS td|  j}td| j}|dd}|dd}d}tdD ]}t|g}||||krA|d7 }q,|d S )	Ng        r|   ra   rb   r      r	      )r   r~   r   r   ranger^   rj   )	r   r   Z	decoder_aZ	decoder_bZid_aZid_bcharacter_match_countiZto_be_decodedr   r   r   cp_similarity=  s   


r   c                 C  s   | t v o	|t |  v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   r   r   r   is_cp_similarQ  s   
r   charset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevelformat_stringNonec                 C  s:   t | }|| t  }|t | || d S rB   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   r   r   loggerhandlerr   r   r   set_logging_handler\  s
   

r   	sequencesrp   offsetsr   
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadGenerator[str, None, None]c	                 c  s&   |r|du r|D ]}	||	|	|  }
|
s d S |
V  q	d S |D ]p}	|	| }|t | d kr/q | |	|	|  }|rA|du rA|| }|j||rHdndd}
|r|	dkrt|d}|r|
d | |vrt|	|	d d	D ]#}| || }|r{|du r{|| }|j|dd}
|
d | |v r nqi|
V  q d S )
NF   ra   r   rb   r   r       )rh   rj   ri   r   )r   rp   r   r   r   r   r   r   r   r   chunkZ	chunk_endZcut_sequenceZchunk_partial_size_chkjr   r   r   cut_sequence_chunksi  sD   

r   )r   r   r   r   )r   r   r   r   )r   r   r   r'   )r*   r   r   r   )r\   )r]   r^   r_   r$   r   r'   )r   r   r   r   )r]   r^   r   r   )r   r   r   r   )T)r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r$   r   r   r   r   rB   )r   r^   rp   r   r   r   r   r$   r   r   r   r   r   r^   r   r   r   r'   r   r   )7
__future__r   r~   r   r   codecsr   Zencodings.aliasesr   	functoolsr   rer   typingr   Z_multibytecodecr   constantr
   r   r   r   r   r   r   r   r&   r+   r-   r5   r9   r;   rA   rF   rI   rK   rL   rM   rN   rP   rQ   rR   rh   rX   r[   rq   r   r   r   r   r   r   INFOr   r   r   r   r   r   <module>   s~    $
									
 



