
    (phN                       S SK Jr  S SKJr  S SKJr  SSKJrJrJ	r	  SSK
JrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr   " S S5      r " S	 S
\5      r " S S\5      r " S S\5      r  " S S\5      r! " S S\5      r" " S S\5      r# " S S\5      r$ " S S\5      r% " S S\5      r&\" SS9      S!S j5       r'\" SS9 S"       S#S jj5       r(g )$    )annotations)	lru_cache)	getLogger   )COMMON_SAFE_ASCII_CHARACTERSTRACEUNICODE_SECONDARY_RANGE_KEYWORD)is_accentuated	is_arabicis_arabic_isolated_formis_case_variableis_cjkis_emoticon	is_hangulis_hiraganais_katakanais_latinis_punctuationis_separator	is_symbolis_thaiis_unprintableremove_accentunicode_rangeis_cjk_uncommonc                  J    \ rS rSrSrS	S jrS
S jrSS jr\SS j5       r	Sr
g)MessDetectorPlugin!   zm
Base abstract class used for mess detection plugins.
All detectors MUST extend and implement given methods.
c                    [         e)z0
Determine if given character should be fed in.
NotImplementedErrorself	characters     H/var/www/html/venv/lib/python3.13/site-packages/charset_normalizer/md.pyeligibleMessDetectorPlugin.eligible'   
     "!    c                    [         e)zq
The main routine to be executed upon character.
Insert the logic in witch the text would be considered chaotic.
r    r"   s     r%   feedMessDetectorPlugin.feed-   s
    
 "!r)   c                    [         e)z2
Permit to reset the plugin to the initial state.
r    r#   s    r%   resetMessDetectorPlugin.reset4   r(   r)   c                    [         e)zm
Compute the chaos ratio based on what your feed() has seen.
Must NOT be lower than 0.; No restriction gt 0.
r    r.   s    r%   ratioMessDetectorPlugin.ratio:   s
     "!r)    Nr$   strreturnboolr$   r6   r7   Noner7   r:   r7   float)__name__
__module____qualname____firstlineno____doc__r&   r+   r/   propertyr2   __static_attributes__r4   r)   r%   r   r   !   s*    
""" " "r)   r   c                  P    \ rS rSrS	S jrS
S jrSS jrS	S jr\SS j5       r	Sr
g) TooManySymbolOrPunctuationPluginC   c                J    SU l         SU l        SU l        S U l        SU l        g )Nr   F)_punctuation_count_symbol_count_character_count_last_printable_char_frenzy_symbol_in_wordr.   s    r%   __init__)TooManySymbolOrPunctuationPlugin.__init__D   s*    '("#%&04!,1#r)   c                "    UR                  5       $ Nisprintabler"   s     r%   r&   )TooManySymbolOrPunctuationPlugin.eligibleL       $$&&r)   c                D   U =R                   S-  sl         XR                  :w  av  U[        ;  al  [        U5      (       a  U =R                  S-  sl        OFUR                  5       SL a3  [        U5      (       a#  [        U5      SL a  U =R                  S-  sl        Xl        g )Nr   F   )	rK   rL   r   r   rI   isdigitr   r   rJ   r"   s     r%   r+   %TooManySymbolOrPunctuationPlugin.feedO   s    " 222!==i((''1,'!!#u,i((	*e3""a'"$-!r)   c                .    SU l         SU l        SU l        g Nr   )rI   rK   rJ   r.   s    r%   r/   &TooManySymbolOrPunctuationPlugin.reseta   s    "# !r)   c                    U R                   S:X  a  gU R                  U R                  -   U R                   -  nUS:  a  U$ S$ )Nr           333333?)rK   rI   rJ   )r#   ratio_of_punctuations     r%   r2   &TooManySymbolOrPunctuationPlugin.ratiof   sO      A% ##d&8&88!!'" (<s'B#KKr)   )rK   rM   rL   rI   rJ   Nr;   r5   r9   r<   r>   r?   r@   rA   rN   r&   r+   r/   rC   r2   rD   r4   r)   r%   rF   rF   C   s,    2'.$
 L Lr)   rF   c                  P    \ rS rSrS	S jrS
S jrSS jrS	S jr\SS j5       r	Sr
g)TooManyAccentuatedPluginr   c                     SU l         SU l        g r[   rK   _accentuated_countr.   s    r%   rN   !TooManyAccentuatedPlugin.__init__s   s    %&'(r)   c                "    UR                  5       $ rQ   )isalphar"   s     r%   r&   !TooManyAccentuatedPlugin.eligiblew   s      ""r)   c                z    U =R                   S-  sl         [        U5      (       a  U =R                  S-  sl        g g Nr   )rK   r
   rh   r"   s     r%   r+   TooManyAccentuatedPlugin.feedz   s4    ")$$##q(# %r)   c                     SU l         SU l        g r[   rg   r.   s    r%   r/   TooManyAccentuatedPlugin.reset   s     !"#r)   c                j    U R                   S:  a  gU R                  U R                   -  nUS:  a  U$ S$ )N   r^   gffffff?rg   )r#   ratio_of_accentuations     r%   r2   TooManyAccentuatedPlugin.ratio   s=      1$'+'>'>AVAV'V(=(E$N3Nr)   )rh   rK   Nr;   r5   r9   r<   rb   r4   r)   r%   rd   rd   r   s,    )#)$ O Or)   rd   c                  P    \ rS rSrS	S jrS
S jrSS jrS	S jr\SS j5       r	Sr
g)UnprintablePlugin   c                     SU l         SU l        g r[   )_unprintable_countrK   r.   s    r%   rN   UnprintablePlugin.__init__   s    '(%&r)   c                    gNTr4   r"   s     r%   r&   UnprintablePlugin.eligible       r)   c                x    [        U5      (       a  U =R                  S-  sl        U =R                  S-  sl        g rn   )r   rz   rK   r"   s     r%   r+   UnprintablePlugin.feed   s/    )$$##q(#"r)   c                    SU l         g r[   )rz   r.   s    r%   r/   UnprintablePlugin.reset   s
    "#r)   c                \    U R                   S:X  a  gU R                  S-  U R                   -  $ )Nr   r^   rs   rK   rz   r.   s    r%   r2   UnprintablePlugin.ratio   s/      A%''!+t/D/DDDr)   r   Nr;   r5   r9   r<   rb   r4   r)   r%   rw   rw      s,    '#
$ E Er)   rw   c                  P    \ rS rSrS	S jrS
S jrSS jrS	S jr\SS j5       r	Sr
g)SuspiciousDuplicateAccentPlugin   c                .    SU l         SU l        S U l        g r[   _successive_countrK   _last_latin_characterr.   s    r%   rN   (SuspiciousDuplicateAccentPlugin.__init__   s    &'%&15"r)   c                F    UR                  5       =(       a    [        U5      $ rQ   )rk   r   r"   s     r%   r&   (SuspiciousDuplicateAccentPlugin.eligible   s      ":x	'::r)   c                   U =R                   S-  sl         U R                  b  [        U5      (       a  [        U R                  5      (       a  UR                  5       (       a4  U R                  R                  5       (       a  U =R                  S-  sl        [        U5      [        U R                  5      :X  a  U =R                  S-  sl        Xl        g rn   )rK   r   r
   isupperr   r   r"   s     r%   r+   $SuspiciousDuplicateAccentPlugin.feed   s    "&&2y))t99::  ""t'A'A'I'I'K'K&&!+&Y'=9S9S+TT&&!+&%."r)   c                .    SU l         SU l        S U l        g r[   r   r.   s    r%   r/   %SuspiciousDuplicateAccentPlugin.reset   s    !" !%)"r)   c                \    U R                   S:X  a  gU R                  S-  U R                   -  $ )Nr   r^   rW   )rK   r   r.   s    r%   r2   %SuspiciousDuplicateAccentPlugin.ratio   s/      A%&&*d.C.CCCr)   )rK   r   r   Nr;   r5   r9   r<   rb   r4   r)   r%   r   r      s,    6;/*
 D Dr)   r   c                  P    \ rS rSrS	S jrS
S jrSS jrS	S jr\SS j5       r	Sr
g)SuspiciousRange   c                .    SU l         SU l        S U l        g r[   )"_suspicious_successive_range_countrK   _last_printable_seenr.   s    r%   rN   SuspiciousRange.__init__   s    78/%&04!r)   c                "    UR                  5       $ rQ   rR   r"   s     r%   r&   SuspiciousRange.eligible   rU   r)   c                Z   U =R                   S-  sl         UR                  5       (       d  [        U5      (       d
  U[        ;   a  S U l        g U R                  c  Xl        g [        U R                  5      n[        U5      n[        X#5      (       a  U =R                  S-  sl        Xl        g rn   )rK   isspacer   r   r   r    is_suspiciously_successive_ranger   )r#   r$   unicode_range_aunicode_range_bs       r%   r+   SuspiciousRange.feed   s    " i((88(,D%$$,(1%&3D4M4M&N&3I&>+OMM33q83$-!r)   c                .    SU l         SU l        S U l        g r[   )rK   r   r   r.   s    r%   r/   SuspiciousRange.reset   s     !23/$(!r)   c                `    U R                   S::  a  gU R                  S-  U R                   -  nU$ )N   r^   rW   )rK   r   )r#   ratio_of_suspicious_range_usages     r%   r2   SuspiciousRange.ratio   s<      B& 33a7!!2"' /.r)   )rK   r   r   Nr;   r5   r9   r<   rb   r4   r)   r%   r   r      s*    5
'..)
 / /r)   r   c                  P    \ rS rSrS	S jrS
S jrSS jrS	S jr\SS j5       r	Sr
g)SuperWeirdWordPlugin   c                    SU l         SU l        SU l        SU l        SU l        SU l        SU l        SU l        SU l        SU l	        g )Nr   F )
_word_count_bad_word_count_foreign_long_count_is_current_word_bad_foreign_long_watchrK   _bad_character_count_buffer_buffer_accent_count_buffer_glyph_countr.   s    r%   rN   SuperWeirdWordPlugin.__init__   sQ     !$%() */!). %&)*!)*!() r)   c                    gr}   r4   r"   s     r%   r&   SuperWeirdWordPlugin.eligible  r   r)   c                   UR                  5       (       Ga  U =R                  U-  sl        [        U5      (       a  U =R                  S-  sl        U R                  SL ak  [        U5      SL d  [        U5      (       aM  [        U5      SL a?  [        U5      SL a1  [        U5      SL a#  [        U5      SL a  [        U5      SL a  SU l        [        U5      (       d@  [        U5      (       d0  [        U5      (       d   [        U5      (       d  [        U5      (       a  U =R                  S-  sl        g U R                  (       d  g UR                  5       (       d!  [        U5      (       d  [        U5      (       Ga-  U R                  (       Ga  U =R                  S-  sl        [!        U R                  5      nU =R"                  U-  sl        US:  a  U R                  U-  S:  a  SU l        O[        U R                  S   5      (       a^  U R                  S   R'                  5       (       a<  [)        S U R                   5       5      SL a  U =R*                  S-  sl        SU l        O,U R                  S:X  a  SU l        U =R*                  S-  sl        US:  a  U R                  (       a  [-        U R                  [/        S	U5      5       VVs/ s H  u  p4UR'                  5       (       d  M  UPM      nnnSnU(       a  [!        U5      U-  S
::  a  SnU(       d  U =R*                  S-  sl        SU l        U R$                  (       aD  U =R0                  S-  sl        U =R2                  [!        U R                  5      -  sl        SU l        SU l        SU l        S	U l        S	U l        g US;  aB  UR5                  5       SL a.  [7        U5      (       a  SU l        U =R                  U-  sl        g g g g s  snnf )Nr   FT         ?c              3  @   #    U  H  oR                  5       v   M     g 7frQ   )r   ).0_s     r%   	<genexpr>,SuperWeirdWordPlugin.feed.<locals>.<genexpr>8  s     >AIIKKs      r   r_   r   >   -<=>r   |~)rk   r   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   lenrK   r   r   allr   zipranger   r   rX   r   )r#   r$   buffer_lengthcicamel_case_dstprobable_camel_caseds          r%   r+   SuperWeirdWordPlugin.feed  s:   LLI%Li(())Q.)((E1i(E1^I5N5N9%.i(E1	*e3	*e3I&%/+/(y!!Y''y))y))9%%((A-(||>)#<#<Y@W@Wlll!!$T\\!2M!!]2!!,,}<C04D- #4<<#344R(0022>>>%G,,1,04D---204D-,,1,"t'?'? !$DLL%=2I J" Jyy{  J  "
 .3$!s>':]'Jc'Q+/(+,,1,04D-(($$)$))S->>),1)',D$DL()D%'(D$@@!!#u,)$$(,D%LLI%L % - A1"s   O/+O/c                t    SU l         SU l        SU l        SU l        SU l        SU l        SU l        SU l        g )Nr   Fr   )r   r   r   r   r   rK   r   r   r.   s    r%   r/   SuperWeirdWordPlugin.reset_  sA    $)!#(   !$%!#$ r)   c                v    U R                   S::  a  U R                  S:X  a  gU R                  U R                  -  $ )N
   r   r^   )r   r   r   rK   r.   s    r%   r2   SuperWeirdWordPlugin.ratioi  s7    r!d&>&>!&C((4+@+@@@r)   )
r   r   r   r   r   rK   r   r   r   r   Nr;   r5   r9   r<   rb   r4   r)   r%   r   r      s.    *O&b% A Ar)   r   c                  T    \ rS rSrSrS
S jrSS jrSS jrS
S jr\	SS j5       r
Srg	)CjkUncommonPluginiq  z4
Detect messy CJK text that probably means nothing.
c                     SU l         SU l        g r[   rK   _uncommon_countr.   s    r%   rN   CjkUncommonPlugin.__init__v  s    %&$%r)   c                    [        U5      $ rQ   )r   r"   s     r%   r&   CjkUncommonPlugin.eligiblez  s    i  r)   c                z    U =R                   S-  sl         [        U5      (       a  U =R                  S-  sl        g g rn   )rK   r   r   r"   s     r%   r+   CjkUncommonPlugin.feed}  s7    "9%%  A%  &r)   c                     SU l         SU l        g r[   r   r.   s    r%   r/   CjkUncommonPlugin.reset  s     ! r)   c                p    U R                   S:  a  gU R                  U R                   -  nUS:  a  US-  $ S$ )Nrs   r^   r   r   r   )r#   uncommon_form_usages     r%   r2   CjkUncommonPlugin.ratio  sD      1$%)%9%9D<Q<Q%Q ,?+D"R'M#Mr)   r   Nr;   r5   r9   r<   )r>   r?   r@   rA   rB   rN   r&   r+   r/   rC   r2   rD   r4   r)   r%   r   r   q  s1    &!! N Nr)   r   c                  P    \ rS rSrS	S jrS
S jrSS jrS	S jr\SS j5       r	Sr
g)ArchaicUpperLowerPlugini  c                f    SU l         SU l        SU l        SU l        SU l        S U l        SU l        g )NFr   T)_buf_character_count_since_last_sep_successive_upper_lower_count#_successive_upper_lower_count_finalrK   _last_alpha_seen_current_ascii_onlyr.   s    r%   rN    ArchaicUpperLowerPlugin.__init__  s9    	45,23*890%&,0)- r)   c                    gr}   r4   r"   s     r%   r&    ArchaicUpperLowerPlugin.eligible  r   r)   c                   UR                  5       =(       a    [        U5      nUSL nU(       a  U R                  S:  a  U R                  S::  aA  UR                  5       SL a.  U R                  SL a  U =R
                  U R                  -  sl        SU l        SU l        S U l        SU l        U =R                  S-  sl	        SU l        g U R                  SL a  UR                  5       SL a  SU l        U R                  b  UR                  5       (       a  U R                  R                  5       (       d4  UR                  5       (       aS  U R                  R                  5       (       a4  U R                  SL a  U =R                  S-  sl        SU l        OSU l        OSU l        U =R                  S-  sl	        U =R                  S-  sl        Xl        g )NFr   @   r   TrW   )rk   r   r   rX   r   r   r   r   r   rK   isasciir   islower)r#   r$   is_concerned	chunk_seps       r%   r+   ArchaicUpperLowerPlugin.feed  s    ((*J/?	/J E)	==A44:%%'50,,588668 23D.34D0$(D!DI!!Q&!'+D$##t+	0A0A0Cu0L',D$  ,!!##(=(=(E(E(G(G!!##(=(=(E(E(G(G99$66!;6 %DI $DI!	",,1, )r)   c                f    SU l         SU l        SU l        SU l        S U l        SU l        SU l        g )Nr   FT)rK   r   r   r   r   r   r   r.   s    r%   r/   ArchaicUpperLowerPlugin.reset  s9     !/0,-.*340 $	#' r)   c                V    U R                   S:X  a  gU R                  U R                   -  $ )Nr   r^   )rK   r   r.   s    r%   r2   ArchaicUpperLowerPlugin.ratio  s*      A%77$:O:OOOr)   )r   rK   r   r   r   r   r   Nr;   r5   r9   r<   rb   r4   r)   r%   r   r     s-    .(*T( P Pr)   r   c                  P    \ rS rSrS	S jrS	S jrS
S jrSS jr\SS j5       r	Sr
g)ArabicIsolatedFormPlugini  c                     SU l         SU l        g r[   rK   _isolated_form_countr.   s    r%   rN   !ArabicIsolatedFormPlugin.__init__  s    %&)*!r)   c                     SU l         SU l        g r[   r  r.   s    r%   r/   ArabicIsolatedFormPlugin.reset  s     !$%!r)   c                    [        U5      $ rQ   )r   r"   s     r%   r&   !ArabicIsolatedFormPlugin.eligible  s    ##r)   c                z    U =R                   S-  sl         [        U5      (       a  U =R                  S-  sl        g g rn   )rK   r   r  r"   s     r%   r+   ArabicIsolatedFormPlugin.feed  s4    ""9--%%*% .r)   c                Z    U R                   S:  a  gU R                  U R                   -  nU$ )Nrs   r^   r  )r#   isolated_form_usages     r%   r2   ArabicIsolatedFormPlugin.ratio  s0      1$%)%>%>AVAV%V""r)   r  Nr;   r5   r9   r<   )r>   r?   r@   rA   rN   r/   r&   r+   rC   r2   rD   r4   r)   r%   r  r    s*    +&$+ # #r)   r     )maxsizec                .   U b  Uc  gX:X  a  gSU ;   a  SU;   a  gSU ;   d  SU;   a  gSU ;   d  SU;   a  SU ;   d  SU;   a  gU R                  S5      UR                  S5      p2U H  nU[        ;   a  M  XC;   d  M    g   U S;   US;   peU(       d  U(       a  SU ;   d  SU;   a  gU(       a  U(       a  gS	U ;   d  S	U;   a  SU ;   d  SU;   a  gU S
:X  d  US
:X  a  gSU ;   d  SU;   d  U S;   a-  US;   a'  SU ;   d  SU;   a  gSU ;   d  SU;   a  gU S
:X  d  US
:X  a  gg)zY
Determine if two Unicode range seen next to each other can be considered as suspicious.
TFLatin	Emoticons	Combining )HiraganaKatakanaCJKHangulzBasic Latin)r  r  PunctuationForms)splitr	   )r   r   keywords_range_akeywords_range_belrange_a_jp_charsrange_b_jp_charss          r%   r   r     sw    /"9)/!g&@o%)G 	?"g&@&+*H 	c"c" '
 00!	  	
	

 	33 ' 	, E_$<,?"h/&AO#u'?m+-/O 	 E_$<3377O+}/Oo%O)Cm+-/Or)   i   c           	     $   [         R                  5        Vs/ s H	  o3" 5       PM     nn[        U 5      S-   nSnUS:  a  SnOUS::  a  SnOSn[        U S-   [	        U5      5       Hh  u  pU H,  n
U
R                  U5      (       d  M  U
R                  U5        M.     U	S	:  a  X-  S	:X  d
  XS-
  :X  d  MO  [        S
 U 5       5      nXa:  d  Mh    O   U(       a  [        S5      nUR                  [        SU SU SU 35        [        U 5      S:  a8  UR                  [        SU SS  35        UR                  [        SU SS  35        U H2  nUR                  [        UR                   SUR                   35        M4     [        US5      $ s  snf )zo
Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
r   r^   i       r  r      
r   c              3  8   #    U  H  oR                   v   M     g 7frQ   )r2   )r   dts     r%   r   mess_ratio.<locals>.<genexpr>e  s     !?Yr((Ys   charset_normalizerzIMess-detector extended-analysis start. intermediary_mean_mess_ratio_calc=z mean_mess_ratio=z maximum_threshold=   zStarting with: NzEnding with: iz:    )r   __subclasses__r   r   r   r&   r+   sumr   logr   	__class__r2   round)decoded_sequencemaximum_thresholddebugmd_class	detectorslengthmean_mess_ratio!intermediary_mean_mess_ratio_calcr$   indexdetectorloggerr*  s                r%   
mess_ratior?  F  s    $6#D#D#F+#Fx
#F  + &'!+F O|13)	4,.),/) 04 7vG	!H  ++i( "
 AI%CqHqj !!?Y!??O3 H /0

11R0SSdetdu v!!2 35	
  2%JJu0@"0E/FGHJJu.>su.E-FGHBJJub
;<  !$$[+s   FN)r   
str | Noner   r@  r7   r8   )g?F)r4  r6   r5  r=   r6  r8   r7   r=   ))
__future__r   	functoolsr   loggingr   constantr   r   r	   utilsr
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rF   rd   rw   r   r   r   r   r   r  r   r?  r4   r)   r%   <module>rF     s<   "   
    ," "D,L'9 ,L^O1 O6E* E0"D&8 "DJ./( ./bsA- sAl N*  NFIP0 IPX#1 #8 4FF2<F	F FR 4IN4%4%.34%BF4%
4% 4%r)   