
    (ph                     6   % S r SrSSKJr  SSKJr  SSKrSSKJr  SSKrSSK	J
r
Jr  SSKJr  SS	KJrJrJrJrJrJrJrJrJrJr  SS
KJr  SSKJrJr  SSKrSr\\   \ S'    SSK!r!\!rS\%S\\&   4S jr'Sr(\&\ S'   Sr)\&\ S'   \*" 5       r+\\\\&\4   4   \ S'   \RX                  " \)R[                  S5      \R\                  5      \RX                  " \(R[                  S5      \R\                  5      S.\+\%'   \RX                  " \)\R\                  5      \RX                  " \(\R\                  5      S.\+\&'    " S S\/5      r0\0Rc                  5          " S S5      r2 " S S5      r3g! \" a/     SSK#r#\#r GN
! \" a     SSK$r$\$r  GN! \" a       GN&f = ff = ff = f)a~  Beautiful Soup bonus library: Unicode, Dammit

This library converts a bytestream to Unicode through any means
necessary. It is heavily based on code from Mark Pilgrim's `Universal
Feed Parser <https://pypi.org/project/feedparser/>`_, now maintained
by Kurt McKee. It does not rewrite the body of an XML or HTML document
to reflect a new encoding; that's the job of `TreeBuilder`.

MIT    )codepoint2name)defaultdictN)html5)Logger	getLogger)
ModuleType)
DictIteratorListOptionalPatternSetTupleTypeUnioncast)Literal)	_Encoding
_Encodingschardet_modulesreturnc                 p    [         b  [        U [        5      (       a  g[         nUR                  U 5      S   $ )z?Try as hard as possible to detect the encoding of a bytestring.Nencoding)r   
isinstancestrdetect)r   modules     =/var/www/html/venv/lib/python3.13/site-packages/bs4/dammit.py_chardet_dammitr!   G   s0    As!3!3F==J''    z$^\s*<\?.*encoding=['"](.*?)['"].*\?>xml_encodingz0<\s*meta[^>]+charset\s*=\s*["']?([^>]*?)[ /;'">]	html_metaencoding_resascii)htmlxmlc                      \ rS rSr% Sr\\\4   \S'   \\\4   \S'   \\   \S'   \\   \S'   \	S%S	 j5       r
S
SSSSS.r\\\4   \S'   \R                  " S\R                  5      r\R                  " S5      r\\   \S'   \R                  " S5      r\\   \S'   \	S\R$                  S\4S j5       r\	S\R$                  S\4S j5       r\	S\R$                  S\4S j5       r\	S\R$                  S\4S j5       r\	S\S\4S j5       r\	S&S\S\S\4S jj5       r\	 S&S\S\S\4S jj5       r\	S \S\4S! j5       r\	S \S\4S" j5       r\	S \S\4S# j5       rS$rg)'EntitySubstitutionb   zFThe ability to substitute XML or HTML entities for certain characters.HTML_ENTITY_TO_CHARACTERCHARACTER_TO_HTML_ENTITYCHARACTER_TO_HTML_ENTITY_RE*CHARACTER_TO_HTML_ENTITY_WITH_AMPERSAND_REr   Nc                    0 n0 n[        5       n[        [         5      n[        [        R                  " 5       5       H  u  pVUR                  S5      (       a  USS nOUnXr;  a  XbU'   XqU'   [        U5      S:X  a  [        U5      S:  a  US;  a  MV  [        U5      S:  a  [        S U 5       5      (       a  M~  [        U5      S:X  a  US:w  a  UR                  U5        M  XFS	      R                  U5        M     [        5       nU H^  n	XI   n
U
(       d  UR                  U	5        M!  S
R                  U
 Vs/ s H  oS   PM	     sn5      nUR                  U	< SU< S35        M`     [        UR                  5       5       H  nU H  nUR                  U5        M     M     SSR                  U5      -  nUR                  S5        SSR                  U5      -  n[        [        R                  " 5       5       H  u  nn[        U5      nXqU'   M     Xl        X l        ["        R$                  " U5      U l        ["        R$                  " U5      U l        gs  snf )ue  Initialize variables used by this class to manage the plethora of
HTML5 named entities.

This function sets the following class variables:

CHARACTER_TO_HTML_ENTITY - A mapping of Unicode strings like "⦨" to
entity names like "angmsdaa". When a single Unicode string has
multiple entity names, we try to choose the most commonly-used
name.

HTML_ENTITY_TO_CHARACTER: A mapping of entity names like "angmsdaa" to
Unicode strings like "⦨".

CHARACTER_TO_HTML_ENTITY_RE: A regular expression matching (almost) any
Unicode string that corresponds to an HTML5 named entity.

CHARACTER_TO_HTML_ENTITY_WITH_AMPERSAND_RE: A very similar
regular expression to CHARACTER_TO_HTML_ENTITY_RE, but which
also matches unescaped ampersands. This is used by the 'html'
formatted to provide backwards-compatibility, even though the HTML5
spec allows most ampersands to go unescaped.
;N      z<>c              3   >   #    U  H  n[        U5      S :  v   M     g7f)r4   N)ord).0xs     r    	<genexpr>?EntitySubstitution._populate_class_variables.<locals>.<genexpr>   s     )J	1#a&3,	s   &r    z(?![z])z(%s)|)setr   sortedr   itemsendswithlenr6   alladdjoinlistvaluesr   chrr-   r,   recompiler.   r/   )clsunicode_to_namename_to_unicodeshort_entities long_entities_by_first_charactername_with_semicolon	charactername	particlesshortlong_versionsr8   ignorelong_entitieslong_entityre_definitionre_definition_with_ampersand	codepoints                     r    _populate_class_variables,EntitySubstitution._populate_class_variables   s#   0 +6s+;(.4U[[].C* #++C00*3B/*
 *(1%
 *.I& 9~"s9~';	QU@U
 9~!c)J	)J&J&J  9~"yC'7""9-01>BB9M{ /DB E	#E<CM e$!>1A$!>? eV<= $ ""B"I"I"KLM,k*  - M )!44c'-0C'C$  $N$8$8$:;OItII)-I&  < (7$'6$*,**]*C'9;(:
67 "?s   8I
aposquotampltgt)'"r;   <>CHARACTER_TO_XML_ENTITYz&(#\d+|#x[0-9a-fA-F]+|\w+);z&([<>]|&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;))BARE_AMPERSAND_OR_BRACKETz([<>&])AMPERSAND_OR_BRACKETmatchobjc                 t    UR                  S5      nU R                  R                  U5      nUc  SU-  $ SU-  $ )zhUsed with a regular expression to substitute the
appropriate HTML entity for a special character string.r   &amp;%s;&%s;)groupr-   get)rK   rj   original_entityentitys       r    _substitute_html_entity*EntitySubstitution._substitute_html_entity  sB     #..+--11/B>//r"   c                 H    U R                   UR                  S5         nSU-  $ )zgUsed with a regular expression to substitute the
appropriate XML entity for a special character string.r   rm   )rg   rn   )rK   rj   rq   s      r    _substitute_xml_entity)EntitySubstitution._substitute_xml_entity)  s'     ,,X^^A->?r"   c                 *    SUR                  S5      -  $ )Nrl   r3   )rn   )rK   rj   s     r    _escape_entity_name&EntitySubstitution._escape_entity_name0  s    HNN1---r"   c                 V    UR                  S5      nX R                  ;   a  SU-  $ SU-  $ )Nr3   rm   rl   )rn   r,   )rK   rj   possible_entitys      r     _escape_unrecognized_entity_name3EntitySubstitution._escape_unrecognized_entity_name4  s2    "..+:::O++O++r"   valuec                 Z    SnSU;   a  SU;   a  SnUR                  SU5      nOSnX!-   U-   $ )a#  Make a value into a quoted XML attribute, possibly escaping it.

 Most strings will be quoted using double quotes.

  Bob's Bar -> "Bob's Bar"

 If a string contains double quotes, it will be quoted using
 single quotes.

  Welcome to "my bar" -> 'Welcome to "my bar"'

 If a string contains both single and double quotes, the
 double quotes will be escaped, and the string will be quoted
 using double quotes.

  Welcome to "Bob's Bar" -> Welcome to &quot;Bob's bar&quot;

:param value: The XML attribute value to quote
:return: The quoted value
rd   rc   z&quot;)replace)rK   r~   
quote_withreplace_withs       r    quoted_attribute_value)EntitySubstitution.quoted_attribute_value;  sC    , 
%<e|  (c<8 !
!J..r"   make_quoted_attributec                     U R                   R                  U R                  U5      nU(       a  U R                  U5      nU$ )a  Replace special XML characters with named XML entities.

The less-than sign will become &lt;, the greater-than sign
will become &gt;, and any ampersands will become &amp;. If you
want ampersands that seem to be part of an entity definition
to be left alone, use `substitute_xml_containing_entities`
instead.

:param value: A string to be substituted.

:param make_quoted_attribute: If True, then the string will be
 quoted, as befits an attribute value.

:return: A version of ``value`` with special characters replaced
 with named entities.
)ri   subru   r   rK   r~   r   s      r    substitute_xml!EntitySubstitution.substitute_xmlc  s:    & ((,,S-G-GO ..u5Er"   c                     U R                   R                  U R                  U5      nU(       a  U R                  U5      nU$ )ag  Substitute XML entities for special XML characters.

:param value: A string to be substituted. The less-than sign will
  become &lt;, the greater-than sign will become &gt;, and any
  ampersands that are not part of an entity defition will
  become &amp;.

:param make_quoted_attribute: If True, then the string will be
 quoted, as befits an attribute value.
)rh   r   ru   r   r   s      r    "substitute_xml_containing_entities5EntitySubstitution.substitute_xml_containing_entities|  s:      --11#2L2LeT ..u5Er"   r   c                 N    U R                   R                  U R                  U5      $ )a'  Replace certain Unicode characters with named HTML entities.

This differs from ``data.encode(encoding, 'xmlcharrefreplace')``
in that the goal is to make the result more readable (to those
with ASCII displays) rather than to recover from
errors. There's absolutely nothing wrong with a UTF-8 string
containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
character with "&eacute;" will make it more readable to some
people.

:param s: The string to be modified.
:return: The string with some Unicode characters replaced with
   HTML entities.
)r/   r   rr   rK   r   s     r    substitute_html"EntitySubstitution.substitute_html  s(    " ==AA''
 	
r"   c                     U R                   R                  U R                  U5      nU R                  R                  U R                  U5      nU$ )a  Replace certain Unicode characters with named HTML entities
using HTML5 rules.

Specifically, this method is much less aggressive about
escaping ampersands than substitute_html. Only ambiguous
ampersands are escaped, per the HTML5 standard:

"An ambiguous ampersand is a U+0026 AMPERSAND character (&)
that is followed by one or more ASCII alphanumerics, followed
by a U+003B SEMICOLON character (;), where these characters do
not match any of the names given in the named character
references section."

Unlike substitute_html5_raw, this method assumes HTML entities
were converted to Unicode characters on the way in, as
Beautiful Soup does. By the time Beautiful Soup does its work,
the only ambiguous ampersands that need to be escaped are the
ones that were escaped in the original markup when mentioning
HTML entities.

:param s: The string to be modified.
:return: The string with some Unicode characters replaced with
   HTML entities.
)ANY_ENTITY_REr   rx   r.   rr   r   s     r    substitute_html5#EntitySubstitution.substitute_html5  sG    6 !!#"9"91= ++//0K0KQOr"   c                     U R                   R                  U R                  U5      nU R                  R                  U R                  U5      nU$ )ae  Replace certain Unicode characters with named HTML entities
using HTML5 rules.

substitute_html5_raw is similar to substitute_html5 but it is
designed for standalone use (whereas substitute_html5 is
designed for use with Beautiful Soup).

:param s: The string to be modified.
:return: The string with some Unicode characters replaced with
   HTML entities.
)r   r   r|   r.   rr   r   s     r    substitute_html5_raw'EntitySubstitution.substitute_html5_raw  sG      !!#"F"FJ ++//0K0KQOr"    )r   N)F)__name__
__module____qualname____firstlineno____doc__r
   r   __annotations__r   classmethodr\   rg   rI   rJ   Ir   rh   ri   Matchrr   ru   rx   r|   r   boolr   r   r   r   r   __static_attributes__r   r"   r    r*   r*   b   sC   P
 #38n, #38n, ")- 18<A
 A
N /T#s(^  JJ>EM /1jj8/ws|  *,I)>'#,>rxx C   bhh 3   .288 . . . , ,S , , %/3 %/3 %/ %/N 3 t PS  0 7<04	 * 
 
 
 
(    B S S  r"   r*   c                   r   \ rS rSr% Sr     SS\S\\   S\\   S\\   S\\   S	\\   4S
 jjr	\\
S'   \\
S'   \\
S'   \\   \
S'   \\
S'   \\   \
S'   \\
S'   \\   \
S'   S\\   S\\   S\4S jr\S\\   4S j5       r\S\S\\\\   4   4S j5       r\  SS\\\4   S\S\S\\   4S jj5       rSrg)EncodingDetectori  af  This class is capable of guessing a number of possible encodings
for a bytestring.

Order of precedence:

1. Encodings you specifically tell EncodingDetector to try first
   (the ``known_definite_encodings`` argument to the constructor).

2. An encoding determined by sniffing the document's byte-order mark.

3. Encodings you specifically tell EncodingDetector to try if
   byte-order mark sniffing fails (the ``user_encodings`` argument to the
   constructor).

4. An encoding declared within the bytestring itself, either in an
   XML declaration (if the bytestring is to be interpreted as an XML
   document), or in a <meta> tag (if the bytestring is to be
   interpreted as an HTML document.)

5. An encoding detected through textual analysis by chardet,
   cchardet, or a similar external library.

6. UTF-8.

7. Windows-1252.

:param markup: Some markup in an unknown encoding.

:param known_definite_encodings: When determining the encoding
    of ``markup``, these encodings will be tried first, in
    order. In HTML terms, this corresponds to the "known
    definite encoding" step defined in `section 13.2.3.1 of the HTML standard <https://html.spec.whatwg.org/multipage/parsing.html#parsing-with-a-known-character-encoding>`_.

:param user_encodings: These encodings will be tried after the
    ``known_definite_encodings`` have been tried and failed, and
    after an attempt to sniff the encoding by looking at a
    byte order mark has failed. In HTML terms, this
    corresponds to the step "user has explicitly instructed
    the user agent to override the document's character
    encoding", defined in `section 13.2.3.2 of the HTML standard <https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding>`_.

:param override_encodings: A **deprecated** alias for
    ``known_definite_encodings``. Any encodings here will be tried
    immediately after the encodings in
    ``known_definite_encodings``.

:param is_html: If True, this markup is considered to be
    HTML. Otherwise it's assumed to be XML.

:param exclude_encodings: These encodings will not be tried,
    even if they otherwise would be.

Nmarkupknown_definite_encodingsis_htmlexclude_encodingsuser_encodingsoverride_encodingsc                    [        U=(       d    / 5      U l        U(       a/  [        R                  " S[        SS9  U =R                  U-  sl        U=(       d    / U l        U=(       d    / n[        U Vs/ s H  owR                  5       PM     sn5      U l        S U l	        Uc  SOUU l
        S U l        U R                  U5      u  U l        U l        g s  snf )NzcThe 'override_encodings' argument was deprecated in 4.10.0. Use 'known_definite_encodings' instead.   )
stacklevelF)rF   r   warningswarnDeprecationWarningr   r>   lowerr   chardet_encodingr   declared_encodingstrip_byte_order_markr   sniffed_encoding)selfr   r   r   r   r   r   r8   s           r    __init__EncodingDetector.__init__  s     )--E-K(L%MMu"
 ))-??),2-3!$9J%K9JAggi9J%K!L $ 'uW04 .2-G-G-O*T* &Ls   4Cr   r   r   r   triedr   c                 z    Uc  gUR                  5       nXR                  ;   a  gX;  a  UR                  U5        gg)zShould we even bother to try this encoding?

:param encoding: Name of an encoding.
:param tried: Encodings that have already been tried. This
    will be modified as a side effect.
FT)r   r   rD   )r   r   r   s      r    _usableEncodingDetector._usable@  s@     >>#--- IIhr"   c              #   L  #    [        5       nU R                   H  nU R                  X!5      (       d  M  Uv   M!     U R                  b/  U R                  U R                  U5      (       a  U R                  v   U R                   H  nU R                  X!5      (       d  M  Uv   M!     U R
                  c+  U R                  U R                  U R                  5      U l        U R
                  b/  U R                  U R
                  U5      (       a  U R
                  v   U R                  c  [        U R                  5      U l	        U R                  b/  U R                  U R                  U5      (       a  U R                  v   S H  nU R                  X!5      (       d  M  Uv   M!     g7f)zYield a number of encodings that might work for this markup.

:yield: A sequence of strings. Each is the name of an encoding
   that *might* work to convert a bytestring into Unicode.
N)utf-8windows-1252)r>   r   r   r   r   r   find_declared_encodingr   r   r   r!   )r   r   es      r    	encodingsEncodingDetector.encodingsQ  sj     !$ ..A||A%% /   ,!!52
 2
 ''' $$A||A%% % !!)%)%@%@T\\&D" !!-$,,""E3
 3
 (((   ($3DKK$@D!  ,!!52
 2
 ''' +A||A%% +s   .F$A'F$C8F$	F$datac                 T   Sn[        U[        5      (       a  X4$ [        U5      S:  a  USS S:X  a  USS S:w  a
  SnUSS nX4$ [        U5      S:  a  USS S:X  a  USS S:w  a
  SnUSS nX4$ USS	 S
:X  a
  SnUS	S nX4$ USS S:X  a
  SnUSS nX4$ USS S:X  a  SnUSS nX4$ )zIf a byte-order mark is present, strip it and return the encoding it implies.

:param data: A bytestring that may or may not begin with a
   byte-order mark.

:return: A 2-tuple (data stripped of byte-order mark, encoding implied by byte-order mark)
N      s   s     zutf-16bes   zutf-16ler   s   ﻿r   s     zutf-32bes     zutf-32le)r   r   rB   )rK   r   r   s      r    r   &EncodingDetector.strip_byte_order_mark  s     dC  >!Y!^bq[(ak)!H8D" ~ Y!^bq[(ak)!H8D ~ "1X(H8D ~ "1X,,!H8D ~ "1X,,!H8D~r"   search_entire_documentc                    U(       a  [        U5      =pEO#Sn[        S[        [        U5      S-  5      5      n[        U[        5      (       a  [
        [           nO[
        [           nUS   nUS   nSn	UR                  XS9n
U
(       d  U(       a  UR                  XS9n
U
b  U
R                  5       S   n	U	(       a7  [        U	[        5      (       a  U	R                  S	S
5      n	U	R                  5       $ g)a  Given a document, tries to find an encoding declared within the
text of the document itself.

An XML encoding is declared at the beginning of the document.

An HTML encoding is declared in a <meta> tag, hopefully near the
beginning of the document.

:param markup: Some markup.
:param is_html: If True, this markup is considered to be HTML. Otherwise
    it's assumed to be XML.
:param search_entire_document: Since an encoding is supposed
    to declared near the beginning of the document, most of
    the time it's only necessary to search a few kilobytes of
    data.  Set this to True to force this method to search the
    entire document.
:return: The declared encoding, if one is found.
i   i   g?r(   r'   N)endposr   r&   r   )rB   maxintr   bytesr%   r   searchgroupsdecoder   )rK   r   r   r   
xml_endposhtml_endposresxml_rehtml_rer   declared_encoding_matchs              r    r   'EncodingDetector.find_declared_encoding  s    2 "'*6{2JJdCFd(:$;<Kfe$$u%Cs#CUf+15"(---"J&7&-nnVn&P#". 7 > > @ C+U33$5$<$<Wi$P!$**,,r"   )r   r   r   r   r   r   r   r   )NFNNN)FF)r   r   r   r   r   r   r   r   r   r   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r"   r    r   r     s|   4r :>"'26/337PP #+:"6P $	P
 $J/P !,P %Z0P6 )(!!y))M	**My)) 3 C	N t " 18I. 1 1f # #5@S9S3T # #J  ',	/eSj!/ / !%	/
 
)	/ /r"   r   c                      \ rS rSr% Sr/ SS/ SS4S\S\\   S\\S      S	\	S
\\   S\\   S\\   4S jjr
\\S'   \\   \S'   \	\S'   \\   \S'   \\   \S'   \\\\4      \S'   \\S'   S\R$                  S\4S jrSSS.r\\\4   \S'   / SQr\\S'    GSS\S\S\\   4S jjr GSS\S \S\S\4S! jjr\S\\   4S" j5       rS#\S\\   4S$ jrS#\S\\   4S% jr0 S&S'_S(S)_S*S+_S,S-_S.S/_S0S1_S2S3_S4S5_S6S7_S8S9_S:S;_S<S=_S>S?_S@SA_SBSC_SDSA_SESA_SFSGSHSISJSKSLSMSNSOSPSQSASRSSST.Er\\\\\\\4   4   4   \SU'   0 S&SV_S(S)_S*SW_S,SX_S.SY_S0SZ_S2S[_S4S\_S6S]_S8S^_S:S__S<S`_S>Sa_S@SA_SBSb_SDSA_SESA_0 ScSd_SeSd_SfSg_ShSg_SiSj_SkSl_SmSn_SoSp_SqSr_SsSt_SuSv_SwSx_SySA_SzS{_S|S}_S~S)_SS_E0 SS_SS_SS_SS_SS_SS__SS_SS_SS_SS_SS_SS)_SS_SSl_SS_SS_SS_E0 SS_SSd_SS_SS_SSj_SSW_SS_SS_SS_SS_SS_SS_SSA_SS_SS_SS_SS_E0 SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_E0 SS_SS_SSj_SS_SS_SS_SS_SS_SS}_SS_SS_SS_SS_SS_SS_SS_SS_E0 SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_SS_ESSSSSSSSSS.	Er\\\4   \S'   0 SGS _GSGS_GSGS_GSGS_GSGS_GS	GS
_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS _0 GS!GS"_GS#GS$_GS%GS&_GS'GS(_GS)GS*_GS+GS,_GS-GS._GS/GS0_GS1GS2_GS3GS4_GS5GS6_GS7GS8_GS9GS:_GS;GS<_GS=GS>_GS?GS@_GSAGSB_E0 GSCGSD_GSEGSF_GSGGSH_GSIGSJ_GSKGSL_GSMGSN_GSOGSP_GSQGSR_GSSGST_GSUGSV_GSWGSX_GSYGSZ_GS[GS\_GS]GS^_GS_GS`_GSaGSb_GScGSd_E0 GSeGSf_GSgGSh_GSiGSj_GSkGSl_GSmGSn_GSoGSp_GSqGSr_GSsGSt_GSuGSv_GSwGSx_GSyGSz_GS{GS|_GS}GS~_GSGS_GSGS_GSGS_GSGS_E0 GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_E0 GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_E0 GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_GSGS_EGSGSGSGS.Er \\!\4   \GS'   / GSQr"\\\!\!\!4      \GS'   \"GS   GS   r#\!\GS'   \"GS   GS   r$\!\GS'   \%  GSGS\GS\GS\S\4GS jj5       r&GSr'g(  UnicodeDammiti  a  A class for detecting the encoding of a bytestring containing an
HTML or XML document, and decoding it to Unicode. If the source
encoding is windows-1252, `UnicodeDammit` can also replace
Microsoft smart quotes with their HTML or XML equivalents.

:param markup: HTML or XML markup in an unknown encoding.

:param known_definite_encodings: When determining the encoding
    of ``markup``, these encodings will be tried first, in
    order. In HTML terms, this corresponds to the "known
    definite encoding" step defined in `section 13.2.3.1 of the HTML standard <https://html.spec.whatwg.org/multipage/parsing.html#parsing-with-a-known-character-encoding>`_.

:param user_encodings: These encodings will be tried after the
    ``known_definite_encodings`` have been tried and failed, and
    after an attempt to sniff the encoding by looking at a
    byte order mark has failed. In HTML terms, this
    corresponds to the step "user has explicitly instructed
    the user agent to override the document's character
    encoding", defined in `section 13.2.3.2 of the HTML standard <https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding>`_.

:param override_encodings: A **deprecated** alias for
    ``known_definite_encodings``. Any encodings here will be tried
    immediately after the encodings in
    ``known_definite_encodings``.

:param smart_quotes_to: By default, Microsoft smart quotes will,
   like all other characters, be converted to Unicode
   characters. Setting this to ``ascii`` will convert them to ASCII
   quotes instead.  Setting it to ``xml`` will convert them to XML
   entity references, and setting it to ``html`` will convert them
   to HTML entity references.

:param is_html: If True, ``markup`` is treated as an HTML
   document. Otherwise it's treated as an XML document.

:param exclude_encodings: These encodings will not be considered,
   even if the sniffing code thinks they might make sense.

NFr   r   smart_quotes_to)r&   r(   r'   r   r   r   r   c                    X0l         / U l        SU l        X@l        [	        [
        5      U l        [        UUUUUU5      U l        [        U[        5      (       d  US:X  a  Xl        [        U5      U l        S U l        g U R                  R                  U l        S nU R                  R                   H/  n	U R                  R                  nU R                  U	5      nUc  M/    O   U(       d\  U R                  R                   HB  n	U	S:w  a  U R                  U	S5      nUc  M   U R                  R!                  S5        SU l          O   Uc  S U l        S U l        g Xl        g )NFr"   r&   r   zSSome characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.T)r   tried_encodingscontains_replacement_charactersr   r   r   logr   detectorr   r   r   unicode_markuporiginal_encodingr   _convert_fromwarning)
r   r   r   r   r   r   r   r   ur   s
             r    r   UnicodeDammit.__init__  sI     /!/4,X&($
 fc""fm K"%f+D%)D" mm**//H]]))F""8,A}	 0  !MM33w&**8Y?A=HH$$?
 <@D8 4( 9%)D""&D"#r"   r   r   r   r   r   matchr   c                    UR                  S5      nU R                  S:X  a3  X R                  ;   a  U R                  U   R                  5       nU$ Un U$ X R                  ;   a  U R                  U   n[        U5      [        L aG  U R                  S:X  a  SUS   R                  5       -   S-   nU$ SUS   R                  5       -   S-   n U$ [        [        U5      nUR                  5       n U$ UnU$ )zChanges a MS smart quote character to an XML or HTML
entity, or an ASCII character.

TODO: Since this is only used to convert smart quotes, it
could be simplified, and MS_CHARS_TO_ASCII made much less
parochial.
r3   r&   r(   s   &#x   ;   &r   )	rn   r   MS_CHARS_TO_ASCIIencodeMS_CHARStypetupler   r   )r   r   origr   substitutionss        r    _sub_ms_charUnicodeDammit._sub_ms_charl  s    kk!n7*---,,T299;( 
!   
 }}$ $d 3&%/++u4$}Q'7'>'>'@@4G 
 #]1%5%<%<%>>E 
 %)m$<M'..0C
 
 
r"   z	mac-romanz	shift-jis)	macintoshzx-sjisCHARSET_ALIASES)r   z
iso-8859-1z
iso-8859-2ENCODINGS_WITH_SMART_QUOTESproposederrorsc                    U R                  U5      nUb  X24U R                  ;   a  gUnU R                  R                  X45        U R                  nU R                  bC  XR
                  ;   a4  Sn[        R                  " U5      nUR                  U R                  U5      n U R                  XAU5      nXpl        Xl        U R                  $ ! [         a     gf = f)a  Attempt to convert the markup to the proposed encoding.

:param proposed: The name of a character encoding.
:param errors: An error handling strategy, used when calling `str`.
:return: The converted markup, or `None` if the proposed
   encoding/error handling strategy didn't work.
Ns   ([-]))
find_codecr   appendr   r   r   rI   rJ   r   r   _to_unicoder   r   	Exception)r   r   r   lookup_resultr   smart_quotes_resmart_quotes_compiledr   s           r    r   UnicodeDammit._convert_from  s     1 ]$;t?S?S$S ##X$67   ,<<<.O$&JJ$?!*..t/@/@&IF		   6:A"#%-" """  	 	s   "C 
CCr   r   c                     [        XU5      $ )zGiven a bytestring and its encoding, decodes the string into Unicode.

:param encoding: The name of an encoding.
:param errors: An error handling strategy, used when calling `str`.
)r   )r   r   r   r   s       r    r   UnicodeDammit._to_unicode  s     46**r"   c                 R    U R                   (       d  gU R                  R                  $ )zaIf the markup is an HTML document, returns the encoding, if any,
declared *inside* the document.
N)r   r   r   )r   s    r    declared_html_encoding$UnicodeDammit.declared_html_encoding  s    
 ||}}...r"   charsetc                    U R                  U R                  R                  X5      5      =(       d    U=(       a!    U R                  UR                  SS5      5      =(       dS    U=(       a!    U R                  UR                  SS5      5      =(       d"    U=(       a    UR	                  5       =(       d    UnU(       a  UR	                  5       $ g)zLook up the Python codec corresponding to a given character set.

:param charset: The name of a character set.
:return: The name of a Python codec.
-r<   _N)_codecr   ro   r   r   )r   r
  r~   s      r    r   UnicodeDammit.find_codec  s     KK,,00BC ADKKR(@ABDKKS(AB +GMMO  	 ;;= r"   c                 |    U(       d  U$ S n [         R                  " U5        UnU$ ! [        [        4 a     U$ f = f)N)codecslookupLookupError
ValueError)r   r
  codecs      r    r  UnicodeDammit._codec  sJ    N	MM'"E  Z( 		s   ' ;;   )euro20AC       )sbquo201A   )fnof192   )bdquo201E   )hellip2026   )dagger2020   )Dagger2021   )circ2C6   )permil2030   )Scaron160   )lsaquo2039   )OElig152   ?   )z#x17D17D      )lsquo2018)rsquo2019)ldquo201C)rdquo201D)bull2022)ndash2013)mdash2014)tilde2DC)trade2122)scaron161)rsaquo203A)oelig153)z#x17E17E)Yumlr<   )                                             r   EUR,fz,,z...+z++^%Sre   OEZr]  rc   r^  r_  rd   r`  ra  *rb  r  rc  z--rd  ~re  z(TM)rf  r   rg  rf   rh  oeri  rj  zrk  Y      !   c   GBP   $   YEN   r=         z..   r<      z(th)   z<<         z(R)      o   z+-   2   3      r      P         1      z>>   z1/4   z1/2   z3/4      A                  AE   C   E            r               D   N   O                     U               b   B   a                  ae      r               i               n               /y)	                           r   r4   s   €   s   ‚   s   ƒ   s   „   s   …   s   †   s   ‡   s   ˆ   s   ‰   s   Š   s   ‹   s   Œ   s   Ž   s   ‘   s   ’   s   “   s   ”   s   •   s   –   s   —   s   ˜   s   ™   s   š   s   ›   s   œ   s   ž   s   Ÿ   s       s   ¡   s   ¢   s   £   s   ¤   s   ¥   s   ¦   s   §   s   ¨   s   ©   s   ª   s   «   s   ¬   s   ­   s   ®   s   ¯   s   °   s   ±   s   ²   s   ³   s   ´   s   µ   s   ¶   s   ·   s   ¸   s   ¹   s   º   s   »   s   ¼   s   ½   s   ¾   s   ¿   s   À   s   Á   s   Â   s   Ã   s   Ä   s   Å   s   Æ   s   Ç   s   È   s   É   s   Ê   s   Ë   s   Ì   s   Í   s   Î   s   Ï   s   Ð   s   Ñ   s   Ò   s   Ó   s   Ô   s   Õ   s   Ö   s   ×   s   Ø   s   Ù   s   Ú   s   Û   s   Ü   s   Ý   s   Þ   s   ß   s   à      s   â   s   ã   s   ä   s   å   s   æ   s   ç   s   è   s   é   s   ê   s   ë   s   ì   s   í   s   î   s   ï   s   ð   s   ñ   s   ò   s   ó   s   ô   s   õ   s   ö   s   ÷   s   ø   s   ù   s   ú   s   ûs   üs   ýs   þ)         WINDOWS_1252_TO_UTF8))r0  rM  r   )rN  r]  r   )r^  rb  r   MULTIBYTE_MARKERS_AND_SIZESr   FIRST_MULTIBYTE_MARKERr2   r3   LAST_MULTIBYTE_MARKERin_bytesmain_encodingembedded_encodingc                 ~   UR                  SS5      R                  5       S;  a  [        S5      eUR                  5       S;  a  [        S5      e/ nSnSnU[        U5      :  a  X   nXpR                  :  a8  XpR
                  ::  a)  U R                   H  u  pn
Xx:  d  M  Xy::  d  M  Xj-  n  OV   OSUS:  aH  XpR                  ;   a9  UR                  XU 5        UR                  U R                  U   5        US	-  nUnOUS	-  nU[        U5      :  a  M  US:X  a  U$ UR                  XS
 5        SR                  U5      $ )a  Fix characters from one encoding embedded in some other encoding.

Currently the only situation supported is Windows-1252 (or its
subset ISO-8859-1), embedded in UTF-8.

:param in_bytes: A bytestring that you suspect contains
    characters from multiple encodings. Note that this *must*
    be a bytestring. If you've already converted the document
    to Unicode, you're too late.
:param main_encoding: The primary encoding of ``in_bytes``.
:param embedded_encoding: The encoding that was used to embed characters
    in the main document.
:return: A bytestring similar to ``in_bytes``, in which
  ``embedded_encoding`` characters have been converted to
  their ``main_encoding`` equivalents.
r  r  )r   windows_1252zPWindows-1252 and ISO-8859-1 are the only currently supported embedded encodings.)utf8r   z4UTF-8 is the only currently supported main encoding.r   r4   r3   Nr"   )
r   r   NotImplementedErrorrB   ro  rp  rn  rm  r   rE   )rK   rq  rr  rs  byte_chunkschunk_startposbytestartendsizes              r    	detwingleUnicodeDammit.detwingle;  sc   . $$S#.446 ?
 
 && 
  (99%F  CM!=D111d>W>W6W ),(G(G$E} )H $*B*B"B ""8#<= ""3#;#;D#ABq! q+ CM!, !O x56xx$$r"   )	r   r   r   r   r   r   r   r   r   )strict)rv  r   )(r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rI   r   r   r   r
   r   r   r   r   r  r   r  r   r   r   rm  r   rn  ro  rp  r   r  r   r   r"   r    r   r     sd   &V :<EI24/337F$F$ #+:"6F$ "'*@"AB	F$
 F$ $J/F$ !,F$ %Z0F$V M
 SM! &*)  	** c]" %	3/00	K "((  u  R !-OT#y.) /  2:%#!%#+.%#	#%#P ?G++%.+8;+	+ /(; / /)  "	i 	HSM 	!:!!:!: 	"!: 		!:
 	"!: 	#!: 	#!: 	#!: 	!: 	#!: 	"!: 	#!: 	!!: 	!: 	!!:  	!!:" 	#!:$ #"""!""!""#!!A!:Hd5%U38_ 4556 !VB+B+B+ 	B+ 		B+
 	B+ 	B+ 	B+ 	B+ 	B+ 	B+ 	B+ 	B+ 	B+ 	B+ 	B+  	!B+" 	#B+$ 	%B+& 	'B+( 	)B+* 	+B+, 	-B+. 	/B+0 	1B+2 	3B+4 	5B+6 	7B+8 	9B+: 	;B+< 	=B+> 	?B+@ 	AB+B 	CB+D 	EB+F 	GB+H 	IB+J 	KB+N 	OB+P 	QB+R 	SB+T 	UB+V 	WB+X 	YB+Z 	[B+\ 	]B+^ 	_B+` 	aB+b 	cB+d 	eB+f 	gB+h 	iB+j 	kB+l 	mB+n 	oB+p 	qB+r 	sB+t 	uB+v 	wB+x 	yB+z 	{B+| 	}B+~ 	B+@ 	AB+B 	CB+D 	EB+F 	GB+H 	IB+J 	KB+L 	MB+N 	OB+P 	QB+R 	SB+T 	UB+V 	WB+X 	YB+Z 	[B+\ 	]B+^ 	_B+` 	aB+b 	cB+d 	eB+f 	gB+h 	iB+j 	kB+l 	mB+n 	oB+p 	qB+r 	sB+t 	uB+v 	wB+x 	yB+z 	{B+| 	}B+~ 	B+@ 	AB+B 	CB+D 	EB+F 	GB+H 	IB+J 	KB+L 	MB+N 	OB+P 	QB+R 	SB+T 	UB+V 	WB+X 	YB+Z 	[B+\ 	]B+^ 	_B+` 	aB+b 	cB+d 	eB+f 	gB+h 	iB+j 	kB+l 	mB+n 	oB+p 	qB+r CB+tE3J' BV{.o{.o{. 	k{. 	o	{.
 	o{. 	o{. 	o{. 	k{. 	o{. 	k{. 	o{. 	k{. 	k{. 	o{. 	o{.  	o!{." 	o#{.$ 	o%{.& 	o'{.( 	o){.* 	k+{., 	o-{.. 	k/{.0 	o1{.2 	k3{.4 	k5{.6 	k7{.8 	k9{.: 	k;{.< 	k={.> 	k?{.@ 	kA{.B 	kC{.D 	kE{.F 	kG{.H 	kI{.J 	kK{.L 	kM{.N 	kO{.P 	kQ{.R 	kS{.T 	kU{.V 	kW{.X 	kY{.Z 	k[{.\ 	k]{.^ 	k_{.` 	ka{.b 	kc{.d 	ke{.f 	kg{.h 	ki{.j 	kk{.l 	km{.n 	ko{.p 	kq{.r 	ks{.t 	ku{.v 	kw{.x 	ky{.z 	k{{.| 	k}{.~ 	k{.@ 	kA{.B 	kC{.D 	kE{.F 	kG{.H 	kI{.J 	kK{.L 	kM{.N 	kO{.P 	kQ{.R 	kS{.T 	kU{.V 	kW{.X 	kY{.Z 	k[{.\ 	k]{.^ 	k_{.` 	ka{.b 	kc{.d 	ke{.f 	kg{.h 	ki{.j 	kk{.l 	km{.n 	ko{.p 	kq{.r 	ks{.t 	ku{.v 	kw{.x 	ky{.z 	g{{.| 	k}{.~ 	k{.@ 	kA{.B 	kC{.D 	kE{.F 	kG{.H 	kI{.J 	kK{.L 	kM{.N 	kO{.P 	kQ{.R 	kS{.T 	kU{.V 	kW{.X 	kY{.Z 	k[{.\ 	k]{.^ 	k_{.` 	ka{.b 	kc{.d 	ke{.f 	kg{.h 	ki{.j 	kk{.l 	km{.n 	ko{.p u{.$sEz* {|?eCcM&:!;  #>a"@"CCC "=R!@!C3C $*'5	D%D% !D% %	D%
 
D% D%r"   r   )4r   __license__html.entitiesr   collectionsr   r  r   rI   loggingr   r   typesr	   typingr
   r   r   r   r   r   r   r   r   r   typing_extensionsr   bs4._typingr   r   r   r   r   cchardetImportErrorchardetcharset_normalizerr   r   r!   r#   r$   dictr%   rJ   r   r   objectr*   r\   r   r   r   r"   r    <module>r     s    ( #   	 %    &  (,$ +N&(u (# ( @c ?; 
3 
 04vd4c7l++, 5JJy''0"$$7::l))'2BDD9U 
 JJy"$$'::lBDD)S } }@  , , .v vrb
% b
%S   	  	%/N 		sH   #E# #F*E33F:F FF
FFFFF