
    (phE                    <   S r SSKJr  SrS/rSSKJr  SSKJrJ	r	J
r
JrJrJrJrJrJrJrJr  SSKJrJrJrJrJrJr  SSKJrJr  SS	KJrJrJ r J!r!  SS
K"J#r#  \(       a  SSK$J%r%  SSKJ&r&  SSK'J(r(J)r)J*r*  Sr+\	\\,\,4   \,\,/S4   r- " S S\\5      r. " S S\ 5      r/g)zCUse the HTMLParser library to parse HTML files that aren't too bad.    )annotationsMITHTMLParserTreeBuilder)
HTMLParser)AnyCallablecastDictIterableListOptionalTYPE_CHECKINGTupleTypeUnion)AttributeDictCDataCommentDeclarationDoctypeProcessingInstruction)EntitySubstitutionUnicodeDammit)DetectsXMLParsedAsHTMLHTMLHTMLTreeBuilderSTRICTParserRejectedMarkup)BeautifulSoup)NavigableString)	_Encoding
_Encodings
_RawMarkupzhtml.parserNc                     \ rS rSr% SrS\S'   SrS\S'    \S.       SS jjrS\S
'   S\S'   S	\S'   SS jr      SS jr	 S       S S jjr
SS!S jjrS"S jrS#S jrS#S jrS"S jrS"S jrS"S jrS"S jrSrg)$BeautifulSoupHTMLParser=   replacestrREPLACEignoreIGNOREon_duplicate_attributesoupr    r.   &Union[str, _DuplicateAttributeHandler]c                   Xl         X l        UR                  R                  U l        [        R
                  " U /UQ70 UD6  / U l        U R                  5         g N)r/   r.   builderattribute_dict_classr   __init__already_closed_empty_element_initialize_xml_detector)selfr/   r.   argskwargss        J/var/www/html/venv/lib/python3.13/site-packages/bs4/builder/_htmlparser.pyr5    BeautifulSoupHTMLParser.__init__T   sO     	&<#$(LL$E$E!D24262 -/)%%'    z	List[str]r6   c                    [        U5      er2   r   )r8   messages     r;   errorBeautifulSoupHTMLParser.erroro   s     #7++r=   c                F    U R                  XSS9  U R                  U5        g)zmHandle an incoming empty-element tag.

html.parser only calls this method when the markup looks like
<tag/>.
F)handle_empty_elementN)handle_starttaghandle_endtag)r8   nameattrss      r;   handle_startendtag*BeautifulSoupHTMLParser.handle_startendtag   s%     	TuE4 r=   c           	     x   U R                  5       nU Hc  u  pVUc  SnXT;   aP  U R                  nXpR                  :X  a  M,  USU R                  4;   a  XdU'   MD  [	        [
        U5      nU" XEU5        M_  XdU'   Me     U R                  R                  R                  (       a  U R                  5       u  pOS=pU R                  R                  USSXHU	S9n
U
(       aC  U
R                  (       a2  U(       a+  U R                  USS9  U R                  R                  U5        U R                  c  U R!                  U5        gg)zHandle an opening tag, e.g. '<tag>'

:param handle_empty_element: True if this tag is known to be
    an empty-element tag (i.e. there is not expected to be any
    closing tag).
N )
sourceline	sourceposF)check_already_closed)r4   r.   r,   r*   r	   _DuplicateAttributeHandlerr/   r3   store_line_numbersgetposrD   is_empty_elementrE   r6   append_root_tag_name_root_tag_encountered)r8   rF   rG   rC   	attr_dictkeyvalueon_duperL   rM   tags              r;   rD   'BeautifulSoupHTMLParser.handle_starttag   s+    $(#<#<#>	JC } 55kk)t|| 44%*cN"#=wGGIE2!&#%  , 99//$(KKM!J	%))Jii''$i) ( 
 3'',@ t%@ --44T:&&&t, 'r=   c                    U(       a+  XR                   ;   a  U R                   R                  U5        gU R                  R                  U5        g)zHandle a closing tag, e.g. '</tag>'

:param name: A tag name.
:param check_already_closed: True if this tag is expected to
   be the closing portion of an empty-element tag,
   e.g. '<tag></tag>'.
N)r6   remover/   rE   )r8   rF   rN   s      r;   rE   %BeautifulSoupHTMLParser.handle_endtag   s:      D,M,M$M
 --44T:II##D)r=   c                :    U R                   R                  U5        g)z4Handle some textual data that shows up between tags.N)r/   handle_datar8   datas     r;   r`   #BeautifulSoupHTMLParser.handle_data   s    		d#r=   c                &   UR                  S5      (       a  [        UR                  S5      S5      nO=UR                  S5      (       a  [        UR                  S5      S5      nO[        U5      nSnUS:  aD  U R                  R                  S4 H(  nU(       d  M   [        U/5      R                  U5      nM*     U(       d   [        U5      nU=(       d    SnU R                  U5        g! [         a     Mi  f = f! [        [        4 a     N@f = f)zHandle a numeric character reference by converting it to the
corresponding Unicode character and treating it as textual
data.

:param name: Character number, possibly in hexadecimal.
x   XN   zwindows-1252u   �)
startswithintlstripr/   original_encoding	bytearraydecodeUnicodeDecodeErrorchr
ValueErrorOverflowErrorr`   )r8   rF   	real_namerb   encodings        r;   handle_charref&BeautifulSoupHTMLParser.handle_charref   s     ??3DKK,b1I__S!!DKK,b1ID	Is? "YY88.I$i[188BD	 J 9~ 22 * 
 . s$   C,C= ,
C:9C:=DDc                z    [         R                  R                  U5      nUb  UnOSU-  nU R                  U5        g)zHandle a named entity reference by converting it to the
corresponding Unicode character(s) and treating it as textual
data.

:param name: Name of the entity reference.
Nz&%s)r   HTML_ENTITY_TO_CHARACTERgetr`   )r8   rF   	characterrb   s       r;   handle_entityref(BeautifulSoupHTMLParser.handle_entityref
  s>     '??CCDI	 D 4<Dr=   c                    U R                   R                  5         U R                   R                  U5        U R                   R                  [        5        g)z?Handle an HTML comment.

:param data: The text of the comment.
N)r/   endDatar`   r   ra   s     r;   handle_comment&BeautifulSoupHTMLParser.handle_comment  s8    
 					d#		'"r=   c                    U R                   R                  5         U[        S5      S nU R                   R                  U5        U R                   R                  [        5        g)zIHandle a DOCTYPE declaration.

:param data: The text of the declaration.
zDOCTYPE N)r/   r~   lenr`   r   ra   s     r;   handle_decl#BeautifulSoupHTMLParser.handle_decl&  sI    
 			C
O%&		d#		'"r=   c                "   UR                  5       R                  S5      (       a  [        nU[        S5      S nO[        nU R
                  R                  5         U R
                  R                  U5        U R
                  R                  U5        g)zkHandle a declaration of unknown type -- probably a CDATA block.

:param data: The text of the declaration.
zCDATA[N)upperri   r   r   r   r/   r~   r`   )r8   rb   clss      r;   unknown_decl$BeautifulSoupHTMLParser.unknown_decl0  si     ::<""8,,CH(DC				d#		#r=   c                    U R                   R                  5         U R                   R                  U5        U R                  U5        U R                   R                  [        5        g)zLHandle a processing instruction.

:param data: The text of the instruction.
N)r/   r~   r`   _document_might_be_xmlr   ra   s     r;   	handle_pi!BeautifulSoupHTMLParser.handle_pi?  sG    
 					d###D)		/0r=   )r6   r4   r.   r/   N)r/   r    r9   r   r.   r0   r:   r   )r?   r)   returnNone)rF   r)   rG   List[Tuple[str, Optional[str]]]r   r   )T)rF   r)   rG   r   rC   boolr   r   )rF   r)   rN   r   r   r   )rb   r)   r   r   )rF   r)   r   r   )__name__
__module____qualname____firstlineno__r*   __annotations__r,   r5   r@   rH   rD   rE   r`   ru   r{   r   r   r   r   __static_attributes__ r=   r;   r&   r&   =   s     GS FC$ JQ	(( ( !G	(
 (. CB"++
, !! ?!	!& &*	<-<- /<- #	<-
 
<-|*$$&P&##1r=   r&   c                     ^  \ rS rSr% SrSrS\S'   SrS\S'   \r	S\S	'   \	\
\/rS
\S'   S\S'   SrS\S'     S     SU 4S jjjr   S         SS jjrSS jrSrU =r$ )r   iJ  zA Beautiful soup `bs4.builder.TreeBuilder` that uses the
:py:class:`html.parser.HTMLParser` parser, found in the Python
standard library.

Fr   is_xmlT	picklabler)   NAMEzIterable[str]featuresz$Tuple[Iterable[Any], Dict[str, Any]]parser_argsTRACKS_LINE_NUMBERSc                   > [        5       nS H  nXS;   d  M
  UR                  U5      nXdU'   M!     [        [        U ]  " S0 UD6  U=(       d    / nU=(       d    0 nUR                  U5        SUS'   X4U l        g)aB  Constructor.

:param parser_args: Positional arguments to pass into
    the BeautifulSoupHTMLParser constructor, once it's
    invoked.
:param parser_kwargs: Keyword arguments to pass into
    the BeautifulSoupHTMLParser constructor, once it's
    invoked.
:param kwargs: Keyword arguments for the superclass constructor.
r-   Fconvert_charrefsNr   )dictpopsuperr   r5   updater   )r8   r   parser_kwargsr:   extra_parser_kwargsargrX   	__class__s          r;   r5   HTMLParserTreeBuilder.__init__[  s    $ #f.C}

3+0C( / 	#T3=f=!'R%+01,1()'7r=   c              #  Z  #    [        U[        5      (       a	  USSS4v   g/ nU(       a  UR                  U5        / nU(       a  UR                  U5        [        UUUSUS9nUR                  c  [        S5      eUR                  UR                  UR                  UR                  4v   g7f)a  Run any preliminary steps necessary to make incoming markup
acceptable to the parser.

:param markup: Some markup -- probably a bytestring.
:param user_specified_encoding: The user asked to try this encoding.
:param document_declared_encoding: The markup itself claims to be
    in this encoding.
:param exclude_encodings: The user asked _not_ to try any of
    these encodings.

:yield: A series of 4-tuples: (markup, encoding, declared encoding,
     has undergone character replacement)

    Each 4-tuple represents a strategy for parsing the document.
    This TreeBuilder uses Unicode, Dammit to convert the markup
    into Unicode, so the ``markup`` element of the tuple will
    always be a string.
NFT)known_definite_encodingsuser_encodingsis_htmlexclude_encodingszPCould not convert input to Unicode, and html.parser will not accept bytestrings.)	
isinstancer)   rS   r   unicode_markupr   rl   declared_html_encodingcontains_replacement_characters)r8   markupuser_specified_encodingdocument_declared_encodingr   r   r   dammits           r;   prepare_markup$HTMLParserTreeBuilder.prepare_markupy  s     2 fc""4u-- 57 "
 %++,CD*,% !!"<=%=)/
   ( 'b 
 %%((--66	 s   B)B+c                *   U R                   u  p#[        U[        5      (       d   eU R                  c   e[	        U R                  /UQ70 UD6n UR                  U5        UR                  5         / Ul	        g ! [         a  n[        U5      eS nAff = fr2   )
r   r   r)   r/   r&   feedcloseAssertionErrorr   r6   )r8   r   r9   r:   parseres         r;   r   HTMLParserTreeBuilder.feed  s    '' &#&&&&
 yy$$$(DTDVD	*KKLLN /1+  	* 'q))		*s   !A8 8
BBB)r   )NN)r   zOptional[Iterable[Any]]r   zOptional[Dict[str, Any]]r:   r   )NNN)
r   r$   r   Optional[_Encoding]r   r   r   zOptional[_Encodings]r   zDIterable[Tuple[str, Optional[_Encoding], Optional[_Encoding], bool]])r   r$   r   r   )r   r   r   r   __doc__r   r   r   
HTMLPARSERr   r   r   r   r   r5   r   r   r   __classcell__)r   s   @r;   r   r   J  s     FDItD##T62Hm255 !%$ 04268,8 08 	8 8B 8<:>26FF "5F %8	F
 0F 
NFP1 1r=   )0r   
__future__r   __license____all__html.parserr   typingr   r   r	   r
   r   r   r   r   r   r   r   bs4.elementr   r   r   r   r   r   
bs4.dammitr   r   bs4.builderr   r   r   r   bs4.exceptionsr   bs4r    r!   bs4._typingr"   r#   r$   r   r)   rO   r&   r   r   r=   r;   <module>r      s    I "   #     9  0!+  
%tCH~sC&@$&FG J1j*@ J1ZP1O P1r=   