
    (phE                         S SK r S SKrS SKrS SKrS SKJr  S SKJrJrJr   " S S\	5      r
 " S S\	5      r " S S	\	5      rg)
    N)BeautifulSoup)EntitySubstitutionEncodingDetectorUnicodeDammitc                       \ rS rSrSrS r\R                  R                  S/ SQ5      S 5       r	S r
S rS	 rS
 rS rS rSrg)TestUnicodeDammit   z"Standalone tests of UnicodeDammit.c                 B    Sn[        U5      nUR                  U:X  d   eg )Nu   I'm already Unicode! ☃)r   unicode_markup)selfmarkupdammits      H/var/www/html/venv/lib/python3.13/site-packages/bs4/tests/test_dammit.pytest_unicode_input$TestUnicodeDammit.test_unicode_input   s%    3v&$$...    z smart_quotes_to,expect_converted))Nu   ‘’“”)xmlz &#x2018;&#x2019;&#x201C;&#x201D;)htmlz&lsquo;&rsquo;&ldquo;&rdquo;)asciiz''""c                 b    Sn[        US/US9R                  nUSR                  U5      :X  d   eg)zZVerify the functionality of the smart_quotes_to argument
to the UnicodeDammit constructor.s   <foo></foo>windows-1252)known_definite_encodingssmart_quotes_toz<foo>{}</foo>N)r   r   format)r   r   expect_convertedr   	converteds        r   test_smart_quotes_to&TestUnicodeDammit.test_smart_quotes_to   sE     0!&4%5+
 .	 	
 O223CDDDDr   c                     Sn[        U5      nUR                  R                  5       S:X  d   eUR                  S:X  d   eg )Ns   Sacré bleu! ☃utf-8u   Sacré bleu! ☃r   original_encodinglowerr   )r   utf8r   s      r   test_detect_utf8"TestUnicodeDammit.test_detect_utf8*   sC    1t$''--/7:::$$(DDDDr   c                     Sn[        US/5      nUR                  R                  5       S:X  d   eUR                  S:X  d   eg )N   
iso-8859-8u   םולשr!   )r   hebrewr   s      r   test_convert_hebrew%TestUnicodeDammit.test_convert_hebrew0   sG    $v~6''--/<???$$(BBBBr   c                     Sn[        U5      nUR                  R                  5       S:X  d   eUR                  R	                  S5      U:X  d   eg )Ns   ケータイ Watchr    )r   r"   r#   r   encode)r   utf_8r   s      r   /test_dont_see_smart_quotes_where_there_are_noneATestUnicodeDammit.test_dont_see_smart_quotes_where_there_are_none6   sM    Iu%''--/7:::$$++G4===r   c                     SR                  S5      n[        US/5      nUR                  R                  5       S:X  d   eg )N   Räksmörgåsr    r)   r.   r   r"   r#   r   	utf8_datar   s      r    test_ignore_inappropriate_codecs2TestUnicodeDammit.test_ignore_inappropriate_codecs<   s=    #**73	y<.9''--/7:::r   c                     SR                  S5      nS H/  n[        X/5      nUR                  R                  5       S:X  a  M/   e   g )Nr3   r    )z.utf8z...z
utF---16.!r4   )r   r6   bad_encodingr   s       r   test_ignore_invalid_codecs,TestUnicodeDammit.test_ignore_invalid_codecsA   sE    #**73	:L"9n=F++113w>>> ;r   c                     SR                  S5      n[        US/S9nUR                  R                  5       S:X  d   e[        USS/S9nUR                  b   eg )Nr3   r    )exclude_encodingsr   r4   r5   s      r   test_exclude_encodings(TestUnicodeDammit.test_exclude_encodingsG   sd    #**73	 yWIF''--/>AAA yWn<UV''///r    N)__name__
__module____qualname____firstlineno____doc__r   pytestmarkparametrizer   r%   r+   r0   r7   r;   r?   __static_attributes__rA   r   r   r   r      sW    ,/
 [[*	
	E	EEC>;
?0r   r   c                   J    \ rS rSrS rS rS rS rS rS r	S r
S	 rS
 rSrg)TestEncodingDetectorU   c                 T    [        S5      n[        UR                  5      nSU;   d   eg )Ns'   <?xml version="1.0" encoding="UTF-" ?>u   utf-�)r   list	encodings)r   detectedrP   s      r   Ptest_encoding_detector_replaces_junk_in_encoding_name_with_replacement_charactereTestEncodingDetector.test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterV   s-     $$QR++,	.);;;r   c                 N    S H  n[        USS9nSUR                  :X  a  M   e   g )N)s&   <html><meta charset="euc-jp" /></html>s&   <html><meta charset='euc-jp' /></html>s$   <html><meta charset=euc-jp /></html>s#   <html><meta charset=euc-jp/></html>Tis_htmlzeuc-jp)r   r"   r   datar   s      r    test_detect_html5_style_meta_tag5TestEncodingDetector.test_detect_html5_style_meta_tag]   s0    
D #46Fv77777
r   c                 @   Sn[         R                  R                  n[        R                  " [        R
                  5         S nU[         R                  l        [        U5      nSUR                  L d   eSUR                  ;   d   e[        US5      nUR                  (       d   e [        R                  " [        R                  5        U[         R                  l        g ! [        R                  " [        R                  5        U[         R                  l        f = f)NsT   ﻿<?xml version="1.0" encoding="UTF-8"?>
<html><b>بتر</b>
<i>ѐ</i></html>c                     g NrA   )strs    r   noopETestEncodingDetector.test_last_ditch_entity_replacement.<locals>.noop~   s    r   Tu   �zhtml.parser)bs4r   _chardet_dammitloggingdisableWARNINGr   contains_replacement_charactersr   r   NOTSET)r   docchardetr_   r   soups         r   "test_last_ditch_entity_replacement7TestEncodingDetector.test_last_ditch_entity_replacementg   s     2 **,,(	1 *.CJJ&"3'F6AAAAAv44444 m4D7777OOGNN+)0CJJ& OOGNN+)0CJJ&s   A%C" ";Dc                 f    Sn[        U5      nSUR                  :X  d   eSUR                  :X  d   eg )N   < a >   < / a > u   <a>áé</a>utf-16le)r   r   r"   rW   s      r   test_byte_order_mark_removed1TestEncodingDetector.test_byte_order_mark_removed   s9    Mt$ 5 5555V55555r   c                    Sn[        U5      n[        US/S9nSUR                  :X  d   e[        US/S9nSUR                  :X  d   eS/UR                   Vs/ s H  oUS   PM	     sn:X  d   eSn[        US/S	/S
9nS	UR                  :X  d   eSS	/UR                   Vs/ s H  oUS   PM	     sn:X  d   eg s  snf s  snf )Nrn   zutf-16)r   r    )user_encodingsro   r   r(   r)   )r   rs   )r   r"   tried_encodings)r   rX   r   beforeafterxr*   s          r   )test_known_definite_versus_user_encodings>TestEncodingDetector.test_known_definite_versus_user_encodings   s    
 Nt$ txjI633333
 dG9=U44444|f.D.DE.D!.DEEEE %gY~
 v77777&9O9O*P9OAQ49O*PPPP  F +Qs   B7 B<c                 x   Sn[         R                  " SS9 n[        US/S/S/S9nS S S 5        Wu  nUR                  n[	        U[
        5      (       d   eUR                  [        :X  d   eSWR                  :X  d   e/ SQUR                   Vs/ s H  ofS	   PM	     sn:X  d   eg ! , (       d  f       N= fs  snf )
Nr(   T)record	shift-jisr    r)   )r   override_encodingsrs   )r|   r    r)   r   )
warningscatch_warningsr   message
isinstanceDeprecationWarningfilename__file__r"   rt   )r   r*   wr   warningr   rw   s          r   "test_deprecated_override_encodings7TestEncodingDetector.test_deprecated_override_encodings   s     %$$D1Q"*5$+9 ,~	F 2 	//'#566668+++v77777 4!1121aqT12
 	
 
 21  3s   B&B7&
B4c                 :   SR                  S5      nSR                  S5      nX-   U-   n[        R                  " [        5         UR	                  S5        S S S 5        [
        R                  " U5      nSUR	                  S5      :X  d   eg ! , (       d  f       N<= f)Nu	   ☃☃☃r$   u   “Hi, I like Windows!”windows_1252u+   ☃☃☃“Hi, I like Windows!”☃☃☃)r.   rG   raisesUnicodeDecodeErrordecoder   	detwingle)r   r$   r   rh   fixeds        r   test_detwingle#TestEncodingDetector.test_detwingle   s    !))&1.
&
  	 !D( ]]-.JJv / '',<V@TTTT /.s   B
Bc                     S HI  nUR                  S5      nUR                  S5      (       d   e[        R                  " U5      nX2:X  a  MI   e   g )N)u   œu   ₓu   ðr$      )r.   endswithr   r   )r   tricky_unicode_charinputoutputs       r   +test_detwingle_ignores_multibyte_characters@TestEncodingDetector.test_detwingle_ignores_multibyte_characters   sP    
$

 (..v6E>>'****",,U3F?"?$
r   c                    SnUR                  S5      nSnUR                  S5      n[        R                  nU" USS9b   eSU" USS9:X  d   eSU" USS9:X  d   eSU" U5      :X  d   eSU" U5      :X  d   eS	S
-  nU" Xb-   5      b   eU" Xd-   5      b   eU" Xb-   SSS9S:X  d   eU" USS9S:X  d   eU" S	U-   SS9S:X  d   eU" SU-   SS9b   eg )Nz0<html><head><meta charset="utf-8"></head></html>r   z,<?xml version="1.0" encoding="ISO-8859-1" ?>FrU   r    Tz
iso-8859-1    i  )rV   search_entire_document)r      a)r.   r   find_declared_encoding)r   html_unicode
html_bytesxml_unicode	xml_bytesmspacers          r   test_find_declared_encoding0TestEncodingDetector.test_find_declared_encoding   s6    J!((1
D&&w/	33u-555!L$7777!J5555q~---q|+++ $%---#$,,,
 f!4MQXX	
X 48LHHH	!$?<OOO	!$?GGGr   rA   N)rB   rC   rD   rE   rR   rY   rk   rp   rx   r   r   r   r   rJ   rA   r   r   rL   rL   U   s4    <8#1J6Q@
.U0#'Hr   rL   c                      \ rS rSrSrS r\R                  R                  SSS/5      S 5       r	S r
S	 rS
 rS rS rS rS rS rS rS rS rS r\R                  R                  S/ SQ5      S 5       r\R                  R                  S/ SQ5      S 5       r\R                  R                  SS/5      S 5       rSrg)TestEntitySubstitutioni  z1Standalone tests of the EntitySubstitution class.c                     [         U l        g r]   )r   subr   s    r   setup_method#TestEntitySubstitution.setup_method  s	    %r   zoriginal,substituted)u   foo∀☃õbaru   foo&forall;☃&otilde;bar)u   ‘’foo“”z&lsquo;&rsquo;foo&ldquo;&rdquo;c                 F    U R                   R                  U5      U:X  d   eg r]   r   substitute_html)r   originalsubstituteds      r   test_substitute_html+TestEntitySubstitution.test_substitute_html!  s!     xx''1[@@@r   c                 p    S H0  u  pSnX2-  nX1-  nU R                   R                  U5      U:X  a  M0   e   g )N)	)z&models;u   ⊧)z&Nfr;u   𝔑)z&ngeqq;u   ≧̸)z&not;   ¬)z&Not;u   ⫬z||)fjr   )z&gt;>)z&lt;<z3 %s 4r   )r   entityutemplaterawwith_entitiess         r   test_html5_entity(TestEntitySubstitution.test_html5_entity/  sC    
IF(  H,C$-M88++C0MAAA/
r   c                     SnSnU R                   R                  U5      U:X  d   eSnSnU R                   R                  U5      U:X  d   eg )Nu   fjords ⊔ penguinszfjords &sqcup; penguinsu   fjords ⊔︀ penguinszfjords &sqcups; penguinsr   )r   rX   r   s      r   )test_html5_entity_with_variation_selector@TestEntitySubstitution.test_html5_entity_with_variation_selectorI  sR     (*xx''-777-+xx''-777r   c                 L    SnU R                   R                  US5      U:X  d   eg )NWelcome to "my bar"Fr   substitute_xmlr   ss     r   Itest_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false`TestEntitySubstitution.test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falseU  s&    !xx&&q%0A555r   c                     U R                   R                  SS5      S:X  d   eU R                   R                  SS5      S:X  d   eg )NWelcomeTz	"Welcome"z	Bob's Barz"Bob's Bar"r   r   s    r   6test_xml_attribute_quoting_normally_uses_double_quotesMTestEntitySubstitution.test_xml_attribute_quoting_normally_uses_double_quotesY  s@    xx&&y$7;FFFxx&&{D9^KKKr   c                 L    SnU R                   R                  US5      S:X  d   eg )Nr   Tz'Welcome to "my bar"'r   r   s     r   Otest_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotesfTestEntitySubstitution.test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes]  s)     "xx&&q$/3LLLLr   c                 L    SnU R                   R                  US5      S:X  d   eg )NWelcome to "Bob's Bar"Tz""Welcome to &quot;Bob's Bar&quot;"r   r   s     r   btest_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotesyTestEntitySubstitution.test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotesc  s)     &xx&&q$/3XXXXr   c                 J    SnU R                   R                  U5      U:X  d   eg )Nr   r   )r   quoteds     r   <test_xml_quotes_arent_escaped_when_value_is_not_being_quotedSTestEntitySubstitution.test_xml_quotes_arent_escaped_when_value_is_not_being_quotedi  s$    *xx&&v.&888r   c                 F    U R                   R                  S5      S:X  d   eg )Nzfoo<bar>zfoo&lt;bar&gt;r   r   s    r   'test_xml_quoting_handles_angle_brackets>TestEntitySubstitution.test_xml_quoting_handles_angle_bracketsm  s     xx&&z26FFFFr   c                 F    U R                   R                  S5      S:X  d   eg )NzAT&TzAT&amp;Tr   r   s    r   #test_xml_quoting_handles_ampersands:TestEntitySubstitution.test_xml_quoting_handles_ampersandsp  s    xx&&v.*<<<r   c                 F    U R                   R                  S5      S:X  d   eg )N&Aacute;T&Tz&amp;Aacute;T&amp;Tr   r   s    r   Etest_xml_quoting_including_ampersands_when_they_are_part_of_an_entity\TestEntitySubstitution.test_xml_quoting_including_ampersands_when_they_are_part_of_an_entitys  s     xx&&}59NNNNr   c                 F    U R                   R                  S5      S:X  d   eg )Nr   z&Aacute;T&amp;T)r   "substitute_xml_containing_entitiesr   s    r   Dtest_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity[TestEntitySubstitution.test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityv  s&    HH77F !	
!r   c                 J    SnU R                   R                  U5      U:X  d   eg)z:There's no need to do this except inside attribute values.zBob's "bar"Nr   )r   texts     r    test_quotes_not_html_substituted7TestEntitySubstitution.test_quotes_not_html_substituted|  s$    xx''-555r   zmarkup, old))z	foo & barzfoo &amp; bar)zfoo&zfoo&amp;)z
foo&&& barzfoo&amp;&amp;&amp; bar)zx=1&y=2zx=1&amp;y=2)z&123z&amp;123)z&abcz&amp;abc)z
foo &0 barzfoo &amp;0 bar)zfoo &lolwat barzfoo &amp;lolwat barc                     U R                   R                  U5      U:X  d   eU R                   R                  U5      U:X  d   eg r]   r   r   substitute_html5_raw)r   r   olds      r   'test_unambiguous_ampersands_not_escaped>TestEntitySubstitution.test_unambiguous_ampersands_not_escaped  s>     xx''/3666xx,,V4>>>r   zmarkup,html,html5,html5raw))&divide;&amp;divide;r   r   )z
&nonesuch;&amp;nonesuch;r   r   )z&#247;
&amp;#247;r   r   )z&#xa1;
&amp;#xa1;r   r   c                     U R                   R                  U5      U:X  d   eU R                   R                  U5      U:X  d   eU R                   R                  U5      U:X  d   eg r]   )r   r   substitute_html5r   )r   r   r   html5html5raws        r   'test_when_entity_ampersands_are_escaped>TestEntitySubstitution.test_when_entity_ampersands_are_escaped  s[    , xx''/4777xx((0E999xx,,V4@@@r   zmarkup,expect)z&nosuchentity;z&amp;nosuchentity;c                     U R                   R                  U5      U:X  d   eU R                   R                  U5      U:X  d   eg r]   r   )r   r   expects      r   !test_ambiguous_ampersands_escaped8TestEntitySubstitution.test_ambiguous_ampersands_escaped  s>     xx''/6999xx,,V4>>>r   )r   N)rB   rC   rD   rE   rF   r   rG   rH   rI   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   rA   r   r   r   r     s	   ;& [[ S C	

A
AB4
86LMY9G=O
6
 [[		
?? [[$	
AA" [[BC??r   r   )rG   rc   r~   ra   r   
bs4.dammitr   r   r   objectr   rL   r   rA   r   r   <module>r     sL       
  D0 D0NCH6 CHLV?V V?r   