o
    á½	iÎG  ã                   @   sÂ   d Z ddlZddlZddlmZ dgZe d¡Ze d¡Ze d¡Z	e d¡Z
e d	¡Ze d
¡Ze d¡Ze d¡Ze d¡Ze d¡Ze dej¡Ze d¡Ze d¡ZG dd„ dejƒZdS )zA parser for HTML and XHTML.é    N)ÚunescapeÚ
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]ú>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   @   sà   e Zd ZdZdZddœdd„Zdd„ Zd	d
„ Zdd„ ZdZ	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zd7dd„Zdd„ Zdd„ Zdd „ Zd!d"„ Zd#d$„ Zd%d&„ Zd'd(„ Zd)d*„ Zd+d,„ Zd-d.„ Zd/d0„ Zd1d2„ Zd3d4„ Zd5d6„ ZdS )8r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )ÚscriptÚstyleT)Úconvert_charrefsc                C   s   || _ |  ¡  dS )zÆInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)r   Úreset)Úselfr   © r
   ú"/usr/lib/python3.10/html/parser.pyÚ__init__W   s   zHTMLParser.__init__c                 C   s(   d| _ d| _t| _d| _tj | ¡ dS )z1Reset this instance.  Loses all unprocessed data.Ú z???N)ÚrawdataÚlasttagÚinteresting_normalÚinterestingÚ
cdata_elemÚ_markupbaseÚ
ParserBaser   ©r	   r
   r
   r   r   `   s
   zHTMLParser.resetc                 C   s   | j | | _ |  d¡ dS )z‘Feed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   Úgoahead©r	   Údatar
   r
   r   Úfeedh   s   zHTMLParser.feedc                 C   s   |   d¡ dS )zHandle any buffered data.é   N)r   r   r
   r
   r   Úcloseq   s   zHTMLParser.closeNc                 C   s   | j S )z)Return full source of start tag: '<...>'.)Ú_HTMLParser__starttag_textr   r
   r
   r   Úget_starttag_textw   s   zHTMLParser.get_starttag_textc                 C   s$   |  ¡ | _t d| j tj¡| _d S )Nz</\s*%s\s*>)Úlowerr   ÚreÚcompileÚIr   )r	   Úelemr
   r
   r   Úset_cdata_mode{   s   
zHTMLParser.set_cdata_modec                 C   s   t | _d | _d S ©N)r   r   r   r   r
   r
   r   Úclear_cdata_mode   s   
zHTMLParser.clear_cdata_modec                 C   s  | j }d}t|ƒ}||k rU| jr;| js;| d|¡}|dk r:| dt||d ƒ¡}|dkr8t d¡ 	||¡s8n|}n| j
 	||¡}|rI| ¡ }n| jrNn|}||k ro| jrf| jsf|  t|||… ƒ¡ n	|  |||… ¡ |  ||¡}||kr{nÚ|j}|d|ƒrŒt ||¡r|  |¡}	n@|d|ƒr›|  |¡}	n5|d|ƒr¦|  |¡}	n*|d|ƒr±|  |¡}	n|d	|ƒr¼|  |¡}	n|d
 |k sÄ|rÎ|  d¡ |d
 }	nn…|	dk r…|sÙn|t ||¡ràn£|d|ƒr|d |krò|  d¡ n‘t ||¡rùnŠ|  ||d d … ¡ n~|d|ƒr0|}dD ]}
| |
|d ¡r"|t|
ƒ8 } nq|  ||d |… ¡ nS|d|ƒrB|  ||d d … ¡ nA|||d …  ¡ dkr[|  ||d d … ¡ n(|d	|ƒrm|  ||d d … ¡ n|d|ƒr|  ||d d … ¡ ntdƒ‚|}	|  ||	¡}nÅ|d|ƒrÜt ||¡}|r¿|  ¡ dd… }|  !|¡ | "¡ }	|d|	d
 ƒs¸|	d
 }	|  ||	¡}q	d||d … v rÛ|  |||d … ¡ |  ||d ¡}ny|d|ƒrMt# ||¡}|r|  d
¡}|  $|¡ | "¡ }	|d|	d
 ƒs|	d
 }	|  ||	¡}q	t% ||¡}|r7|r6|  ¡ ||d … kr6| "¡ }	|	|kr.|}	|  ||d
 ¡}n|d
 |k rL|  d¡ |  ||d
 ¡}nnJ dƒ‚||k s|r„||k r„| js„| jru| jsu|  t|||… ƒ¡ n	|  |||… ¡ |  ||¡}||d … | _ d S )Nr   ú<ú&é"   z[\s;]ú</ú<!--ú<?ú<!r   é   )z--!z--ú-é   z	<![CDATA[é   é	   ú	<!doctypeúwe should not get here!z&#éÿÿÿÿú;zinteresting.search() lied)&r   Úlenr   r   ÚfindÚrfindÚmaxr   r    Úsearchr   ÚstartÚhandle_datar   Ú	updateposÚ
startswithÚstarttagopenÚmatchÚparse_starttagÚparse_endtagÚparse_commentÚparse_piÚparse_html_declarationÚ
endtagopenÚhandle_commentÚendswithÚunknown_declr   Úhandle_declÚ	handle_piÚAssertionErrorÚcharrefÚgroupÚhandle_charrefÚendÚ	entityrefÚhandle_entityrefÚ
incomplete)r	   rP   r   ÚiÚnÚjÚampposr@   r>   ÚkÚsuffixÚnamer
   r
   r   r   †   sè   
ÿ€







þ




…}zHTMLParser.goaheadc                 C   sº   | j }|||d … dksJ dƒ‚|||d … dkr |  |¡S |||d … dkr/|  |¡S |||d …  ¡ d	krX| d
|d ¡}|dkrIdS |  ||d |… ¡ |d S |  |¡S )Nr-   r,   z+unexpected call to parse_html_declaration()r/   r*   r0   z<![r1   r2   r   r4   r   )r   rC   Úparse_marked_sectionr   r7   rJ   Úparse_bogus_comment)r	   rT   r   Úgtposr
   r
   r   rE     s   


z!HTMLParser.parse_html_declarationr   c                 C   s`   | j }|||d … dv sJ dƒ‚| d|d ¡}|dkrdS |r,|  ||d |… ¡ |d S )Nr-   )r,   r)   z"unexpected call to parse_comment()r   r4   r   )r   r7   rG   )r	   rT   Úreportr   Úposr
   r
   r   r\   '  s   zHTMLParser.parse_bogus_commentc                 C   sd   | j }|||d … dksJ dƒ‚t ||d ¡}|sdS | ¡ }|  ||d |… ¡ | ¡ }|S )Nr-   r+   zunexpected call to parse_pi()r4   )r   Úpicloser:   r;   rK   rP   )r	   rT   r   r@   rV   r
   r
   r   rD   3  s   zHTMLParser.parse_pic                 C   sä  d | _ |  |¡}|dk r|S | j}|||… | _ g }t ||d ¡}|s(J dƒ‚| ¡ }| d¡ ¡  | _}||k r–t	 ||¡}|sCnS| ddd¡\}	}
}|
sRd }n-|d d… d  krd|dd … ksyn |d d… d  krw|dd … krn n|dd… }|r…t
|ƒ}| |	 ¡ |f¡ | ¡ }||k s:|||…  ¡ }|d	vrÓ|  ¡ \}}d
| j v rÁ|| j  d
¡ }t| j ƒ| j  d
¡ }n|t| j ƒ }|  |||… ¡ |S | d¡rà|  ||¡ |S |  ||¡ || jv rð|  |¡ |S )Nr   r   z#unexpected call to parse_starttag()r-   r0   ú'r4   ú")r   ú/>Ú
rc   )r   Úcheck_for_whole_start_tagr   Útagfind_tolerantr@   rP   rN   r   r   Úattrfind_tolerantr   ÚappendÚstripÚgetposÚcountr6   r8   r<   rH   Úhandle_startendtagÚhandle_starttagÚCDATA_CONTENT_ELEMENTSr#   )r	   rT   Úendposr   Úattrsr@   rX   ÚtagÚmÚattrnameÚrestÚ	attrvaluerP   ÚlinenoÚoffsetr
   r
   r   rA   ?  sX   
&(ó

ÿ
ý

zHTMLParser.parse_starttagc                 C   s²   | j }t ||¡}|rU| ¡ }|||d … }|dkr|d S |dkr?| d|¡r-|d S | d|¡r5dS ||kr;|S |d S |dkrEdS |dv rKdS ||krQ|S |d S td	ƒ‚)
Nr   r   ú/rc   r-   r4   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZr3   )r   Úlocatestarttagend_tolerantr@   rP   r>   rL   )r	   rT   r   rr   rV   Únextr
   r
   r   re   r  s.   z$HTMLParser.check_for_whole_start_tagc                 C   s*  | j }|||d … dksJ dƒ‚t ||d ¡}|sdS | ¡ }t ||¡}|sn| jd ur9|  |||… ¡ |S t ||d ¡}|sV|||d … dkrQ|d S |  	|¡S | 
d¡ ¡ }| d| ¡ ¡}|  |¡ |d S | 
d¡ ¡ }| jd urŠ|| jkrŠ|  |||… ¡ |S |  |¡ |  ¡  |S )	Nr-   r)   zunexpected call to parse_endtagr   r4   r0   z</>r   )r   Ú	endendtagr:   rP   Ú
endtagfindr@   r   r<   rf   r\   rN   r   r7   Úhandle_endtagr%   )r	   rT   r   r@   r]   Ú	namematchÚtagnamer"   r
   r
   r   rB   ”  s8   





zHTMLParser.parse_endtagc                 C   s   |   ||¡ |  |¡ d S r$   )rm   r}   ©r	   rq   rp   r
   r
   r   rl   ¼  s   zHTMLParser.handle_startendtagc                 C   ó   d S r$   r
   r€   r
   r
   r   rm   Á  ó   zHTMLParser.handle_starttagc                 C   r   r$   r
   )r	   rq   r
   r
   r   r}   Å  r‚   zHTMLParser.handle_endtagc                 C   r   r$   r
   ©r	   rZ   r
   r
   r   rO   É  r‚   zHTMLParser.handle_charrefc                 C   r   r$   r
   rƒ   r
   r
   r   rR   Í  r‚   zHTMLParser.handle_entityrefc                 C   r   r$   r
   r   r
   r
   r   r<   Ñ  r‚   zHTMLParser.handle_datac                 C   r   r$   r
   r   r
   r
   r   rG   Õ  r‚   zHTMLParser.handle_commentc                 C   r   r$   r
   )r	   Údeclr
   r
   r   rJ   Ù  r‚   zHTMLParser.handle_declc                 C   r   r$   r
   r   r
   r
   r   rK   Ý  r‚   zHTMLParser.handle_pic                 C   r   r$   r
   r   r
   r
   r   rI   à  r‚   zHTMLParser.unknown_decl)r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__rn   r   r   r   r   r   r   r#   r%   r   rE   r\   rD   rA   re   rB   rl   rm   r}   rO   rR   r<   rG   rJ   rK   rI   r
   r
   r
   r   r   ?   s:    		 
3"()rˆ   r   r   Úhtmlr   Ú__all__r    r   rS   rQ   rM   r?   rF   r`   Úcommentcloserf   rg   ÚVERBOSEry   r{   r|   r   r   r
   r
   r
   r   Ú<module>   s.    









ÿò

