B ܠg!@sddlZddlZddlZddlmZdgZedZedZedZ edZ edZ ed Z ed Z ed Zed Zed ejZed ZedZGdddejZdS)N)unescape HTMLParserz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) (?:\s*,)* # possibly followed by a comma )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#c@seZdZdZddddZddZdd Zd d Zd Zd dZ ddZ ddZ ddZ ddZ d8ddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd S)9r)ZscriptZstyleT)convert_charrefscCs||_|dS)N)rreset)selfrr./opt/alt/python37/lib/python3.7/html/parser.py__init__WszHTMLParser.__init__cCs(d|_d|_t|_d|_tj|dS)Nz???)rawdatalasttaginteresting_normal interesting cdata_elem _markupbase ParserBaser)rrrr r`s zHTMLParser.resetcCs|j||_|ddS)Nr)r goahead)rdatarrr feedhs zHTMLParser.feedcCs|ddS)N)r)rrrr closeqszHTMLParser.closeNcCs|jS)N)_HTMLParser__starttag_text)rrrr get_starttag_textwszHTMLParser.get_starttag_textcCs$||_td|jtj|_dS)Nz )lowerrrecompileIr)relemrrr set_cdata_mode{s zHTMLParser.set_cdata_modecCst|_d|_dS)N)rrr)rrrr clear_cdata_modeszHTMLParser.clear_cdata_modec Cs@|j}d}t|}x||kr|jrx|jsx|d|}|dkr|dt||d}|dkrrtd ||srP|}n(|j ||}|r| }n |jrP|}||kr|jr|js| t |||n| ||||||}||krP|j}|d|rDt||r ||} n|d|r8||} nl|d|rP||} nT|d|rh||} n<|d |r||} n$|d |kr| d|d } nP| dkr6|sP|d |d } | dkr|d|d } | dkr|d } n| d 7} |jr$|js$| t ||| n| ||| ||| }q|d |rt||}|r|d d} || |} |d| d s| d } ||| }qn:d||dkr| |||d |||d }Pq|d|rt||}|rF|d } || |} |d| d s8| d } ||| }qt||}|r|r|||dkr|} | |kr|} |||d }Pn,|d |kr| d|||d }nPqqW|r.||kr.|js.|jr|js| t |||n| ||||||}||d|_dS)Nr<&"z[\s;]z z/>)rcheck_for_whole_start_tagr tagfind_tolerantr0r9r7rr attrfind_tolerantrappendstripZgetposcountr'r)r-endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr)rr=endposr attrsr0r@tagmZattrnamerestZ attrvaluer9linenooffsetrrr r1-sP   &(         zHTMLParser.parse_starttagcCs|j}t||}|r|}|||d}|dkr>|dS|dkr~|d|rZ|dS|d|rjdS||krv|S|dS|dkrdS|dkrdS||kr|S|dStd dS) Nrr/z/>r$r%r z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r locatestarttagend_tolerantr0r9r.AssertionError)rr=r r\r?nextrrr rO`s.   z$HTMLParser.check_for_whole_start_tagcCs|j}t||d}|sdS|}t||}|s|jdk rV|||||St||d}|s|||ddkr|dS| |S| d }| d|}| ||dS| d }|jdk r||jkr|||||S| |||S)Nrr%r$rCzr)r endendtagr+r9 endtagfindr0rr-rPrFr7rr( handle_endtagr )rr=r r0rGZ namematchZtagnamerrrr r2s6       zHTMLParser.parse_endtagcCs|||||dS)N)rWrf)rr[rZrrr rVs zHTMLParser.handle_startendtagcCsdS)Nr)rr[rZrrr rWszHTMLParser.handle_starttagcCsdS)Nr)rr[rrr rfszHTMLParser.handle_endtagcCsdS)Nr)rrArrr r8szHTMLParser.handle_charrefcCsdS)Nr)rrArrr r;szHTMLParser.handle_entityrefcCsdS)Nr)rrrrr r-szHTMLParser.handle_datacCsdS)Nr)rrrrr rHszHTMLParser.handle_commentcCsdS)Nr)rZdeclrrr rEszHTMLParser.handle_declcCsdS)Nr)rrrrr rKszHTMLParser.handle_picCsdS)Nr)rrrrr unknown_declszHTMLParser.unknown_declcCstjdtddt|S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead.r$) stacklevel)warningswarnDeprecationWarningr)rsrrr rs zHTMLParser.unescape)r)__name__ __module__ __qualname__rXr rrrrrrr rr5rFr4r1rOr2rVrWrfr8r;r-rHrErKrgrrrrr r?s6  z  3"()rrirZhtmlr__all__rrr<r:r6r/rJZ commentcloserPrQVERBOSErardrerrrrrr  s&