PNG  IHDR;IDATxܻn0K )(pA 7LeG{ §㻢|ذaÆ 6lذaÆ 6lذaÆ 6lom$^yذag5bÆ 6lذaÆ 6lذa{ 6lذaÆ `}HFkm,mӪôô! x|'ܢ˟;E:9&ᶒ}{v]n&6 h_tڠ͵-ҫZ;Z$.Pkž)!o>}leQfJTu іچ\X=8Rن4`Vwl>nG^is"ms$ui?wbs[m6K4O.4%/bC%t Mז -lG6mrz2s%9s@-k9=)kB5\+͂Zsٲ Rn~GRC wIcIn7jJhۛNCS|j08yiHKֶۛkɈ+;SzL/F*\Ԕ#"5m2[S=gnaPeғL lذaÆ 6l^ḵaÆ 6lذaÆ 6lذa; _ذaÆ 6lذaÆ 6lذaÆ RIENDB`  oY?E@sdZddlZddlZddlZddlmZdgZejdZejdZ ejdZ ejdZ ejd Z ejd Z ejd Zejd Zejd ZejdejZejd ZejdZGdddejZdS)zA parser for HTML and XHTML.N)unescape HTMLParserz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]>z--\s*>z$([a-zA-Z][^ />]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) (?:\s*,)* # possibly followed by a comma )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#c@sWeZdZdZd:ZddddZdd Zd d Zd d ZdZ ddZ ddZ ddZ ddZ ddZdddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd8d9ZdS);raEFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). If convert_charrefs is True the character references are converted automatically to the corresponding Unicode character (and self.handle_data() is no longer split in chunks), otherwise they are passed by calling self.handle_entityref() or self.handle_charref() with the string containing respectively the named or numeric reference as the argument. scriptstyleconvert_charrefsTcCs||_|jdS)zInitialize and reset this instance. If convert_charrefs is True (the default), all character references are automatically converted to the corresponding Unicode characters. N)rreset)selfrr %/opt/python35/lib/python3.5/parser.py__init__Ws zHTMLParser.__init__cCs8d|_d|_t|_d|_tjj|dS)z1Reset this instance. Loses all unprocessed data.z???N)rawdatalasttaginteresting_normal interesting cdata_elem _markupbase ParserBaser)r r r r r`s     zHTMLParser.resetcCs!|j||_|jddS)zFeed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). rN)rgoahead)r datar r r feedhszHTMLParser.feedcCs|jddS)zHandle any buffered data.N)r)r r r r closeqszHTMLParser.closeNcCs|jS)z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_text)r r r r get_starttag_textwszHTMLParser.get_starttag_textcCs2|j|_tjd|jtj|_dS)Nz )lowerrrecompileIr)r elemr r r set_cdata_mode{szHTMLParser.set_cdata_modecCst|_d|_dS)N)rrr)r r r r clear_cdata_modes zHTMLParser.clear_cdata_modec Cs|j}d}t|}x||kr|jr|j r|jd|}|dkr|jdt||d}|dkrtjdj || rP|}n:|j j ||}|r|j }n|jrP|}||kr<|jr%|j r%|j t |||n|j ||||j||}||kr[P|j}|d|rtj||r|j|} n|d|r|j|} n|d|r|j|} nm|d|r|j|} nL|d |r|j|} n+|d |krE|j d|d } nP| dkr|sYP|jd |d } | dkr|jd|d } | dkr|d } n | d 7} |jr|j r|j t ||| n|j ||| |j|| }q|d |rtj||}|r|jd d} |j| |j} |d| d s| d } |j|| }qqd||dkr|j |||d |j||d }Pq|d|rtj||}|rj|jd } |j| |j} |d| d sU| d } |j|| }qtj||}|r|r|j||dkr|j} | |kr|} |j||d }Pq|d |kr|j d|j||d }qPqqW|r||kr|j r|jri|j ri|j t |||n|j ||||j||}||d|_dS)Nr<&"z[\s;]z r(r(r()rrP)rcheck_for_whole_start_tagrtagfind_tolerantr2r;r9rrattrfind_tolerantrappendstripZgetposcountr)r+r/endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr!)r r?endposrattrsr2rBtagmZattrnamerestZ attrvaluer;linenooffsetr r r r3-sP     00    zHTMLParser.parse_starttagcCs|j}tj||}|r|j}|||d}|dkrU|dS|dkr|jd|r{|dS|jd|rd S||kr|S|dS|dkrd S|dkrd S||kr|S|dStddS) Nrr/z/>r&r z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!r(r(r()rlocatestarttagend_tolerantr2r;r0AssertionError)r r?rr_rAnextr r r rR`s.        z$HTMLParser.check_for_whole_start_tagcCss|j}tj||d}|s)dS|j}tj||}|s|jdk rw|j||||Stj||d}|s|||ddkr|dS|j |S|j dj }|j d|j}|j ||dS|j dj }|jdk rR||jkrR|j||||S|j |j |j|S)Nrr&rEzrr()r endendtagr-r; endtagfindr2rr/rSrHr9rr* handle_endtagr")r r?rr2rIZ namematchZtagnamer r r r r4s6     zHTMLParser.parse_endtagcCs!|j|||j|dS)N)rZri)r r^r]r r r rYszHTMLParser.handle_startendtagcCsdS)Nr )r r^r]r r r rZszHTMLParser.handle_starttagcCsdS)Nr )r r^r r r riszHTMLParser.handle_endtagcCsdS)Nr )r rCr r r r:szHTMLParser.handle_charrefcCsdS)Nr )r rCr r r r=szHTMLParser.handle_entityrefcCsdS)Nr )r rr r r r/szHTMLParser.handle_datacCsdS)Nr )r rr r r rJszHTMLParser.handle_commentcCsdS)Nr )r Zdeclr r r rGszHTMLParser.handle_declcCsdS)Nr )r rr r r rMszHTMLParser.handle_picCsdS)Nr )r rr r r unknown_declszHTMLParser.unknown_declcCs tjdtddt|S)NzZThe unescape method is deprecated and will be removed in 3.5, use html.unescape() instead. stacklevelr&)warningswarnDeprecationWarningr)r sr r r rs  zHTMLParser.unescape)rr)__name__ __module__ __qualname____doc__r[r rrrrrr!r"rr7rHr6r3rRr4rYrZrir:r=r/rJrGrMrjrr r r r r?s8       z  3 " (          )rsrrlrZhtmlr__all__rrr>r<r8r1rLZ commentcloserSrTVERBOSErdrgrhrrr r r r s(