a a'@sLdZgdZddlZddlmZmZz0ddlmZmZm Z m Z m Z m Z m Z e e fZWn6eyddlmZmZm Z m Z m Z m Z e ZYn0dddZdd d Zdd d Zd dZedejjZGdddZddZddZzddlmZWn eyddlmZYn0edjZze Wne!y>e"Z Yn0ddZ#dS)z5External interface to the BeautifulSoup HTML parser. ) fromstringparse convert_treeN)etreehtml) BeautifulSoupTagCommentProcessingInstructionNavigableString DeclarationDoctype)rrr r r r cKst|||fi|S)aParse a string of HTML data into an Element tree using the BeautifulSoup parser. Returns the root ```` Element of the tree. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. )_parse)data beautifulsoup makeelementbsargsr:/usr/lib64/python3.9/site-packages/lxml/html/soupparser.pyrs rcKs0t|dst|}t|||fi|}t|S)aYParse a file into an ElemenTree using the BeautifulSoup parser. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. read)hasattropenrrZ ElementTree)filerrrrootrrrr$s rcCs*t||}|}|D]}||q|S)aConvert a BeautifulSoup tree to a list of Element trees. Returns a list instead of a single root Element to support HTML-like soup with more than one root element. You can pass a different Element factory through the `makeelement` keyword. ) _convert_treeZ getchildrenremove)beautiful_soup_treerrZchildrenchildrrrr3s  rcKs|dur t}t|dr&d|vr&d|d<t|dr@d|vr@d|d<||fi|}t||}t|dkr||djdkr||dSd|_|S) NZ HTML_ENTITIESZconvertEntitiesrZDEFAULT_BUILDER_FEATURESZfeaturesz html.parserr)rrrlentag)sourcerrrZtreerrrrrEs   rz`(?:\s|[|\}}| j}|o|dd|_|o|dd|_| S)Nrr)rZ html_parserr enumerate isinstancerr#lower_DECLARATION_OR_DOCTYPEr%indexr"_init_node_convertersreversedZ addpreviousZaddnextZ output_readyAttributeErrorstring_parse_doctype_declarationgroupsZ getroottreedocinfoZ public_idZ system_url)rrZfirst_element_idxZlast_element_idxZ html_rootZ declarationieZpre_rootZ post_rootroots convert_nodeZres_rootprevZ convertedZdoctype_stringmatchZ external_idZsys_urir9rrrrisd   &          rcsigfdd}fdddfdd ddd d |ttfd d }|td d}|tdd}|tfdd}S)Ncsfdd}|S)Ncs D]}||<|q|Sr()append)handlert) convertersordered_node_typestypesrradds z5_init_node_converters..converter..addr)rErFrCrD)rEr convertersz(_init_node_converters..convertercs$D]}t||r|SqdSr()r/)ZnoderBrGrrfind_best_converters z2_init_node_converters..find_best_convertercsNzt|}Wn&ty6|}t|<Yn0|durDdS|||Sr()typeKeyError)bs_nodeparentrA)rCrIrrr=s z+_init_node_converters..convert_nodecSsXt|trBi}|D](\}}t|tr2d|}t|||<qntdd|D}|S)N css|]\}}|t|fVqdSr(unescape).0kvrrr z;_init_node_converters..map_attrs..)r/dictitemslistjoinrP)Zbs_attrsattribsrRrSrrr map_attrss   z(_init_node_converters..map_attrscSs:t|dkr|jpd||_n|djp*d||d_dS)Nrr-)rtexttail)rMr]rrr append_texts z*_init_node_converters..append_textc s|j}|dur2|r|nd}tj||j|d}n|r>|ni}|j|d}|D]F}zt|}WntyzYn0|durT|||qT||qT|S)N)Zattrib)r$rZ SubElementr#rJrK)rLrMr$rZresrrA)r=rCrr[rr convert_tags    z*_init_node_converters..convert_tagcSs t|}|dur|||Sr()rZ HtmlCommentr@rLrMr`rrrconvert_comments  z._init_node_converters..convert_commentcSs>|dr|dd}tj|dd}|dur:|||S)N?r-rNr)endswithrr splitr@rbrrr convert_pi s    z)_init_node_converters..convert_pics|dur|t|dSr(rO)rLrM)r_rr convert_textsz+_init_node_converters..convert_text)N)rr"r r r )rrHrarcrgrhr)r_r=rCrIrr[rDrr3s     r3)name2codepointz&(\w+);cCs|sdSdd}t||S)Nr\cSs6ztt|dWSty0|dYS0dS)Nrr)unichrrigrouprK)mrrrunescape_entity5s z!unescape..unescape_entity)handle_entities)r6rmrrrrP1srP)NN)NN)N)$__doc____all__reZlxmlrrZbs4rrr r r r r r1 ImportErrorrrrrcompile IGNORECASEr?r7r"rr3Z html.entitiesriZhtmlentitydefssubrnrj NameErrorchrrPrrrrs<$        Uc