include "../utils.pxi" import re def create_tag(tag: str): """ Given an HTML tag name, e.g. `"div"`, create a single empty node for that tag, e.g. `"
"`. Use `LexborHTMLParser().create_node(..)` if you need to create a node tied to a specific parser instance. """ return LexborHTMLParser(f"<{tag}>{tag}>", is_fragment=True).root def parse_fragment(html: str): """ Given HTML, parse it into a list of Nodes, such that the nodes correspond to the given HTML. For contrast, HTMLParser adds ``, ``, and `` tags if they are missing. This function does not add these tags. """ return do_parse_fragment(html, LexborHTMLParser) def extract_html_comment(text: str) -> str: """Extract the inner content of an HTML comment string. Args: text: Raw HTML comment, including the ```` markers. Returns: The comment body with surrounding whitespace stripped. Raises: ValueError: If the input is not a well-formed HTML comment. Examples: >>> extract_html_comment("") 'hello' """ if match := re.fullmatch(r"\s*\s*", text, flags=re.DOTALL): return match.group(1).strip() msg = "Input is not a valid HTML comment" raise ValueError(msg) cdef inline bint is_empty_text_node(lxb_dom_node_t *text_node): """ Check whether a node is a text node made up solely of HTML ASCII whitespace. Parameters ---------- text_node : lxb_dom_node_t * Pointer to the node that should be inspected. Returns ------- bint ``True`` if ``text_node`` is a text node whose character data contains only space, tab, newline, form feed, or carriage return characters; otherwise ``False``. """ if text_node == NULL or text_node.type != LXB_DOM_NODE_TYPE_TEXT: return False cdef lxb_dom_character_data_t *text_character_data =