Python应用之------转换网页特殊字符

  
　　由于网页也有自己独特的转义字符，在进行正则匹配的时候就有些麻烦。在官方文档中查到一个用字典替换的方案，私以为不错，拿来做了一些扩充。其中有一些是为保持正则的正确性。
　　html_escape_table = {

    "&": "&amp;",

    '"': "&quot;",

    "'": "&apos;",

    ">": "&gt;",

    "<": "&lt;",

    u"·":"&#183;",

    u"°":"&#176;",

    #regular expression

    ".":r"\.",

    "^":r"\^",

    "$":r"\$",

    "{":r"\{",

    "}":r"\}",

    "\\":r"\\",

    "|":r"\|",

    "(":r"\(",

    ")":r"\)",

    "+":r"\+",

    "*":r"\*",

    "?":r"\?",
}
 
def html_escape(text):
    """Produce entities within text."""

    tmp="".join(html_escape_table.get(c,c) for c in text)

    return tmp.encode("utf-8")

2022-12-23
2022-12-23
2022-12-23
2022-12-23
2022-12-23
2022-12-23
2021-12-09
2022-12-23