diff options
author | Mark Sapiro <mark@msapiro.net> | 2016-07-14 19:10:24 -0700 |
---|---|---|
committer | Mark Sapiro <mark@msapiro.net> | 2016-07-14 19:10:24 -0700 |
commit | b17234a23a590d9b27f3f609781596eea27b6974 (patch) | |
tree | 6d065e88b6a68a6fbc989a4b8e425769da00d293 /Mailman/Utils.py | |
parent | 6efea059931995de8713f35bccc1116905175cf2 (diff) | |
download | mailman2-b17234a23a590d9b27f3f609781596eea27b6974.tar.gz mailman2-b17234a23a590d9b27f3f609781596eea27b6974.tar.xz mailman2-b17234a23a590d9b27f3f609781596eea27b6974.zip |
Match header_filter_rules as normalized unicodes.
Diffstat (limited to '')
-rw-r--r-- | Mailman/Utils.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/Mailman/Utils.py b/Mailman/Utils.py index 2dbaef0b..d2317b10 100644 --- a/Mailman/Utils.py +++ b/Mailman/Utils.py @@ -1432,3 +1432,34 @@ def check_eq_domains(email, domains_list): return [local + '@' + x for x in domains if x != domain] return [] + +def _invert_xml(mo): + # This is used with re.sub below to convert XML char refs and textual \u + # escapes to unicodes. + try: + if mo.group(1)[:1] == '#': + return unichr(int(mo.group(1)[1:])) + elif mo.group(1)[:1].lower() == 'u': + return unichr(int(mo.group(1)[1:], 16)) + else: + return(u'\ufffd') + except ValueError: + # Value is out of range. Return the unicode replace character. + return(u'\ufffd') + + +def xml_to_unicode(s, cset): + """This converts a string s, encoded in cset to a unicode with translation + of XML character references and textual \uxxxx escapes. It is more or less + the inverse of unicode.decode(cset, errors='xmlcharrefreplace'). It is + similar to canonstr above except for replacing invalid refs with the + unicode replace character and recognizing \u escapes. + """ + if isinstance(s, str): + us = s.decode(cset, 'replace') + us = re.sub(u'&(#[0-9]+);', _invert_xml, us) + us = re.sub(u'(?i)\\\\(u[a-f0-9]{4})', _invert_xml, us) + return us + else: + return s + |