diff options
author | Yasuhito FUTATSUKI at POEM <futatuki@poem.co.jp> | 2016-07-17 06:39:50 +0900 |
---|---|---|
committer | Yasuhito FUTATSUKI at POEM <futatuki@poem.co.jp> | 2016-07-17 06:39:50 +0900 |
commit | 8a31986e68316d0a06919990abad096ee6c0e041 (patch) | |
tree | 3d198ba9f0e4e17eaa242c3a4587ee6ec1ae852f /Mailman/Utils.py | |
parent | 8cac32e5bac4495139573b07da94c255522e8498 (diff) | |
parent | b17234a23a590d9b27f3f609781596eea27b6974 (diff) | |
download | mailman2-8a31986e68316d0a06919990abad096ee6c0e041.tar.gz mailman2-8a31986e68316d0a06919990abad096ee6c0e041.tar.xz mailman2-8a31986e68316d0a06919990abad096ee6c0e041.zip |
Merge lp:mailman/2.1 up to rev 1664
Diffstat (limited to 'Mailman/Utils.py')
-rw-r--r-- | Mailman/Utils.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/Mailman/Utils.py b/Mailman/Utils.py index 2dbaef0b..d2317b10 100644 --- a/Mailman/Utils.py +++ b/Mailman/Utils.py @@ -1432,3 +1432,34 @@ def check_eq_domains(email, domains_list): return [local + '@' + x for x in domains if x != domain] return [] + +def _invert_xml(mo): + # This is used with re.sub below to convert XML char refs and textual \u + # escapes to unicodes. + try: + if mo.group(1)[:1] == '#': + return unichr(int(mo.group(1)[1:])) + elif mo.group(1)[:1].lower() == 'u': + return unichr(int(mo.group(1)[1:], 16)) + else: + return(u'\ufffd') + except ValueError: + # Value is out of range. Return the unicode replace character. + return(u'\ufffd') + + +def xml_to_unicode(s, cset): + """This converts a string s, encoded in cset to a unicode with translation + of XML character references and textual \uxxxx escapes. It is more or less + the inverse of unicode.decode(cset, errors='xmlcharrefreplace'). It is + similar to canonstr above except for replacing invalid refs with the + unicode replace character and recognizing \u escapes. + """ + if isinstance(s, str): + us = s.decode(cset, 'replace') + us = re.sub(u'&(#[0-9]+);', _invert_xml, us) + us = re.sub(u'(?i)\\\\(u[a-f0-9]{4})', _invert_xml, us) + return us + else: + return s + |