aboutsummaryrefslogtreecommitdiffstats
path: root/Mailman/Utils.py
diff options
context:
space:
mode:
authorYasuhito FUTATSUKI at POEM <futatuki@poem.co.jp>2016-07-17 06:39:50 +0900
committerYasuhito FUTATSUKI at POEM <futatuki@poem.co.jp>2016-07-17 06:39:50 +0900
commit8a31986e68316d0a06919990abad096ee6c0e041 (patch)
tree3d198ba9f0e4e17eaa242c3a4587ee6ec1ae852f /Mailman/Utils.py
parent8cac32e5bac4495139573b07da94c255522e8498 (diff)
parentb17234a23a590d9b27f3f609781596eea27b6974 (diff)
downloadmailman2-8a31986e68316d0a06919990abad096ee6c0e041.tar.gz
mailman2-8a31986e68316d0a06919990abad096ee6c0e041.tar.xz
mailman2-8a31986e68316d0a06919990abad096ee6c0e041.zip
Merge lp:mailman/2.1 up to rev 1664
Diffstat (limited to 'Mailman/Utils.py')
-rw-r--r--Mailman/Utils.py31
1 files changed, 31 insertions, 0 deletions
diff --git a/Mailman/Utils.py b/Mailman/Utils.py
index 2dbaef0b..d2317b10 100644
--- a/Mailman/Utils.py
+++ b/Mailman/Utils.py
@@ -1432,3 +1432,34 @@ def check_eq_domains(email, domains_list):
return [local + '@' + x for x in domains if x != domain]
return []
+
+def _invert_xml(mo):
+ # This is used with re.sub below to convert XML char refs and textual \u
+ # escapes to unicodes.
+ try:
+ if mo.group(1)[:1] == '#':
+ return unichr(int(mo.group(1)[1:]))
+ elif mo.group(1)[:1].lower() == 'u':
+ return unichr(int(mo.group(1)[1:], 16))
+ else:
+ return(u'\ufffd')
+ except ValueError:
+ # Value is out of range. Return the unicode replace character.
+ return(u'\ufffd')
+
+
+def xml_to_unicode(s, cset):
+ """This converts a string s, encoded in cset to a unicode with translation
+ of XML character references and textual \uxxxx escapes. It is more or less
+ the inverse of unicode.decode(cset, errors='xmlcharrefreplace'). It is
+ similar to canonstr above except for replacing invalid refs with the
+ unicode replace character and recognizing \u escapes.
+ """
+ if isinstance(s, str):
+ us = s.decode(cset, 'replace')
+ us = re.sub(u'&(#[0-9]+);', _invert_xml, us)
+ us = re.sub(u'(?i)\\\\(u[a-f0-9]{4})', _invert_xml, us)
+ return us
+ else:
+ return s
+