From cb51f78717fbd1f6187b61fe0b2d1e06859a3018 Mon Sep 17 00:00:00 2001 From: Mark Sapiro Date: Tue, 12 Jul 2016 11:29:42 -0700 Subject: Use xmlcharrefreplace when encoding for header_filter_rules. --- Mailman/Handlers/SpamDetect.py | 2 +- NEWS | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Mailman/Handlers/SpamDetect.py b/Mailman/Handlers/SpamDetect.py index 1ea295a6..de19adfc 100644 --- a/Mailman/Handlers/SpamDetect.py +++ b/Mailman/Handlers/SpamDetect.py @@ -86,7 +86,7 @@ def getDecodedHeaders(msg, cset='utf-8'): # unicode it as iso-8859-1 which may result in a garbled # mess, but we have to do something. uvalue += unicode(frag, 'iso-8859-1', 'replace') - headers += '%s: %s\n' % (h, uvalue.encode(cset, 'backslashreplace')) + headers += '%s: %s\n' % (h, uvalue.encode(cset, 'xmlcharrefreplace')) return headers diff --git a/NEWS b/NEWS index 65600663..84130847 100644 --- a/NEWS +++ b/NEWS @@ -11,11 +11,11 @@ Here is a history of user visible changes to Mailman. - RFC 2047 encoded headers are now decoded and re-encoded in the charset of the list's preferred language for matching by header_filter_rules using - errors='backslashreplace' instead of the former errors='replace'. This + errors='xmlcharrefreplace' instead of the former errors='replace'. This means that characters that can't be represented in the charset of the - list's preferred language will now be represented as '\uxxxx' escapes - rather than '?' enabling regexps to be constructed to match specific - characters or ranges. (LP: #558155) + list's preferred language will now be represented as '&#nnnn;' XML + character references rather than '?' enabling regexps to be constructed + to match specific characters or ranges. (LP: #558155) - Thanks to Jim Popovitch REMOVE_DKIM_HEADERS can now be set to 3 to preserve the original headers as X-Mailman-Original-... before removing -- cgit v1.2.3