From 75dee062afed5d1432820d897de3bcf3dc2e8238 Mon Sep 17 00:00:00 2001 From: tkikuchi <> Date: Sat, 17 Dec 2005 05:11:44 +0000 Subject: Fixes for email.set_payload() not distinguish parsed or virgin payload. I've tested the example by Mark Sapiro: http://mail.python.org/pipermail/mailman-developers/2005-November/018395.html both with and without 'Content-Transfer-Encoding' but may need more test. --- Mailman/Archiver/HyperArch.py | 7 ++++++- Mailman/Archiver/pipermail.py | 7 ++++++- Mailman/Handlers/Decorate.py | 7 ++++++- Mailman/Handlers/Scrubber.py | 45 +++++++++++++++++++++++-------------------- Mailman/Handlers/ToArchive.py | 11 ++++++++++- Mailman/Handlers/ToDigest.py | 13 +++++++++++-- 6 files changed, 63 insertions(+), 27 deletions(-) (limited to 'Mailman') diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py index 6f66db4a..3fc5fadf 100644 --- a/Mailman/Archiver/HyperArch.py +++ b/Mailman/Archiver/HyperArch.py @@ -303,7 +303,12 @@ class Article(pipermail.Article): if charset[0]=="'" and charset[-1]=="'": charset = charset[1:-1] try: - body = message.get_payload(decode=True) + # Check Scrubber-munged payload + if message.get('x-mailman-scrubbed'): + decode = False + else: + decode = True + body = message.get_payload(decode=decode) except binascii.Error: body = None if body and charset != Utils.GetCharSet(self._lang): diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py index fac7e5ed..f27e1101 100644 --- a/Mailman/Archiver/pipermail.py +++ b/Mailman/Archiver/pipermail.py @@ -217,7 +217,12 @@ class Article: self.headers[i] = message[i] # Read the message body - s = StringIO(message.get_payload(decode=1)\ + # Check Scrubber-munged paylaod + if message.get('x-mailman-scrubbed'): + decode = False + else: + decode = True + s = StringIO(message.get_payload(decode=decode)\ or message.as_string().split('\n\n',1)[1]) self.body = s.readlines() diff --git a/Mailman/Handlers/Decorate.py b/Mailman/Handlers/Decorate.py index afb0a1c9..03266fae 100644 --- a/Mailman/Handlers/Decorate.py +++ b/Mailman/Handlers/Decorate.py @@ -97,7 +97,12 @@ def process(mlist, msg, msgdata): uheader = unicode(header, lcset) ufooter = unicode(footer, lcset) try: - oldpayload = unicode(msg.get_payload(decode=1), mcset) + # First, check if the message was Scrubber-munged + if msg.get('x-mailman-scrubbed'): + decode = False + else: + decode = True + oldpayload = unicode(msg.get_payload(decode=decode), mcset) frontsep = endsep = u'' if header and not header.endswith('\n'): frontsep = u'\n' diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py index 05aeb20a..edaa47d2 100644 --- a/Mailman/Handlers/Scrubber.py +++ b/Mailman/Handlers/Scrubber.py @@ -161,6 +161,16 @@ def calculate_attachments_dir(mlist, msg, msgdata): return os.path.join('attachments', datedir, digest[:4] + digest[-4:]) +def replace_payload_by_text(msg, text, charset): + # TK: This is a common function in replacing the attachment and + # the main message by a text (scrubbing). Plus a flag indicating + # it has been scrubbed. + del msg['content-type'] + del msg['content-transfer-encoding'] + msg.set_payload(text, charset) + msg['X-Mailman-Scrubbed'] = 'Yes' + + def process(mlist, msg, msgdata=None): sanitize = mm_cfg.ARCHIVE_HTML_SANITIZER @@ -197,9 +207,7 @@ def process(mlist, msg, msgdata=None): os.umask(omask) filename = part.get_filename(_('not available')) filename = Utils.oneline(filename, lcset) - del part['content-type'] - del part['content-transfer-encoding'] - part.set_payload(_("""\ + replace_payload_by_text(part, _("""\ An embedded and charset-unspecified text was scrubbed... Name: %(filename)s Url: %(url)s @@ -208,9 +216,8 @@ Url: %(url)s if sanitize == 0: if outer: raise DiscardMessage - del part['content-type'] - del part['content-transfer-encoding'] - part.set_payload(_('HTML attachment scrubbed and removed'), + replace_payload_by_text(part, + _('HTML attachment scrubbed and removed'), # Adding charset arg and removing content-tpe # sets content-type to text/plain lcset) @@ -226,9 +233,7 @@ Url: %(url)s url = save_attachment(mlist, part, dir, filter_html=False) finally: os.umask(omask) - del part['content-type'] - del part['content-transfer-encoding'] - part.set_payload(_("""\ + replace_payload_by_text(part, _("""\ An HTML attachment was scrubbed... URL: %(url)s """), lcset) @@ -253,8 +258,7 @@ URL: %(url)s url = save_attachment(mlist, part, dir, filter_html=False) finally: os.umask(omask) - del part['content-type'] - part.set_payload(_("""\ + replace_payload_by_text(part, _("""\ An HTML attachment was scrubbed... URL: %(url)s """), lcset) @@ -270,8 +274,7 @@ URL: %(url)s date = submsg.get('date', _('no date')) who = submsg.get('from', _('unknown sender')) size = len(str(submsg)) - del part['content-type'] - part.set_payload(_("""\ + replace_payload_by_text(part, _("""\ An embedded message was scrubbed... From: %(who)s Subject: %(subject)s @@ -302,9 +305,7 @@ Url: %(url)s desc = part.get('content-description', _('not available')) filename = part.get_filename(_('not available')) filename = Utils.oneline(filename, lcset) - del part['content-type'] - del part['content-transfer-encoding'] - part.set_payload(_("""\ + replace_payload_by_text(part, _("""\ A non-text attachment was scrubbed... Name: %(filename)s Type: %(ctype)s @@ -342,7 +343,12 @@ Url : %(url)s text.append(_('Skipped content of type %(partctype)s\n')) continue try: - t = part.get_payload(decode=True) + # Check if the part is replaced. + if part.get('x-mailman-scrubbed'): + decode = False + else: + decode = True + t = part.get_payload(decode=decode) except binascii.Error: t = part.get_payload() # TK: get_content_charset() returns 'iso-2022-jp' for internally @@ -375,10 +381,7 @@ Url : %(url)s text.append(t) # Now join the text and set the payload sep = _('-------------- next part --------------\n') - del msg['content-type'] - msg.set_payload(sep.join(text), charset) - del msg['content-transfer-encoding'] - msg.add_header('Content-Transfer-Encoding', '8bit') + replace_payload_by_text(msg, sep.join(text), charset) return msg diff --git a/Mailman/Handlers/ToArchive.py b/Mailman/Handlers/ToArchive.py index cdee793f..59bf680f 100644 --- a/Mailman/Handlers/ToArchive.py +++ b/Mailman/Handlers/ToArchive.py @@ -19,7 +19,10 @@ import time from cStringIO import StringIO +from email import message_from_string + from Mailman import mm_cfg +from Mailman import Message from Mailman.Queue.sbcache import get_switchboard @@ -36,4 +39,10 @@ def process(mlist, msg, msgdata): # Send the message to the archiver queue archq = get_switchboard(mm_cfg.ARCHQUEUE_DIR) # Send the message to the queue - archq.enqueue(msg, msgdata) + if msg.get('x-mailman-scrubbed'): + # Clean Scrubber-munged message. + archmsg = message_from_string(msg.as_string(), Message.Message) + del archmsg['x-mailman-scrubbed'] + archq.enqueue(archmsg, msgdata) + else: + archq.enqueue(msg, msgdata) diff --git a/Mailman/Handlers/ToDigest.py b/Mailman/Handlers/ToDigest.py index cd96b5a6..b0f948ac 100644 --- a/Mailman/Handlers/ToDigest.py +++ b/Mailman/Handlers/ToDigest.py @@ -32,6 +32,7 @@ import time from types import ListType from cStringIO import StringIO +from email import message_from_string from email.Parser import Parser from email.Generator import Generator from email.MIMEBase import MIMEBase @@ -334,8 +335,14 @@ def send_i18n_digests(mlist, mboxfp): uh = '\n\t'.join(uh.split('\n')) print >> plainmsg, uh print >> plainmsg - payload = msg.get_payload(decode=True)\ - or msg.as_string().split('\n\n',1)[1] + if msg.get('x-mailman-scrubbed'): + # It has successfully been scrubbed, so this should be string. + payload = msg.get_payload() + else: + # If decoded payload is empty, this may be multipart message. + # -- just stringfy it. + payload = msg.get_payload(decode=True)\ + or msg.as_string().split('\n\n',1)[1] mcset = msg.get_content_charset('') if mcset and mcset <> lcset and mcset <> lcset_out: try: @@ -407,6 +414,8 @@ def send_i18n_digests(mlist, mboxfp): isdigest=True) # RFC 1153 rfc1153msg.set_payload(plainmsg.getvalue(), lcset) + # Re-generate it because set_payload() doesn't encode. :-( + rfc1153msg = message_from_string(rfc1153msg.as_string(), Message.Message) virginq.enqueue(rfc1153msg, recips=plainrecips, listname=mlist.internal_name(), -- cgit v1.2.3