aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authortkikuchi <>2005-12-17 05:11:44 +0000
committertkikuchi <>2005-12-17 05:11:44 +0000
commit75dee062afed5d1432820d897de3bcf3dc2e8238 (patch)
treec0cb18cab8ab2e89526fbefc04a5737d94a4bcf4
parent4fc195111db5d68eb97d259ed8bc67beae95302d (diff)
downloadmailman2-75dee062afed5d1432820d897de3bcf3dc2e8238.tar.gz
mailman2-75dee062afed5d1432820d897de3bcf3dc2e8238.tar.xz
mailman2-75dee062afed5d1432820d897de3bcf3dc2e8238.zip
Fixes for email.set_payload() not distinguish parsed or virgin payload.
I've tested the example by Mark Sapiro: http://mail.python.org/pipermail/mailman-developers/2005-November/018395.html both with and without 'Content-Transfer-Encoding' but may need more test.
-rw-r--r--Mailman/Archiver/HyperArch.py7
-rw-r--r--Mailman/Archiver/pipermail.py7
-rw-r--r--Mailman/Handlers/Decorate.py7
-rw-r--r--Mailman/Handlers/Scrubber.py45
-rw-r--r--Mailman/Handlers/ToArchive.py11
-rw-r--r--Mailman/Handlers/ToDigest.py13
6 files changed, 63 insertions, 27 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
index 6f66db4a..3fc5fadf 100644
--- a/Mailman/Archiver/HyperArch.py
+++ b/Mailman/Archiver/HyperArch.py
@@ -303,7 +303,12 @@ class Article(pipermail.Article):
if charset[0]=="'" and charset[-1]=="'":
charset = charset[1:-1]
try:
- body = message.get_payload(decode=True)
+ # Check Scrubber-munged payload
+ if message.get('x-mailman-scrubbed'):
+ decode = False
+ else:
+ decode = True
+ body = message.get_payload(decode=decode)
except binascii.Error:
body = None
if body and charset != Utils.GetCharSet(self._lang):
diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py
index fac7e5ed..f27e1101 100644
--- a/Mailman/Archiver/pipermail.py
+++ b/Mailman/Archiver/pipermail.py
@@ -217,7 +217,12 @@ class Article:
self.headers[i] = message[i]
# Read the message body
- s = StringIO(message.get_payload(decode=1)\
+ # Check Scrubber-munged paylaod
+ if message.get('x-mailman-scrubbed'):
+ decode = False
+ else:
+ decode = True
+ s = StringIO(message.get_payload(decode=decode)\
or message.as_string().split('\n\n',1)[1])
self.body = s.readlines()
diff --git a/Mailman/Handlers/Decorate.py b/Mailman/Handlers/Decorate.py
index afb0a1c9..03266fae 100644
--- a/Mailman/Handlers/Decorate.py
+++ b/Mailman/Handlers/Decorate.py
@@ -97,7 +97,12 @@ def process(mlist, msg, msgdata):
uheader = unicode(header, lcset)
ufooter = unicode(footer, lcset)
try:
- oldpayload = unicode(msg.get_payload(decode=1), mcset)
+ # First, check if the message was Scrubber-munged
+ if msg.get('x-mailman-scrubbed'):
+ decode = False
+ else:
+ decode = True
+ oldpayload = unicode(msg.get_payload(decode=decode), mcset)
frontsep = endsep = u''
if header and not header.endswith('\n'):
frontsep = u'\n'
diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py
index 05aeb20a..edaa47d2 100644
--- a/Mailman/Handlers/Scrubber.py
+++ b/Mailman/Handlers/Scrubber.py
@@ -161,6 +161,16 @@ def calculate_attachments_dir(mlist, msg, msgdata):
return os.path.join('attachments', datedir, digest[:4] + digest[-4:])
+def replace_payload_by_text(msg, text, charset):
+ # TK: This is a common function in replacing the attachment and
+ # the main message by a text (scrubbing). Plus a flag indicating
+ # it has been scrubbed.
+ del msg['content-type']
+ del msg['content-transfer-encoding']
+ msg.set_payload(text, charset)
+ msg['X-Mailman-Scrubbed'] = 'Yes'
+
+
def process(mlist, msg, msgdata=None):
sanitize = mm_cfg.ARCHIVE_HTML_SANITIZER
@@ -197,9 +207,7 @@ def process(mlist, msg, msgdata=None):
os.umask(omask)
filename = part.get_filename(_('not available'))
filename = Utils.oneline(filename, lcset)
- del part['content-type']
- del part['content-transfer-encoding']
- part.set_payload(_("""\
+ replace_payload_by_text(part, _("""\
An embedded and charset-unspecified text was scrubbed...
Name: %(filename)s
Url: %(url)s
@@ -208,9 +216,8 @@ Url: %(url)s
if sanitize == 0:
if outer:
raise DiscardMessage
- del part['content-type']
- del part['content-transfer-encoding']
- part.set_payload(_('HTML attachment scrubbed and removed'),
+ replace_payload_by_text(part,
+ _('HTML attachment scrubbed and removed'),
# Adding charset arg and removing content-tpe
# sets content-type to text/plain
lcset)
@@ -226,9 +233,7 @@ Url: %(url)s
url = save_attachment(mlist, part, dir, filter_html=False)
finally:
os.umask(omask)
- del part['content-type']
- del part['content-transfer-encoding']
- part.set_payload(_("""\
+ replace_payload_by_text(part, _("""\
An HTML attachment was scrubbed...
URL: %(url)s
"""), lcset)
@@ -253,8 +258,7 @@ URL: %(url)s
url = save_attachment(mlist, part, dir, filter_html=False)
finally:
os.umask(omask)
- del part['content-type']
- part.set_payload(_("""\
+ replace_payload_by_text(part, _("""\
An HTML attachment was scrubbed...
URL: %(url)s
"""), lcset)
@@ -270,8 +274,7 @@ URL: %(url)s
date = submsg.get('date', _('no date'))
who = submsg.get('from', _('unknown sender'))
size = len(str(submsg))
- del part['content-type']
- part.set_payload(_("""\
+ replace_payload_by_text(part, _("""\
An embedded message was scrubbed...
From: %(who)s
Subject: %(subject)s
@@ -302,9 +305,7 @@ Url: %(url)s
desc = part.get('content-description', _('not available'))
filename = part.get_filename(_('not available'))
filename = Utils.oneline(filename, lcset)
- del part['content-type']
- del part['content-transfer-encoding']
- part.set_payload(_("""\
+ replace_payload_by_text(part, _("""\
A non-text attachment was scrubbed...
Name: %(filename)s
Type: %(ctype)s
@@ -342,7 +343,12 @@ Url : %(url)s
text.append(_('Skipped content of type %(partctype)s\n'))
continue
try:
- t = part.get_payload(decode=True)
+ # Check if the part is replaced.
+ if part.get('x-mailman-scrubbed'):
+ decode = False
+ else:
+ decode = True
+ t = part.get_payload(decode=decode)
except binascii.Error:
t = part.get_payload()
# TK: get_content_charset() returns 'iso-2022-jp' for internally
@@ -375,10 +381,7 @@ Url : %(url)s
text.append(t)
# Now join the text and set the payload
sep = _('-------------- next part --------------\n')
- del msg['content-type']
- msg.set_payload(sep.join(text), charset)
- del msg['content-transfer-encoding']
- msg.add_header('Content-Transfer-Encoding', '8bit')
+ replace_payload_by_text(msg, sep.join(text), charset)
return msg
diff --git a/Mailman/Handlers/ToArchive.py b/Mailman/Handlers/ToArchive.py
index cdee793f..59bf680f 100644
--- a/Mailman/Handlers/ToArchive.py
+++ b/Mailman/Handlers/ToArchive.py
@@ -19,7 +19,10 @@
import time
from cStringIO import StringIO
+from email import message_from_string
+
from Mailman import mm_cfg
+from Mailman import Message
from Mailman.Queue.sbcache import get_switchboard
@@ -36,4 +39,10 @@ def process(mlist, msg, msgdata):
# Send the message to the archiver queue
archq = get_switchboard(mm_cfg.ARCHQUEUE_DIR)
# Send the message to the queue
- archq.enqueue(msg, msgdata)
+ if msg.get('x-mailman-scrubbed'):
+ # Clean Scrubber-munged message.
+ archmsg = message_from_string(msg.as_string(), Message.Message)
+ del archmsg['x-mailman-scrubbed']
+ archq.enqueue(archmsg, msgdata)
+ else:
+ archq.enqueue(msg, msgdata)
diff --git a/Mailman/Handlers/ToDigest.py b/Mailman/Handlers/ToDigest.py
index cd96b5a6..b0f948ac 100644
--- a/Mailman/Handlers/ToDigest.py
+++ b/Mailman/Handlers/ToDigest.py
@@ -32,6 +32,7 @@ import time
from types import ListType
from cStringIO import StringIO
+from email import message_from_string
from email.Parser import Parser
from email.Generator import Generator
from email.MIMEBase import MIMEBase
@@ -334,8 +335,14 @@ def send_i18n_digests(mlist, mboxfp):
uh = '\n\t'.join(uh.split('\n'))
print >> plainmsg, uh
print >> plainmsg
- payload = msg.get_payload(decode=True)\
- or msg.as_string().split('\n\n',1)[1]
+ if msg.get('x-mailman-scrubbed'):
+ # It has successfully been scrubbed, so this should be string.
+ payload = msg.get_payload()
+ else:
+ # If decoded payload is empty, this may be multipart message.
+ # -- just stringfy it.
+ payload = msg.get_payload(decode=True)\
+ or msg.as_string().split('\n\n',1)[1]
mcset = msg.get_content_charset('')
if mcset and mcset <> lcset and mcset <> lcset_out:
try:
@@ -407,6 +414,8 @@ def send_i18n_digests(mlist, mboxfp):
isdigest=True)
# RFC 1153
rfc1153msg.set_payload(plainmsg.getvalue(), lcset)
+ # Re-generate it because set_payload() doesn't encode. :-(
+ rfc1153msg = message_from_string(rfc1153msg.as_string(), Message.Message)
virginq.enqueue(rfc1153msg,
recips=plainrecips,
listname=mlist.internal_name(),