From 925200da11d52ae4d7fc664bff898f8050bef687 Mon Sep 17 00:00:00 2001 From: bwarsaw <> Date: Sat, 8 Feb 2003 07:14:13 +0000 Subject: Backporting from the trunk. --- Mailman/Handlers/CookHeaders.py | 19 +++++-- Mailman/Handlers/SMTPDirect.py | 11 +++- Mailman/Handlers/Scrubber.py | 101 ++++++++++++++++++++++++----------- Mailman/Handlers/ToDigest.py | 113 +++++++++++++++++++++++++--------------- 4 files changed, 166 insertions(+), 78 deletions(-) (limited to 'Mailman/Handlers') diff --git a/Mailman/Handlers/CookHeaders.py b/Mailman/Handlers/CookHeaders.py index 40eddd66..c4ad06ab 100644 --- a/Mailman/Handlers/CookHeaders.py +++ b/Mailman/Handlers/CookHeaders.py @@ -1,4 +1,4 @@ -# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc. +# Copyright (C) 1998-2003 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -39,7 +39,7 @@ MAXLINELEN = 78 def _isunicode(s): return isinstance(s, UnicodeType) -def uheader(mlist, s, header_name=None): +def uheader(mlist, s, header_name=None, continuation_ws='\t'): # Get the charset to encode the string in. If this is us-ascii, we'll use # iso-8859-1 instead, just to get a little extra coverage, and because the # Header class tries us-ascii first anyway. @@ -54,7 +54,8 @@ def uheader(mlist, s, header_name=None): codec = charset.input_codec or 'ascii' s = unicode(s, codec, 'replace') # We purposefully leave no space b/w prefix and subject! - return Header(s, charset, header_name=header_name) + return Header(s, charset, header_name=header_name, + continuation_ws=continuation_ws) @@ -218,7 +219,15 @@ def prefix_subject(mlist, msg, msgdata): # tracked (e.g. internally crafted, delivered to a single user such as the # list admin). prefix = mlist.subject_prefix - subject = msg['subject'] + subject = msg.get('subject', '') + # Try to figure out what the continuation_ws is for the header + if isinstance(subject, Header): + lines = str(subject).splitlines() + else: + lines = subject.splitlines() + ws = '\t' + if len(lines) > 1 and lines[1] and lines[1][0] in ' \t': + ws = lines[1][0] msgdata['origsubj'] = subject # The header may be multilingual; decode it from base64/quopri and search # each chunk for the prefix. BAW: Note that if the prefix contains spaces @@ -235,7 +244,7 @@ def prefix_subject(mlist, msg, msgdata): if not subject: subject = _('(no subject)') # Get the header as a Header instance, with proper unicode conversion - h = uheader(mlist, prefix, 'Subject') + h = uheader(mlist, prefix, 'Subject', continuation_ws=ws) for s, c in headerbits: # Once again, convert the string to unicode. if c is None: diff --git a/Mailman/Handlers/SMTPDirect.py b/Mailman/Handlers/SMTPDirect.py index fd64f6f1..4724c3a1 100644 --- a/Mailman/Handlers/SMTPDirect.py +++ b/Mailman/Handlers/SMTPDirect.py @@ -25,6 +25,7 @@ Note: This file only handles single threaded delivery. See SMTPThreaded.py for a threaded implementation. """ +import copy import time import socket import smtplib @@ -268,12 +269,20 @@ def verpdeliver(mlist, msg, msgdata, envsender, failures, conn): # they missed due to bouncing. Neat idea. msgdata['recips'] = [recip] # Make a copy of the message and decorate + delivery that - msgcopy = email.message_from_string(msg.as_string()) + msgcopy = copy.deepcopy(msg) Decorate.process(mlist, msgcopy, msgdata) # Calculate the envelope sender, which we may be VERPing if msgdata.get('verp'): bmailbox, bdomain = Utils.ParseEmail(envsender) rmailbox, rdomain = Utils.ParseEmail(recip) + if rdomain is None: + # The recipient address is not fully-qualified. We can't + # deliver it to this person, nor can we craft a valid verp + # header. I don't think there's much we can do except ignore + # this recipient. + syslog('smtp', 'Skipping VERP delivery to unqual recip: %s', + recip) + continue d = {'bounces': bmailbox, 'mailbox': rmailbox, 'host' : DOT.join(rdomain), diff --git a/Mailman/Handlers/Scrubber.py b/Mailman/Handlers/Scrubber.py index 024832a4..b5be73df 100644 --- a/Mailman/Handlers/Scrubber.py +++ b/Mailman/Handlers/Scrubber.py @@ -17,6 +17,8 @@ """Cleanse a message for archiving. """ +from __future__ import nested_scopes + import os import re import sha @@ -24,7 +26,6 @@ import time import errno import binascii import tempfile -import mimetypes from cStringIO import StringIO from types import IntType @@ -51,6 +52,35 @@ dre = re.compile(r'^\.*') BR = '
\n' SPACE = ' ' +try: + from mimetypes import guess_all_extensions +except ImportError: + import mimetypes + def guess_all_extensions(ctype, strict=1): + # BAW: sigh, guess_all_extensions() is new in Python 2.3 + all = [] + def check(map): + for e, t in map.items(): + if t == ctype: + all.append(e) + check(mimetypes.types_map) + # Python 2.1 doesn't have common_types. Sigh, sigh. + if not strict and hasattr(mimetypes, 'common_types'): + check(mimetypes.common_types) + return all + + + +def guess_extension(ctype, ext): + # mimetypes maps multiple extensions to the same type, e.g. .doc, .dot, + # and .wiz are all mapped to application/msword. This sucks for finding + # the best reverse mapping. If the extension is one of the giving + # mappings, we'll trust that, otherwise we'll just guess. :/ + all = guess_all_extensions(ctype, strict=0) + if ext in all: + return ext + return all and all[0] + # We're using a subclass of the standard Generator because we want to suppress @@ -131,6 +161,7 @@ def process(mlist, msg, msgdata=None): msgdata = {} dir = calculate_attachments_dir(mlist, msg, msgdata) charset = None + lcset = Utils.GetCharSet(mlist.preferred_language) # Now walk over all subparts of this message and scrub out various types for part in msg.walk(): ctype = part.get_type(part.get_default_type()) @@ -140,13 +171,16 @@ def process(mlist, msg, msgdata=None): # arbitrarily pick the charset of the first text/plain part in the # message. if charset is None: - charset = part.get_content_charset(charset) + charset = part.get_content_charset(lcset) elif ctype == 'text/html' and isinstance(sanitize, IntType): if sanitize == 0: if outer: raise DiscardMessage - part.set_payload(_('HTML attachment scrubbed and removed')) - part.set_type('text/plain') + del part['content-type'] + part.set_payload(_('HTML attachment scrubbed and removed'), + # Adding charset arg and removing content-tpe + # sets content-type to text/plain + lcset) elif sanitize == 2: # By leaving it alone, Pipermail will automatically escape it pass @@ -159,11 +193,11 @@ def process(mlist, msg, msgdata=None): url = save_attachment(mlist, part, dir, filter_html=0) finally: os.umask(omask) + del part['content-type'] part.set_payload(_("""\ An HTML attachment was scrubbed... URL: %(url)s -""")) - part.set_type('text/plain') +"""), lcset) else: # HTML-escape it and store it as an attachment, but make it # look a /little/ bit prettier. :( @@ -185,11 +219,11 @@ URL: %(url)s url = save_attachment(mlist, part, dir, filter_html=0) finally: os.umask(omask) + del part['content-type'] part.set_payload(_("""\ An HTML attachment was scrubbed... URL: %(url)s -""")) - part.set_type('text/plain') +"""), lcset) elif ctype == 'message/rfc822': # This part contains a submessage, so it too needs scrubbing submsg = part.get_payload(0) @@ -202,6 +236,7 @@ URL: %(url)s date = submsg.get('date', _('no date')) who = submsg.get('from', _('unknown sender')) size = len(str(submsg)) + del part['content-type'] part.set_payload(_("""\ An embedded message was scrubbed... From: %(who)s @@ -209,13 +244,12 @@ Subject: %(subject)s Date: %(date)s Size: %(size)s Url: %(url)s -""")) - part.set_type('text/plain') +"""), lcset) # If the message isn't a multipart, then we'll strip it out as an # attachment that would have to be separately downloaded. Pipermail # will transform the url into a hyperlink. elif not part.is_multipart(): - payload = part.get_payload() + payload = part.get_payload(decode=1) ctype = part.get_type() size = len(payload) omask = os.umask(002) @@ -225,6 +259,8 @@ Url: %(url)s os.umask(omask) desc = part.get('content-description', _('not available')) filename = part.get_filename(_('not available')) + del part['content-type'] + del part['content-transfer-encoding'] part.set_payload(_("""\ A non-text attachment was scrubbed... Name: %(filename)s @@ -232,8 +268,7 @@ Type: %(ctype)s Size: %(size)d bytes Desc: %(desc)s Url : %(url)s -""")) - part.set_type('text/plain') +"""), lcset) outer = 0 # We still have to sanitize multipart messages to flat text because # Pipermail can't handle messages with list payloads. This is a kludge; @@ -242,8 +277,8 @@ Url : %(url)s # By default we take the charset of the first text/plain part in the # message, but if there was none, we'll use the list's preferred # language's charset. - if charset is None: - charset = Utils.GetCharSet(mlist.preferred_language) + if charset is None or charset == 'us-ascii': + charset = lcset # We now want to concatenate all the parts which have been scrubbed to # text/plain, into a single text/plain payload. We need to make sure # all the characters in the concatenated string are in the same @@ -261,20 +296,26 @@ Url : %(url)s t = part.get_payload(decode=1) except binascii.Error: t = part.get_payload() - partcharset = part.get_charset() + partcharset = part.get_content_charset() if partcharset and partcharset <> charset: try: t = unicode(t, partcharset, 'replace') - # Should use HTML-Escape, or try generalizing to UTF-8 - t = t.encode(charset, 'replace') - except UnicodeError: + except (UnicodeError, LookupError): # Replace funny characters t = unicode(t, 'ascii', 'replace').encode('ascii') + try: + # Should use HTML-Escape, or try generalizing to UTF-8 + t = t.encode(charset, 'replace') + except (UnicodeError, LookupError): + t = t.encode(lcset, 'replace') + # Separation is useful + if not t.endswith('\n'): + t += '\n' text.append(t) # Now join the text and set the payload sep = _('-------------- next part --------------\n') + del msg['content-type'] msg.set_payload(sep.join(text), charset) - msg.set_type('text/plain') del msg['content-transfer-encoding'] msg.add_header('Content-Transfer-Encoding', '8bit') return msg @@ -285,13 +326,13 @@ def makedirs(dir): # Create all the directories to store this attachment in try: os.makedirs(dir, 02775) + # Unfortunately, FreeBSD seems to be broken in that it doesn't honor + # the mode arg of mkdir(). + def twiddle(arg, dirname, names): + os.chmod(dirname, 02775) + os.path.walk(dir, twiddle, None) except OSError, e: if e.errno <> errno.EEXIST: raise - # Unfortunately, FreeBSD seems to be broken in that it doesn't honor the - # mode arg of mkdir(). - def twiddle(arg, dirname, names): - os.chmod(dirname, 02775) - os.path.walk(dir, twiddle, None) @@ -303,13 +344,15 @@ def save_attachment(mlist, msg, dir, filter_html=1): # BAW: mimetypes ought to handle non-standard, but commonly found types, # e.g. image/jpg (should be image/jpeg). For now we just store such # things as application/octet-streams since that seems the safest. - ext = mimetypes.guess_extension(msg.get_type()) + ctype = msg.get_content_type() + fnext = os.path.splitext(msg.get_filename(''))[1] + ext = guess_extension(ctype, fnext) if not ext: # We don't know what it is, so assume it's just a shapeless # application/octet-stream, unless the Content-Type: is # message/rfc822, in which case we know we'll coerce the type to # text/plain below. - if msg.get_type() == 'message/rfc822': + if ctype == 'message/rfc822': ext = '.txt' else: ext = '.bin' @@ -361,7 +404,7 @@ def save_attachment(mlist, msg, dir, filter_html=1): # ARCHIVE_HTML_SANITIZER is a string (which it must be or we wouldn't be # here), then send the attachment through the filter program for # sanitization - if filter_html and msg.get_type() == 'text/html': + if filter_html and ctype == 'text/html': base, ext = os.path.splitext(path) tmppath = base + '-tmp' + ext fp = open(tmppath, 'w') @@ -384,7 +427,7 @@ def save_attachment(mlist, msg, dir, filter_html=1): ext = '.txt' path = base + '.txt' # Is it a message/rfc822 attachment? - elif msg.get_type() == 'message/rfc822': + elif ctype == 'message/rfc822': submsg = msg.get_payload() # BAW: I'm sure we can eventually do better than this. :( decodedpayload = Utils.websafe(str(submsg)) diff --git a/Mailman/Handlers/ToDigest.py b/Mailman/Handlers/ToDigest.py index d735cd69..79090051 100644 --- a/Mailman/Handlers/ToDigest.py +++ b/Mailman/Handlers/ToDigest.py @@ -1,4 +1,4 @@ -# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc. +# Copyright (C) 1998-2003 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -37,6 +37,7 @@ from email.MIMEBase import MIMEBase from email.MIMEText import MIMEText from email.MIMEMessage import MIMEMessage from email.Utils import getaddresses +from email.Header import decode_header, make_header, Header from Mailman import mm_cfg from Mailman import Utils @@ -46,19 +47,13 @@ from Mailman.MemberAdaptor import ENABLED from Mailman.Handlers.Decorate import decorate from Mailman.Queue.sbcache import get_switchboard from Mailman.Mailbox import Mailbox +from Mailman.Handlers.Scrubber import process as scrubber +from Mailman.Logging.Syslog import syslog _ = i18n._ - -# rfc1153 says we should keep only these headers, and present them in this -# exact order. -KEEP = ['Date', 'From', 'To', 'Cc', 'Subject', 'Message-ID', 'Keywords', - # I believe we should also keep these headers though. - 'In-Reply-To', 'References', 'Content-Type', 'MIME-Version', - 'Content-Transfer-Encoding', 'Precedence', 'Reply-To', - # Mailman 2.0 adds these headers, but they don't need to be kept from - # the original message: Message - ] +UEMPTYSTRING = u'' +EMPTYSTRING = '' @@ -73,7 +68,7 @@ def process(mlist, msg, msgdata): finally: os.umask(omask) g = Generator(mboxfp) - g(msg, unixfrom=1) + g.flatten(msg, unixfrom=1) # Calculate the current size of the accumulation file. This will not tell # us exactly how big the MIME, rfc1153, or any other generated digest # message will be, but it's the most easily available metric to decide @@ -135,24 +130,26 @@ def send_i18n_digests(mlist, mboxfp): mbox = Mailbox(mboxfp) # Prepare common information lang = mlist.preferred_language + lcset = Utils.GetCharSet(lang) realname = mlist.real_name volume = mlist.volume issue = mlist.next_digest_number digestid = _('%(realname)s Digest, Vol %(volume)d, Issue %(issue)d') + digestsubj = Header(digestid, lcset, header_name='Subject') # Set things up for the MIME digest. Only headers not added by # CookHeaders need be added here. mimemsg = Message.Message() mimemsg['Content-Type'] = 'multipart/mixed' mimemsg['MIME-Version'] = '1.0' mimemsg['From'] = mlist.GetRequestEmail() - mimemsg['Subject'] = digestid + mimemsg['Subject'] = digestsubj mimemsg['To'] = mlist.GetListEmail() mimemsg['Reply-To'] = mlist.GetListEmail() # Set things up for the rfc1153 digest plainmsg = StringIO() rfc1153msg = Message.Message() rfc1153msg['From'] = mlist.GetRequestEmail() - rfc1153msg['Subject'] = digestid + rfc1153msg['Subject'] = digestsubj rfc1153msg['To'] = mlist.GetListEmail() rfc1153msg['Reply-To'] = mlist.GetListEmail() separator70 = '-' * 70 @@ -170,20 +167,20 @@ def send_i18n_digests(mlist, mboxfp): 'got_owner_email': mlist.GetOwnerEmail(), }, mlist=mlist) # MIME - masthead = MIMEText(mastheadtxt, _charset=Utils.GetCharSet(lang)) + masthead = MIMEText(mastheadtxt, _charset=lcset) masthead['Content-Description'] = digestid mimemsg.attach(masthead) - # rfc1153 + # RFC 1153 print >> plainmsg, mastheadtxt print >> plainmsg # Now add the optional digest header if mlist.digest_header: headertxt = decorate(mlist, mlist.digest_header, _('digest header')) # MIME - header = MIMEText(headertxt) + header = MIMEText(headertxt, _charset=lcset) header['Content-Description'] = _('Digest Header') mimemsg.attach(header) - # rfc1153 + # RFC 1153 print >> plainmsg, headertxt print >> plainmsg # Now we have to cruise through all the messages accumulated in the @@ -196,7 +193,7 @@ def send_i18n_digests(mlist, mboxfp): toc = StringIO() print >> toc, _("Today's Topics:\n") # Now cruise through all the messages in the mailbox of digest messages, - # building the MIME payload and core of the rfc1153 digest. We'll also + # building the MIME payload and core of the RFC 1153 digest. We'll also # accumulate Subject: headers and authors for the table-of-contents. messages = [] msgcount = 0 @@ -208,23 +205,26 @@ def send_i18n_digests(mlist, mboxfp): msgcount += 1 messages.append(msg) # Get the Subject header - subject = msg.get('subject', _('(no subject)')) + msgsubj = msg.get('subject', _('(no subject)')) + subject = oneline(msgsubj, lcset) # Don't include the redundant subject prefix in the toc mo = re.match('(re:? *)?(%s)' % re.escape(mlist.subject_prefix), subject, re.IGNORECASE) if mo: subject = subject[:mo.start(2)] + subject[mo.end(2):] - addresses = getaddresses([msg.get('From', '')]) username = '' + addresses = getaddresses([oneline(msg.get('from', ''), lcset)]) # Take only the first author we find - if type(addresses) is ListType and len(addresses) > 0: + if isinstance(addresses, ListType) and addresses: username = addresses[0][0] + if not username: + username = addresses[0][1] if username: username = ' (%s)' % username - # Wrap the toc subject line - wrapped = Utils.wrap('%2d. %s' % (msgcount, subject)) - # Split by lines and see if the username can fit on the last line + # Put count and Wrap the toc subject line + wrapped = Utils.wrap('%2d. %s' % (msgcount, subject), 65) slines = wrapped.split('\n') + # See if the user's name can fit on the last line if len(slines[-1]) + len(username) > 70: slines.append(username) else: @@ -236,20 +236,26 @@ def send_i18n_digests(mlist, mboxfp): print >> toc, ' ', line first = 0 else: - print >> toc, ' ', line + print >> toc, ' ', line.lstrip() # We do not want all the headers of the original message to leak - # through in the digest messages. For simplicity, we'll leave the - # same set of headers in both digests, i.e. those required in rfc1153 + # through in the digest messages. For this phase, we'll leave the + # same set of headers in both digests, i.e. those required in RFC 1153 # plus a couple of other useful ones. We also need to reorder the - # headers according to rfc1153. + # headers according to RFC 1153. Later, we'll strip out headers for + # for the specific MIME or plain digests. keeper = {} - for keep in KEEP: + all_keepers = {} + for header in (mm_cfg.MIME_DIGEST_KEEP_HEADERS + + mm_cfg.PLAIN_DIGEST_KEEP_HEADERS): + all_keepers[header] = 1 + all_keepers = all_keepers.keys() + for keep in all_keepers: keeper[keep] = msg.get_all(keep, []) # Now remove all unkempt headers :) for header in msg.keys(): del msg[header] - # And add back the kept header in the rfc1153 designated order - for keep in KEEP: + # And add back the kept header in the RFC 1153 designated order + for keep in all_keepers: for field in keeper[keep]: msg[keep] = field # And a bit of extra stuff @@ -263,13 +269,13 @@ def send_i18n_digests(mlist, mboxfp): return toctext = toc.getvalue() # MIME - tocpart = MIMEText(toctext) + tocpart = MIMEText(toctext, _charset=lcset) tocpart['Content-Description']= _("Today's Topics (%(msgcount)d messages)") mimemsg.attach(tocpart) - # rfc1153 + # RFC 1153 print >> plainmsg, toctext print >> plainmsg - # For rfc1153 digests, we now need the standard separator + # For RFC 1153 digests, we now need the standard separator print >> plainmsg, separator70 print >> plainmsg # Now go through and add each message @@ -285,20 +291,28 @@ def send_i18n_digests(mlist, mboxfp): else: print >> plainmsg, separator30 print >> plainmsg - g = Generator(plainmsg) - g(msg, unixfrom=0) + # Use Mailman.Handlers.Scrubber.process() to get plain text + msg = scrubber(mlist, msg) + # Honor the default setting + for h in mm_cfg.PLAIN_DIGEST_KEEP_HEADERS: + if msg[h]: + uh = Utils.wrap('%s: %s' % (h, oneline(msg[h], lcset))) + uh = '\n\t'.join(uh.split('\n')) + print >> plainmsg, uh + print >> plainmsg + print >> plainmsg, msg.get_payload(decode=1) # Now add the footer if mlist.digest_footer: footertxt = decorate(mlist, mlist.digest_footer, _('digest footer')) # MIME - footer = MIMEText(footertxt) + footer = MIMEText(footertxt, _charset=lcset) footer['Content-Description'] = _('Digest Footer') mimemsg.attach(footer) - # rfc1153 - # BAW: This is not strictly conformant rfc1153. The trailer is only + # RFC 1153 + # BAW: This is not strictly conformant RFC 1153. The trailer is only # supposed to contain two lines, i.e. the "End of ... Digest" line and # the row of asterisks. If this screws up MUAs, the solution is to - # add the footer as the last message in the rfc1153 digest. I just + # add the footer as the last message in the RFC 1153 digest. I just # hate the way that VM does that and I think it's confusing to users, # so don't do it unless there's a clamor. print >> plainmsg, separator30 @@ -343,9 +357,22 @@ def send_i18n_digests(mlist, mboxfp): recips=mimerecips, listname=mlist.internal_name(), isdigest=1) - # rfc1153 - rfc1153msg.set_payload(plainmsg.getvalue()) + # RFC 1153 + rfc1153msg.set_payload(plainmsg.getvalue(), lcset) virginq.enqueue(rfc1153msg, recips=plainrecips, listname=mlist.internal_name(), isdigest=1) + + + +def oneline(s, cset): + # Decode header string in one line and convert into specified charset + try: + h = make_header(decode_header(s)) + ustr = h.__unicode__() + oneline = UEMPTYSTRING.join(ustr.splitlines()) + return oneline.encode(cset, 'replace') + except (LookupError, UnicodeError): + # possibly charset problem. return with undecoded string in one line. + return EMPTYSTRING.join(s.splitlines()) -- cgit v1.2.3