diff options
Diffstat (limited to '')
-rwxr-xr-x | Mailman/Handlers/CookHeaders.py | 227 |
1 files changed, 191 insertions, 36 deletions
diff --git a/Mailman/Handlers/CookHeaders.py b/Mailman/Handlers/CookHeaders.py index a2096172..3e2806f0 100755 --- a/Mailman/Handlers/CookHeaders.py +++ b/Mailman/Handlers/CookHeaders.py @@ -1,4 +1,4 @@ -# Copyright (C) 1998-2011 by the Free Software Foundation, Inc. +# Copyright (C) 1998-2017 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -15,7 +15,10 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # USA. -"""Cook a message's Subject header.""" +"""Cook a message's Subject header. +Also do other manipulations of From:, Reply-To: and Cc: depending on +list configuration. +""" from __future__ import nested_scopes import re @@ -26,12 +29,13 @@ from email.Header import Header, decode_header, make_header from email.Utils import parseaddr, formataddr, getaddresses from email.Errors import HeaderParseError +from Mailman import i18n from Mailman import mm_cfg from Mailman import Utils from Mailman.i18n import _ from Mailman.Logging.Syslog import syslog -CONTINUATION = ',\n\t' +CONTINUATION = ',\n ' COMMASPACE = ', ' MAXLINELEN = 78 @@ -49,7 +53,7 @@ def _isunicode(s): nonascii = re.compile('[^\s!-~]') -def uheader(mlist, s, header_name=None, continuation_ws='\t', maxlinelen=None): +def uheader(mlist, s, header_name=None, continuation_ws=' ', maxlinelen=None): # Get the charset to encode the string in. Then search if there is any # non-ascii character is in the string. If there is and the charset is # us-ascii then we use iso-8859-1 instead. If the string is ascii only @@ -62,20 +66,42 @@ def uheader(mlist, s, header_name=None, continuation_ws='\t', maxlinelen=None): else: # there is no nonascii so ... charset = 'us-ascii' - return Header(s, charset, maxlinelen, header_name, continuation_ws) + try: + return Header(s, charset, maxlinelen, header_name, continuation_ws) + except UnicodeError: + syslog('error', 'list: %s: can\'t decode "%s" as %s', + mlist.internal_name(), s, charset) + return Header('', charset, maxlinelen, header_name, continuation_ws) + +def change_header(name, value, mlist, msg, msgdata, delete=True, repl=True): + if ((msgdata.get('from_is_list') == 2 or + (msgdata.get('from_is_list') == 0 and mlist.from_is_list == 2)) and + not msgdata.get('_fasttrack') + ) or name.lower() in ('from', 'reply-to', 'cc'): + # The or name.lower() in ... above is because when we are munging + # the From:, we want to defer the resultant changes to From:, + # Reply-To:, and/or Cc: until after the message passes through + # ToDigest, ToArchive and ToUsenet. Thus, we put them in + # msgdata[add_header] here and apply them in WrapMessage. + msgdata.setdefault('add_header', {})[name] = value + elif repl or not msg.has_key(name): + if delete: + del msg[name] + msg[name] = value def process(mlist, msg, msgdata): # Set the "X-Ack: no" header if noack flag is set. if msgdata.get('noack'): - del msg['x-ack'] - msg['X-Ack'] = 'no' + change_header('X-Ack', 'no', mlist, msg, msgdata) # Because we're going to modify various important headers in the email # message, we want to save some of the information in the msgdata # dictionary for later. Specifically, the sender header will get waxed, # but we need it for the Acknowledge module later. - msgdata['original_sender'] = msg.get_sender() + # We may have already saved it; if so, don't clobber it here. + if 'original_sender' not in msgdata: + msgdata['original_sender'] = msg.get_sender() # VirginRunner sets _fasttrack for internally crafted messages. fasttrack = msgdata.get('_fasttrack') if not msgdata.get('isdigest') and not fasttrack: @@ -87,7 +113,8 @@ def process(mlist, msg, msgdata): pass # Mark message so we know we've been here, but leave any existing # X-BeenThere's intact. - msg['X-BeenThere'] = mlist.GetListEmail() + change_header('X-BeenThere', mlist.GetListEmail(), + mlist, msg, msgdata, delete=False) # Add Precedence: and other useful headers. None of these are standard # and finding information on some of them are fairly difficult. Some are # just common practice, and we'll add more here as they become necessary. @@ -101,12 +128,68 @@ def process(mlist, msg, msgdata): # known exploits in a particular version of Mailman and we know a site is # using such an old version, they may be vulnerable. It's too easy to # edit the code to add a configuration variable to handle this. - if not msg.has_key('x-mailman-version'): - msg['X-Mailman-Version'] = mm_cfg.VERSION + change_header('X-Mailman-Version', mm_cfg.VERSION, + mlist, msg, msgdata, repl=False) # We set "Precedence: list" because this is the recommendation from the # sendmail docs, the most authoritative source of this header's semantics. - if not msg.has_key('precedence'): - msg['Precedence'] = 'list' + change_header('Precedence', 'list', + mlist, msg, msgdata, repl=False) + # Do we change the from so the list takes ownership of the email + if (msgdata.get('from_is_list') or mlist.from_is_list) and not fasttrack: + # Be as robust as possible here. + faddrs = getaddresses(msg.get_all('from', [])) + # Strip the nulls and bad emails. + faddrs = [x for x in faddrs if x[1].find('@') > 0] + if len(faddrs) == 1: + realname, email = o_from = faddrs[0] + else: + # No From: or multiple addresses. Just punt and take + # the get_sender result. + realname = '' + email = msgdata['original_sender'] + o_from = (realname, email) + if not realname: + if mlist.isMember(email): + realname = mlist.getMemberName(email) or email + else: + realname = email + # Remove domain from realname if it looks like an email address + realname = re.sub(r'@([^ .]+\.)+[^ .]+$', '---', realname) + # Make a display name and RFC 2047 encode it if necessary. This is + # difficult and kludgy. If the realname came from From: it should be + # ascii or RFC 2047 encoded. If it came from the list, it should be + # in the charset of the list's preferred language or possibly unicode. + # if it's from the email address, it should be ascii. In any case, + # make it a unicode. + if isinstance(realname, unicode): + urn = realname + else: + rn, cs = ch_oneline(realname) + urn = unicode(rn, cs, errors='replace') + # likewise, the list's real_name which should be ascii, but use the + # charset of the list's preferred_language which should be a superset. + lcs = Utils.GetCharSet(mlist.preferred_language) + ulrn = unicode(mlist.real_name, lcs, errors='replace') + # get translated 'via' with dummy replacements + realname = '%(realname)s' + lrn = '%(lrn)s' + # We want the i18n context to be the list's preferred_language. It + # could be the poster's. + otrans = i18n.get_translation() + i18n.set_language(mlist.preferred_language) + via = _('%(realname)s via %(lrn)s') + i18n.set_translation(otrans) + uvia = unicode(via, lcs, errors='replace') + # Replace the dummy replacements. + uvia = re.sub(u'%\(lrn\)s', ulrn, re.sub(u'%\(realname\)s', urn, uvia)) + # And get an RFC 2047 encoded header string. + dn = str(Header(uvia, lcs)) + change_header('From', + formataddr((dn, mlist.GetListEmail())), + mlist, msg, msgdata) + else: + # Use this as a flag + o_from = None # Reply-To: munging. Do not do this if the message is "fast tracked", # meaning it is internally crafted and delivered to a specific user. BAW: # Yuck, I really hate this feature but I've caved under the sheer pressure @@ -115,6 +198,23 @@ def process(mlist, msg, msgdata): # augment it. RFC 2822 allows max one Reply-To: header so collapse them # if we're adding a value, otherwise don't touch it. (Should we collapse # in all cases?) + # MAS: We need to do some things with the original From: if we've munged + # it for DMARC mitigation. We have goals for this process which are + # not completely compatible, so we do the best we can. Our goals are: + # 1) as long as the list is not anonymous, the original From: address + # should be obviously exposed, i.e. not just in a header that MUAs + # don't display. + # 2) the original From: address should not be in a comment or display + # name in the new From: because it is claimed that multiple domains + # in any fields in From: are indicative of spamminess. This means + # it should be in Reply-To: or Cc:. + # 3) the behavior of an MUA doing a 'reply' or 'reply all' should be + # consistent regardless of whether or not the From: is munged. + # Goal 3) implies sometimes the original From: should be in Reply-To: + # and sometimes in Cc:, and even so, this goal won't be achieved in + # all cases with all MUAs. In cases of conflict, the above ordering of + # goals is priority order. + if not fasttrack: # A convenience function, requires nested scopes. pair is (name, addr) new = [] @@ -132,22 +232,43 @@ def process(mlist, msg, msgdata): # the original Reply-To:'s to the list we're building up. In both # cases we'll zap the existing field because RFC 2822 says max one is # allowed. + o_rt = False if not mlist.first_strip_reply_to: orig = msg.get_all('reply-to', []) for pair in getaddresses(orig): + # There's an original Reply-To: and we're not removing it. add(pair) + o_rt = True + # We also need to put the old From: in Reply-To: in all cases where + # it is not going in Cc:. This is when reply_goes_to_list == 0 and + # either there was no original Reply-To: or we stripped it. + # However, if there was an original Reply-To:, unstripped, and it + # contained the original From: address we need to flag that it's + # there so we don't add the original From: to Cc: + if o_from and mlist.reply_goes_to_list == 0: + if o_rt: + if d.has_key(o_from[1].lower()): + # Original From: address is in original Reply-To:. + # Pretend we added it. + o_from = None + else: + add(o_from) + # Flag that we added it. + o_from = None # Set Reply-To: header to point back to this list. Add this last # because some folks think that some MUAs make it easier to delete # addresses from the right than from the left. if mlist.reply_goes_to_list == 1: i18ndesc = uheader(mlist, mlist.description, 'Reply-To') add((str(i18ndesc), mlist.GetListEmail())) - del msg['reply-to'] # Don't put Reply-To: back if there's nothing to add! if new: # Preserve order - msg['Reply-To'] = COMMASPACE.join( - [formataddr(pair) for pair in new]) + change_header('Reply-To', + COMMASPACE.join([formataddr(pair) for pair in new]), + mlist, msg, msgdata) + else: + del msg['reply-to'] # The To field normally contains the list posting address. However # when messages are fully personalized, that header will get # overwritten with the address of the recipient. We need to get the @@ -158,18 +279,38 @@ def process(mlist, msg, msgdata): # above code? # Also skip Cc if this is an anonymous list as list posting address # is already in From and Reply-To in this case. - if mlist.personalize == 2 and mlist.reply_goes_to_list <> 1 \ - and not mlist.anonymous_list: + # We do add the Cc in cases where From: header munging is being done + # because even though the list address is in From:, the Reply-To: + # poster will override it. Brain dead MUAs may then address the list + # twice on a 'reply all', but reasonable MUAs should do the right + # thing. We also add the original From: to Cc: if it wasn't added + # to Reply-To: + add_list = (mlist.personalize == 2 and + mlist.reply_goes_to_list <> 1 and + not mlist.anonymous_list) + if add_list or o_from: # Watch out for existing Cc headers, merge, and remove dups. Note # that RFC 2822 says only zero or one Cc header is allowed. new = [] d = {} - for pair in getaddresses(msg.get_all('cc', [])): - add(pair) - i18ndesc = uheader(mlist, mlist.description, 'Cc') - add((str(i18ndesc), mlist.GetListEmail())) - del msg['Cc'] - msg['Cc'] = COMMASPACE.join([formataddr(pair) for pair in new]) + # If we're adding the original From:, add it first. + if o_from: + add(o_from) + # AvoidDuplicates may have set a new Cc: in msgdata.add_header, + # so check that. + if (msgdata.has_key('add_header') and + msgdata['add_header'].has_key('Cc')): + for pair in getaddresses([msgdata['add_header']['Cc']]): + add(pair) + else: + for pair in getaddresses(msg.get_all('cc', [])): + add(pair) + if add_list: + i18ndesc = uheader(mlist, mlist.description, 'Cc') + add((str(i18ndesc), mlist.GetListEmail())) + change_header('Cc', + COMMASPACE.join([formataddr(pair) for pair in new]), + mlist, msg, msgdata) # Add list-specific headers as defined in RFC 2369 and RFC 2919, but only # if the message is being crafted for a specific list (e.g. not for the # password reminders). @@ -191,8 +332,7 @@ def process(mlist, msg, msgdata): # without desc we need to ensure the MUST brackets listid_h = '<%s>' % listid # We always add a List-ID: header. - del msg['list-id'] - msg['List-Id'] = listid_h + change_header('List-Id', listid_h, mlist, msg, msgdata) # For internally crafted messages, we also add a (nonstandard), # "X-List-Administrivia: yes" header. For all others (i.e. those coming # from list posts), we add a bunch of other RFC 2369 headers. @@ -219,13 +359,12 @@ def process(mlist, msg, msgdata): # First we delete any pre-existing headers because the RFC permits only # one copy of each, and we want to be sure it's ours. for h, v in headers.items(): - del msg[h] # Wrap these lines if they are too long. 78 character width probably # shouldn't be hardcoded, but is at least text-MUA friendly. The # adding of 2 is for the colon-space separator. if len(h) + 2 + len(v) > 78: v = CONTINUATION.join(v.split(', ')) - msg[h] = v + change_header(h, v, mlist, msg, msgdata) @@ -242,7 +381,7 @@ def prefix_subject(mlist, msg, msgdata): lines = str(subject).splitlines() else: lines = subject.splitlines() - ws = '\t' + ws = ' ' if len(lines) > 1 and lines[1] and lines[1][0] in ' \t': ws = lines[1][0] msgdata['origsubj'] = subject @@ -272,16 +411,29 @@ def prefix_subject(mlist, msg, msgdata): else: old_style = mm_cfg.OLD_STYLE_PREFIXING subject = re.sub(prefix_pattern, '', subject) - rematch = re.match('((RE|AW|SV|VS)\s*(\[\d+\])?\s*:\s*)+', subject, re.I) + # Previously the following re didn't have the first \s*. It would fail + # if the incoming Subject: was like '[prefix] Re: Re: Re:' because of the + # leading space after stripping the prefix. It is not known what MUA would + # create such a Subject:, but the issue was reported. + rematch = re.match( + '(\s*(RE|AW|SV|VS)\s*(\[\d+\])?\s*:\s*)+', + subject, re.I) if rematch: subject = subject[rematch.end():] recolon = 'Re:' else: recolon = '' + # Strip leading and trailing whitespace from subject. + subject = subject.strip() # At this point, subject may become null if someone post mail with - # subject: [subject prefix] - if subject.strip() == '': + # Subject: [subject prefix] + if subject == '': + # We want the i18n context to be the list's preferred_language. It + # could be the poster's. + otrans = i18n.get_translation() + i18n.set_language(mlist.preferred_language) subject = _('(no subject)') + i18n.set_translation(otrans) cset = Utils.GetCharSet(mlist.preferred_language) subject = unicode(subject, cset) # and substitute %d in prefix with post_id @@ -302,8 +454,7 @@ def prefix_subject(mlist, msg, msgdata): h = u' '.join([prefix, subject]) h = h.encode('us-ascii') h = uheader(mlist, h, 'Subject', continuation_ws=ws) - del msg['subject'] - msg['Subject'] = h + change_header('Subject', h, mlist, msg, msgdata) ss = u' '.join([recolon, subject]) ss = ss.encode('us-ascii') ss = uheader(mlist, ss, 'Subject', continuation_ws=ws) @@ -312,6 +463,11 @@ def prefix_subject(mlist, msg, msgdata): except UnicodeError: pass # Get the header as a Header instance, with proper unicode conversion + # Because of rfc2047 encoding, spaces between encoded words can be + # insignificant, so we need to append spaces to our encoded stuff. + prefix += ' ' + if recolon: + recolon += ' ' if old_style: h = uheader(mlist, recolon, 'Subject', continuation_ws=ws) h.append(prefix) @@ -321,8 +477,7 @@ def prefix_subject(mlist, msg, msgdata): # TK: Subject is concatenated and unicode string. subject = subject.encode(cset, 'replace') h.append(subject, cset) - del msg['subject'] - msg['Subject'] = h + change_header('Subject', h, mlist, msg, msgdata) ss = uheader(mlist, recolon, 'Subject', continuation_ws=ws) ss.append(subject, cset) msgdata['stripped_subject'] = ss |