Mailman/Handlers/CookHeaders.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263

# Copyright (C) 1998-2003 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

"""Cook a message's Subject header.
"""

from __future__ import nested_scopes
import re
from types import UnicodeType

from email.Charset import Charset
from email.Header import Header, decode_header
from email.Utils import parseaddr, formataddr, getaddresses

from Mailman import mm_cfg
from Mailman import Utils
from Mailman.i18n import _
from Mailman.Logging.Syslog import syslog

CONTINUATION = ',\n\t'
COMMASPACE = ', '
MAXLINELEN = 78


def _isunicode(s):
    return isinstance(s, UnicodeType)

def uheader(mlist, s, header_name=None, continuation_ws='\t'):
    # Get the charset to encode the string in.  If this is us-ascii, we'll use
    # iso-8859-1 instead, just to get a little extra coverage, and because the
    # Header class tries us-ascii first anyway.
    charset = Utils.GetCharSet(mlist.preferred_language)
    if charset == 'us-ascii':
        charset = 'iso-8859-1'
    charset = Charset(charset)
    # Convert the string to unicode so Header will do the 3-charset encoding.
    # If s is a byte string and there are funky characters in it that don't
    # match the charset, we might as well replace them now.
    if not _isunicode(s):
        codec = charset.input_codec or 'ascii'
        s = unicode(s, codec, 'replace')
    # We purposefully leave no space b/w prefix and subject!
    return Header(s, charset, header_name=header_name,
                  continuation_ws=continuation_ws)


def process(mlist, msg, msgdata):
    # Set the "X-Ack: no" header if noack flag is set.
    if msgdata.get('noack'):
        del msg['x-ack']
        msg['X-Ack'] = 'no'
    # Because we're going to modify various important headers in the email
    # message, we want to save some of the information in the msgdata
    # dictionary for later.  Specifically, the sender header will get waxed,
    # but we need it for the Acknowledge module later.
    msgdata['original_sender'] = msg.get_sender()
    # VirginRunner sets _fasttrack for internally crafted messages.
    fasttrack = msgdata.get('_fasttrack')
    if not msgdata.get('isdigest') and not fasttrack:
        prefix_subject(mlist, msg, msgdata)
    # Mark message so we know we've been here, but leave any existing
    # X-BeenThere's intact.
    msg['X-BeenThere'] = mlist.GetListEmail()
    # Add Precedence: and other useful headers.  None of these are standard
    # and finding information on some of them are fairly difficult.  Some are
    # just common practice, and we'll add more here as they become necessary.
    # Good places to look are:
    #
    # http://www.dsv.su.se/~jpalme/ietf/jp-ietf-home.html
    # http://www.faqs.org/rfcs/rfc2076.html
    #
    # None of these headers are added if they already exist.  BAW: some
    # consider the advertising of this a security breach.  I.e. if there are
    # known exploits in a particular version of Mailman and we know a site is
    # using such an old version, they may be vulnerable.  It's too easy to
    # edit the code to add a configuration variable to handle this.
    if not msg.has_key('x-mailman-version'):
        msg['X-Mailman-Version'] = mm_cfg.VERSION
    # We set "Precedence: list" because this is the recommendation from the
    # sendmail docs, the most authoritative source of this header's semantics.
    if not msg.has_key('precedence'):
        msg['Precedence'] = 'list'
    # Reply-To: munging.  Do not do this if the message is "fast tracked",
    # meaning it is internally crafted and delivered to a specific user.  BAW:
    # Yuck, I really hate this feature but I've caved under the sheer pressure
    # of the (very vocal) folks want it.  OTOH, RFC 2822 allows Reply-To: to
    # be a list of addresses, so instead of replacing the original, simply
    # augment it.  RFC 2822 allows max one Reply-To: header so collapse them
    # if we're adding a value, otherwise don't touch it.  (Should we collapse
    # in all cases?)
    if not fasttrack:
        # A convenience function, requires nested scopes.  pair is (name, addr)
        new = []
        d = {}
        def add(pair):
            lcaddr = pair[1].lower()
            if d.has_key(lcaddr):
                return
            d[lcaddr] = pair
            new.append(pair)
        # List admin wants an explicit Reply-To: added
        if mlist.reply_goes_to_list == 2:
            add(parseaddr(mlist.reply_to_address))
        # If we're not first stripping existing Reply-To: then we need to add
        # the original Reply-To:'s to the list we're building up.  In both
        # cases we'll zap the existing field because RFC 2822 says max one is
        # allowed.
        if not mlist.first_strip_reply_to:
            orig = msg.get_all('reply-to', [])
            for pair in getaddresses(orig):
                add(pair)
        # Set Reply-To: header to point back to this list.  Add this last
        # because some folks think that some MUAs make it easier to delete
        # addresses from the right than from the left.
        if mlist.reply_goes_to_list == 1:
            i18ndesc = uheader(mlist, mlist.description)
            add((str(i18ndesc), mlist.GetListEmail()))
        del msg['reply-to']
        # Don't put Reply-To: back if there's nothing to add!
        if new:
            # Preserve order
            msg['Reply-To'] = COMMASPACE.join(
                [formataddr(pair) for pair in new])
        # The To field normally contains the list posting address.  However
        # when messages are fully personalized, that header will get
        # overwritten with the address of the recipient.  We need to get the
        # posting address in one of the recipient headers or they won't be
        # able to reply back to the list.  It's possible the posting address
        # was munged into the Reply-To header, but if not, we'll add it to a
        # Cc header.  BAW: should we force it into a Reply-To header in the
        # above code?
        if mlist.personalize == 2 and mlist.reply_goes_to_list <> 1:
            # Watch out for existing Cc headers, merge, and remove dups.  Note
            # that RFC 2822 says only zero or one Cc header is allowed.
            new = []
            d = {}
            for pair in getaddresses(msg.get_all('cc', [])):
                add(pair)
            i18ndesc = uheader(mlist, mlist.description)
            add((str(i18ndesc), mlist.GetListEmail()))
            del msg['Cc']
            msg['Cc'] = COMMASPACE.join([formataddr(pair) for pair in new])
    # Add list-specific headers as defined in RFC 2369 and RFC 2919, but only
    # if the message is being crafted for a specific list (e.g. not for the
    # password reminders).
    #
    # BAW: Some people really hate the List-* headers.  It seems that the free
    # version of Eudora (possibly on for some platforms) does not hide these
    # headers by default, pissing off their users.  Too bad.  Fix the MUAs.
    if msgdata.get('_nolist') or not mlist.include_rfc2369_headers:
        return
    # Pre-calculate
    listid = '<%s.%s>' % (mlist.internal_name(), mlist.host_name)
    if mlist.description:
        # Make sure description is properly i18n'd
        listid_h = uheader(mlist, mlist.description, 'List-Id')
        listid_h.append(listid, 'us-ascii')
    else:
        # For wrapping
        listid_h = Header(listid, 'us-ascii', header_name='List-Id')
    # We always add a List-ID: header.
    del msg['list-id']
    msg['List-Id'] = listid_h
    # For internally crafted messages, we
    # also add a (nonstandard), "X-List-Administrivia: yes" header.  For all
    # others (i.e. those coming from list posts), we adda a bunch of other RFC
    # 2369 headers.
    requestaddr = mlist.GetRequestEmail()
    subfieldfmt = '<%s>, <mailto:%s?subject=%ssubscribe>'
    listinfo = mlist.GetScriptURL('listinfo', absolute=1)
    headers = {}
    if msgdata.get('reduced_list_headers'):
        headers['X-List-Administrivia'] = 'yes'
    else:
        headers.update({
            'List-Help'       : '<mailto:%s?subject=help>' % requestaddr,
            'List-Unsubscribe': subfieldfmt % (listinfo, requestaddr, 'un'),
            'List-Subscribe'  : subfieldfmt % (listinfo, requestaddr, ''),
            })
        # List-Post: is controlled by a separate attribute
        if mlist.include_list_post_header:
            headers['List-Post'] = '<mailto:%s>' % mlist.GetListEmail()
        # Add this header if we're archiving
        if mlist.archive:
            archiveurl = mlist.GetBaseArchiveURL()
            if archiveurl.endswith('/'):
                archiveurl = archiveurl[:-1]
            headers['List-Archive'] = '<%s>' % archiveurl
    # First we delete any pre-existing headers because the RFC permits only
    # one copy of each, and we want to be sure it's ours.
    for h, v in headers.items():
        del msg[h]
        # Wrap these lines if they are too long.  78 character width probably
        # shouldn't be hardcoded, but is at least text-MUA friendly.  The
        # adding of 2 is for the colon-space separator.
        if len(h) + 2 + len(v) > 78:
            v = CONTINUATION.join(v.split(', '))
        msg[h] = v


def prefix_subject(mlist, msg, msgdata):
    # Add the subject prefix unless the message is a digest or is being fast
    # tracked (e.g. internally crafted, delivered to a single user such as the
    # list admin).
    prefix = mlist.subject_prefix
    subject = msg.get('subject', '')
    # Try to figure out what the continuation_ws is for the header
    if isinstance(subject, Header):
        lines = str(subject).splitlines()
    else:
        lines = subject.splitlines()
    ws = '\t'
    if len(lines) > 1 and lines[1] and lines[1][0] in ' \t':
        ws = lines[1][0]
    msgdata['origsubj'] = subject
    if not subject:
        subject = _('(no subject)')
    # The header may be multilingual; decode it from base64/quopri and search
    # each chunk for the prefix.  BAW: Note that if the prefix contains spaces
    # and each word of the prefix is encoded in a different chunk in the
    # header, we won't find it.  I think in practice that's unlikely though.
    headerbits = decode_header(subject)
    if prefix and subject:
        pattern = re.escape(prefix.strip())
        for decodedsubj, charset in headerbits:
            if re.search(pattern, decodedsubj, re.IGNORECASE):
                # The subject's already got the prefix, so don't change it
                return
    del msg['subject']
    # Get the header as a Header instance, with proper unicode conversion
    h = uheader(mlist, prefix, 'Subject', continuation_ws=ws)
    for s, c in headerbits:
        # Once again, convert the string to unicode.
        if c is None:
            c = Charset('iso-8859-1')
        if not isinstance(c, Charset):
            c = Charset(c)
        if not _isunicode(s):
            codec = c.input_codec or 'ascii'
            try:
                s = unicode(s, codec, 'replace')
            except LookupError:
                # Unknown codec, is this default reasonable?
                s = unicode(s, Utils.GetCharSet(mlist.preferred_language),
                            'replace')
        h.append(s, c)
    msg['Subject'] = h