Mailman/Handlers/SpamDetect.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160

# Copyright (C) 1998-2013 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.

"""Do more detailed spam detection.

This module hard codes site wide spam detection.  By hacking the
KNOWN_SPAMMERS variable, you can set up more regular expression matches
against message headers.  If spam is detected the message is discarded
immediately.

TBD: This needs to be made more configurable and robust.
"""

import re

from email.Errors import HeaderParseError
from email.Header import decode_header
from email.Utils import parseaddr

from Mailman import mm_cfg
from Mailman import Errors
from Mailman import i18n
from Mailman import Utils
from Mailman.Handlers.Hold import hold_for_approval

try:
    True, False
except NameError:
    True = 1
    False = 0

# First, play footsie with _ so that the following are marked as translated,
# but aren't actually translated until we need the text later on.
def _(s):
    return s


class SpamDetected(Errors.DiscardMessage):
    """The message contains known spam"""

class HeaderMatchHold(Errors.HoldMessage):
    reason = _('The message headers matched a filter rule')


# And reset the translator
_ = i18n._


def getDecodedHeaders(msg, cset='utf-8'):
    """Returns a string containing all the headers of msg, unfolded and
    RFC 2047 decoded and encoded in cset.
    """

    headers = ''
    for h, v in msg.items():
        uvalue = u''
        try:
            v = decode_header(re.sub('\n\s', ' ', v))
        except HeaderParseError:
            v = [(v, 'us-ascii')]
        for frag, cs in v:
            if not cs:
                cs = 'us-ascii'
            uvalue += unicode(frag, cs, 'replace')
        headers += '%s: %s\n' % (h, uvalue.encode(cset, 'replace'))
    return headers


def process(mlist, msg, msgdata):
    # Before anything else, check DMARC if necessary.  We do this as early
    # as possible so reject/discard actions trump other holds/approvals and
    # wrap/munge actions get flagged even for approved messages.
    msgdata['from_is_list'] = 0
    dn, addr = parseaddr(msg.get('from'))
    if addr and mlist.dmarc_moderation_action > 0:
        if Utils.IsDMARCProhibited(mlist, addr):
            # Note that for dmarc_moderation_action, 0 = Accept, 
            #    1 = Munge, 2 = Wrap, 3 = Reject, 4 = Discard
            if mlist.dmarc_moderation_action == 1:
                msgdata['from_is_list'] = 1
            elif mlist.dmarc_moderation_action == 2:
                msgdata['from_is_list'] = 2
            elif mlist.dmarc_moderation_action == 3:
                # Reject
                text = mlist.dmarc_moderation_notice
                if text:
                    text = Utils.wrap(text)
                else:
                    text = Utils.wrap(_(
"""You are not allowed to post to this mailing list From: a domain which
publishes a DMARC policy of reject or quarantine, and your message has been
automatically rejected.  If you think that your messages are being rejected in
error, contact the mailing list owner at %(listowner)s."""))
                raise Errors.RejectMessage, text
            elif mlist.dmarc_moderation_action == 4:
                raise Errors.DiscardMessage
    if msgdata.get('approved'):
        return
    # First do site hard coded header spam checks
    for header, regex in mm_cfg.KNOWN_SPAMMERS:
        cre = re.compile(regex, re.IGNORECASE)
        for value in msg.get_all(header, []):
            mo = cre.search(value)
            if mo:
                # we've detected spam, so throw the message away
                raise SpamDetected
    # Now do header_filter_rules
    # TK: Collect headers in sub-parts because attachment filename
    # extension may be a clue to possible virus/spam.
    headers = ''
    # Get the character set of the lists preferred language for headers
    lcset = Utils.GetCharSet(mlist.preferred_language)
    for p in msg.walk():
        headers += getDecodedHeaders(p, lcset)
    for patterns, action, empty in mlist.header_filter_rules:
        if action == mm_cfg.DEFER:
            continue
        for pattern in patterns.splitlines():
            if pattern.startswith('#'):
                continue
            # ignore 'empty' patterns
            if not pattern.strip():
                continue
            if re.search(pattern, headers, re.IGNORECASE|re.MULTILINE):
                if action == mm_cfg.DISCARD:
                    raise Errors.DiscardMessage
                if action == mm_cfg.REJECT:
                    if msgdata.get('toowner'):
                        # Don't send rejection notice if addressed to '-owner'
                        # because it may trigger a loop of notices if the
                        # sender address is forged.  We just discard it here.
                        raise Errors.DiscardMessage
                    raise Errors.RejectMessage(
                        _('Message rejected by filter rule match'))
                if action == mm_cfg.HOLD:
                    if msgdata.get('toowner'):
                        # Don't hold '-owner' addressed message.  We just
                        # pass it here but list-owner can set this to be
                        # discarded on the GUI if he wants.
                        return
                    hold_for_approval(mlist, msg, msgdata, HeaderMatchHold)
                if action == mm_cfg.ACCEPT:
                    return