aboutsummaryrefslogtreecommitdiffstats
path: root/Mailman/Handlers/SpamDetect.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--Mailman/Handlers/SpamDetect.py93
1 files changed, 82 insertions, 11 deletions
diff --git a/Mailman/Handlers/SpamDetect.py b/Mailman/Handlers/SpamDetect.py
index 8d26da03..aaddff5f 100644
--- a/Mailman/Handlers/SpamDetect.py
+++ b/Mailman/Handlers/SpamDetect.py
@@ -1,4 +1,4 @@
-# Copyright (C) 1998-2012 by the Free Software Foundation, Inc.
+# Copyright (C) 1998-2016 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
@@ -27,13 +27,17 @@ TBD: This needs to be made more configurable and robust.
import re
+from unicodedata import normalize
+from email.Errors import HeaderParseError
from email.Header import decode_header
+from email.Utils import parseaddr
from Mailman import mm_cfg
from Mailman import Errors
from Mailman import i18n
-from Mailman.Utils import GetCharSet
+from Mailman import Utils
from Mailman.Handlers.Hold import hold_for_approval
+from Mailman.Logging.Syslog import syslog
try:
True, False
@@ -61,24 +65,81 @@ _ = i18n._
def getDecodedHeaders(msg, cset='utf-8'):
- """Returns a string containing all the headers of msg, unfolded and
- RFC 2047 decoded and encoded in cset.
+ """Returns a unicode containing all the headers of msg, unfolded and
+ RFC 2047 decoded, normalized and separated by new lines.
"""
- headers = ''
+ headers = u''
for h, v in msg.items():
uvalue = u''
- v = decode_header(re.sub('\n\s', ' ', v))
+ try:
+ v = decode_header(re.sub('\n\s', ' ', v))
+ except HeaderParseError:
+ v = [(v, 'us-ascii')]
for frag, cs in v:
if not cs:
cs = 'us-ascii'
- uvalue += unicode(frag, cs, 'replace')
- headers += '%s: %s\n' % (h, uvalue.encode(cset, 'replace'))
+ try:
+ uvalue += unicode(frag, cs, 'replace')
+ except LookupError:
+ # The encoding charset is unknown. At this point, frag
+ # has been QP or base64 decoded into a byte string whose
+ # charset we don't know how to handle. We will try to
+ # unicode it as iso-8859-1 which may result in a garbled
+ # mess, but we have to do something.
+ uvalue += unicode(frag, 'iso-8859-1', 'replace')
+ uhdr = h.decode('us-ascii', 'replace')
+ headers += u'%s: %s\n' % (h, normalize(mm_cfg.NORMALIZE_FORM, uvalue))
return headers
def process(mlist, msg, msgdata):
+ # Before anything else, check DMARC if necessary. We do this as early
+ # as possible so reject/discard actions trump other holds/approvals and
+ # wrap/munge actions get flagged even for approved messages.
+ # But not for owner mail which should not be subject to DMARC reject or
+ # discard actions.
+ if not msgdata.get('toowner'):
+ msgdata['from_is_list'] = 0
+ dn, addr = parseaddr(msg.get('from'))
+ if addr and mlist.dmarc_moderation_action > 0:
+ if Utils.IsDMARCProhibited(mlist, addr):
+ # Note that for dmarc_moderation_action, 0 = Accept,
+ # 1 = Munge, 2 = Wrap, 3 = Reject, 4 = Discard
+ if mlist.dmarc_moderation_action == 1:
+ msgdata['from_is_list'] = 1
+ elif mlist.dmarc_moderation_action == 2:
+ msgdata['from_is_list'] = 2
+ elif mlist.dmarc_moderation_action == 3:
+ # Reject
+ text = mlist.dmarc_moderation_notice
+ if text:
+ text = Utils.wrap(text)
+ else:
+ text = Utils.wrap(_(
+"""You are not allowed to post to this mailing list From: a domain which
+publishes a DMARC policy of reject or quarantine, and your message has been
+automatically rejected. If you think that your messages are being rejected in
+error, contact the mailing list owner at %(listowner)s."""))
+ raise Errors.RejectMessage, text
+ elif mlist.dmarc_moderation_action == 4:
+ raise Errors.DiscardMessage
+
+ # Get member address if any.
+ for sender in msg.get_senders():
+ if mlist.isMember(sender):
+ break
+ else:
+ sender = msg.get_sender()
+ if (mlist.member_verbosity_threshold > 0 and
+ Utils.IsVerboseMember(mlist, sender)
+ ):
+ mlist.setMemberOption(sender, mm_cfg.Moderate, 1)
+ syslog('vette',
+ '%s: Automatically Moderated %s for verbose postings.',
+ mlist.real_name, sender)
+
if msgdata.get('approved'):
return
# First do site hard coded header spam checks
@@ -92,9 +153,9 @@ def process(mlist, msg, msgdata):
# Now do header_filter_rules
# TK: Collect headers in sub-parts because attachment filename
# extension may be a clue to possible virus/spam.
- headers = ''
+ headers = u''
# Get the character set of the lists preferred language for headers
- lcset = GetCharSet(mlist.preferred_language)
+ lcset = Utils.GetCharSet(mlist.preferred_language)
for p in msg.walk():
headers += getDecodedHeaders(p, lcset)
for patterns, action, empty in mlist.header_filter_rules:
@@ -106,7 +167,17 @@ def process(mlist, msg, msgdata):
# ignore 'empty' patterns
if not pattern.strip():
continue
- if re.search(pattern, headers, re.IGNORECASE|re.MULTILINE):
+ pattern = Utils.xml_to_unicode(pattern, lcset)
+ pattern = normalize(mm_cfg.NORMALIZE_FORM, pattern)
+ try:
+ mo = re.search(pattern,
+ headers,
+ re.IGNORECASE|re.MULTILINE|re.UNICODE)
+ except (re.error, TypeError):
+ syslog('error',
+ 'ignoring header_filter_rules invalid pattern: %s',
+ pattern)
+ if mo:
if action == mm_cfg.DISCARD:
raise Errors.DiscardMessage
if action == mm_cfg.REJECT: