diff options
Diffstat (limited to '')
-rw-r--r-- | Mailman/Handlers/SpamDetect.py | 93 |
1 files changed, 82 insertions, 11 deletions
diff --git a/Mailman/Handlers/SpamDetect.py b/Mailman/Handlers/SpamDetect.py index 8d26da03..aaddff5f 100644 --- a/Mailman/Handlers/SpamDetect.py +++ b/Mailman/Handlers/SpamDetect.py @@ -1,4 +1,4 @@ -# Copyright (C) 1998-2012 by the Free Software Foundation, Inc. +# Copyright (C) 1998-2016 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -27,13 +27,17 @@ TBD: This needs to be made more configurable and robust. import re +from unicodedata import normalize +from email.Errors import HeaderParseError from email.Header import decode_header +from email.Utils import parseaddr from Mailman import mm_cfg from Mailman import Errors from Mailman import i18n -from Mailman.Utils import GetCharSet +from Mailman import Utils from Mailman.Handlers.Hold import hold_for_approval +from Mailman.Logging.Syslog import syslog try: True, False @@ -61,24 +65,81 @@ _ = i18n._ def getDecodedHeaders(msg, cset='utf-8'): - """Returns a string containing all the headers of msg, unfolded and - RFC 2047 decoded and encoded in cset. + """Returns a unicode containing all the headers of msg, unfolded and + RFC 2047 decoded, normalized and separated by new lines. """ - headers = '' + headers = u'' for h, v in msg.items(): uvalue = u'' - v = decode_header(re.sub('\n\s', ' ', v)) + try: + v = decode_header(re.sub('\n\s', ' ', v)) + except HeaderParseError: + v = [(v, 'us-ascii')] for frag, cs in v: if not cs: cs = 'us-ascii' - uvalue += unicode(frag, cs, 'replace') - headers += '%s: %s\n' % (h, uvalue.encode(cset, 'replace')) + try: + uvalue += unicode(frag, cs, 'replace') + except LookupError: + # The encoding charset is unknown. At this point, frag + # has been QP or base64 decoded into a byte string whose + # charset we don't know how to handle. We will try to + # unicode it as iso-8859-1 which may result in a garbled + # mess, but we have to do something. + uvalue += unicode(frag, 'iso-8859-1', 'replace') + uhdr = h.decode('us-ascii', 'replace') + headers += u'%s: %s\n' % (h, normalize(mm_cfg.NORMALIZE_FORM, uvalue)) return headers def process(mlist, msg, msgdata): + # Before anything else, check DMARC if necessary. We do this as early + # as possible so reject/discard actions trump other holds/approvals and + # wrap/munge actions get flagged even for approved messages. + # But not for owner mail which should not be subject to DMARC reject or + # discard actions. + if not msgdata.get('toowner'): + msgdata['from_is_list'] = 0 + dn, addr = parseaddr(msg.get('from')) + if addr and mlist.dmarc_moderation_action > 0: + if Utils.IsDMARCProhibited(mlist, addr): + # Note that for dmarc_moderation_action, 0 = Accept, + # 1 = Munge, 2 = Wrap, 3 = Reject, 4 = Discard + if mlist.dmarc_moderation_action == 1: + msgdata['from_is_list'] = 1 + elif mlist.dmarc_moderation_action == 2: + msgdata['from_is_list'] = 2 + elif mlist.dmarc_moderation_action == 3: + # Reject + text = mlist.dmarc_moderation_notice + if text: + text = Utils.wrap(text) + else: + text = Utils.wrap(_( +"""You are not allowed to post to this mailing list From: a domain which +publishes a DMARC policy of reject or quarantine, and your message has been +automatically rejected. If you think that your messages are being rejected in +error, contact the mailing list owner at %(listowner)s.""")) + raise Errors.RejectMessage, text + elif mlist.dmarc_moderation_action == 4: + raise Errors.DiscardMessage + + # Get member address if any. + for sender in msg.get_senders(): + if mlist.isMember(sender): + break + else: + sender = msg.get_sender() + if (mlist.member_verbosity_threshold > 0 and + Utils.IsVerboseMember(mlist, sender) + ): + mlist.setMemberOption(sender, mm_cfg.Moderate, 1) + syslog('vette', + '%s: Automatically Moderated %s for verbose postings.', + mlist.real_name, sender) + if msgdata.get('approved'): return # First do site hard coded header spam checks @@ -92,9 +153,9 @@ def process(mlist, msg, msgdata): # Now do header_filter_rules # TK: Collect headers in sub-parts because attachment filename # extension may be a clue to possible virus/spam. - headers = '' + headers = u'' # Get the character set of the lists preferred language for headers - lcset = GetCharSet(mlist.preferred_language) + lcset = Utils.GetCharSet(mlist.preferred_language) for p in msg.walk(): headers += getDecodedHeaders(p, lcset) for patterns, action, empty in mlist.header_filter_rules: @@ -106,7 +167,17 @@ def process(mlist, msg, msgdata): # ignore 'empty' patterns if not pattern.strip(): continue - if re.search(pattern, headers, re.IGNORECASE|re.MULTILINE): + pattern = Utils.xml_to_unicode(pattern, lcset) + pattern = normalize(mm_cfg.NORMALIZE_FORM, pattern) + try: + mo = re.search(pattern, + headers, + re.IGNORECASE|re.MULTILINE|re.UNICODE) + except (re.error, TypeError): + syslog('error', + 'ignoring header_filter_rules invalid pattern: %s', + pattern) + if mo: if action == mm_cfg.DISCARD: raise Errors.DiscardMessage if action == mm_cfg.REJECT: |