1 files changed, 82 insertions, 11 deletions
diff --git a/Mailman/Handlers/SpamDetect.py b/Mailman/Handlers/SpamDetect.py
index 8d26da03..aaddff5f 100644
--- a/Mailman/Handlers/SpamDetect.py
+++ b/Mailman/Handlers/SpamDetect.py
@@ -1,4 +1,4 @@
-# Copyright (C) 1998-2012 by the Free Software Foundation, Inc.
+# Copyright (C) 1998-2016 by the Free Software Foundation, Inc.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -27,13 +27,17 @@ TBD: This needs to be made more configurable and robust.
 
 import re
 
+from unicodedata import normalize
+from email.Errors import HeaderParseError
 from email.Header import decode_header
+from email.Utils import parseaddr
 
 from Mailman import mm_cfg
 from Mailman import Errors
 from Mailman import i18n
-from Mailman.Utils import GetCharSet
+from Mailman import Utils
 from Mailman.Handlers.Hold import hold_for_approval
+from Mailman.Logging.Syslog import syslog
 
 try:
     True, False
@@ -61,24 +65,81 @@ _ = i18n._
 
 
 def getDecodedHeaders(msg, cset='utf-8'):
-    """Returns a string containing all the headers of msg, unfolded and
-    RFC 2047 decoded and encoded in cset.
+    """Returns a unicode containing all the headers of msg, unfolded and
+    RFC 2047 decoded, normalized and separated by new lines.
     """
 
-    headers = ''
+    headers = u''
     for h, v in msg.items():
         uvalue = u''
-        v = decode_header(re.sub('\n\s', ' ', v))
+        try:
+            v = decode_header(re.sub('\n\s', ' ', v))
+        except HeaderParseError:
+            v = [(v, 'us-ascii')]
         for frag, cs in v:
             if not cs:
                 cs = 'us-ascii'
-            uvalue += unicode(frag, cs, 'replace')
-        headers += '%s: %s\n' % (h, uvalue.encode(cset, 'replace'))
+            try:
+                uvalue += unicode(frag, cs, 'replace')
+            except LookupError:
+                # The encoding charset is unknown.  At this point, frag
+                # has been QP or base64 decoded into a byte string whose
+                # charset we don't know how to handle.  We will try to
+                # unicode it as iso-8859-1 which may result in a garbled
+                # mess, but we have to do something.
+                uvalue += unicode(frag, 'iso-8859-1', 'replace')
+        uhdr = h.decode('us-ascii', 'replace')
+        headers += u'%s: %s\n' % (h, normalize(mm_cfg.NORMALIZE_FORM, uvalue))
     return headers
 
 
 
 def process(mlist, msg, msgdata):
+    # Before anything else, check DMARC if necessary.  We do this as early
+    # as possible so reject/discard actions trump other holds/approvals and
+    # wrap/munge actions get flagged even for approved messages.
+    # But not for owner mail which should not be subject to DMARC reject or
+    # discard actions.
+    if not msgdata.get('toowner'):
+        msgdata['from_is_list'] = 0
+        dn, addr = parseaddr(msg.get('from'))
+        if addr and mlist.dmarc_moderation_action > 0:
+            if Utils.IsDMARCProhibited(mlist, addr):
+                # Note that for dmarc_moderation_action, 0 = Accept, 
+                #    1 = Munge, 2 = Wrap, 3 = Reject, 4 = Discard
+                if mlist.dmarc_moderation_action == 1:
+                    msgdata['from_is_list'] = 1
+                elif mlist.dmarc_moderation_action == 2:
+                    msgdata['from_is_list'] = 2
+                elif mlist.dmarc_moderation_action == 3:
+                    # Reject
+                    text = mlist.dmarc_moderation_notice
+                    if text:
+                        text = Utils.wrap(text)
+                    else:
+                        text = Utils.wrap(_(
+"""You are not allowed to post to this mailing list From: a domain which
+publishes a DMARC policy of reject or quarantine, and your message has been
+automatically rejected.  If you think that your messages are being rejected in
+error, contact the mailing list owner at %(listowner)s."""))
+                    raise Errors.RejectMessage, text
+                elif mlist.dmarc_moderation_action == 4:
+                    raise Errors.DiscardMessage
+
+        # Get member address if any.
+        for sender in msg.get_senders():
+            if mlist.isMember(sender):
+                break
+        else:
+            sender = msg.get_sender()
+        if (mlist.member_verbosity_threshold > 0 and
+            Utils.IsVerboseMember(mlist, sender)
+           ):
+             mlist.setMemberOption(sender, mm_cfg.Moderate, 1)
+             syslog('vette',
+                    '%s: Automatically Moderated %s for verbose postings.',
+                     mlist.real_name, sender) 
+
     if msgdata.get('approved'):
         return
     # First do site hard coded header spam checks
@@ -92,9 +153,9 @@ def process(mlist, msg, msgdata):
     # Now do header_filter_rules
     # TK: Collect headers in sub-parts because attachment filename
     # extension may be a clue to possible virus/spam.
-    headers = ''
+    headers = u''
     # Get the character set of the lists preferred language for headers
-    lcset = GetCharSet(mlist.preferred_language)
+    lcset = Utils.GetCharSet(mlist.preferred_language)
     for p in msg.walk():
         headers += getDecodedHeaders(p, lcset)
     for patterns, action, empty in mlist.header_filter_rules:
@@ -106,7 +167,17 @@ def process(mlist, msg, msgdata):
             # ignore 'empty' patterns
             if not pattern.strip():
                 continue
-            if re.search(pattern, headers, re.IGNORECASE|re.MULTILINE):
+            pattern = Utils.xml_to_unicode(pattern, lcset)
+            pattern = normalize(mm_cfg.NORMALIZE_FORM, pattern)
+            try:
+                mo = re.search(pattern,
+                               headers,
+                               re.IGNORECASE|re.MULTILINE|re.UNICODE)
+            except (re.error, TypeError):
+                syslog('error',
+                       'ignoring header_filter_rules invalid pattern: %s',
+                       pattern)
+            if mo:
                 if action == mm_cfg.DISCARD:
                     raise Errors.DiscardMessage
                 if action == mm_cfg.REJECT: