From da9d83cf1291701261bec4a9faafbe01b60a72a4 Mon Sep 17 00:00:00 2001 From: Mark Sapiro Date: Wed, 24 Feb 2016 23:24:26 -0800 Subject: If DMARC lookup fails to find a policy, also try the Organizational Domain. --- Mailman/Utils.py | 29 +++++++++++++++++++++++++---- NEWS | 3 +++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/Mailman/Utils.py b/Mailman/Utils.py index 1aa49954..0344ef58 100644 --- a/Mailman/Utils.py +++ b/Mailman/Utils.py @@ -1170,7 +1170,28 @@ def IsDMARCProhibited(mlist, email): at_sign = email.find('@') if at_sign < 1: return False - dmarc_domain = '_dmarc.' + email[at_sign+1:] + dparts = email[at_sign+1:].split('.') + # The following is a way of testing the "Organizational Domain" for DMARC + # policy if the From: domain doesn't publish a policy. What we're doing + # is clearly wrong. I.e., if the From: domain is a.b.c.example.com, we + # should lookup _dmarc.a.b.c.example.com and if no DMARC policy there, + # we should look up only _dmarc.example.com. The problem is not all + # Organizational Domains are two "words" and determining any particular + # Organizational Domain requires applying a non-trivial algorithm to a + # large, somewhat dynamic data set. What we do is look up all the + # intermediate domains on the theory that if _dmarc.a.b.c.example.com has + # no valid DMARC policy then the intermediates won't either. We will also + # err with a domain like x.y.x.co.uk. Here we will go to far and also look + # up _dmarc.co.uk which is also wrong but hopefully won't return a policy. + # This is clearly a flawed approach, but hopefully good enough. + while len(dparts) > 1: + x = _DMARCProhibited(mlist, email, '_dmarc.' + '.'.join(dparts)) + if x != 'continue': + return x + dparts = dparts[1:] + return False + +def _DMARCProhibited(mlist, email, dmarc_domain): try: resolver = dns.resolver.Resolver() @@ -1178,12 +1199,12 @@ def IsDMARCProhibited(mlist, email): resolver.lifetime = float(mm_cfg.DMARC_RESOLVER_LIFETIME) txt_recs = resolver.query(dmarc_domain, dns.rdatatype.TXT) except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): - return False + return 'continue' except DNSException, e: syslog('error', 'DNSException: Unable to query DMARC policy for %s (%s). %s', email, dmarc_domain, e.__class__) - return False + return 'continue' else: # people are already being dumb, don't trust them to provide honest DNS # where the answer section only contains what was asked for, nor to include @@ -1223,7 +1244,7 @@ def IsDMARCProhibited(mlist, email): dmarcs = filter(lambda n: n.startswith('v=DMARC1;'), results_by_name[name]) if len(dmarcs) == 0: - return False + return 'continue' if len(dmarcs) > 1: syslog('error', """RRset of TXT records for %s has %d v=DMARC1 entries; diff --git a/NEWS b/NEWS index 86608210..8d9f452b 100644 --- a/NEWS +++ b/NEWS @@ -51,6 +51,9 @@ Here is a history of user visible changes to Mailman. Bug fixes and other patches + - If DMARC lookup fails to find a policy, also try the Organizational + Domain. (LP: #1549420) + - Modified contrib/mmdsr to correctly report No such list names that contain ". -- cgit v1.2.3 From 1736634b57aa1ba6864e255e088e64eeca9ca16a Mon Sep 17 00:00:00 2001 From: Mark Sapiro Date: Thu, 25 Feb 2016 19:08:37 -0800 Subject: Refactored OrganizationalDomain fix. --- Mailman/Utils.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 81 insertions(+), 17 deletions(-) diff --git a/Mailman/Utils.py b/Mailman/Utils.py index 0344ef58..4a5dddef 100644 --- a/Mailman/Utils.py +++ b/Mailman/Utils.py @@ -34,6 +34,7 @@ import time import errno import base64 import random +import urllib2 import urlparse import htmlentitydefs import email.Header @@ -1156,6 +1157,79 @@ def suspiciousHTML(html): return False +# The next functions read data from +# https://publicsuffix.org/list/public_suffix_list.dat and implement the +# algorithm at https://publicsuffix.org/list/ to find the "Organizational +# Domain corresponding to a From: domain. + +URL = 'https://publicsuffix.org/list/public_suffix_list.dat' +s_dict = {} + +def get_suffixes(url): + """This loads the data from the url argument into s_dict for use by +get_org_dom.""" + global s_dict + if s_dict: + return + try: + d = urllib2.urlopen(url) + except urllib2.URLError, e: + syslog('error', + 'Unable to retrieve data from %s: %s', + url, e) + return + for line in d.readlines(): + if not line or line.startswith(' ') or line.startswith('//'): + continue + line = re.sub(' .*', '', line.strip()) + if not line: + continue + parts = line.split('.') + if parts[0].startswith('!'): + exc = True + parts = [parts[0][1:]] + parts[1:] + else: + exc = False + parts.reverse() + k = '.'.join(parts) + s_dict[k] = exc + +def _get_dom(d, l): + """A helper to get a domain name consisting of the first l labels in d.""" + dom = d[:min(l+1, len(d))] + dom.reverse() + return '.'.join(dom) + +def get_org_dom(domain): + """Given a domain name, this returns the corresponding Organizational +Domain which may be the same as the input.""" + global s_dict + if not s_dict: + get_suffixes(URL) + hits = [] + d = domain.split('.') + d.reverse() + for k in s_dict.keys(): + ks = k.split('.') + if len(d) >= len(ks): + for i in range(len(ks)-1): + if d[i] != ks[i] and ks[i] != '*': + break + else: + if d[len(ks)-1] == ks[-1] or ks[-1] == '*': + hits.append(k) + if not hits: + return _get_dom(d, 1) + l = 0 + for k in hits: + if s_dict[k]: + # It's an exception + return _get_dom(d, len(k.split('.'))-1) + if len(k.split('.')) > l: + l = len(k.split('.')) + return _get_dom(d, l) + + # This takes an email address, and returns True if DMARC policy is p=reject # or possibly quarantine. def IsDMARCProhibited(mlist, email): @@ -1170,25 +1244,15 @@ def IsDMARCProhibited(mlist, email): at_sign = email.find('@') if at_sign < 1: return False - dparts = email[at_sign+1:].split('.') - # The following is a way of testing the "Organizational Domain" for DMARC - # policy if the From: domain doesn't publish a policy. What we're doing - # is clearly wrong. I.e., if the From: domain is a.b.c.example.com, we - # should lookup _dmarc.a.b.c.example.com and if no DMARC policy there, - # we should look up only _dmarc.example.com. The problem is not all - # Organizational Domains are two "words" and determining any particular - # Organizational Domain requires applying a non-trivial algorithm to a - # large, somewhat dynamic data set. What we do is look up all the - # intermediate domains on the theory that if _dmarc.a.b.c.example.com has - # no valid DMARC policy then the intermediates won't either. We will also - # err with a domain like x.y.x.co.uk. Here we will go to far and also look - # up _dmarc.co.uk which is also wrong but hopefully won't return a policy. - # This is clearly a flawed approach, but hopefully good enough. - while len(dparts) > 1: - x = _DMARCProhibited(mlist, email, '_dmarc.' + '.'.join(dparts)) + f_dom = email[at_sign+1:] + x = _DMARCProhibited(mlist, email, '_dmarc.' + f_dom) + if x != 'continue': + return x + o_dom = get_org_dom(f_dom) + if o_dom != f_dom: + x = _DMARCProhibited(mlist, email, '_dmarc.' + o_dom) if x != 'continue': return x - dparts = dparts[1:] return False def _DMARCProhibited(mlist, email, dmarc_domain): -- cgit v1.2.3