diff options
Diffstat (limited to 'Mailman/Utils.py')
-rw-r--r-- | Mailman/Utils.py | 98 |
1 files changed, 81 insertions, 17 deletions
diff --git a/Mailman/Utils.py b/Mailman/Utils.py index 0344ef58..4a5dddef 100644 --- a/Mailman/Utils.py +++ b/Mailman/Utils.py @@ -34,6 +34,7 @@ import time import errno import base64 import random +import urllib2 import urlparse import htmlentitydefs import email.Header @@ -1156,6 +1157,79 @@ def suspiciousHTML(html): return False +# The next functions read data from +# https://publicsuffix.org/list/public_suffix_list.dat and implement the +# algorithm at https://publicsuffix.org/list/ to find the "Organizational +# Domain corresponding to a From: domain. + +URL = 'https://publicsuffix.org/list/public_suffix_list.dat' +s_dict = {} + +def get_suffixes(url): + """This loads the data from the url argument into s_dict for use by +get_org_dom.""" + global s_dict + if s_dict: + return + try: + d = urllib2.urlopen(url) + except urllib2.URLError, e: + syslog('error', + 'Unable to retrieve data from %s: %s', + url, e) + return + for line in d.readlines(): + if not line or line.startswith(' ') or line.startswith('//'): + continue + line = re.sub(' .*', '', line.strip()) + if not line: + continue + parts = line.split('.') + if parts[0].startswith('!'): + exc = True + parts = [parts[0][1:]] + parts[1:] + else: + exc = False + parts.reverse() + k = '.'.join(parts) + s_dict[k] = exc + +def _get_dom(d, l): + """A helper to get a domain name consisting of the first l labels in d.""" + dom = d[:min(l+1, len(d))] + dom.reverse() + return '.'.join(dom) + +def get_org_dom(domain): + """Given a domain name, this returns the corresponding Organizational +Domain which may be the same as the input.""" + global s_dict + if not s_dict: + get_suffixes(URL) + hits = [] + d = domain.split('.') + d.reverse() + for k in s_dict.keys(): + ks = k.split('.') + if len(d) >= len(ks): + for i in range(len(ks)-1): + if d[i] != ks[i] and ks[i] != '*': + break + else: + if d[len(ks)-1] == ks[-1] or ks[-1] == '*': + hits.append(k) + if not hits: + return _get_dom(d, 1) + l = 0 + for k in hits: + if s_dict[k]: + # It's an exception + return _get_dom(d, len(k.split('.'))-1) + if len(k.split('.')) > l: + l = len(k.split('.')) + return _get_dom(d, l) + + # This takes an email address, and returns True if DMARC policy is p=reject # or possibly quarantine. def IsDMARCProhibited(mlist, email): @@ -1170,25 +1244,15 @@ def IsDMARCProhibited(mlist, email): at_sign = email.find('@') if at_sign < 1: return False - dparts = email[at_sign+1:].split('.') - # The following is a way of testing the "Organizational Domain" for DMARC - # policy if the From: domain doesn't publish a policy. What we're doing - # is clearly wrong. I.e., if the From: domain is a.b.c.example.com, we - # should lookup _dmarc.a.b.c.example.com and if no DMARC policy there, - # we should look up only _dmarc.example.com. The problem is not all - # Organizational Domains are two "words" and determining any particular - # Organizational Domain requires applying a non-trivial algorithm to a - # large, somewhat dynamic data set. What we do is look up all the - # intermediate domains on the theory that if _dmarc.a.b.c.example.com has - # no valid DMARC policy then the intermediates won't either. We will also - # err with a domain like x.y.x.co.uk. Here we will go to far and also look - # up _dmarc.co.uk which is also wrong but hopefully won't return a policy. - # This is clearly a flawed approach, but hopefully good enough. - while len(dparts) > 1: - x = _DMARCProhibited(mlist, email, '_dmarc.' + '.'.join(dparts)) + f_dom = email[at_sign+1:] + x = _DMARCProhibited(mlist, email, '_dmarc.' + f_dom) + if x != 'continue': + return x + o_dom = get_org_dom(f_dom) + if o_dom != f_dom: + x = _DMARCProhibited(mlist, email, '_dmarc.' + o_dom) if x != 'continue': return x - dparts = dparts[1:] return False def _DMARCProhibited(mlist, email, dmarc_domain): |