aboutsummaryrefslogtreecommitdiffstats
path: root/Mailman/Utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'Mailman/Utils.py')
-rw-r--r--Mailman/Utils.py98
1 files changed, 81 insertions, 17 deletions
diff --git a/Mailman/Utils.py b/Mailman/Utils.py
index 0344ef58..4a5dddef 100644
--- a/Mailman/Utils.py
+++ b/Mailman/Utils.py
@@ -34,6 +34,7 @@ import time
import errno
import base64
import random
+import urllib2
import urlparse
import htmlentitydefs
import email.Header
@@ -1156,6 +1157,79 @@ def suspiciousHTML(html):
return False
+# The next functions read data from
+# https://publicsuffix.org/list/public_suffix_list.dat and implement the
+# algorithm at https://publicsuffix.org/list/ to find the "Organizational
+# Domain corresponding to a From: domain.
+
+URL = 'https://publicsuffix.org/list/public_suffix_list.dat'
+s_dict = {}
+
+def get_suffixes(url):
+ """This loads the data from the url argument into s_dict for use by
+get_org_dom."""
+ global s_dict
+ if s_dict:
+ return
+ try:
+ d = urllib2.urlopen(url)
+ except urllib2.URLError, e:
+ syslog('error',
+ 'Unable to retrieve data from %s: %s',
+ url, e)
+ return
+ for line in d.readlines():
+ if not line or line.startswith(' ') or line.startswith('//'):
+ continue
+ line = re.sub(' .*', '', line.strip())
+ if not line:
+ continue
+ parts = line.split('.')
+ if parts[0].startswith('!'):
+ exc = True
+ parts = [parts[0][1:]] + parts[1:]
+ else:
+ exc = False
+ parts.reverse()
+ k = '.'.join(parts)
+ s_dict[k] = exc
+
+def _get_dom(d, l):
+ """A helper to get a domain name consisting of the first l labels in d."""
+ dom = d[:min(l+1, len(d))]
+ dom.reverse()
+ return '.'.join(dom)
+
+def get_org_dom(domain):
+ """Given a domain name, this returns the corresponding Organizational
+Domain which may be the same as the input."""
+ global s_dict
+ if not s_dict:
+ get_suffixes(URL)
+ hits = []
+ d = domain.split('.')
+ d.reverse()
+ for k in s_dict.keys():
+ ks = k.split('.')
+ if len(d) >= len(ks):
+ for i in range(len(ks)-1):
+ if d[i] != ks[i] and ks[i] != '*':
+ break
+ else:
+ if d[len(ks)-1] == ks[-1] or ks[-1] == '*':
+ hits.append(k)
+ if not hits:
+ return _get_dom(d, 1)
+ l = 0
+ for k in hits:
+ if s_dict[k]:
+ # It's an exception
+ return _get_dom(d, len(k.split('.'))-1)
+ if len(k.split('.')) > l:
+ l = len(k.split('.'))
+ return _get_dom(d, l)
+
+
# This takes an email address, and returns True if DMARC policy is p=reject
# or possibly quarantine.
def IsDMARCProhibited(mlist, email):
@@ -1170,25 +1244,15 @@ def IsDMARCProhibited(mlist, email):
at_sign = email.find('@')
if at_sign < 1:
return False
- dparts = email[at_sign+1:].split('.')
- # The following is a way of testing the "Organizational Domain" for DMARC
- # policy if the From: domain doesn't publish a policy. What we're doing
- # is clearly wrong. I.e., if the From: domain is a.b.c.example.com, we
- # should lookup _dmarc.a.b.c.example.com and if no DMARC policy there,
- # we should look up only _dmarc.example.com. The problem is not all
- # Organizational Domains are two "words" and determining any particular
- # Organizational Domain requires applying a non-trivial algorithm to a
- # large, somewhat dynamic data set. What we do is look up all the
- # intermediate domains on the theory that if _dmarc.a.b.c.example.com has
- # no valid DMARC policy then the intermediates won't either. We will also
- # err with a domain like x.y.x.co.uk. Here we will go to far and also look
- # up _dmarc.co.uk which is also wrong but hopefully won't return a policy.
- # This is clearly a flawed approach, but hopefully good enough.
- while len(dparts) > 1:
- x = _DMARCProhibited(mlist, email, '_dmarc.' + '.'.join(dparts))
+ f_dom = email[at_sign+1:]
+ x = _DMARCProhibited(mlist, email, '_dmarc.' + f_dom)
+ if x != 'continue':
+ return x
+ o_dom = get_org_dom(f_dom)
+ if o_dom != f_dom:
+ x = _DMARCProhibited(mlist, email, '_dmarc.' + o_dom)
if x != 'continue':
return x
- dparts = dparts[1:]
return False
def _DMARCProhibited(mlist, email, dmarc_domain):