diff options
Diffstat (limited to '')
-rw-r--r-- | Mailman/Utils.py | 230 |
1 files changed, 226 insertions, 4 deletions
diff --git a/Mailman/Utils.py b/Mailman/Utils.py index 93e1fba1..f22e45b4 100644 --- a/Mailman/Utils.py +++ b/Mailman/Utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 1998-2011 by the Free Software Foundation, Inc. +# Copyright (C) 1998-2015 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -71,8 +71,16 @@ except NameError: True = 1 False = 0 +try: + import dns.resolver + from dns.exception import DNSException + dns_resolver = True +except ImportError: + dns_resolver = False + EMPTYSTRING = '' UEMPTYSTRING = u'' +CR = '\r' NL = '\n' DOT = '.' IDENTCHARS = ascii_letters + digits + '_' @@ -92,6 +100,12 @@ def list_exists(listname): # # The former two are for 2.1alpha3 and beyond, while the latter two are # for all earlier versions. + # + # But first ensure the list name doesn't contain a path traversal + # attack. + if len(re.sub(mm_cfg.ACCEPTABLE_LISTNAME_CHARACTERS, '', listname)) > 0: + syslog('mischief', 'Hostile listname: %s', listname) + return False basepath = Site.get_listpath(listname) for ext in ('.pck', '.pck.last', '.db', '.db.last'): dbfile = os.path.join(basepath, 'config' + ext) @@ -220,10 +234,18 @@ _valid_domain = re.compile('[-a-z0-9]', re.IGNORECASE) def ValidateEmail(s): """Verify that an email address isn't grossly evil.""" + # If a user submits a form or URL with post data or query fragments + # with multiple occurrences of the same variable, we can get a list + # here. Be as careful as possible. + if isinstance(s, list) or isinstance(s, tuple): + if len(s) == 0: + s = '' + else: + s = s[-1] # Pretty minimal, cheesy check. We could do better... if not s or s.count(' ') > 0: raise Errors.MMBadEmailError - if _badchars.search(s) or s[0] == '-': + if _badchars.search(s): raise Errors.MMHostileAddress, s user, domain_parts = ParseEmail(s) # This means local, unqualified addresses, are not allowed @@ -232,8 +254,9 @@ def ValidateEmail(s): if len(domain_parts) < 2: raise Errors.MMBadEmailError, s # domain parts may only contain ascii letters, digits and hyphen + # and must not begin with hyphen. for p in domain_parts: - if len(_valid_domain.sub('', p)) > 0: + if len(p) == 0 or p[0] == '-' or len(_valid_domain.sub('', p)) > 0: raise Errors.MMHostileAddress, s @@ -247,12 +270,24 @@ def GetPathPieces(envar='PATH_INFO'): if path: if CRNLpat.search(path): path = CRNLpat.split(path)[0] - syslog('error', 'Warning: Possible malformed path attack.') + remote = os.environ.get('HTTP_FORWARDED_FOR', + os.environ.get('HTTP_X_FORWARDED_FOR', + os.environ.get('REMOTE_ADDR', + 'unidentified origin'))) + syslog('error', + 'Warning: Possible malformed path attack domain=%s remote=%s', + get_domain(), + remote) return [p for p in path.split('/') if p] return None +def GetRequestMethod(): + return os.environ.get('REQUEST_METHOD') + + + def ScriptURL(target, web_page_url=None, absolute=False): """target - scriptname only, nothing extra web_page_url - the list's configvar of the same name @@ -427,6 +462,14 @@ def check_global_password(response, siteadmin=True): _ampre = re.compile('&((?:#[0-9]+|[a-z]+);)', re.IGNORECASE) def websafe(s): + # If a user submits a form or URL with post data or query fragments + # with multiple occurrences of the same variable, we can get a list + # here. Be as careful as possible. + if isinstance(s, list) or isinstance(s, tuple): + if len(s) == 0: + s = '' + else: + s = s[-1] if mm_cfg.BROKEN_BROWSER_WORKAROUND: # Archiver can pass unicode here. Just skip them as the # archiver escapes non-ascii anyway. @@ -905,6 +948,61 @@ def oneline(s, cset): return EMPTYSTRING.join(s.splitlines()) +def strip_verbose_pattern(pattern): + # Remove white space and comments from a verbose pattern and return a + # non-verbose, equivalent pattern. Replace CR and NL in the result + # with '\\r' and '\\n' respectively to avoid multi-line results. + if not isinstance(pattern, str): + return pattern + newpattern = '' + i = 0 + inclass = False + skiptoeol = False + copynext = False + while i < len(pattern): + c = pattern[i] + if copynext: + if c == NL: + newpattern += '\\n' + elif c == CR: + newpattern += '\\r' + else: + newpattern += c + copynext = False + elif skiptoeol: + if c == NL: + skiptoeol = False + elif c == '#' and not inclass: + skiptoeol = True + elif c == '[' and not inclass: + inclass = True + newpattern += c + copynext = True + elif c == ']' and inclass: + inclass = False + newpattern += c + elif re.search('\s', c): + if inclass: + if c == NL: + newpattern += '\\n' + elif c == CR: + newpattern += '\\r' + else: + newpattern += c + elif c == '\\' and not inclass: + newpattern += c + copynext = True + else: + if c == NL: + newpattern += '\\n' + elif c == CR: + newpattern += '\\r' + else: + newpattern += c + i += 1 + return newpattern + + # Patterns and functions to flag possible XSS attacks in HTML. # This list is compiled from information at http://ha.ckers.org/xss.html, # http://www.quirksmode.org/js/events_compinfo.html, @@ -1057,3 +1155,127 @@ def suspiciousHTML(html): else: return False + +# This takes an email address, and returns True if DMARC policy is p=reject +# or possibly quarantine. +def IsDMARCProhibited(mlist, email): + if not dns_resolver: + # This is a problem; log it. + syslog('error', + 'DNS lookup for dmarc_moderation_action for list %s not available', + mlist.real_name) + return False + + email = email.lower() + at_sign = email.find('@') + if at_sign < 1: + return False + dmarc_domain = '_dmarc.' + email[at_sign+1:] + + try: + resolver = dns.resolver.Resolver() + resolver.timeout = float(mm_cfg.DMARC_RESOLVER_TIMEOUT) + resolver.lifetime = float(mm_cfg.DMARC_RESOLVER_LIFETIME) + txt_recs = resolver.query(dmarc_domain, dns.rdatatype.TXT) + except (dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): + return False + except DNSException, e: + syslog('error', + 'DNSException: Unable to query DMARC policy for %s (%s). %s', + email, dmarc_domain, e.__class__) + return False + else: +# people are already being dumb, don't trust them to provide honest DNS +# where the answer section only contains what was asked for, nor to include +# CNAMEs before the values they point to. + full_record = "" + results_by_name = {} + cnames = {} + want_names = set([dmarc_domain + '.']) + for txt_rec in txt_recs.response.answer: + if txt_rec.rdtype == dns.rdatatype.CNAME: + cnames[txt_rec.name.to_text()] = ( + txt_rec.items[0].target.to_text()) + if txt_rec.rdtype != dns.rdatatype.TXT: + continue + results_by_name.setdefault(txt_rec.name.to_text(), []).append( + "".join(txt_rec.items[0].strings)) + expands = list(want_names) + seen = set(expands) + while expands: + item = expands.pop(0) + if item in cnames: + if cnames[item] in seen: + continue # cname loop + expands.append(cnames[item]) + seen.add(cnames[item]) + want_names.add(cnames[item]) + want_names.discard(item) + + if len(want_names) != 1: + syslog('error', + """multiple DMARC entries in results for %s, + processing each to be strict""", + dmarc_domain) + for name in want_names: + if name not in results_by_name: + continue + dmarcs = filter(lambda n: n.startswith('v=DMARC1;'), + results_by_name[name]) + if len(dmarcs) == 0: + return False + if len(dmarcs) > 1: + syslog('error', + """RRset of TXT records for %s has %d v=DMARC1 entries; + testing them all""", + dmarc_domain, len(dmarc)) + for entry in dmarcs: + if re.search(r'\bp=reject\b', entry, re.IGNORECASE): + syslog('vette', + '%s: DMARC lookup for %s (%s) found p=reject in %s = %s', + mlist.real_name, email, dmarc_domain, name, entry) + return True + + if (mlist.dmarc_quarantine_moderation_action and + re.search(r'\bp=quarantine\b', entry, re.IGNORECASE)): + syslog('vette', + '%s: DMARC lookup for %s (%s) found p=quarantine in %s = %s', + mlist.real_name, email, dmarc_domain, name, entry) + return True + + return False + + +def check_eq_domains(email, domains_list): + """The arguments are an email address and a string representing a + list of lists in a form like 'a,b,c;1,2' representing [['a', 'b', + 'c'],['1', '2']]. The inner lists are domains which are + equivalent in some sense. The return is an empty list or a list + of email addresses equivalent to the first argument. + For example, given + + email = 'user@me.com' + domains_list = '''domain1, domain2; mac.com, me.com, icloud.com; + domaina, domainb + ''' + + check_eq_domains(email, domains_list) will return + ['user@mac.com', 'user@icloud.com'] + """ + if not domains_list: + return [] + try: + local, domain = email.rsplit('@', 1) + except ValueError: + return [] + domain = domain.lower() + domains_list = re.sub('\s', '', domains_list).lower() + domains = domains_list.split(';') + domains_list = [] + for d in domains: + domains_list.append(d.split(',')) + for domains in domains_list: + if domain in domains: + return [local + '@' + x for x in domains if x != domain] + return [] + |