Implement Ralf Jung's captcha feature for the subscribe form.

author: Mark Sapiro <mark@msapiro.net> 2019-06-19 16:56:49 -0700
committer: Mark Sapiro <mark@msapiro.net> 2019-06-19 16:56:49 -0700
commit: 1799a87556e18776e64df28ff2ac4fee190f2dc1 (patch)
tree: 670eea11f001d2273da50af94c9f949b85ded058 /Mailman
parent: 56188e427f80ed350b6608ce47124402c90b9d40 (diff)
parent: 91203be694e4ca836b862b7921e119b2f55a8307 (diff)
download: mailman2-1799a87556e18776e64df28ff2ac4fee190f2dc1.tar.gz
mailman2-1799a87556e18776e64df28ff2ac4fee190f2dc1.tar.xz
mailman2-1799a87556e18776e64df28ff2ac4fee190f2dc1.zip
4 files changed, 82 insertions, 6 deletions
diff --git a/Mailman/Cgi/listinfo.py b/Mailman/Cgi/listinfo.py
index f1b455da..81ff7f48 100644
--- a/Mailman/Cgi/listinfo.py
+++ b/Mailman/Cgi/listinfo.py
@@ -216,10 +216,28 @@ def list_listinfo(mlist, lang):
             #        drop one : resulting in an invalid format, but it's only
             #        for our hash so it doesn't matter.
             remote = remote.rsplit(':', 1)[0]
+        # render CAPTCHA, if configured
+        if isinstance(mm_cfg.CAPTCHAS, dict) and 'en' in mm_cfg.CAPTCHAS:
+            (captcha_question, captcha_box, captcha_idx) = \
+                Utils.captcha_display(mlist, lang, mm_cfg.CAPTCHAS)
+            pre_question = _(
+                    """Please answer the following question to prove that
+                    you are not a bot:"""
+                )
+            replacements['<mm-captcha-ui>'] = (
+                """<tr><td BGCOLOR="#dddddd">%s<br>%s</td><td>%s</td></tr>"""
+                % (pre_question, captcha_question, captcha_box))
+        else:
+            # just to have something to include in the hash below
+            captcha_idx = ''
+        # fill form
         replacements['<mm-subscribe-form-start>'] += (
-                '<input type="hidden" name="sub_form_token" value="%s:%s">\n'
-                % (now, Utils.sha_new(mm_cfg.SUBSCRIBE_FORM_SECRET + ":" +
+                '<input type="hidden" name="sub_form_token"'
+                ' value="%s:%s:%s">\n'
+                % (now, captcha_idx,
+                          Utils.sha_new(mm_cfg.SUBSCRIBE_FORM_SECRET + ":" +
                           now + ":" +
+                          captcha_idx + ":" +
                           mlist.internal_name() + ":" +
                           remote
                           ).hexdigest()
diff --git a/Mailman/Cgi/subscribe.py b/Mailman/Cgi/subscribe.py
index b6527a2a..ce7940f9 100644
--- a/Mailman/Cgi/subscribe.py
+++ b/Mailman/Cgi/subscribe.py
@@ -168,13 +168,15 @@ def process_form(mlist, doc, cgidata, lang):
             #        for our hash so it doesn't matter.
             remote1 = remote.rsplit(':', 1)[0]
         try:
-            ftime, fhash = cgidata.getfirst('sub_form_token', '').split(':')
+            ftime, fcaptcha_idx, fhash = cgidata.getfirst(
+                    'sub_form_token', '').split(':')
             then = int(ftime)
         except ValueError:
-            ftime = fhash = ''
+            ftime = fcaptcha_idx = fhash = ''
             then = 0
         token = Utils.sha_new(mm_cfg.SUBSCRIBE_FORM_SECRET + ":" +
                               ftime + ":" +
+                              fcaptcha_idx + ":" +
                               mlist.internal_name() + ":" +
                               remote1).hexdigest()
         if ftime and now - then > mm_cfg.FORM_LIFETIME:
@@ -189,6 +191,13 @@ def process_form(mlist, doc, cgidata, lang):
             results.append(
     _('There was no hidden token in your submission or it was corrupted.'))
             results.append(_('You must GET the form before submitting it.'))
+        # Check captcha
+        if isinstance(mm_cfg.CAPTCHAS, dict):
+            captcha_answer = cgidata.getvalue('captcha_answer', '')
+            if not Utils.captcha_verify(
+                    fcaptcha_idx, captcha_answer, mm_cfg.CAPTCHAS):
+                results.append(_(
+                    'This was not the right answer to the CAPTCHA question.'))
     # Was an attempt made to subscribe the list to itself?
     if email == mlist.GetListEmail():
         syslog('mischief', 'Attempt to self subscribe %s: %s', email, remote)
diff --git a/Mailman/Defaults.py.in b/Mailman/Defaults.py.in
index 3350f278..594674ca 100755
--- a/Mailman/Defaults.py.in
+++ b/Mailman/Defaults.py.in
@@ -131,6 +131,25 @@ SUBSCRIBE_FORM_SECRET = None
 # test.
 SUBSCRIBE_FORM_MIN_TIME = seconds(5)
 
+# Use a custom question-answer CAPTCHA to protect against subscription spam.
+# Has no effect unless SUBSCRIBE_FORM_SECRET is set.
+# Should be set to a dict mapping language keys to a list of pairs
+# of questions and regexes for the answers, e.g.
+# CAPTCHAS = {
+#   'en': [
+#     ('What is two times six?', '(12|twelve)'),
+#     ('What is this mailing list software called?', '[Mm]ailman'),
+#   ],
+#   'de': [
+#     ('Was ist 3 mal 6?', '(18|achtzehn)'),
+#   ],
+# }
+# The regular expression must match the full string, i.e., it is implicitly
+# acting as if it had "^" in the beginning and "$" at the end.
+# An 'en' key must be present and is used as fall-back if there are no
+# questions for the currently set language.
+CAPTCHAS = None
+
 # Use Google reCAPTCHA to protect the subscription form from spam bots.  The
 # following must be set to a pair of keys issued by the reCAPTCHA service at
 # https://www.google.com/recaptcha/admin
@@ -1188,7 +1207,7 @@ DEFAULT_DMARC_MODERATION_ACTION = 0
 # with a stronger DMARC policy if such a policy would result in message
 # modification because dmarc_moderation_action is 1 or 2.  Thus, there is
 # a list setting to apply dmarc_moderaction_action of 1 or 2 to messages
-# From: domains with DMARC p=none.  Setting this to Yes is only effective if 
+# From: domains with DMARC p=none.  Setting this to Yes is only effective if
 # dmarc_quarantine_moderaction_action is also Yes.  The following is the
 # default for this setting for new lists.
 DEFAULT_DMARC_NONE_MODERATION_ACTION = No
@@ -1224,7 +1243,7 @@ DMARC_ORGANIZATIONAL_DOMAIN_DATA_URL = \
 #                                     (0 to disable).
 DEFAULT_MEMBER_VERBOSITY_INTERVAL = 300
 DEFAULT_MEMBER_VERBOSITY_THRESHOLD = 0
- 
+
 # This controls how often to clean old post time entries from the dictionary
 # used to implement the member verbosity feature. This is a compromise between
 # using resources for cleaning and allowing the dictionary to grow very large.
diff --git a/Mailman/Utils.py b/Mailman/Utils.py
index 10629fc4..36fbd1f9 100644
--- a/Mailman/Utils.py
+++ b/Mailman/Utils.py
@@ -1576,3 +1576,33 @@ def banned_domain(email):
         if not re.search(r'127\.0\.1\.255$', text, re.MULTILINE):
             return True
     return False
+
+
+def captcha_display(mlist, lang, captchas):
+    """Returns a CAPTCHA question, the HTML for the answer box, and
+    the data to be put into the CSRF token"""
+    if not lang in captchas:
+        lang = 'en'
+    captchas = captchas[lang]
+    idx = random.randrange(len(captchas))
+    question = captchas[idx][0]
+    box_html = mlist.FormatBox('captcha_answer', size=30)
+    # Remember to encode the language in the index so that we can get it out
+    # again!
+    return (websafe(question), box_html, lang + "-" + str(idx))
+
+def captcha_verify(idx, given_answer, captchas):
+    try:
+        (lang, idx) = idx.split("-")
+        idx = int(idx)
+    except ValueError:
+        return False
+    if not lang in captchas:
+        return False
+    captchas = captchas[lang]
+    if not idx in range(len(captchas)):
+        return False
+    # Check the given answer.
+    # We append a `$` to emulate `re.fullmatch`.
+    correct_answer_pattern = captchas[idx][1] + "$"
+    return re.match(correct_answer_pattern, given_answer)
author	Mark Sapiro <mark@msapiro.net>	2019-06-19 16:56:49 -0700
committer	Mark Sapiro <mark@msapiro.net>	2019-06-19 16:56:49 -0700
commit	1799a87556e18776e64df28ff2ac4fee190f2dc1 (patch)
tree	670eea11f001d2273da50af94c9f949b85ded058 /Mailman
parent	56188e427f80ed350b6608ce47124402c90b9d40 (diff)
parent	91203be694e4ca836b862b7921e119b2f55a8307 (diff)
download	mailman2-1799a87556e18776e64df28ff2ac4fee190f2dc1.tar.gz mailman2-1799a87556e18776e64df28ff2ac4fee190f2dc1.tar.xz mailman2-1799a87556e18776e64df28ff2ac4fee190f2dc1.zip