1 files changed, 8 insertions, 3 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
index d4fc5d9a..ad51596e 100644
--- a/Mailman/Archiver/HyperArch.py
+++ b/Mailman/Archiver/HyperArch.py
@@ -1,4 +1,4 @@
-# Copyright (C) 1998-2007 by the Free Software Foundation, Inc.
+# Copyright (C) 1998-2008 by the Free Software Foundation, Inc.
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
@@ -156,7 +156,12 @@ REpat = re.compile( r"\s*RE\s*(\[\d+\]\s*)?:\s*", re.IGNORECASE)
 emailpat = re.compile(r'([-+,.\w]+@[-+.\w]+)')
 
 #  Argh!  This pattern is buggy, and will choke on URLs with GET parameters.
-urlpat = re.compile(r'(\w+://[^>)\s]+)') # URLs in text
+# MAS: Given that people are not constrained in how they write URIs in plain
+# text, it is not possible to have a single regexp to reliably match them.
+# The regexp below is intended to match straightforward cases.  Even humans
+# can't reliably tell whether various punctuation at the end of a URI is part
+# of the URI or not.
+urlpat = re.compile(r'([a-z]+://.*?)(?:_\s|_$|$|[]})>\'"\s])', re.IGNORECASE)
 
 # Blank lines
 blankpat = re.compile(r'^\s*$')
@@ -574,8 +579,8 @@ class Article(pipermail.Article):
         if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS:
             otrans = i18n.get_translation()
             try:
-                atmark = unicode(_(' at '), cset)
                 i18n.set_language(self._lang)
+                atmark = unicode(_(' at '), cset)
                 body = re.sub(r'([-+,.\w]+)@([-+.\w]+)',
                               '\g<1>' + atmark + '\g<2>', body)
             finally: