diff options
Diffstat (limited to '')
-rw-r--r-- | Mailman/Archiver/HyperArch.py | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py index d4fc5d9a..ad51596e 100644 --- a/Mailman/Archiver/HyperArch.py +++ b/Mailman/Archiver/HyperArch.py @@ -1,4 +1,4 @@ -# Copyright (C) 1998-2007 by the Free Software Foundation, Inc. +# Copyright (C) 1998-2008 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -156,7 +156,12 @@ REpat = re.compile( r"\s*RE\s*(\[\d+\]\s*)?:\s*", re.IGNORECASE) emailpat = re.compile(r'([-+,.\w]+@[-+.\w]+)') # Argh! This pattern is buggy, and will choke on URLs with GET parameters. -urlpat = re.compile(r'(\w+://[^>)\s]+)') # URLs in text +# MAS: Given that people are not constrained in how they write URIs in plain +# text, it is not possible to have a single regexp to reliably match them. +# The regexp below is intended to match straightforward cases. Even humans +# can't reliably tell whether various punctuation at the end of a URI is part +# of the URI or not. +urlpat = re.compile(r'([a-z]+://.*?)(?:_\s|_$|$|[]})>\'"\s])', re.IGNORECASE) # Blank lines blankpat = re.compile(r'^\s*$') @@ -574,8 +579,8 @@ class Article(pipermail.Article): if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS: otrans = i18n.get_translation() try: - atmark = unicode(_(' at '), cset) i18n.set_language(self._lang) + atmark = unicode(_(' at '), cset) body = re.sub(r'([-+,.\w]+)@([-+.\w]+)', '\g<1>' + atmark + '\g<2>', body) finally: |