aboutsummaryrefslogtreecommitdiffstats
path: root/Mailman/Archiver
diff options
context:
space:
mode:
authorMark Sapiro <mark@msapiro.net>2008-12-22 18:17:35 -0800
committerMark Sapiro <mark@msapiro.net>2008-12-22 18:17:35 -0800
commit84d3a487a2654e8afa6a09c175bfd2e28ed8c5b9 (patch)
treea0d8c75007e198ad18cc360e489a074fda7026f0 /Mailman/Archiver
parentd5646b28aed100dea3d15426645531ce00bbf085 (diff)
downloadmailman2-84d3a487a2654e8afa6a09c175bfd2e28ed8c5b9.tar.gz
mailman2-84d3a487a2654e8afa6a09c175bfd2e28ed8c5b9.tar.xz
mailman2-84d3a487a2654e8afa6a09c175bfd2e28ed8c5b9.zip
Changed the pattern used to recognize URLs in messages for the pipermail
archive in order to try to do a better job of making hyperlinks. Bug #310124.
Diffstat (limited to 'Mailman/Archiver')
-rw-r--r--Mailman/Archiver/HyperArch.py7
1 files changed, 6 insertions, 1 deletions
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
index 33a389ed..ad51596e 100644
--- a/Mailman/Archiver/HyperArch.py
+++ b/Mailman/Archiver/HyperArch.py
@@ -156,7 +156,12 @@ REpat = re.compile( r"\s*RE\s*(\[\d+\]\s*)?:\s*", re.IGNORECASE)
emailpat = re.compile(r'([-+,.\w]+@[-+.\w]+)')
# Argh! This pattern is buggy, and will choke on URLs with GET parameters.
-urlpat = re.compile(r'(\w+://[^>)\s]+)') # URLs in text
+# MAS: Given that people are not constrained in how they write URIs in plain
+# text, it is not possible to have a single regexp to reliably match them.
+# The regexp below is intended to match straightforward cases. Even humans
+# can't reliably tell whether various punctuation at the end of a URI is part
+# of the URI or not.
+urlpat = re.compile(r'([a-z]+://.*?)(?:_\s|_$|$|[]})>\'"\s])', re.IGNORECASE)
# Blank lines
blankpat = re.compile(r'^\s*$')