From eddab860303c9a970a0428b0cda6561244e450b9 Mon Sep 17 00:00:00 2001 From: Mark Sapiro Date: Mon, 22 Feb 2010 17:00:40 -0800 Subject: Additional cleanup of pipermail threading --- Mailman/Archiver/HyperArch.py | 3 +++ Mailman/Archiver/pipermail.py | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'Mailman/Archiver') diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py index a532c81e..2c22b33d 100644 --- a/Mailman/Archiver/HyperArch.py +++ b/Mailman/Archiver/HyperArch.py @@ -438,8 +438,11 @@ class Article(pipermail.Article): prefix_pat = re.sub(r'%\d*d', r'\s*\d+\s*', prefix_pat) subject = re.sub(prefix_pat, '', subject) subject = subject.lstrip() + # MAS Should we strip FW and FWD too? strip_pat = re.compile('^((RE|AW|SV|VS)(\[\d+\])?:\s*)+', re.I) stripped = strip_pat.sub('', subject) + # Also remove whitespace to avoid folding/unfolding differences + stripped = re.sub('\s', '', stripped) return stripped def decode_charset(self, field): diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py index 3e5ead66..939602ba 100644 --- a/Mailman/Archiver/pipermail.py +++ b/Mailman/Archiver/pipermail.py @@ -651,8 +651,10 @@ class T: def get_parent_info(self, archive, article): parentID = None if article.in_reply_to: - parentID = article.in_reply_to - elif article.references: + if self.database.hasArticle(archive, article.in_reply_to): + # Only use In-Reply-To if it's in the archive. + parentID = article.in_reply_to + if not parentID and article.references: refs = self._remove_external_references(article.references) if refs: maxdate = self.database.getArticle(archive, refs[0]) @@ -661,7 +663,7 @@ class T: if a.date > maxdate.date: maxdate = a parentID = maxdate.msgid - else: + if not parentID: # Get the oldest article with a matching subject, and # assume this is a follow-up to that article # But, use the subject that's in the database -- cgit v1.2.3