From 41a9b83a0d0a1253aeb5d7aa108852128de504ee Mon Sep 17 00:00:00 2001 From: Mark Sapiro Date: Thu, 10 Mar 2016 17:53:32 -0800 Subject: Fixed _set_date() in pipermail.py do do a better job. --- Mailman/Archiver/pipermail.py | 26 ++++++++++++++++++-------- NEWS | 9 +++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py index 9c54bbd9..15decd40 100644 --- a/Mailman/Archiver/pipermail.py +++ b/Mailman/Archiver/pipermail.py @@ -16,6 +16,7 @@ __version__ = '0.09 (Mailman edition)' VERSION = __version__ CACHESIZE = 100 # Number of slots in the cache +from Mailman import mm_cfg from Mailman import Errors from Mailman.Mailbox import ArchiverMailbox from Mailman.Logging.Syslog import syslog @@ -230,21 +231,30 @@ class Article: self.body = s.readlines() def _set_date(self, message): - def floatdate(header): - missing = [] - datestr = message.get(header, missing) - if datestr is missing: + def floatdate(datestr): + if not datestr: return None date = parsedate_tz(datestr) try: - return mktime_tz(date) + date = mktime_tz(date) + if (date < 0 or + date - time.time() > + mm_cfg.ARCHIVER_ALLOWABLE_SANE_DATE_SKEW + ): + return None + return date except (TypeError, ValueError, OverflowError): return None - date = floatdate('date') + date = floatdate(message.get('date')) + if date is None: + date = floatdate(message.get('x-list-received-date')) + if date is None: + date = floatdate(re.sub(r'^.*;\s*', '', + message.get('received'), flags=re.S)) if date is None: - date = floatdate('x-list-received-date') + date = floatdate(re.sub(r'From \s*\S+\s+', '', + message.get_unixfrom())) if date is None: - # What's left to try? date = self._last_article_time + 1 self._last_article_time = date self.date = '%011i' % date diff --git a/NEWS b/NEWS index 26596c37..f3a3c886 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,15 @@ Here is a history of user visible changes to Mailman. Bug fixes and other patches + - Fixed the pipermail archiver to do a better job of figuring the date of + a post when its Date: header is missing, unparseable or has an obviously + out of range date. This should only affect bin/arch as ArchRunner has + code to fix dates at least if ARCHIVER_CLOBBER_DATE_POLICY has not been + set to 0 in mm_cfg.py. If posts have been added in the past to a list's + archive using bin/arch and an imported mbox, running bin/arch again could + result is some of those posts being archived with a different date. + (LP: #1555798) + - Fixed an issue with CommandRunner shunting a malformed message with a null byte in the body. (LP: #1553888) -- cgit v1.2.3