7 files changed, 2738 insertions, 0 deletions
diff --git a/Mailman/Archiver/.cvsignore b/Mailman/Archiver/.cvsignore
new file mode 100644
index 00000000..f3c7a7c5
--- /dev/null
+++ b/Mailman/Archiver/.cvsignore
@@ -0,0 +1 @@
+Makefile
diff --git a/Mailman/Archiver/Archiver.py b/Mailman/Archiver/Archiver.py
new file mode 100644
index 00000000..903031cd
--- /dev/null
+++ b/Mailman/Archiver/Archiver.py
@@ -0,0 +1,232 @@
+# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+"""Mixin class for putting new messages in the right place for archival.
+
+Public archives are separated from private ones.  An external archival
+mechanism (eg, pipermail) should be pointed to the right places, to do the
+archival.
+"""
+
+import os
+import errno
+import traceback
+from cStringIO import StringIO
+
+from Mailman import mm_cfg
+from Mailman import Mailbox
+from Mailman import Utils
+from Mailman import Site
+from Mailman.SafeDict import SafeDict
+from Mailman.Logging.Syslog import syslog
+from Mailman.i18n import _
+
+
+
+def makelink(old, new):
+    try:
+        os.symlink(old, new)
+    except os.error, e:
+        code, msg = e
+        if code <> errno.EEXIST:
+            raise
+
+def breaklink(link):
+    try:
+        os.unlink(link)
+    except os.error, e:
+        code, msg = e
+        if code <> errno.ENOENT:
+            raise
+
+
+
+class Archiver:
+    #
+    # Interface to Pipermail.  HyperArch.py uses this method to get the
+    # archive directory for the mailing list
+    #
+    def InitVars(self):
+        # Configurable
+        self.archive = mm_cfg.DEFAULT_ARCHIVE
+        # 0=public, 1=private:
+        self.archive_private = mm_cfg.DEFAULT_ARCHIVE_PRIVATE
+        self.archive_volume_frequency = \
+                mm_cfg.DEFAULT_ARCHIVE_VOLUME_FREQUENCY
+        # The archive file structure by default is:
+        #
+        # archives/
+        #     private/
+        #         listname.mbox/
+        #             listname.mbox
+        #         listname/
+        #             lots-of-pipermail-stuff
+        #     public/
+        #         listname.mbox@ -> ../private/listname.mbox
+        #         listname@ -> ../private/listname
+        #
+        # IOW, the mbox and pipermail archives are always stored in the
+        # private archive for the list.  This is safe because archives/private
+        # is always set to o-rx.  Public archives have a symlink to get around
+        # the private directory, pointing directly to the private/listname
+        # which has o+rx permissions.  Private archives do not have the
+        # symbolic links.
+        omask = os.umask(0)
+        try:
+            try:
+                os.mkdir(self.archive_dir()+'.mbox', 02775)
+            except OSError, e:
+                if e.errno <> errno.EEXIST: raise
+                # We also create an empty pipermail archive directory into
+                # which we'll drop an empty index.html file into.  This is so
+                # that lists that have not yet received a posting have
+                # /something/ as their index.html, and don't just get a 404.
+            try:
+                os.mkdir(self.archive_dir(), 02775)
+            except OSError, e:
+                if e.errno <> errno.EEXIST: raise
+            # See if there's an index.html file there already and if not,
+            # write in the empty archive notice.
+            indexfile = os.path.join(self.archive_dir(), 'index.html')
+            fp = None
+            try:
+                fp = open(indexfile)
+            except IOError, e:
+                if e.errno <> errno.ENOENT: raise
+                else:
+                    fp = open(indexfile, 'w')
+                    fp.write(Utils.maketext(
+                        'emptyarchive.html',
+                        {'listname': self.real_name,
+                         'listinfo': self.GetScriptURL('listinfo', absolute=1),
+                         }, mlist=self))
+            if fp:
+                fp.close()
+        finally:
+            os.umask(omask)
+
+    def archive_dir(self):
+        return Site.get_archpath(self.internal_name())
+
+    def ArchiveFileName(self):
+        """The mbox name where messages are left for archive construction."""
+        return os.path.join(self.archive_dir() + '.mbox',
+                            self.internal_name() + '.mbox')
+
+    def GetBaseArchiveURL(self):
+        if self.archive_private:
+            return self.GetScriptURL('private', absolute=1) + '/'
+        else:
+            inv = {}
+            for k, v in mm_cfg.VIRTUAL_HOSTS.items():
+                inv[v] = k
+            url = mm_cfg.PUBLIC_ARCHIVE_URL % {
+                'listname': self.internal_name(),
+                'hostname': inv.get(self.host_name, mm_cfg.DEFAULT_URL_HOST),
+                }
+            if not url.endswith('/'):
+                url += '/'
+            return url
+
+    def __archive_file(self, afn):
+        """Open (creating, if necessary) the named archive file."""
+        omask = os.umask(002)
+        try:
+            return Mailbox.Mailbox(open(afn, 'a+'))
+        finally:
+            os.umask(omask)
+
+    #
+    # old ArchiveMail function, retained under a new name
+    # for optional archiving to an mbox
+    #
+    def __archive_to_mbox(self, post):
+        """Retain a text copy of the message in an mbox file."""
+        try:
+            afn = self.ArchiveFileName()
+            mbox = self.__archive_file(afn)
+            mbox.AppendMessage(post)
+            mbox.fp.close()
+        except IOError, msg:
+            syslog('error', 'Archive file access failure:\n\t%s %s', afn, msg)
+            raise
+
+    def ExternalArchive(self, ar, txt):
+        d = SafeDict({'listname': self.internal_name()})
+        cmd = ar % d
+        extarch = os.popen(cmd, 'w')
+        extarch.write(txt)
+        status = extarch.close()
+        if status:
+            syslog('error', 'external archiver non-zero exit status: %d\n',
+                   (status & 0xff00) >> 8)
+
+    #
+    # archiving in real time  this is called from list.post(msg)
+    #
+    def ArchiveMail(self, msg):
+        """Store postings in mbox and/or pipermail archive, depending."""
+        # Fork so archival errors won't disrupt normal list delivery
+        if mm_cfg.ARCHIVE_TO_MBOX == -1:
+            return
+        #
+        # We don't need an extra archiver lock here because we know the list
+        # itself must be locked.
+        if mm_cfg.ARCHIVE_TO_MBOX in (1, 2):
+            self.__archive_to_mbox(msg)
+            if mm_cfg.ARCHIVE_TO_MBOX == 1:
+                # Archive to mbox only.
+                return
+        txt = str(msg)
+        # should we use the internal or external archiver?
+        private_p = self.archive_private
+        if mm_cfg.PUBLIC_EXTERNAL_ARCHIVER and not private_p:
+            self.ExternalArchive(mm_cfg.PUBLIC_EXTERNAL_ARCHIVER, txt)
+        elif mm_cfg.PRIVATE_EXTERNAL_ARCHIVER and private_p:
+            self.ExternalArchive(mm_cfg.PRIVATE_EXTERNAL_ARCHIVER, txt)
+        else:
+            # use the internal archiver
+            f = StringIO(txt)
+            import HyperArch
+            h = HyperArch.HyperArchive(self)
+            h.processUnixMailbox(f)
+            h.close()
+            f.close()
+
+    #
+    # called from MailList.MailList.Save()
+    #
+    def CheckHTMLArchiveDir(self):
+        # We need to make sure that the archive directory has the right perms
+        # for public vs private.  If it doesn't exist, or some weird
+        # permissions errors prevent us from stating the directory, it's
+        # pointless to try to fix the perms, so we just return -scott
+        if mm_cfg.ARCHIVE_TO_MBOX == -1:
+            # Archiving is completely disabled, don't require the skeleton.
+            return
+        pubdir = Site.get_archpath(self.internal_name(), public=1)
+        privdir = self.archive_dir()
+        pubmbox = pubdir + '.mbox'
+        privmbox = privdir + '.mbox'
+        if self.archive_private:
+            breaklink(pubdir)
+            breaklink(pubmbox)
+        else:
+            # BAW: privdir or privmbox could be nonexistant.  We'd get an
+            # OSError, ENOENT which should be caught and reported properly.
+            makelink(privdir, pubdir)
+            makelink(privmbox, pubmbox)
diff --git a/Mailman/Archiver/HyperArch.py b/Mailman/Archiver/HyperArch.py
new file mode 100644
index 00000000..98fb5738
--- /dev/null
+++ b/Mailman/Archiver/HyperArch.py
@@ -0,0 +1,1224 @@
+# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+"""HyperArch: Pipermail archiving for Mailman
+
+     - The Dragon De Monsyne <dragondm@integral.org>
+
+   TODO:
+     - Should be able to force all HTML to be regenerated next time the
+       archive is run, in case a template is changed.
+     - Run a command to generate tarball of html archives for downloading
+       (probably in the 'update_dirty_archives' method).
+"""
+
+from __future__ import nested_scopes
+
+import sys
+import re
+import errno
+import urllib
+import time
+import os
+import types
+import HyperDatabase
+import pipermail
+import weakref
+import binascii
+
+from email.Header import decode_header, make_header
+
+from Mailman import mm_cfg
+from Mailman import Utils
+from Mailman import LockFile
+from Mailman import MailList
+from Mailman import i18n
+from Mailman.SafeDict import SafeDict
+from Mailman.Logging.Syslog import syslog
+from Mailman.Mailbox import ArchiverMailbox
+
+# Set up i18n.  Assume the current language has already been set in the caller.
+_ = i18n._
+
+gzip = None
+if mm_cfg.GZIP_ARCHIVE_TXT_FILES:
+    try:
+        import gzip
+    except ImportError:
+        pass
+
+EMPTYSTRING = ''
+NL = '\n'
+
+# MacOSX has a default stack size that is too small for deeply recursive
+# regular expressions.  We see this as crashes in the Python test suite when
+# running test_re.py and test_sre.py.  The fix is to set the stack limit to
+# 2048; the general recommendation is to do in the shell before running the
+# test suite.  But that's inconvenient for a daemon like the qrunner.
+#
+# AFAIK, this problem only affects the archiver, so we're adding this work
+# around to this file (it'll get imported by the bundled pipermail or by the
+# bin/arch script.  We also only do this on darwin, a.k.a. MacOSX.
+if sys.platform == 'darwin':
+    try:
+        import resource
+    except ImportError:
+        pass
+    else:
+        soft, hard = resource.getrlimit(resource.RLIMIT_STACK)
+        newsoft = min(hard, max(soft, 1024*2048))
+        resource.setrlimit(resource.RLIMIT_STACK, (newsoft, hard))
+
+
+
+def html_quote(s, lang=None):
+    repls = ( ('&', '&amp;'),
+              ("<", '&lt;'),
+              (">", '&gt;'),
+              ('"', '&quot;'))
+    for thing, repl in repls:
+        s = s.replace(thing, repl)
+    return Utils.uncanonstr(s, lang)
+
+
+def url_quote(s):
+    return urllib.quote(s)
+
+
+def null_to_space(s):
+    return s.replace('\000', ' ')
+
+
+def sizeof(filename, lang):
+    try:
+        size = os.path.getsize(filename)
+    except OSError, e:
+        # ENOENT can happen if the .mbox file was moved away or deleted, and
+        # an explicit mbox file name was given to bin/arch.
+        if e.errno <> errno.ENOENT: raise
+        return _('size not available')
+    if size < 1000:
+        # Avoid i18n side-effects
+        otrans = i18n.get_translation()
+        try:
+            i18n.set_language(lang)
+            out = _(' %(size)i bytes ')
+        finally:
+            i18n.set_translation(otrans)
+        return out
+    elif size < 1000000:
+        return ' %d KB ' % (size / 1000)
+    # GB?? :-)
+    return ' %d MB ' % (size / 1000000)
+
+
+html_charset = '<META http-equiv="Content-Type" ' \
+               'content="text/html; charset=%s">'
+
+def CGIescape(arg, lang=None):
+    if isinstance(arg, types.UnicodeType):
+        s = Utils.websafe(arg)
+    else:
+        s = Utils.websafe(str(arg))
+    return Utils.uncanonstr(s.replace('"', '&quot;'), lang)
+
+# Parenthesized human name
+paren_name_pat = re.compile(r'([(].*[)])')
+
+# Subject lines preceded with 'Re:'
+REpat = re.compile( r"\s*RE\s*(\[\d+\]\s*)?:\s*", re.IGNORECASE)
+
+# E-mail addresses and URLs in text
+emailpat = re.compile(r'([-+,.\w]+@[-+.\w]+)')
+
+#  Argh!  This pattern is buggy, and will choke on URLs with GET parameters.
+urlpat = re.compile(r'(\w+://[^>)\s]+)') # URLs in text
+
+# Blank lines
+blankpat = re.compile(r'^\s*$')
+
+# Starting <html> directive
+htmlpat = re.compile(r'^\s*<HTML>\s*$', re.IGNORECASE)
+# Ending </html> directive
+nohtmlpat = re.compile(r'^\s*</HTML>\s*$', re.IGNORECASE)
+# Match quoted text
+quotedpat = re.compile(r'^([>|:]|&gt;)+')
+
+
+
+# This doesn't need to be a weakref instance because it's just storing
+# strings.  Keys are (templatefile, lang) tuples.
+_templatecache = {}
+
+def quick_maketext(templatefile, dict=None, lang=None, mlist=None):
+    if lang is None:
+        if mlist is None:
+            lang = mm_cfg.DEFAULT_SERVER_LANGUAGE
+        else:
+            lang = mlist.preferred_language
+    template = _templatecache.get((templatefile, lang))
+    if template is None:
+        # Use the basic maketext, with defaults to get the raw template
+        template = Utils.maketext(templatefile, lang=lang, raw=1)
+        _templatecache[(templatefile, lang)] = template
+    # Copied from Utils.maketext()
+    text = template
+    if dict is not None:
+        try:
+            sdict = SafeDict(dict)
+            try:
+                text = sdict.interpolate(template)
+            except UnicodeError:
+                # Try again after coercing the template to unicode
+                utemplate = unicode(template,
+                                    Utils.GetCharSet(lang),
+                                    'replace')
+                text = sdict.interpolate(utemplate)
+        except (TypeError, ValueError):
+            # The template is really screwed up
+            pass
+    # Make sure the text is in the given character set, or html-ify any bogus
+    # characters.
+    return Utils.uncanonstr(text, lang)
+
+
+
+# Note: I'm overriding most, if not all of the pipermail Article class
+#       here -ddm
+# The Article class encapsulates a single posting.  The attributes are:
+#
+#  sequence : Sequence number, unique for each article in a set of archives
+#  subject  : Subject
+#  datestr  : The posting date, in human-readable format
+#  date     : The posting date, in purely numeric format
+#  fromdate : The posting date, in `unixfrom' format
+#  headers  : Any other headers of interest
+#  author   : The author's name (and possibly organization)
+#  email    : The author's e-mail address
+#  msgid    : A unique message ID
+#  in_reply_to : If !="", this is the msgid of the article being replied to
+#  references: A (possibly empty) list of msgid's of earlier articles in
+#              the thread
+#  body     : A list of strings making up the message body
+
+class Article(pipermail.Article):
+    __super_init = pipermail.Article.__init__
+    __super_set_date = pipermail.Article._set_date
+
+    _last_article_time = time.time()
+
+    def __init__(self, message=None, sequence=0, keepHeaders=[],
+                       lang=mm_cfg.DEFAULT_SERVER_LANGUAGE, mlist=None):
+        self.__super_init(message, sequence, keepHeaders)
+        self.prev = None
+        self.next = None
+        # Trim Re: from the subject line
+        i = 0
+        while i != -1:
+            result = REpat.match(self.subject)
+            if result:
+                i = result.end(0)
+                self.subject = self.subject[i:]
+            else:
+                i = -1
+        # Useful to keep around
+        self._lang = lang
+        self._mlist = mlist
+
+        if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS:
+            # Avoid i18n side-effects.  Note that the language for this
+            # article (for this list) could be different from the site-wide
+            # preferred language, so we need to ensure no side-effects will
+            # occur.  Think what happens when executing bin/arch.
+            otrans = i18n.get_translation()
+            try:
+                i18n.set_language(lang)
+                self.email = re.sub('@', _(' at '), self.email)
+            finally:
+                i18n.set_translation(otrans)
+
+        # Snag the content-* headers.  RFC 1521 states that their values are
+        # case insensitive.
+        ctype = message.get('Content-Type', 'text/plain')
+        cenc = message.get('Content-Transfer-Encoding', '')
+        self.ctype = ctype.lower()
+        self.cenc = cenc.lower()
+        self.decoded = {}
+        charset = message.get_param('charset')
+        if charset:
+            charset = charset.lower().strip()
+            if charset[0]=='"' and charset[-1]=='"':
+                charset = charset[1:-1]
+            if charset[0]=="'" and charset[-1]=="'":
+                charset = charset[1:-1]
+            try:
+                body = message.get_payload(decode=1)
+            except binascii.Error:
+                body = None
+            if body and charset != Utils.GetCharSet(self._lang):
+                # decode body
+                try:
+                    body = unicode(body, charset)
+                except (UnicodeError, LookupError):
+                    body = None
+            if body:
+                self.body = [l + "\n" for l in body.splitlines()]
+
+        self.decode_headers()
+
+    # Mapping of listnames to MailList instances as a weak value dictionary.
+    # This code is copied from Runner.py but there's one important operational
+    # difference.  In Runner.py, we always .Load() the MailList object for
+    # each _dispose() run, otherwise the object retrieved from the cache won't
+    # be up-to-date.  Since we're creating a new HyperArchive instance for
+    # each message being archived, we don't need to worry about that -- but it
+    # does mean there are additional opportunities for optimization.
+    _listcache = weakref.WeakValueDictionary()
+
+    def _open_list(self, listname):
+        # Cache the open list so that any use of the list within this process
+        # uses the same object.  We use a WeakValueDictionary so that when the
+        # list is no longer necessary, its memory is freed.
+        mlist = self._listcache.get(listname)
+        if not mlist:
+            try:
+                mlist = MailList.MailList(listname, lock=0)
+            except Errors.MMListError, e:
+                syslog('error', 'error opening list: %s\n%s', listname, e)
+                return None
+            else:
+                self._listcache[listname] = mlist
+        return mlist
+
+    def __getstate__(self):
+        d = self.__dict__.copy()
+        # We definitely don't want to pickle the MailList instance, so just
+        # pickle a reference to it.
+        if d.has_key('_mlist'):
+            mlist = d['_mlist']
+            del d['_mlist']
+        else:
+            mlist = None
+        if mlist:
+            d['__listname'] = self._mlist.internal_name()
+        else:
+            d['__listname'] = None
+        # Delete a few other things we don't want in the pickle
+        for attr in ('prev', 'next', 'body'):
+            if d.has_key(attr):
+                del d[attr]
+        d['body'] = []
+        return d
+
+    def __setstate__(self, d):
+        # For loading older Articles via pickle.  All this stuff was added
+        # when Simone Piunni and Tokio Kikuchi i18n'ified Pipermail.  See SF
+        # patch #594771.
+        self.__dict__ = d
+        listname = d.get('__listname')
+        if listname:
+            del d['__listname']
+            d['_mlist'] = self._open_list(listname)
+        if not d.has_key('_lang'):
+            if hasattr(self, '_mlist'):
+                self._lang = self._mlist.preferred_language
+            else:
+                self._lang = mm_cfg.DEFAULT_SERVER_LANGUAGE
+        if not d.has_key('cenc'):
+            self.cenc = None
+        if not d.has_key('decoded'):
+            self.decoded = {}
+
+    def setListIfUnset(self, mlist):
+        if getattr(self, '_mlist', None) is None:
+            self._mlist = mlist
+
+    def quote(self, buf):
+        return html_quote(buf, self._lang)
+
+    def decode_headers(self):
+        """MIME-decode headers.
+
+        If the email, subject, or author attributes contain non-ASCII
+        characters using the encoded-word syntax of RFC 2047, decoded versions
+        of those attributes are placed in the self.decoded (a dictionary).
+
+        If the list's charset differs from the header charset, an attempt is
+        made to decode the headers as Unicode.  If that fails, they are left
+        undecoded.
+        """
+        author = self.decode_charset(self.author)
+        subject = self.decode_charset(self.subject)
+        if author:
+            self.decoded['author'] = author
+            email = self.decode_charset(self.email)
+            if email:
+                self.decoded['email'] = email
+        if subject:
+            self.decoded['subject'] = subject
+
+    def decode_charset(self, field):
+        if field.find("=?") == -1:
+            return None
+        # Get the decoded header as a list of (s, charset) tuples
+        pairs = decode_header(field)
+        # Use __unicode__() until we can guarantee Python 2.2
+        try:
+            # Use a large number for maxlinelen so it won't get wrapped
+            h = make_header(pairs, 99999)
+            return h.__unicode__()
+        except (UnicodeError, LookupError):
+            # Unknown encoding
+            return None
+        # The last value for c will have the proper charset in it
+        return EMPTYSTRING.join([s for s, c in pairs])
+
+    def as_html(self):
+        d = self.__dict__.copy()
+        # avoid i18n side-effects
+        otrans = i18n.get_translation()
+        i18n.set_language(self._lang)
+        try:
+            d["prev"], d["prev_wsubj"] = self._get_prev()
+            d["next"], d["next_wsubj"] = self._get_next()
+
+            d["email_html"] = self.quote(self.email)
+            d["title"] = self.quote(self.subject)
+            d["subject_html"] = self.quote(self.subject)
+            d["subject_url"] = url_quote(self.subject)
+            d["in_reply_to_url"] = url_quote(self.in_reply_to)
+            if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS:
+                # Point the mailto url back to the list
+                author = re.sub('@', _(' at '), self.author)
+                emailurl = self._mlist.GetListEmail()
+            else:
+                author = self.author
+                emailurl = self.email
+            d["author_html"] = self.quote(author)
+            d["email_url"] = url_quote(emailurl)
+            d["datestr_html"] = self.quote(i18n.ctime(int(self.date)))
+            d["body"] = self._get_body()
+            d['listurl'] = self._mlist.GetScriptURL('listinfo', absolute=1)
+            d['listname'] = self._mlist.real_name
+            d['encoding'] = ''
+        finally:
+            i18n.set_translation(otrans)
+
+        charset = Utils.GetCharSet(self._lang)
+        d["encoding"] = html_charset % charset
+
+        self._add_decoded(d)
+        return quick_maketext(
+             'article.html', d,
+             lang=self._lang, mlist=self._mlist)
+
+    def _get_prev(self):
+        """Return the href and subject for the previous message"""
+        if self.prev:
+            subject = self._get_subject_enc(self.prev)
+            prev = ('<LINK REL="Previous"  HREF="%s">'
+                    % (url_quote(self.prev.filename)))
+            prev_wsubj = ('<LI>' + _('Previous message:') +
+                          ' <A HREF="%s">%s\n</A></li>'
+                          % (url_quote(self.prev.filename),
+                             self.quote(subject)))
+        else:
+            prev = prev_wsubj = ""
+        return prev, prev_wsubj
+
+    def _get_subject_enc(self, art):
+        """Return the subject of art, decoded if possible.
+
+        If the charset of the current message and art match and the
+        article's subject is encoded, decode it.
+        """
+        return art.decoded.get('subject', art.subject)
+
+    def _get_next(self):
+        """Return the href and subject for the previous message"""
+        if self.next:
+            subject = self._get_subject_enc(self.next)
+            next = ('<LINK REL="Next"  HREF="%s">'
+                    % (url_quote(self.next.filename)))
+            next_wsubj = ('<LI>' + _('Next message:') +
+                          ' <A HREF="%s">%s\n</A></li>'
+                          % (url_quote(self.next.filename),
+                             self.quote(subject)))
+        else:
+            next = next_wsubj = ""
+        return next, next_wsubj
+
+    _rx_quote = re.compile('=([A-F0-9][A-F0-9])')
+    _rx_softline = re.compile('=[ \t]*$')
+
+    def _get_body(self):
+        """Return the message body ready for HTML, decoded if necessary"""
+        try:
+            body = self.html_body
+        except AttributeError:
+            body = self.body
+        return null_to_space(EMPTYSTRING.join(body))
+
+    def _add_decoded(self, d):
+        """Add encoded-word keys to HTML output"""
+        for src, dst in (('author', 'author_html'),
+                         ('email', 'email_html'),
+                         ('subject', 'subject_html'),
+                         ('subject', 'title')):
+            if self.decoded.has_key(src):
+                d[dst] = self.quote(self.decoded[src])
+
+    def as_text(self):
+        d = self.__dict__.copy()
+        # We need to guarantee a valid From_ line, even if there are
+        # bososities in the headers.
+        if not d.get('fromdate', '').strip():
+            d['fromdate'] = time.ctime(time.time())
+        if not d.get('email', '').strip():
+            d['email'] = 'bogus@does.not.exist.com'
+        if not d.get('datestr', '').strip():
+            d['datestr'] = time.ctime(time.time())
+        #
+        headers = ['From %(email)s  %(fromdate)s',
+                 'From: %(email)s (%(author)s)',
+                 'Date: %(datestr)s',
+                 'Subject: %(subject)s']
+        if d['_in_reply_to']:
+            headers.append('In-Reply-To: %(_in_reply_to)s')
+        if d['_references']:
+            headers.append('References: %(_references)s')
+        if d['_message_id']:
+            headers.append('Message-ID: %(_message_id)s')
+        body = EMPTYSTRING.join(self.body)
+        if isinstance(body, types.UnicodeType):
+            body = body.encode(Utils.GetCharSet(self._lang), 'replace')
+        return NL.join(headers) % d + '\n\n' + body
+
+    def _set_date(self, message):
+        self.__super_set_date(message)
+        self.fromdate = time.ctime(int(self.date))
+
+    def loadbody_fromHTML(self,fileobj):
+        self.body = []
+        begin = 0
+        while 1:
+            line = fileobj.readline()
+            if not line:
+                break
+            if not begin:
+                if line.strip() == '<!--beginarticle-->':
+                    begin = 1
+                continue
+            if line.strip() == '<!--endarticle-->':
+                break
+            self.body.append(line)
+
+
+
+class HyperArchive(pipermail.T):
+    __super_init = pipermail.T.__init__
+    __super_update_archive = pipermail.T.update_archive
+    __super_update_dirty_archives = pipermail.T.update_dirty_archives
+    __super_add_article = pipermail.T.add_article
+
+    # some defaults
+    DIRMODE = 02775
+    FILEMODE = 0660
+
+    VERBOSE = 0
+    DEFAULTINDEX = 'thread'
+    ARCHIVE_PERIOD = 'month'
+
+    THREADLAZY = 0
+    THREADLEVELS = 3
+
+    ALLOWHTML = 1             # "Lines between <html></html>" handled as is.
+    SHOWHTML = 0              # Eg, nuke leading whitespace in html manner.
+    IQUOTES = 1               # Italicize quoted text.
+    SHOWBR = 0                # Add <br> onto every line
+
+    def __init__(self, maillist):
+        # can't init the database while other processes are writing to it!
+        # XXX TODO- implement native locking
+        # with mailman's LockFile module for HyperDatabase.HyperDatabase
+        #
+        dir = maillist.archive_dir()
+        db = HyperDatabase.HyperDatabase(dir, maillist)
+        self.__super_init(dir, reload=1, database=db)
+
+        self.maillist = maillist
+        self._lock_file = None
+        self.lang = maillist.preferred_language
+        self.charset = Utils.GetCharSet(maillist.preferred_language)
+
+        if hasattr(self.maillist,'archive_volume_frequency'):
+            if self.maillist.archive_volume_frequency == 0:
+                self.ARCHIVE_PERIOD='year'
+            elif self.maillist.archive_volume_frequency == 2:
+                self.ARCHIVE_PERIOD='quarter'
+            elif self.maillist.archive_volume_frequency == 3:
+                self.ARCHIVE_PERIOD='week'
+            elif self.maillist.archive_volume_frequency == 4:
+                self.ARCHIVE_PERIOD='day'
+            else:
+                self.ARCHIVE_PERIOD='month'
+
+        yre = r'(?P<year>[0-9]{4,4})'
+        mre = r'(?P<month>[01][0-9])'
+        dre = r'(?P<day>[0123][0-9])'
+        self._volre = {
+            'year':    '^' + yre + '$',
+            'quarter': '^' + yre + r'q(?P<quarter>[1234])$',
+            'month':   '^' + yre + r'-(?P<month>[a-zA-Z]+)$',
+            'week':    r'^Week-of-Mon-' + yre + mre + dre,
+            'day':     '^' + yre + mre + dre + '$'
+            }
+
+    def _makeArticle(self, msg, sequence):
+        return Article(msg, sequence,
+                       lang=self.maillist.preferred_language,
+                       mlist=self.maillist)
+
+    def html_foot(self):
+        # avoid i18n side-effects
+        mlist = self.maillist
+        otrans = i18n.get_translation()
+        i18n.set_language(mlist.preferred_language)
+        # Convenience
+        def quotetime(s):
+            return html_quote(i18n.ctime(s), self.lang)
+        try:
+            d = {"lastdate": quotetime(self.lastdate),
+                 "archivedate": quotetime(self.archivedate),
+                 "listinfo": mlist.GetScriptURL('listinfo', absolute=1),
+                 "version": self.version,
+                 }
+            i = {"thread": _("thread"),
+                 "subject": _("subject"),
+                 "author": _("author"),
+                 "date": _("date")
+                 }
+        finally:
+            i18n.set_translation(otrans)
+
+        for t in i.keys():
+            cap = t[0].upper() + t[1:]
+            if self.type == cap:
+                d["%s_ref" % (t)] = ""
+            else:
+                d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>'
+                                     % (t, i[t]))
+        return quick_maketext(
+            'archidxfoot.html', d,
+            mlist=mlist)
+
+    def html_head(self):
+        # avoid i18n side-effects
+        mlist = self.maillist
+        otrans = i18n.get_translation()
+        i18n.set_language(mlist.preferred_language)
+        # Convenience
+        def quotetime(s):
+            return html_quote(i18n.ctime(s), self.lang)
+        try:
+            d = {"listname": html_quote(mlist.real_name, self.lang),
+                 "archtype": self.type,
+                 "archive":  self.volNameToDesc(self.archive),
+                 "listinfo": mlist.GetScriptURL('listinfo', absolute=1),
+                 "firstdate": quotetime(self.firstdate),
+                 "lastdate": quotetime(self.lastdate),
+                 "size": self.size,
+                 }
+            i = {"thread": _("thread"),
+                 "subject": _("subject"),
+                 "author": _("author"),
+                 "date": _("date"),
+                 }
+        finally:
+            i18n.set_translation(otrans)
+
+        for t in i.keys():
+            cap = t[0].upper() + t[1:]
+            if self.type == cap:
+                d["%s_ref" % (t)] = ""
+                d["archtype"] = i[t]
+            else:
+                d["%s_ref" % (t)] = ('<a href="%s.html#start">[ %s ]</a>'
+                                     % (t, i[t]))
+        if self.charset:
+            d["encoding"] = html_charset % self.charset
+        else:
+            d["encoding"] = ""
+        return quick_maketext(
+            'archidxhead.html', d,
+            mlist=mlist)
+
+    def html_TOC(self):
+        mlist = self.maillist
+        listname = mlist.internal_name()
+        mbox = os.path.join(mlist.archive_dir()+'.mbox', listname+'.mbox')
+        d = {"listname": mlist.real_name,
+             "listinfo": mlist.GetScriptURL('listinfo', absolute=1),
+             "fullarch": '../%s.mbox/%s.mbox' % (listname, listname),
+             "size": sizeof(mbox, mlist.preferred_language),
+             'meta': '',
+             }
+        # Avoid i18n side-effects
+        otrans = i18n.get_translation()
+        i18n.set_language(mlist.preferred_language)
+        try:
+            if not self.archives:
+                d["noarchive_msg"] = _(
+                    '<P>Currently, there are no archives. </P>')
+                d["archive_listing_start"] = ""
+                d["archive_listing_end"] = ""
+                d["archive_listing"] = ""
+            else:
+                d["noarchive_msg"] = ""
+                d["archive_listing_start"] = quick_maketext(
+                    'archliststart.html',
+                    lang=mlist.preferred_language,
+                    mlist=mlist)
+                d["archive_listing_end"] = quick_maketext(
+                    'archlistend.html',
+                    mlist=mlist)
+
+                accum = []
+                for a in self.archives:
+                    accum.append(self.html_TOC_entry(a))
+                d["archive_listing"] = EMPTYSTRING.join(accum)
+        finally:
+            i18n.set_translation(otrans)
+
+        # The TOC is always in the charset of the list's preferred language
+        d['meta'] += html_charset % Utils.GetCharSet(mlist.preferred_language)
+
+        return quick_maketext(
+            'archtoc.html', d,
+            mlist=mlist)
+
+    def html_TOC_entry(self, arch):
+        # Check to see if the archive is gzip'd or not
+        txtfile = os.path.join(self.maillist.archive_dir(), arch + '.txt')
+        gzfile = txtfile + '.gz'
+        # which exists?  .txt.gz first, then .txt
+        if os.path.exists(gzfile):
+            file = gzfile
+            url = arch + '.txt.gz'
+            templ = '<td><A href="%(url)s">[ ' + _('Gzip\'d Text%(sz)s') \
+                    + ']</a></td>'
+        elif os.path.exists(txtfile):
+            file = txtfile
+            url = arch + '.txt'
+            templ = '<td><A href="%(url)s">[ ' + _('Text%(sz)s') + ']</a></td>'
+        else:
+            # neither found?
+            file = None
+        # in Python 1.5.2 we have an easy way to get the size
+        if file:
+            textlink = templ % {
+                'url': url,
+                'sz' : sizeof(file, self.maillist.preferred_language)
+                }
+        else:
+            # there's no archive file at all... hmmm.
+            textlink = ''
+        return quick_maketext(
+            'archtocentry.html',
+            {'archive': arch,
+             'archivelabel': self.volNameToDesc(arch),
+             'textlink': textlink
+             },
+            mlist=self.maillist)
+
+    def GetArchLock(self):
+        if self._lock_file:
+            return 1
+        self._lock_file = LockFile.LockFile(
+            os.path.join(mm_cfg.LOCK_DIR,
+                         self.maillist.internal_name() + '-arch.lock'))
+        try:
+            self._lock_file.lock(timeout=0.5)
+        except LockFile.TimeOutError:
+            return 0
+        return 1
+
+    def DropArchLock(self):
+        if self._lock_file:
+            self._lock_file.unlock(unconditionally=1)
+            self._lock_file = None
+
+    def processListArch(self):
+        name = self.maillist.ArchiveFileName()
+        wname= name+'.working'
+        ename= name+'.err_unarchived'
+        try:
+            os.stat(name)
+        except (IOError,os.error):
+            #no archive file, nothin to do -ddm
+            return
+
+        #see if arch is locked here -ddm
+        if not self.GetArchLock():
+            #another archiver is running, nothing to do. -ddm
+            return
+
+        #if the working file is still here, the archiver may have
+        # crashed during archiving. Save it, log an error, and move on.
+        try:
+            wf = open(wname)
+            syslog('error',
+                   'Archive working file %s present.  '
+                   'Check %s for possibly unarchived msgs',
+                   wname, ename)
+            omask = os.umask(007)
+            try:
+                ef = open(ename, 'a+')
+            finally:
+                os.umask(omask)
+            ef.seek(1,2)
+            if ef.read(1) <> '\n':
+                ef.write('\n')
+            ef.write(wf.read())
+            ef.close()
+            wf.close()
+            os.unlink(wname)
+        except IOError:
+            pass
+        os.rename(name,wname)
+        archfile = open(wname)
+        self.processUnixMailbox(archfile)
+        archfile.close()
+        os.unlink(wname)
+        self.DropArchLock()
+
+    def get_filename(self, article):
+        return '%06i.html' % (article.sequence,)
+
+    def get_archives(self, article):
+        """Return a list of indexes where the article should be filed.
+        A string can be returned if the list only contains one entry,
+        and the empty list is legal."""
+        res = self.dateToVolName(float(article.date))
+        self.message(_("figuring article archives\n"))
+        self.message(res + "\n")
+        return res
+
+    def volNameToDesc(self, volname):
+        volname = volname.strip()
+        # Don't make these module global constants since we have to runtime
+        # translate them anyway.
+        monthdict = [
+            '',
+            _('January'),   _('February'), _('March'),    _('April'),
+            _('May'),       _('June'),     _('July'),     _('August'),
+            _('September'), _('October'),  _('November'), _('December')
+            ]
+        for each in self._volre.keys():
+            match = re.match(self._volre[each], volname)
+            # Let ValueErrors percolate up
+            if match:
+                year = int(match.group('year'))
+                if each == 'quarter':
+                    d =["", _("First"), _("Second"), _("Third"), _("Fourth") ]
+                    ord = d[int(match.group('quarter'))]
+                    return _("%(ord)s quarter %(year)i")
+                elif each == 'month':
+                    monthstr = match.group('month').lower()
+                    for i in range(1, 13):
+                        monthname = time.strftime("%B", (1999,i,1,0,0,0,0,1,0))
+                        if monthstr.lower() == monthname.lower():
+                            month = monthdict[i]
+                            return _("%(month)s %(year)i")
+                    raise ValueError, "%s is not a month!" % monthstr
+                elif each == 'week':
+                    month = monthdict[int(match.group("month"))]
+                    day = int(match.group("day"))
+                    return _("The Week Of Monday %(day)i %(month)s %(year)i")
+                elif each == 'day':
+                    month = monthdict[int(match.group("month"))]
+                    day = int(match.group("day"))
+                    return _("%(day)i %(month)s %(year)i")
+                else:
+                    return match.group('year')
+        raise ValueError, "%s is not a valid volname" % volname
+
+# The following two methods should be inverses of each other. -ddm
+
+    def dateToVolName(self,date):
+        datetuple=time.localtime(date)
+        if self.ARCHIVE_PERIOD=='year':
+            return time.strftime("%Y",datetuple)
+        elif self.ARCHIVE_PERIOD=='quarter':
+            if datetuple[1] in [1,2,3]:
+                return time.strftime("%Yq1",datetuple)
+            elif datetuple[1] in [4,5,6]:
+                return time.strftime("%Yq2",datetuple)
+            elif datetuple[1] in [7,8,9]:
+                return time.strftime("%Yq3",datetuple)
+            else:
+                return time.strftime("%Yq4",datetuple)
+        elif self.ARCHIVE_PERIOD == 'day':
+            return time.strftime("%Y%m%d", datetuple)
+        elif self.ARCHIVE_PERIOD == 'week':
+            # Reconstruct "seconds since epoch", and subtract weekday
+            # multiplied by the number of seconds in a day.
+            monday = time.mktime(datetuple) - datetuple[6] * 24 * 60 * 60
+            # Build a new datetuple from this "seconds since epoch" value
+            datetuple = time.localtime(monday)
+            return time.strftime("Week-of-Mon-%Y%m%d", datetuple)
+        # month. -ddm
+        else:
+            return time.strftime("%Y-%B",datetuple)
+
+
+    def volNameToDate(self,volname):
+        volname = volname.strip()
+        for each in self._volre.keys():
+            match=re.match(self._volre[each],volname)
+            if match:
+                year=int(match.group('year'))
+                month=1
+                day = 1
+                if each == 'quarter':
+                    q=int(match.group('quarter'))
+                    month=(q*3)-2
+                elif each == 'month':
+                    monthstr=match.group('month').lower()
+                    m=[]
+                    for i in range(1,13):
+                        m.append(
+                            time.strftime("%B",(1999,i,1,0,0,0,0,1,0)).lower())
+                    try:
+                        month=m.index(monthstr)+1
+                    except ValueError:
+                        pass
+                elif each == 'week' or each == 'day':
+                    month = int(match.group("month"))
+                    day = int(match.group("day"))
+                return time.mktime((year,month,1,0,0,0,0,1,-1))
+        return 0.0
+
+    def sortarchives(self):
+        def sf(a,b,s=self):
+            al=s.volNameToDate(a)
+            bl=s.volNameToDate(b)
+            if al>bl:
+                return 1
+            elif al<bl:
+                return -1
+            else:
+                return 0
+        if self.ARCHIVE_PERIOD in ('month','year','quarter'):
+            self.archives.sort(sf)
+        else:
+            self.archives.sort()
+        self.archives.reverse()
+
+    def message(self, msg):
+        if self.VERBOSE:
+            f = sys.stderr
+            f.write(msg)
+            if msg[-1:] != '\n':
+                f.write('\n')
+            f.flush()
+
+    def open_new_archive(self, archive, archivedir):
+        index_html = os.path.join(archivedir, 'index.html')
+        try:
+            os.unlink(index_html)
+        except:
+            pass
+        os.symlink(self.DEFAULTINDEX+'.html',index_html)
+
+    def write_index_header(self):
+        self.depth=0
+        print self.html_head()
+        if not self.THREADLAZY and self.type=='Thread':
+            self.message(_("Computing threaded index\n"))
+            self.updateThreadedIndex()
+
+    def write_index_footer(self):
+        for i in range(self.depth):
+            print '</UL>'
+        print self.html_foot()
+
+    def write_index_entry(self, article):
+        subject = self.get_header("subject", article)
+        author = self.get_header("author", article)
+        if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS:
+            author = re.sub('@', _(' at '), author)
+        subject = CGIescape(subject, self.lang)
+        author = CGIescape(author, self.lang)
+
+        d = {
+            'filename': urllib.quote(article.filename),
+            'subject':  subject,
+            'sequence': article.sequence,
+            'author':   author
+        }
+        print quick_maketext(
+            'archidxentry.html', d,
+            mlist=self.maillist)
+
+    def get_header(self, field, article):
+        # if we have no decoded header, return the encoded one
+        result = article.decoded.get(field)
+        if result is None:
+            return getattr(article, field)
+        # otherwise, the decoded one will be Unicode
+        return result
+
+    def write_threadindex_entry(self, article, depth):
+        if depth < 0:
+            self.message('depth<0')
+            depth = 0
+        if depth > self.THREADLEVELS:
+            depth = self.THREADLEVELS
+        if depth < self.depth:
+            for i in range(self.depth-depth):
+                print '</UL>'
+        elif depth > self.depth:
+            for i in range(depth-self.depth):
+                print '<UL>'
+        print '<!--%i %s -->' % (depth, article.threadKey)
+        self.depth = depth
+        self.write_index_entry(article)
+
+    def write_TOC(self):
+        self.sortarchives()
+        omask = os.umask(002)
+        try:
+            toc = open(os.path.join(self.basedir, 'index.html'), 'w')
+        finally:
+            os.umask(omask)
+        toc.write(self.html_TOC())
+        toc.close()
+
+    def write_article(self, index, article, path):
+        # called by add_article
+        omask = os.umask(002)
+        try:
+            f = open(path, 'w')
+        finally:
+            os.umask(omask)
+        f.write(article.as_html())
+        f.close()
+
+        # Write the text article to the text archive.
+        path = os.path.join(self.basedir, "%s.txt" % index)
+        omask = os.umask(002)
+        try:
+            f = open(path, 'a+')
+        finally:
+            os.umask(omask)
+        f.write(article.as_text())
+        f.close()
+
+    def update_archive(self, archive):
+        self.__super_update_archive(archive)
+        # only do this if the gzip module was imported globally, and
+        # gzip'ing was enabled via mm_cfg.GZIP_ARCHIVE_TXT_FILES.  See
+        # above.
+        if gzip:
+            archz = None
+            archt = None
+            txtfile = os.path.join(self.basedir, '%s.txt' % archive)
+            gzipfile = os.path.join(self.basedir, '%s.txt.gz' % archive)
+            oldgzip = os.path.join(self.basedir, '%s.old.txt.gz' % archive)
+            try:
+                # open the plain text file
+                archt = open(txtfile)
+            except IOError:
+                return
+            try:
+                os.rename(gzipfile, oldgzip)
+                archz = gzip.open(oldgzip)
+            except (IOError, RuntimeError, os.error):
+                pass
+            try:
+                ou = os.umask(002)
+                newz = gzip.open(gzipfile, 'w')
+            finally:
+                # XXX why is this a finally?
+                os.umask(ou)
+            if archz:
+                newz.write(archz.read())
+                archz.close()
+                os.unlink(oldgzip)
+            # XXX do we really need all this in a try/except?
+            try:
+                newz.write(archt.read())
+                newz.close()
+                archt.close()
+            except IOError:
+                pass
+            os.unlink(txtfile)
+
+    _skip_attrs = ('maillist', '_lock_file', 'charset')
+
+    def getstate(self):
+        d={}
+        for each in self.__dict__.keys():
+            if not (each in self._skip_attrs
+                    or each.upper() == each):
+                d[each] = self.__dict__[each]
+        return d
+
+    # Add <A HREF="..."> tags around URLs and e-mail addresses.
+
+    def __processbody_URLquote(self, lines):
+        # XXX a lot to do here:
+        # 1. use lines directly, rather than source and dest
+        # 2. make it clearer
+        # 3. make it faster
+        source = lines[:]
+        dest = lines
+        last_line_was_quoted = 0
+        for i in xrange(0, len(source)):
+            Lorig = L = source[i]
+            prefix = suffix = ""
+            if L is None:
+                continue
+            # Italicise quoted text
+            if self.IQUOTES:
+                quoted = quotedpat.match(L)
+                if quoted is None:
+                    last_line_was_quoted = 0
+                else:
+                    quoted = quoted.end(0)
+                    prefix = CGIescape(L[:quoted], self.lang) + '<i>'
+                    suffix = '</I>'
+                    if self.SHOWHTML:
+                        suffix += '<BR>'
+                        if not last_line_was_quoted:
+                            prefix = '<BR>' + prefix
+                    L = L[quoted:]
+                    last_line_was_quoted = 1
+            # Check for an e-mail address
+            L2 = ""
+            jr = emailpat.search(L)
+            kr = urlpat.search(L)
+            while jr is not None or kr is not None:
+                if jr == None:
+                    j = -1
+                else:
+                    j = jr.start(0)
+                if kr is None:
+                    k = -1
+                else:
+                    k = kr.start(0)
+                if j != -1 and (j < k or k == -1):
+                    text = jr.group(1)
+                    length = len(text)
+                    if mm_cfg.ARCHIVER_OBSCURES_EMAILADDRS:
+                        text = re.sub('@', _(' at '), text)
+                        URL = self.maillist.GetScriptURL(
+                            'listinfo', absolute=1)
+                    else:
+                        URL = 'mailto:' + text
+                    pos = j
+                elif k != -1 and (j > k or j == -1):
+                    text = URL = kr.group(1)
+                    length = len(text)
+                    pos = k
+                else: # j==k
+                    raise ValueError, "j==k: This can't happen!"
+                #length = len(text)
+                #self.message("URL: %s %s %s \n"
+                #             % (CGIescape(L[:pos]), URL, CGIescape(text)))
+                L2 += '%s<A HREF="%s">%s</A>' % (
+                    CGIescape(L[:pos], self.lang),
+                    html_quote(URL), CGIescape(text, self.lang))
+                L = L[pos+length:]
+                jr = emailpat.search(L)
+                kr = urlpat.search(L)
+            if jr is None and kr is None:
+                L = CGIescape(L, self.lang)
+            L = prefix + L2 + L + suffix
+            source[i] = None
+            dest[i] = L
+
+    # Perform Hypermail-style processing of <HTML></HTML> directives
+    # in message bodies.  Lines between <HTML> and </HTML> will be written
+    # out precisely as they are; other lines will be passed to func2
+    # for further processing .
+
+    def __processbody_HTML(self, lines):
+        # XXX need to make this method modify in place
+        source = lines[:]
+        dest = lines
+        l = len(source)
+        i = 0
+        while i < l:
+            while i < l and htmlpat.match(source[i]) is None:
+                i = i + 1
+            if i < l:
+                source[i] = None
+                i = i + 1
+            while i < l and nohtmlpat.match(source[i]) is None:
+                dest[i], source[i] = source[i], None
+                i = i + 1
+            if i < l:
+                source[i] = None
+                i = i + 1
+
+    def format_article(self, article):
+        # called from add_article
+        # TBD: Why do the HTML formatting here and keep it in the
+        # pipermail database?  It makes more sense to do the html
+        # formatting as the article is being written as html and toss
+        # the data after it has been written to the archive file.
+        lines = filter(None, article.body)
+        # Handle <HTML> </HTML> directives
+        if self.ALLOWHTML:
+            self.__processbody_HTML(lines)
+        self.__processbody_URLquote(lines)
+        if not self.SHOWHTML and lines:
+            lines.insert(0, '<PRE>')
+            lines.append('</PRE>')
+        else:
+            # Do fancy formatting here
+            if self.SHOWBR:
+                lines = map(lambda x:x + "<BR>", lines)
+            else:
+                for i in range(0, len(lines)):
+                    s = lines[i]
+                    if s[0:1] in ' \t\n':
+                        lines[i] = '<P>' + s
+        article.html_body = lines
+        return article
+
+    def update_article(self, arcdir, article, prev, next):
+        seq = article.sequence
+        filename = os.path.join(arcdir, article.filename)
+        self.message(_('Updating HTML for article %(seq)s'))
+        try:
+            f = open(filename)
+            article.loadbody_fromHTML(f)
+            f.close()
+        except IOError, e:
+            if e.errno <> errno.ENOENT: raise
+            self.message(_('article file %(filename)s is missing!'))
+        article.prev = prev
+        article.next = next
+        omask = os.umask(002)
+        try:
+            f = open(filename, 'w')
+        finally:
+            os.umask(omask)
+        f.write(article.as_html())
+        f.close()
diff --git a/Mailman/Archiver/HyperDatabase.py b/Mailman/Archiver/HyperDatabase.py
new file mode 100644
index 00000000..ab41b824
--- /dev/null
+++ b/Mailman/Archiver/HyperDatabase.py
@@ -0,0 +1,338 @@
+# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#
+# site modules
+#
+import os
+import marshal
+import time
+import errno
+
+#
+# package/project modules
+#
+import pipermail
+from Mailman import LockFile
+
+CACHESIZE = pipermail.CACHESIZE
+
+try:
+    import cPickle
+    pickle = cPickle
+except ImportError:
+    import pickle
+
+#
+# we're using a python dict in place of
+# of bsddb.btree database.  only defining
+# the parts of the interface used by class HyperDatabase
+# only one thing can access this at a time.
+#
+class DumbBTree:
+    """Stores pickles of Article objects
+
+    This dictionary-like object stores pickles of all the Article
+    objects.  The object itself is stored using marshal.  It would be
+    much simpler, and probably faster, to store the actual objects in
+    the DumbBTree and pickle it.
+
+    TBD: Also needs a more sensible name, like IteratableDictionary or
+    SortedDictionary.
+    """
+
+    def __init__(self, path):
+        self.current_index = 0
+        self.path = path
+        self.lockfile = LockFile.LockFile(self.path + ".lock")
+        self.lock()
+        self.__dirty = 0
+        self.dict = {}
+        self.sorted = []
+        self.load()
+
+    def __repr__(self):
+        return "DumbBTree(%s)" % self.path
+
+    def __sort(self, dirty=None):
+        if self.__dirty == 1 or dirty:
+            self.sorted = self.dict.keys()
+            self.sorted.sort()
+            self.__dirty = 0
+
+    def lock(self):
+        self.lockfile.lock()
+
+    def unlock(self):
+        try:
+            self.lockfile.unlock()
+        except LockFile.NotLockedError:
+            pass
+
+    def __delitem__(self, item):
+        # if first hasn't been called, we can skip the sort
+        if self.current_index == 0:
+            del self.dict[item]
+            self.__dirty = 1
+            return
+        try:
+            ci = self.sorted[self.current_index]
+        except IndexError:
+            ci = None
+        if ci == item:
+            try:
+                ci = self.sorted[self.current_index + 1]
+            except IndexError:
+                ci = None
+        del self.dict[item]
+        self.__sort(dirty=1)
+        if ci is not None:
+            self.current_index = self.sorted.index(ci)
+        else:
+            self.current_index = self.current_index + 1
+
+    def clear(self):
+        # bulk clearing much faster than deleting each item, esp. with the
+        # implementation of __delitem__() above :(
+        self.dict = {}
+
+    def first(self):
+        self.__sort() # guarantee that the list is sorted
+        if not self.sorted:
+            raise KeyError
+        else:
+            key = self.sorted[0]
+            self.current_index = 1
+            return key, self.dict[key]
+
+    def last(self):
+        if not self.sorted:
+            raise KeyError
+        else:
+            key = self.sorted[-1]
+            self.current_index = len(self.sorted) - 1
+            return key, self.dict[key]
+
+    def next(self):
+        try:
+            key = self.sorted[self.current_index]
+        except IndexError:
+            raise KeyError
+        self.current_index = self.current_index + 1
+        return key, self.dict[key]
+
+    def has_key(self, key):
+        return self.dict.has_key(key)
+
+    def set_location(self, loc):
+        if not self.dict.has_key(loc):
+            raise KeyError
+        self.current_index = self.sorted.index(loc)
+
+    def __getitem__(self, item):
+        return self.dict[item]
+
+    def __setitem__(self, item, val):
+        # if first hasn't been called, then we don't need to worry
+        # about sorting again
+        if self.current_index == 0:
+            self.dict[item] = val
+            self.__dirty = 1
+            return
+        try:
+            current_item = self.sorted[self.current_index]
+        except IndexError:
+            current_item = item
+        self.dict[item] = val
+        self.__sort(dirty=1)
+        self.current_index = self.sorted.index(current_item)
+
+    def __len__(self):
+        return len(self.sorted)
+
+    def load(self):
+        try:
+            fp = open(self.path)
+            try:
+                self.dict = marshal.load(fp)
+            finally:
+                fp.close()
+        except IOError, e:
+            if e.errno <> errno.ENOENT: raise
+            pass
+        except EOFError:
+            pass
+        else:
+            self.__sort(dirty=1)
+
+    def close(self):
+        omask = os.umask(007)
+        try:
+            fp = open(self.path, 'w')
+        finally:
+            os.umask(omask)
+        fp.write(marshal.dumps(self.dict))
+        fp.close()
+        self.unlock()
+
+
+# this is lifted straight out of pipermail with
+# the bsddb.btree replaced with above class.
+# didn't use inheritance because of all the
+# __internal stuff that needs to be here -scott
+#
+class HyperDatabase(pipermail.Database):
+    __super_addArticle = pipermail.Database.addArticle
+
+    def __init__(self, basedir, mlist):
+        self.__cache = {}
+        self.__currentOpenArchive = None   # The currently open indices
+        self._mlist = mlist
+        self.basedir = os.path.expanduser(basedir)
+        # Recently added articles, indexed only by message ID
+        self.changed={}
+
+    def firstdate(self, archive):
+        self.__openIndices(archive)
+        date = 'None'
+        try:
+            datekey, msgid = self.dateIndex.first()
+            date = time.asctime(time.localtime(float(datekey[0])))
+        except KeyError:
+            pass
+        return date
+
+    def lastdate(self, archive):
+        self.__openIndices(archive)
+        date = 'None'
+        try:
+            datekey, msgid = self.dateIndex.last()
+            date = time.asctime(time.localtime(float(datekey[0])))
+        except KeyError:
+            pass
+        return date
+
+    def numArticles(self, archive):
+        self.__openIndices(archive)
+        return len(self.dateIndex)
+
+    def addArticle(self, archive, article, subject=None, author=None,
+                   date=None):
+        self.__openIndices(archive)
+        self.__super_addArticle(archive, article, subject, author, date)
+
+    def __openIndices(self, archive):
+        if self.__currentOpenArchive == archive:
+            return
+        self.__closeIndices()
+        arcdir = os.path.join(self.basedir, 'database')
+        omask = os.umask(0)
+        try:
+            try:
+                os.mkdir(arcdir, 02770)
+            except OSError, e:
+                if e.errno <> errno.EEXIST: raise
+        finally:
+            os.umask(omask)
+        for i in ('date', 'author', 'subject', 'article', 'thread'):
+            t = DumbBTree(os.path.join(arcdir, archive + '-' + i))
+            setattr(self, i + 'Index', t)
+        self.__currentOpenArchive = archive
+
+    def __closeIndices(self):
+        for i in ('date', 'author', 'subject', 'thread', 'article'):
+            attr = i + 'Index'
+            if hasattr(self, attr):
+                index = getattr(self, attr)
+                if i == 'article':
+                    if not hasattr(self, 'archive_length'):
+                        self.archive_length = {}
+                    l = len(index)
+                    self.archive_length[self.__currentOpenArchive] = l
+                index.close()
+                delattr(self, attr)
+        self.__currentOpenArchive = None
+
+    def close(self):
+        self.__closeIndices()
+
+    def hasArticle(self, archive, msgid):
+        self.__openIndices(archive)
+        return self.articleIndex.has_key(msgid)
+
+    def setThreadKey(self, archive, key, msgid):
+        self.__openIndices(archive)
+        self.threadIndex[key]=msgid
+
+    def getArticle(self, archive, msgid):
+        self.__openIndices(archive)
+        if not self.__cache.has_key(msgid):
+            # get the pickled object out of the DumbBTree
+            buf = self.articleIndex[msgid]
+            article = self.__cache[msgid] = pickle.loads(buf)
+            # For upgrading older archives
+            article.setListIfUnset(self._mlist)
+        else:
+            article = self.__cache[msgid]
+        return article
+
+    def first(self, archive, index):
+        self.__openIndices(archive)
+        index = getattr(self, index + 'Index')
+        try:
+            key, msgid = index.first()
+            return msgid
+        except KeyError:
+            return None
+
+    def next(self, archive, index):
+        self.__openIndices(archive)
+        index = getattr(self, index + 'Index')
+        try:
+            key, msgid = index.next()
+            return msgid
+        except KeyError:
+            return None
+
+    def getOldestArticle(self, archive, subject):
+        self.__openIndices(archive)
+        subject = subject.lower()
+        try:
+            key, tempid=self.subjectIndex.set_location(subject)
+            self.subjectIndex.next()
+            [subject2, date]= key.split('\0')
+            if subject!=subject2: return None
+            return tempid
+        except KeyError:
+            return None
+
+    def newArchive(self, archive):
+        pass
+
+    def clearIndex(self, archive, index):
+        self.__openIndices(archive)
+        if hasattr(self.threadIndex, 'clear'):
+            self.threadIndex.clear()
+            return
+        finished=0
+        try:
+            key, msgid=self.threadIndex.first()
+        except KeyError: finished=1
+        while not finished:
+            del self.threadIndex[key]
+            try:
+                key, msgid=self.threadIndex.next()
+            except KeyError: finished=1
diff --git a/Mailman/Archiver/Makefile.in b/Mailman/Archiver/Makefile.in
new file mode 100644
index 00000000..fe56149d
--- /dev/null
+++ b/Mailman/Archiver/Makefile.in
@@ -0,0 +1,72 @@
+# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software 
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+# NOTE: Makefile.in is converted into Makefile by the configure script
+# in the parent directory.  Once configure has run, you can recreate
+# the Makefile by running just config.status.
+
+# Variables set by configure
+
+VPATH=		@srcdir@
+srcdir= 	@srcdir@
+bindir= 	@bindir@
+prefix=   	@prefix@
+exec_prefix=	@exec_prefix@
+
+CC=		@CC@
+CHMOD=  	@CHMOD@
+INSTALL=	@INSTALL@
+
+DEFS=   	@DEFS@
+
+# Customizable but not set by configure
+
+OPT=		@OPT@
+CFLAGS=		$(OPT) $(DEFS)
+PACKAGEDIR= 	$(prefix)/Mailman/Archiver
+SHELL=		/bin/sh
+
+MODULES=	__init__.py Archiver.py HyperArch.py HyperDatabase.py \
+pipermail.py
+
+
+# Modes for directories and executables created by the install
+# process.  Default to group-writable directories but
+# user-only-writable for executables.
+DIRMODE=	775
+EXEMODE=	755
+FILEMODE=	644
+INSTALL_PROGRAM=$(INSTALL) -m $(EXEMODE)
+
+
+# Rules
+
+all:
+
+install: 
+	for f in $(MODULES); \
+	do \
+	    $(INSTALL) -m $(FILEMODE) $(srcdir)/$$f $(PACKAGEDIR); \
+	done
+
+finish:
+
+clean:
+
+distclean:
+	-rm *.pyc
+	-rm Makefile
+
diff --git a/Mailman/Archiver/__init__.py b/Mailman/Archiver/__init__.py
new file mode 100644
index 00000000..65ad7be7
--- /dev/null
+++ b/Mailman/Archiver/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software 
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+from Archiver import *
diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py
new file mode 100644
index 00000000..2e1b226d
--- /dev/null
+++ b/Mailman/Archiver/pipermail.py
@@ -0,0 +1,854 @@
+#! /usr/bin/env python
+
+from __future__ import nested_scopes
+
+import mailbox
+import os
+import re
+import sys
+import time
+from email.Utils import parseaddr, parsedate_tz
+import cPickle as pickle
+from cStringIO import StringIO
+from string import lowercase
+
+__version__ = '0.09 (Mailman edition)'
+VERSION = __version__
+CACHESIZE = 100    # Number of slots in the cache
+
+from Mailman import Errors
+from Mailman.Mailbox import ArchiverMailbox
+from Mailman.Logging.Syslog import syslog
+from Mailman.i18n import _
+
+SPACE = ' '
+
+
+
+msgid_pat = re.compile(r'(<.*>)')
+def strip_separators(s):
+    "Remove quotes or parenthesization from a Message-ID string"
+    if not s:
+        return ""
+    if s[0] in '"<([' and s[-1] in '">)]':
+        s = s[1:-1]
+    return s
+
+smallNameParts = ['van', 'von', 'der', 'de']
+
+def fixAuthor(author):
+    "Canonicalize a name into Last, First format"
+    # If there's a comma, guess that it's already in "Last, First" format
+    if ',' in author:
+        return author
+    L = author.split()
+    i = len(L) - 1
+    if i == 0:
+        return author # The string's one word--forget it
+    if author.upper() == author or author.lower() == author:
+        # Damn, the name is all upper- or lower-case.
+        while i > 0 and L[i-1].lower() in smallNameParts:
+            i = i - 1
+    else:
+        # Mixed case; assume that small parts of the last name will be
+        # in lowercase, and check them against the list.
+        while i>0 and (L[i-1][0] in lowercase or
+                       L[i-1].lower() in smallNameParts):
+            i = i - 1
+    author = SPACE.join(L[-1:] + L[i:-1]) + ', ' + SPACE.join(L[:i])
+    return author
+
+# Abstract class for databases
+
+class DatabaseInterface:
+    def __init__(self): pass
+    def close(self): pass
+    def getArticle(self, archive, msgid): pass
+    def hasArticle(self, archive, msgid): pass
+    def addArticle(self, archive, article, subject=None, author=None,
+                   date=None): pass
+    def firstdate(self, archive): pass
+    def lastdate(self, archive): pass
+    def first(self, archive, index): pass
+    def next(self, archive, index): pass
+    def numArticles(self, archive): pass
+    def newArchive(self, archive): pass
+    def setThreadKey(self, archive, key, msgid): pass
+    def getOldestArticle(self, subject): pass
+
+class Database(DatabaseInterface):
+    """Define the basic sorting logic for a database
+
+    Assumes that the database internally uses dateIndex, authorIndex,
+    etc.
+    """
+
+    # TBD Factor out more of the logic shared between BSDDBDatabase
+    # and HyperDatabase and place it in this class.
+
+    def __init__(self):
+        # This method need not be called by subclasses that do their
+        # own initialization.
+        self.dateIndex = {}
+        self.authorIndex = {}
+        self.subjectIndex = {}
+        self.articleIndex = {}
+        self.changed = {}
+
+    def addArticle(self, archive, article, subject=None, author=None,
+                   date=None):
+        # create the keys; always end w/ msgid which will be unique
+        authorkey = (author or article.author, article.date,
+                     article.msgid)
+        subjectkey = (subject or article.subject, article.date,
+                      article.msgid)
+        datekey = date or article.date, article.msgid
+
+        # Add the new article
+        self.dateIndex[datekey] = article.msgid
+        self.authorIndex[authorkey] = article.msgid
+        self.subjectIndex[subjectkey] = article.msgid
+
+        self.store_article(article)
+        self.changed[archive, article.msgid] = None
+
+        parentID = article.parentID
+        if parentID is not None and self.articleIndex.has_key(parentID):
+            parent = self.getArticle(archive, parentID)
+            myThreadKey = parent.threadKey + article.date + '-'
+        else:
+            myThreadKey = article.date + '-'
+        article.threadKey = myThreadKey
+        key = myThreadKey, article.msgid
+        self.setThreadKey(archive, key, article.msgid)
+
+    def store_article(self, article):
+        """Store article without message body to save space"""
+        # TBD this is not thread safe!
+        temp = article.body
+        article.body = []
+        self.articleIndex[article.msgid] = pickle.dumps(article)
+        article.body = temp
+
+# The Article class encapsulates a single posting.  The attributes
+# are:
+#
+# sequence   : Sequence number, unique for each article in a set of archives
+# subject    : Subject
+# datestr    : The posting date, in human-readable format
+# date       : The posting date, in purely numeric format
+# headers    : Any other headers of interest
+# author     : The author's name (and possibly organization)
+# email      : The author's e-mail address
+# msgid      : A unique message ID
+# in_reply_to: If != "", this is the msgid of the article being replied to
+# references : A (possibly empty) list of msgid's of earlier articles
+#              in the thread
+# body       : A list of strings making up the message body
+
+class Article:
+    _last_article_time = time.time()
+
+    def __init__(self, message = None, sequence = 0, keepHeaders = []):
+        if message is None:
+            return
+        self.sequence = sequence
+
+        self.parentID = None
+        self.threadKey = None
+        # otherwise the current sequence number is used.
+        id = strip_separators(message['Message-Id'])
+        if id == "":
+            self.msgid = str(self.sequence)
+        else: self.msgid = id
+
+        if message.has_key('Subject'):
+            self.subject = str(message['Subject'])
+        else:
+            self.subject = _('No subject')
+        if self.subject == "": self.subject = _('No subject')
+
+        self._set_date(message)
+
+        # Figure out the e-mail address and poster's name.  Use the From:
+        # field first, followed by Reply-To:
+        self.author, self.email = parseaddr(message.get('From', ''))
+        e = message['Reply-To']
+        if not self.email and e is not None:
+            ignoreauthor, self.email = parseaddr(e)
+        self.email = strip_separators(self.email)
+        self.author = strip_separators(self.author)
+
+        if self.author == "":
+            self.author = self.email
+
+        # Save the In-Reply-To:, References:, and Message-ID: lines
+        #
+        # TBD: The original code does some munging on these fields, which
+        # shouldn't be necessary, but changing this may break code.  For
+        # safety, I save the original headers on different attributes for use
+        # in writing the plain text periodic flat files.
+        self._in_reply_to = message['in-reply-to']
+        self._references = message['references']
+        self._message_id = message['message-id']
+
+        i_r_t = message['In-Reply-To']
+        if i_r_t is None:
+            self.in_reply_to = ''
+        else:
+            match = msgid_pat.search(i_r_t)
+            if match is None: self.in_reply_to = ''
+            else: self.in_reply_to = strip_separators(match.group(1))
+
+        references = message['References']
+        if references is None:
+            self.references = []
+        else:
+            self.references = map(strip_separators, references.split())
+
+        # Save any other interesting headers
+        self.headers = {}
+        for i in keepHeaders:
+            if message.has_key(i):
+                self.headers[i] = message[i]
+
+        # Read the message body
+        s = StringIO(message.get_payload())
+        self.body = s.readlines()
+
+    def _set_date(self, message):
+        def floatdate(header):
+            missing = []
+            datestr = message.get(header, missing)
+            if datestr is missing:
+                return None
+            date = parsedate_tz(datestr)
+            try:
+                return time.mktime(date[:9])
+            except (ValueError, OverflowError):
+                return None
+        date = floatdate('date')
+        if date is None:
+            date = floatdate('x-list-received-date')
+        if date is None:
+            # What's left to try?
+            date = self._last_article_time + 1
+        self._last_article_time = date
+        self.date = '%011i' % date
+
+    def __repr__(self):
+        return '<Article ID = '+repr(self.msgid)+'>'
+
+# Pipermail formatter class
+
+class T:
+    DIRMODE = 0755      # Mode to give to created directories
+    FILEMODE = 0644     # Mode to give to created files
+    INDEX_EXT = ".html" # Extension for indexes
+
+    def __init__(self, basedir = None, reload = 1, database = None):
+        # If basedir isn't provided, assume the current directory
+        if basedir is None:
+            self.basedir = os.getcwd()
+        else:
+            basedir = os.path.expanduser(basedir)
+            self.basedir = basedir
+        self.database = database
+
+        # If the directory doesn't exist, create it.  This code shouldn't get
+        # run anymore, we create the directory in Archiver.py.  It should only
+        # get used by legacy lists created that are only receiving their first
+        # message in the HTML archive now -- Marc
+        try:
+            os.stat(self.basedir)
+        except os.error, errdata:
+            errno, errmsg = errdata
+            if errno != 2:
+                raise os.error, errdata
+            else:
+                self.message(_('Creating archive directory ') + self.basedir)
+                omask = os.umask(0)
+                try:
+                    os.mkdir(self.basedir, self.DIRMODE)
+                finally:
+                    os.umask(omask)
+
+        # Try to load previously pickled state
+        try:
+            if not reload:
+                raise IOError
+            f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
+            self.message(_('Reloading pickled archive state'))
+            d = pickle.load(f)
+            f.close()
+            for key, value in d.items():
+                setattr(self, key, value)
+        except (IOError, EOFError):
+            # No pickled version, so initialize various attributes
+            self.archives = []        # Archives
+            self._dirty_archives = [] # Archives that will have to be updated
+            self.sequence = 0         # Sequence variable used for
+                                      #   numbering articles
+            self.update_TOC = 0       # Does the TOC need updating?
+        #
+        # make the basedir variable work when passed in as an __init__ arg
+        # and different from the one in the pickle.  Let the one passed in
+        # as an __init__ arg take precedence if it's stated.  This way, an
+        # archive can be moved from one place to another and still work.
+        #
+        if basedir != self.basedir:
+            self.basedir = basedir
+
+    def close(self):
+        "Close an archive, save its state, and update any changed archives."
+        self.update_dirty_archives()
+        self.update_TOC = 0
+        self.write_TOC()
+        # Save the collective state
+        self.message(_('Pickling archive state into ')
+                     + os.path.join(self.basedir, 'pipermail.pck'))
+        self.database.close()
+        del self.database
+
+        omask = os.umask(007)
+        try:
+            f = open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
+        finally:
+            os.umask(omask)
+        pickle.dump(self.getstate(), f)
+        f.close()
+
+    def getstate(self):
+        # can override this in subclass
+        return self.__dict__
+
+    #
+    # Private methods
+    #
+    # These will be neither overridden nor called by custom archivers.
+    #
+
+
+    # Create a dictionary of various parameters that will be passed
+    # to the write_index_{header,footer} functions
+    def __set_parameters(self, archive):
+        # Determine the earliest and latest date in the archive
+        firstdate = self.database.firstdate(archive)
+        lastdate = self.database.lastdate(archive)
+
+        # Get the current time
+        now = time.asctime(time.localtime(time.time()))
+        self.firstdate = firstdate
+        self.lastdate = lastdate
+        self.archivedate = now
+        self.size = self.database.numArticles(archive)
+        self.archive = archive
+        self.version = __version__
+
+    # Find the message ID of an article's parent, or return None
+    # if no parent can be found.
+
+    def __findParent(self, article, children = []):
+            parentID = None
+            if article.in_reply_to:
+                parentID = article.in_reply_to
+            elif article.references:
+                # Remove article IDs that aren't in the archive
+                refs = filter(self.articleIndex.has_key, article.references)
+                if not refs:
+                    return None
+                maxdate = self.database.getArticle(self.archive,
+                                                   refs[0])
+                for ref in refs[1:]:
+                    a = self.database.getArticle(self.archive, ref)
+                    if a.date > maxdate.date:
+                        maxdate = a
+                parentID = maxdate.msgid
+            else:
+                # Look for the oldest matching subject
+                try:
+                    key, tempid = \
+                         self.subjectIndex.set_location(article.subject)
+                    print key, tempid
+                    self.subjectIndex.next()
+                    [subject, date] = key.split('\0')
+                    print article.subject, subject, date
+                    if subject == article.subject and tempid not in children:
+                        parentID = tempid
+                except KeyError:
+                    pass
+            return parentID
+
+    # Update the threaded index completely
+    def updateThreadedIndex(self):
+        # Erase the threaded index
+        self.database.clearIndex(self.archive, 'thread')
+
+        # Loop over all the articles
+        msgid = self.database.first(self.archive, 'date')
+        while msgid is not None:
+            try:
+                article = self.database.getArticle(self.archive, msgid)
+            except KeyError:
+                pass
+            else:
+                if article.parentID is None or \
+                   not self.database.hasArticle(self.archive,
+                                                article.parentID):
+                    # then
+                    pass
+                else:
+                    parent = self.database.getArticle(self.archive,
+                                                    article.parentID)
+                    article.threadKey = parent.threadKey+article.date+'-'
+                self.database.setThreadKey(self.archive,
+                    (article.threadKey, article.msgid),
+                    msgid)
+            msgid = self.database.next(self.archive, 'date')
+
+    #
+    # Public methods:
+    #
+    # These are part of the public interface of the T class, but will
+    # never be overridden (unless you're trying to do something very new).
+
+    # Update a single archive's indices, whether the archive's been
+    # dirtied or not.
+    def update_archive(self, archive):
+        self.archive = archive
+        self.message(_("Updating index files for archive [%(archive)s]"))
+        arcdir = os.path.join(self.basedir, archive)
+        self.__set_parameters(archive)
+
+        for hdr in ('Date', 'Subject', 'Author'):
+            self._update_simple_index(hdr, archive, arcdir)
+
+        self._update_thread_index(archive, arcdir)
+
+    def _update_simple_index(self, hdr, archive, arcdir):
+        self.message("  " + hdr)
+        self.type = hdr
+        hdr = hdr.lower()
+
+        self._open_index_file_as_stdout(arcdir, hdr)
+        self.write_index_header()
+        count = 0
+        # Loop over the index entries
+        msgid = self.database.first(archive, hdr)
+        while msgid is not None:
+            try:
+                article = self.database.getArticle(self.archive, msgid)
+            except KeyError:
+                pass
+            else:
+                count = count + 1
+                self.write_index_entry(article)
+            msgid = self.database.next(archive, hdr)
+        # Finish up this index
+        self.write_index_footer()
+        self._restore_stdout()
+
+    def _update_thread_index(self, archive, arcdir):
+        self.message(_("  Thread"))
+        self._open_index_file_as_stdout(arcdir, "thread")
+        self.type = 'Thread'
+        self.write_index_header()
+
+        # To handle the prev./next in thread pointers, we need to
+        # track articles 5 at a time.
+
+        # Get the first 5 articles
+        L = [None] * 5
+        i = 2
+        msgid = self.database.first(self.archive, 'thread')
+
+        while msgid is not None and i < 5:
+            L[i] = self.database.getArticle(self.archive, msgid)
+            i = i + 1
+            msgid = self.database.next(self.archive, 'thread')
+
+        while L[2] is not None:
+            article = L[2]
+            artkey = None
+            if article is not None:
+                artkey = article.threadKey
+            if artkey is not None:
+                self.write_threadindex_entry(article, artkey.count('-') - 1)
+                if self.database.changed.has_key((archive,article.msgid)):
+                    a1 = L[1]
+                    a3 = L[3]
+                    self.update_article(arcdir, article, a1, a3)
+                    if a3 is not None:
+                        self.database.changed[(archive, a3.msgid)] = None
+                    if a1 is not None:
+                        key = archive, a1.msgid
+                        if not self.database.changed.has_key(key):
+                            self.update_article(arcdir, a1, L[0], L[2])
+                        else:
+                            del self.database.changed[key]
+            L = L[1:]                   # Rotate the list
+            if msgid is None:
+                L.append(msgid)
+            else:
+                L.append(self.database.getArticle(self.archive, msgid))
+            msgid = self.database.next(self.archive, 'thread')
+
+        self.write_index_footer()
+        self._restore_stdout()
+
+    def _open_index_file_as_stdout(self, arcdir, index_name):
+        path = os.path.join(arcdir, index_name + self.INDEX_EXT)
+        omask = os.umask(002)
+        try:
+            self.__f = open(path, 'w')
+        finally:
+            os.umask(omask)
+        self.__stdout = sys.stdout
+        sys.stdout = self.__f
+
+    def _restore_stdout(self):
+        sys.stdout = self.__stdout
+        self.__f.close()
+        del self.__f
+        del self.__stdout
+
+    # Update only archives that have been marked as "changed".
+    def update_dirty_archives(self):
+        for i in self._dirty_archives:
+            self.update_archive(i)
+        self._dirty_archives = []
+
+    # Read a Unix mailbox file from the file object <input>,
+    # and create a series of Article objects.  Each article
+    # object will then be archived.
+
+    def _makeArticle(self, msg, sequence):
+        return Article(msg, sequence)
+
+    def processUnixMailbox(self, input, start=None, end=None):
+        mbox = ArchiverMailbox(input, self.maillist)
+        if start is None:
+            start = 0
+        counter = 0
+        while counter < start:
+            try:
+                m = mbox.next()
+            except Errors.DiscardMessage:
+                continue
+            if m is None:
+                return
+            counter += 1
+        while 1:
+            try:
+                pos = input.tell()
+                m = mbox.next()
+            except Errors.DiscardMessage:
+                continue
+            except Exception:
+                syslog('error', 'uncaught archiver exception at filepos: %s',
+                       pos)
+                raise
+            if m is None:
+                break
+            if m == '':
+                # It was an unparseable message
+                continue
+            msgid = m.get('message-id', 'n/a')
+            self.message(_('#%(counter)05d %(msgid)s'))
+            a = self._makeArticle(m, self.sequence)
+            self.sequence += 1
+            self.add_article(a)
+            if end is not None and counter >= end:
+               break
+            counter += 1
+
+    def new_archive(self, archive, archivedir):
+        self.archives.append(archive)
+        self.update_TOC = 1
+        self.database.newArchive(archive)
+        # If the archive directory doesn't exist, create it
+        try:
+            os.stat(archivedir)
+        except os.error, errdata:
+            errno, errmsg = errdata
+            if errno == 2:
+                omask = os.umask(0)
+                try:
+                    os.mkdir(archivedir, self.DIRMODE)
+                finally:
+                    os.umask(omask)
+            else:
+                raise os.error, errdata
+        self.open_new_archive(archive, archivedir)
+
+    def add_article(self, article):
+        archives = self.get_archives(article)
+        if not archives:
+            return
+        if type(archives) == type(''):
+            archives = [archives]
+
+        article.filename = filename = self.get_filename(article)
+        temp = self.format_article(article)
+        for arch in archives:
+            self.archive = arch # why do this???
+            archivedir = os.path.join(self.basedir, arch)
+            if arch not in self.archives:
+                self.new_archive(arch, archivedir)
+
+            # Write the HTML-ized article
+            self.write_article(arch, temp, os.path.join(archivedir,
+                                                        filename))
+
+            author = fixAuthor(article.author)
+            subject = article.subject.lower()
+
+            article.parentID = parentID = self.get_parent_info(arch, article)
+            if parentID:
+                parent = self.database.getArticle(arch, parentID)
+                article.threadKey = parent.threadKey + article.date + '-'
+            else:
+                article.threadKey = article.date + '-'
+            key = article.threadKey, article.msgid
+
+            self.database.setThreadKey(arch, key, article.msgid)
+            self.database.addArticle(arch, temp, author=author,
+                                     subject=subject)
+
+            if arch not in self._dirty_archives:
+                self._dirty_archives.append(arch)
+
+    def get_parent_info(self, archive, article):
+        parentID = None
+        if article.in_reply_to:
+            parentID = article.in_reply_to
+        elif article.references:
+            refs = self._remove_external_references(article.references)
+            if refs:
+                maxdate = self.database.getArticle(archive, refs[0])
+                for ref in refs[1:]:
+                    a = self.database.getArticle(archive, ref)
+                    if a.date > maxdate.date:
+                        maxdate = a
+                parentID = maxdate.msgid
+        else:
+            # Get the oldest article with a matching subject, and
+            # assume this is a follow-up to that article
+            parentID = self.database.getOldestArticle(archive,
+                                                      article.subject)
+
+        if parentID and not self.database.hasArticle(archive, parentID):
+            parentID = None
+        return parentID
+
+    def write_article(self, index, article, path):
+        omask = os.umask(002)
+        try:
+            f = open(path, 'w')
+        finally:
+            os.umask(omask)
+        temp_stdout, sys.stdout = sys.stdout, f
+        self.write_article_header(article)
+        sys.stdout.writelines(article.body)
+        self.write_article_footer(article)
+        sys.stdout = temp_stdout
+        f.close()
+
+    def _remove_external_references(self, refs):
+        keep = []
+        for ref in refs:
+            if self.database.hasArticle(self.archive, ref):
+                keep.append(ref)
+        return keep
+
+    # Abstract methods: these will need to be overridden by subclasses
+    # before anything useful can be done.
+
+    def get_filename(self, article):
+        pass
+    def get_archives(self, article):
+        """Return a list of indexes where the article should be filed.
+        A string can be returned if the list only contains one entry,
+        and the empty list is legal."""
+        pass
+    def format_article(self, article):
+        pass
+    def write_index_header(self):
+        pass
+    def write_index_footer(self):
+        pass
+    def write_index_entry(self, article):
+        pass
+    def write_threadindex_entry(self, article, depth):
+        pass
+    def write_article_header(self, article):
+        pass
+    def write_article_footer(self, article):
+        pass
+    def write_article_entry(self, article):
+        pass
+    def update_article(self, archivedir, article, prev, next):
+        pass
+    def write_TOC(self):
+        pass
+    def open_new_archive(self, archive, dir):
+        pass
+    def message(self, msg):
+        pass
+
+
+class BSDDBdatabase(Database):
+    __super_addArticle = Database.addArticle
+
+    def __init__(self, basedir):
+        self.__cachekeys = []
+        self.__cachedict = {}
+        self.__currentOpenArchive = None # The currently open indices
+        self.basedir = os.path.expanduser(basedir)
+        self.changed = {} # Recently added articles, indexed only by
+                          # message ID
+
+    def firstdate(self, archive):
+        self.__openIndices(archive)
+        date = 'None'
+        try:
+            date, msgid = self.dateIndex.first()
+            date = time.asctime(time.localtime(float(date)))
+        except KeyError:
+            pass
+        return date
+
+    def lastdate(self, archive):
+        self.__openIndices(archive)
+        date = 'None'
+        try:
+            date, msgid = self.dateIndex.last()
+            date = time.asctime(time.localtime(float(date)))
+        except KeyError:
+            pass
+        return date
+
+    def numArticles(self, archive):
+        self.__openIndices(archive)
+        return len(self.dateIndex)
+
+    def addArticle(self, archive, article, subject=None, author=None,
+                   date=None):
+        self.__openIndices(archive)
+        self.__super_addArticle(archive, article, subject, author, date)
+
+    # Open the BSDDB files that are being used as indices
+    # (dateIndex, authorIndex, subjectIndex, articleIndex)
+    def __openIndices(self, archive):
+        if self.__currentOpenArchive == archive:
+            return
+
+        import bsddb
+        self.__closeIndices()
+        arcdir = os.path.join(self.basedir, 'database')
+        omask = os.umask(0)
+        try:
+            try:
+                os.mkdir(arcdir, 02775)
+            except OSError:
+                # BAW: Hmm...
+                pass
+        finally:
+            os.umask(omask)
+        for hdr in ('date', 'author', 'subject', 'article', 'thread'):
+            path = os.path.join(arcdir, archive + '-' + hdr)
+            t = bsddb.btopen(path, 'c')
+            setattr(self, hdr + 'Index', t)
+        self.__currentOpenArchive = archive
+
+    # Close the BSDDB files that are being used as indices (if they're
+    # open--this is safe to call if they're already closed)
+    def __closeIndices(self):
+        if self.__currentOpenArchive is not None:
+            pass
+        for hdr in ('date', 'author', 'subject', 'thread', 'article'):
+            attr = hdr + 'Index'
+            if hasattr(self, attr):
+                index = getattr(self, attr)
+                if hdr == 'article':
+                    if not hasattr(self, 'archive_length'):
+                        self.archive_length = {}
+                    self.archive_length[self.__currentOpenArchive] = len(index)
+                index.close()
+                delattr(self,attr)
+        self.__currentOpenArchive = None
+
+    def close(self):
+        self.__closeIndices()
+    def hasArticle(self, archive, msgid):
+        self.__openIndices(archive)
+        return self.articleIndex.has_key(msgid)
+    def setThreadKey(self, archive, key, msgid):
+        self.__openIndices(archive)
+        self.threadIndex[key] = msgid
+    def getArticle(self, archive, msgid):
+        self.__openIndices(archive)
+        if self.__cachedict.has_key(msgid):
+            self.__cachekeys.remove(msgid)
+            self.__cachekeys.append(msgid)
+            return self.__cachedict[msgid]
+        if len(self.__cachekeys) == CACHESIZE:
+            delkey, self.__cachekeys = (self.__cachekeys[0],
+                                        self.__cachekeys[1:])
+            del self.__cachedict[delkey]
+        s = self.articleIndex[msgid]
+        article = pickle.loads(s)
+        self.__cachekeys.append(msgid)
+        self.__cachedict[msgid] = article
+        return article
+
+    def first(self, archive, index):
+        self.__openIndices(archive)
+        index = getattr(self, index+'Index')
+        try:
+            key, msgid = index.first()
+            return msgid
+        except KeyError:
+            return None
+    def next(self, archive, index):
+        self.__openIndices(archive)
+        index = getattr(self, index+'Index')
+        try:
+            key, msgid = index.next()
+        except KeyError:
+            return None
+        else:
+            return msgid
+
+    def getOldestArticle(self, archive, subject):
+        self.__openIndices(archive)
+        subject = subject.lower()
+        try:
+            key, tempid = self.subjectIndex.set_location(subject)
+            self.subjectIndex.next()
+            [subject2, date] = key.split('\0')
+            if subject != subject2:
+                return None
+            return tempid
+        except KeyError: # XXX what line raises the KeyError?
+            return None
+
+    def newArchive(self, archive):
+        pass
+
+    def clearIndex(self, archive, index):
+        self.__openIndices(archive)
+        index = getattr(self, index+'Index')
+        finished = 0
+        try:
+            key, msgid = self.threadIndex.first()
+        except KeyError:
+            finished = 1
+        while not finished:
+            del self.threadIndex[key]
+            try:
+                key, msgid = self.threadIndex.next()
+            except KeyError:
+                finished = 1
+
+