1 files changed, 854 insertions, 0 deletions
diff --git a/Mailman/Archiver/pipermail.py b/Mailman/Archiver/pipermail.py
new file mode 100644
index 00000000..2e1b226d
--- /dev/null
+++ b/Mailman/Archiver/pipermail.py
@@ -0,0 +1,854 @@
+#! /usr/bin/env python
+
+from __future__ import nested_scopes
+
+import mailbox
+import os
+import re
+import sys
+import time
+from email.Utils import parseaddr, parsedate_tz
+import cPickle as pickle
+from cStringIO import StringIO
+from string import lowercase
+
+__version__ = '0.09 (Mailman edition)'
+VERSION = __version__
+CACHESIZE = 100    # Number of slots in the cache
+
+from Mailman import Errors
+from Mailman.Mailbox import ArchiverMailbox
+from Mailman.Logging.Syslog import syslog
+from Mailman.i18n import _
+
+SPACE = ' '
+
+
+
+msgid_pat = re.compile(r'(<.*>)')
+def strip_separators(s):
+    "Remove quotes or parenthesization from a Message-ID string"
+    if not s:
+        return ""
+    if s[0] in '"<([' and s[-1] in '">)]':
+        s = s[1:-1]
+    return s
+
+smallNameParts = ['van', 'von', 'der', 'de']
+
+def fixAuthor(author):
+    "Canonicalize a name into Last, First format"
+    # If there's a comma, guess that it's already in "Last, First" format
+    if ',' in author:
+        return author
+    L = author.split()
+    i = len(L) - 1
+    if i == 0:
+        return author # The string's one word--forget it
+    if author.upper() == author or author.lower() == author:
+        # Damn, the name is all upper- or lower-case.
+        while i > 0 and L[i-1].lower() in smallNameParts:
+            i = i - 1
+    else:
+        # Mixed case; assume that small parts of the last name will be
+        # in lowercase, and check them against the list.
+        while i>0 and (L[i-1][0] in lowercase or
+                       L[i-1].lower() in smallNameParts):
+            i = i - 1
+    author = SPACE.join(L[-1:] + L[i:-1]) + ', ' + SPACE.join(L[:i])
+    return author
+
+# Abstract class for databases
+
+class DatabaseInterface:
+    def __init__(self): pass
+    def close(self): pass
+    def getArticle(self, archive, msgid): pass
+    def hasArticle(self, archive, msgid): pass
+    def addArticle(self, archive, article, subject=None, author=None,
+                   date=None): pass
+    def firstdate(self, archive): pass
+    def lastdate(self, archive): pass
+    def first(self, archive, index): pass
+    def next(self, archive, index): pass
+    def numArticles(self, archive): pass
+    def newArchive(self, archive): pass
+    def setThreadKey(self, archive, key, msgid): pass
+    def getOldestArticle(self, subject): pass
+
+class Database(DatabaseInterface):
+    """Define the basic sorting logic for a database
+
+    Assumes that the database internally uses dateIndex, authorIndex,
+    etc.
+    """
+
+    # TBD Factor out more of the logic shared between BSDDBDatabase
+    # and HyperDatabase and place it in this class.
+
+    def __init__(self):
+        # This method need not be called by subclasses that do their
+        # own initialization.
+        self.dateIndex = {}
+        self.authorIndex = {}
+        self.subjectIndex = {}
+        self.articleIndex = {}
+        self.changed = {}
+
+    def addArticle(self, archive, article, subject=None, author=None,
+                   date=None):
+        # create the keys; always end w/ msgid which will be unique
+        authorkey = (author or article.author, article.date,
+                     article.msgid)
+        subjectkey = (subject or article.subject, article.date,
+                      article.msgid)
+        datekey = date or article.date, article.msgid
+
+        # Add the new article
+        self.dateIndex[datekey] = article.msgid
+        self.authorIndex[authorkey] = article.msgid
+        self.subjectIndex[subjectkey] = article.msgid
+
+        self.store_article(article)
+        self.changed[archive, article.msgid] = None
+
+        parentID = article.parentID
+        if parentID is not None and self.articleIndex.has_key(parentID):
+            parent = self.getArticle(archive, parentID)
+            myThreadKey = parent.threadKey + article.date + '-'
+        else:
+            myThreadKey = article.date + '-'
+        article.threadKey = myThreadKey
+        key = myThreadKey, article.msgid
+        self.setThreadKey(archive, key, article.msgid)
+
+    def store_article(self, article):
+        """Store article without message body to save space"""
+        # TBD this is not thread safe!
+        temp = article.body
+        article.body = []
+        self.articleIndex[article.msgid] = pickle.dumps(article)
+        article.body = temp
+
+# The Article class encapsulates a single posting.  The attributes
+# are:
+#
+# sequence   : Sequence number, unique for each article in a set of archives
+# subject    : Subject
+# datestr    : The posting date, in human-readable format
+# date       : The posting date, in purely numeric format
+# headers    : Any other headers of interest
+# author     : The author's name (and possibly organization)
+# email      : The author's e-mail address
+# msgid      : A unique message ID
+# in_reply_to: If != "", this is the msgid of the article being replied to
+# references : A (possibly empty) list of msgid's of earlier articles
+#              in the thread
+# body       : A list of strings making up the message body
+
+class Article:
+    _last_article_time = time.time()
+
+    def __init__(self, message = None, sequence = 0, keepHeaders = []):
+        if message is None:
+            return
+        self.sequence = sequence
+
+        self.parentID = None
+        self.threadKey = None
+        # otherwise the current sequence number is used.
+        id = strip_separators(message['Message-Id'])
+        if id == "":
+            self.msgid = str(self.sequence)
+        else: self.msgid = id
+
+        if message.has_key('Subject'):
+            self.subject = str(message['Subject'])
+        else:
+            self.subject = _('No subject')
+        if self.subject == "": self.subject = _('No subject')
+
+        self._set_date(message)
+
+        # Figure out the e-mail address and poster's name.  Use the From:
+        # field first, followed by Reply-To:
+        self.author, self.email = parseaddr(message.get('From', ''))
+        e = message['Reply-To']
+        if not self.email and e is not None:
+            ignoreauthor, self.email = parseaddr(e)
+        self.email = strip_separators(self.email)
+        self.author = strip_separators(self.author)
+
+        if self.author == "":
+            self.author = self.email
+
+        # Save the In-Reply-To:, References:, and Message-ID: lines
+        #
+        # TBD: The original code does some munging on these fields, which
+        # shouldn't be necessary, but changing this may break code.  For
+        # safety, I save the original headers on different attributes for use
+        # in writing the plain text periodic flat files.
+        self._in_reply_to = message['in-reply-to']
+        self._references = message['references']
+        self._message_id = message['message-id']
+
+        i_r_t = message['In-Reply-To']
+        if i_r_t is None:
+            self.in_reply_to = ''
+        else:
+            match = msgid_pat.search(i_r_t)
+            if match is None: self.in_reply_to = ''
+            else: self.in_reply_to = strip_separators(match.group(1))
+
+        references = message['References']
+        if references is None:
+            self.references = []
+        else:
+            self.references = map(strip_separators, references.split())
+
+        # Save any other interesting headers
+        self.headers = {}
+        for i in keepHeaders:
+            if message.has_key(i):
+                self.headers[i] = message[i]
+
+        # Read the message body
+        s = StringIO(message.get_payload())
+        self.body = s.readlines()
+
+    def _set_date(self, message):
+        def floatdate(header):
+            missing = []
+            datestr = message.get(header, missing)
+            if datestr is missing:
+                return None
+            date = parsedate_tz(datestr)
+            try:
+                return time.mktime(date[:9])
+            except (ValueError, OverflowError):
+                return None
+        date = floatdate('date')
+        if date is None:
+            date = floatdate('x-list-received-date')
+        if date is None:
+            # What's left to try?
+            date = self._last_article_time + 1
+        self._last_article_time = date
+        self.date = '%011i' % date
+
+    def __repr__(self):
+        return '<Article ID = '+repr(self.msgid)+'>'
+
+# Pipermail formatter class
+
+class T:
+    DIRMODE = 0755      # Mode to give to created directories
+    FILEMODE = 0644     # Mode to give to created files
+    INDEX_EXT = ".html" # Extension for indexes
+
+    def __init__(self, basedir = None, reload = 1, database = None):
+        # If basedir isn't provided, assume the current directory
+        if basedir is None:
+            self.basedir = os.getcwd()
+        else:
+            basedir = os.path.expanduser(basedir)
+            self.basedir = basedir
+        self.database = database
+
+        # If the directory doesn't exist, create it.  This code shouldn't get
+        # run anymore, we create the directory in Archiver.py.  It should only
+        # get used by legacy lists created that are only receiving their first
+        # message in the HTML archive now -- Marc
+        try:
+            os.stat(self.basedir)
+        except os.error, errdata:
+            errno, errmsg = errdata
+            if errno != 2:
+                raise os.error, errdata
+            else:
+                self.message(_('Creating archive directory ') + self.basedir)
+                omask = os.umask(0)
+                try:
+                    os.mkdir(self.basedir, self.DIRMODE)
+                finally:
+                    os.umask(omask)
+
+        # Try to load previously pickled state
+        try:
+            if not reload:
+                raise IOError
+            f = open(os.path.join(self.basedir, 'pipermail.pck'), 'r')
+            self.message(_('Reloading pickled archive state'))
+            d = pickle.load(f)
+            f.close()
+            for key, value in d.items():
+                setattr(self, key, value)
+        except (IOError, EOFError):
+            # No pickled version, so initialize various attributes
+            self.archives = []        # Archives
+            self._dirty_archives = [] # Archives that will have to be updated
+            self.sequence = 0         # Sequence variable used for
+                                      #   numbering articles
+            self.update_TOC = 0       # Does the TOC need updating?
+        #
+        # make the basedir variable work when passed in as an __init__ arg
+        # and different from the one in the pickle.  Let the one passed in
+        # as an __init__ arg take precedence if it's stated.  This way, an
+        # archive can be moved from one place to another and still work.
+        #
+        if basedir != self.basedir:
+            self.basedir = basedir
+
+    def close(self):
+        "Close an archive, save its state, and update any changed archives."
+        self.update_dirty_archives()
+        self.update_TOC = 0
+        self.write_TOC()
+        # Save the collective state
+        self.message(_('Pickling archive state into ')
+                     + os.path.join(self.basedir, 'pipermail.pck'))
+        self.database.close()
+        del self.database
+
+        omask = os.umask(007)
+        try:
+            f = open(os.path.join(self.basedir, 'pipermail.pck'), 'w')
+        finally:
+            os.umask(omask)
+        pickle.dump(self.getstate(), f)
+        f.close()
+
+    def getstate(self):
+        # can override this in subclass
+        return self.__dict__
+
+    #
+    # Private methods
+    #
+    # These will be neither overridden nor called by custom archivers.
+    #
+
+
+    # Create a dictionary of various parameters that will be passed
+    # to the write_index_{header,footer} functions
+    def __set_parameters(self, archive):
+        # Determine the earliest and latest date in the archive
+        firstdate = self.database.firstdate(archive)
+        lastdate = self.database.lastdate(archive)
+
+        # Get the current time
+        now = time.asctime(time.localtime(time.time()))
+        self.firstdate = firstdate
+        self.lastdate = lastdate
+        self.archivedate = now
+        self.size = self.database.numArticles(archive)
+        self.archive = archive
+        self.version = __version__
+
+    # Find the message ID of an article's parent, or return None
+    # if no parent can be found.
+
+    def __findParent(self, article, children = []):
+            parentID = None
+            if article.in_reply_to:
+                parentID = article.in_reply_to
+            elif article.references:
+                # Remove article IDs that aren't in the archive
+                refs = filter(self.articleIndex.has_key, article.references)
+                if not refs:
+                    return None
+                maxdate = self.database.getArticle(self.archive,
+                                                   refs[0])
+                for ref in refs[1:]:
+                    a = self.database.getArticle(self.archive, ref)
+                    if a.date > maxdate.date:
+                        maxdate = a
+                parentID = maxdate.msgid
+            else:
+                # Look for the oldest matching subject
+                try:
+                    key, tempid = \
+                         self.subjectIndex.set_location(article.subject)
+                    print key, tempid
+                    self.subjectIndex.next()
+                    [subject, date] = key.split('\0')
+                    print article.subject, subject, date
+                    if subject == article.subject and tempid not in children:
+                        parentID = tempid
+                except KeyError:
+                    pass
+            return parentID
+
+    # Update the threaded index completely
+    def updateThreadedIndex(self):
+        # Erase the threaded index
+        self.database.clearIndex(self.archive, 'thread')
+
+        # Loop over all the articles
+        msgid = self.database.first(self.archive, 'date')
+        while msgid is not None:
+            try:
+                article = self.database.getArticle(self.archive, msgid)
+            except KeyError:
+                pass
+            else:
+                if article.parentID is None or \
+                   not self.database.hasArticle(self.archive,
+                                                article.parentID):
+                    # then
+                    pass
+                else:
+                    parent = self.database.getArticle(self.archive,
+                                                    article.parentID)
+                    article.threadKey = parent.threadKey+article.date+'-'
+                self.database.setThreadKey(self.archive,
+                    (article.threadKey, article.msgid),
+                    msgid)
+            msgid = self.database.next(self.archive, 'date')
+
+    #
+    # Public methods:
+    #
+    # These are part of the public interface of the T class, but will
+    # never be overridden (unless you're trying to do something very new).
+
+    # Update a single archive's indices, whether the archive's been
+    # dirtied or not.
+    def update_archive(self, archive):
+        self.archive = archive
+        self.message(_("Updating index files for archive [%(archive)s]"))
+        arcdir = os.path.join(self.basedir, archive)
+        self.__set_parameters(archive)
+
+        for hdr in ('Date', 'Subject', 'Author'):
+            self._update_simple_index(hdr, archive, arcdir)
+
+        self._update_thread_index(archive, arcdir)
+
+    def _update_simple_index(self, hdr, archive, arcdir):
+        self.message("  " + hdr)
+        self.type = hdr
+        hdr = hdr.lower()
+
+        self._open_index_file_as_stdout(arcdir, hdr)
+        self.write_index_header()
+        count = 0
+        # Loop over the index entries
+        msgid = self.database.first(archive, hdr)
+        while msgid is not None:
+            try:
+                article = self.database.getArticle(self.archive, msgid)
+            except KeyError:
+                pass
+            else:
+                count = count + 1
+                self.write_index_entry(article)
+            msgid = self.database.next(archive, hdr)
+        # Finish up this index
+        self.write_index_footer()
+        self._restore_stdout()
+
+    def _update_thread_index(self, archive, arcdir):
+        self.message(_("  Thread"))
+        self._open_index_file_as_stdout(arcdir, "thread")
+        self.type = 'Thread'
+        self.write_index_header()
+
+        # To handle the prev./next in thread pointers, we need to
+        # track articles 5 at a time.
+
+        # Get the first 5 articles
+        L = [None] * 5
+        i = 2
+        msgid = self.database.first(self.archive, 'thread')
+
+        while msgid is not None and i < 5:
+            L[i] = self.database.getArticle(self.archive, msgid)
+            i = i + 1
+            msgid = self.database.next(self.archive, 'thread')
+
+        while L[2] is not None:
+            article = L[2]
+            artkey = None
+            if article is not None:
+                artkey = article.threadKey
+            if artkey is not None:
+                self.write_threadindex_entry(article, artkey.count('-') - 1)
+                if self.database.changed.has_key((archive,article.msgid)):
+                    a1 = L[1]
+                    a3 = L[3]
+                    self.update_article(arcdir, article, a1, a3)
+                    if a3 is not None:
+                        self.database.changed[(archive, a3.msgid)] = None
+                    if a1 is not None:
+                        key = archive, a1.msgid
+                        if not self.database.changed.has_key(key):
+                            self.update_article(arcdir, a1, L[0], L[2])
+                        else:
+                            del self.database.changed[key]
+            L = L[1:]                   # Rotate the list
+            if msgid is None:
+                L.append(msgid)
+            else:
+                L.append(self.database.getArticle(self.archive, msgid))
+            msgid = self.database.next(self.archive, 'thread')
+
+        self.write_index_footer()
+        self._restore_stdout()
+
+    def _open_index_file_as_stdout(self, arcdir, index_name):
+        path = os.path.join(arcdir, index_name + self.INDEX_EXT)
+        omask = os.umask(002)
+        try:
+            self.__f = open(path, 'w')
+        finally:
+            os.umask(omask)
+        self.__stdout = sys.stdout
+        sys.stdout = self.__f
+
+    def _restore_stdout(self):
+        sys.stdout = self.__stdout
+        self.__f.close()
+        del self.__f
+        del self.__stdout
+
+    # Update only archives that have been marked as "changed".
+    def update_dirty_archives(self):
+        for i in self._dirty_archives:
+            self.update_archive(i)
+        self._dirty_archives = []
+
+    # Read a Unix mailbox file from the file object <input>,
+    # and create a series of Article objects.  Each article
+    # object will then be archived.
+
+    def _makeArticle(self, msg, sequence):
+        return Article(msg, sequence)
+
+    def processUnixMailbox(self, input, start=None, end=None):
+        mbox = ArchiverMailbox(input, self.maillist)
+        if start is None:
+            start = 0
+        counter = 0
+        while counter < start:
+            try:
+                m = mbox.next()
+            except Errors.DiscardMessage:
+                continue
+            if m is None:
+                return
+            counter += 1
+        while 1:
+            try:
+                pos = input.tell()
+                m = mbox.next()
+            except Errors.DiscardMessage:
+                continue
+            except Exception:
+                syslog('error', 'uncaught archiver exception at filepos: %s',
+                       pos)
+                raise
+            if m is None:
+                break
+            if m == '':
+                # It was an unparseable message
+                continue
+            msgid = m.get('message-id', 'n/a')
+            self.message(_('#%(counter)05d %(msgid)s'))
+            a = self._makeArticle(m, self.sequence)
+            self.sequence += 1
+            self.add_article(a)
+            if end is not None and counter >= end:
+               break
+            counter += 1
+
+    def new_archive(self, archive, archivedir):
+        self.archives.append(archive)
+        self.update_TOC = 1
+        self.database.newArchive(archive)
+        # If the archive directory doesn't exist, create it
+        try:
+            os.stat(archivedir)
+        except os.error, errdata:
+            errno, errmsg = errdata
+            if errno == 2:
+                omask = os.umask(0)
+                try:
+                    os.mkdir(archivedir, self.DIRMODE)
+                finally:
+                    os.umask(omask)
+            else:
+                raise os.error, errdata
+        self.open_new_archive(archive, archivedir)
+
+    def add_article(self, article):
+        archives = self.get_archives(article)
+        if not archives:
+            return
+        if type(archives) == type(''):
+            archives = [archives]
+
+        article.filename = filename = self.get_filename(article)
+        temp = self.format_article(article)
+        for arch in archives:
+            self.archive = arch # why do this???
+            archivedir = os.path.join(self.basedir, arch)
+            if arch not in self.archives:
+                self.new_archive(arch, archivedir)
+
+            # Write the HTML-ized article
+            self.write_article(arch, temp, os.path.join(archivedir,
+                                                        filename))
+
+            author = fixAuthor(article.author)
+            subject = article.subject.lower()
+
+            article.parentID = parentID = self.get_parent_info(arch, article)
+            if parentID:
+                parent = self.database.getArticle(arch, parentID)
+                article.threadKey = parent.threadKey + article.date + '-'
+            else:
+                article.threadKey = article.date + '-'
+            key = article.threadKey, article.msgid
+
+            self.database.setThreadKey(arch, key, article.msgid)
+            self.database.addArticle(arch, temp, author=author,
+                                     subject=subject)
+
+            if arch not in self._dirty_archives:
+                self._dirty_archives.append(arch)
+
+    def get_parent_info(self, archive, article):
+        parentID = None
+        if article.in_reply_to:
+            parentID = article.in_reply_to
+        elif article.references:
+            refs = self._remove_external_references(article.references)
+            if refs:
+                maxdate = self.database.getArticle(archive, refs[0])
+                for ref in refs[1:]:
+                    a = self.database.getArticle(archive, ref)
+                    if a.date > maxdate.date:
+                        maxdate = a
+                parentID = maxdate.msgid
+        else:
+            # Get the oldest article with a matching subject, and
+            # assume this is a follow-up to that article
+            parentID = self.database.getOldestArticle(archive,
+                                                      article.subject)
+
+        if parentID and not self.database.hasArticle(archive, parentID):
+            parentID = None
+        return parentID
+
+    def write_article(self, index, article, path):
+        omask = os.umask(002)
+        try:
+            f = open(path, 'w')
+        finally:
+            os.umask(omask)
+        temp_stdout, sys.stdout = sys.stdout, f
+        self.write_article_header(article)
+        sys.stdout.writelines(article.body)
+        self.write_article_footer(article)
+        sys.stdout = temp_stdout
+        f.close()
+
+    def _remove_external_references(self, refs):
+        keep = []
+        for ref in refs:
+            if self.database.hasArticle(self.archive, ref):
+                keep.append(ref)
+        return keep
+
+    # Abstract methods: these will need to be overridden by subclasses
+    # before anything useful can be done.
+
+    def get_filename(self, article):
+        pass
+    def get_archives(self, article):
+        """Return a list of indexes where the article should be filed.
+        A string can be returned if the list only contains one entry,
+        and the empty list is legal."""
+        pass
+    def format_article(self, article):
+        pass
+    def write_index_header(self):
+        pass
+    def write_index_footer(self):
+        pass
+    def write_index_entry(self, article):
+        pass
+    def write_threadindex_entry(self, article, depth):
+        pass
+    def write_article_header(self, article):
+        pass
+    def write_article_footer(self, article):
+        pass
+    def write_article_entry(self, article):
+        pass
+    def update_article(self, archivedir, article, prev, next):
+        pass
+    def write_TOC(self):
+        pass
+    def open_new_archive(self, archive, dir):
+        pass
+    def message(self, msg):
+        pass
+
+
+class BSDDBdatabase(Database):
+    __super_addArticle = Database.addArticle
+
+    def __init__(self, basedir):
+        self.__cachekeys = []
+        self.__cachedict = {}
+        self.__currentOpenArchive = None # The currently open indices
+        self.basedir = os.path.expanduser(basedir)
+        self.changed = {} # Recently added articles, indexed only by
+                          # message ID
+
+    def firstdate(self, archive):
+        self.__openIndices(archive)
+        date = 'None'
+        try:
+            date, msgid = self.dateIndex.first()
+            date = time.asctime(time.localtime(float(date)))
+        except KeyError:
+            pass
+        return date
+
+    def lastdate(self, archive):
+        self.__openIndices(archive)
+        date = 'None'
+        try:
+            date, msgid = self.dateIndex.last()
+            date = time.asctime(time.localtime(float(date)))
+        except KeyError:
+            pass
+        return date
+
+    def numArticles(self, archive):
+        self.__openIndices(archive)
+        return len(self.dateIndex)
+
+    def addArticle(self, archive, article, subject=None, author=None,
+                   date=None):
+        self.__openIndices(archive)
+        self.__super_addArticle(archive, article, subject, author, date)
+
+    # Open the BSDDB files that are being used as indices
+    # (dateIndex, authorIndex, subjectIndex, articleIndex)
+    def __openIndices(self, archive):
+        if self.__currentOpenArchive == archive:
+            return
+
+        import bsddb
+        self.__closeIndices()
+        arcdir = os.path.join(self.basedir, 'database')
+        omask = os.umask(0)
+        try:
+            try:
+                os.mkdir(arcdir, 02775)
+            except OSError:
+                # BAW: Hmm...
+                pass
+        finally:
+            os.umask(omask)
+        for hdr in ('date', 'author', 'subject', 'article', 'thread'):
+            path = os.path.join(arcdir, archive + '-' + hdr)
+            t = bsddb.btopen(path, 'c')
+            setattr(self, hdr + 'Index', t)
+        self.__currentOpenArchive = archive
+
+    # Close the BSDDB files that are being used as indices (if they're
+    # open--this is safe to call if they're already closed)
+    def __closeIndices(self):
+        if self.__currentOpenArchive is not None:
+            pass
+        for hdr in ('date', 'author', 'subject', 'thread', 'article'):
+            attr = hdr + 'Index'
+            if hasattr(self, attr):
+                index = getattr(self, attr)
+                if hdr == 'article':
+                    if not hasattr(self, 'archive_length'):
+                        self.archive_length = {}
+                    self.archive_length[self.__currentOpenArchive] = len(index)
+                index.close()
+                delattr(self,attr)
+        self.__currentOpenArchive = None
+
+    def close(self):
+        self.__closeIndices()
+    def hasArticle(self, archive, msgid):
+        self.__openIndices(archive)
+        return self.articleIndex.has_key(msgid)
+    def setThreadKey(self, archive, key, msgid):
+        self.__openIndices(archive)
+        self.threadIndex[key] = msgid
+    def getArticle(self, archive, msgid):
+        self.__openIndices(archive)
+        if self.__cachedict.has_key(msgid):
+            self.__cachekeys.remove(msgid)
+            self.__cachekeys.append(msgid)
+            return self.__cachedict[msgid]
+        if len(self.__cachekeys) == CACHESIZE:
+            delkey, self.__cachekeys = (self.__cachekeys[0],
+                                        self.__cachekeys[1:])
+            del self.__cachedict[delkey]
+        s = self.articleIndex[msgid]
+        article = pickle.loads(s)
+        self.__cachekeys.append(msgid)
+        self.__cachedict[msgid] = article
+        return article
+
+    def first(self, archive, index):
+        self.__openIndices(archive)
+        index = getattr(self, index+'Index')
+        try:
+            key, msgid = index.first()
+            return msgid
+        except KeyError:
+            return None
+    def next(self, archive, index):
+        self.__openIndices(archive)
+        index = getattr(self, index+'Index')
+        try:
+            key, msgid = index.next()
+        except KeyError:
+            return None
+        else:
+            return msgid
+
+    def getOldestArticle(self, archive, subject):
+        self.__openIndices(archive)
+        subject = subject.lower()
+        try:
+            key, tempid = self.subjectIndex.set_location(subject)
+            self.subjectIndex.next()
+            [subject2, date] = key.split('\0')
+            if subject != subject2:
+                return None
+            return tempid
+        except KeyError: # XXX what line raises the KeyError?
+            return None
+
+    def newArchive(self, archive):
+        pass
+
+    def clearIndex(self, archive, index):
+        self.__openIndices(archive)
+        index = getattr(self, index+'Index')
+        finished = 0
+        try:
+            key, msgid = self.threadIndex.first()
+        except KeyError:
+            finished = 1
+        while not finished:
+            del self.threadIndex[key]
+            try:
+                key, msgid = self.threadIndex.next()
+            except KeyError:
+                finished = 1
+
+