aboutsummaryrefslogtreecommitdiffstats
path: root/Mailman/Archiver/HyperDatabase.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--Mailman/Archiver/HyperDatabase.py338
1 files changed, 338 insertions, 0 deletions
diff --git a/Mailman/Archiver/HyperDatabase.py b/Mailman/Archiver/HyperDatabase.py
new file mode 100644
index 00000000..ab41b824
--- /dev/null
+++ b/Mailman/Archiver/HyperDatabase.py
@@ -0,0 +1,338 @@
+# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+#
+# site modules
+#
+import os
+import marshal
+import time
+import errno
+
+#
+# package/project modules
+#
+import pipermail
+from Mailman import LockFile
+
+CACHESIZE = pipermail.CACHESIZE
+
+try:
+ import cPickle
+ pickle = cPickle
+except ImportError:
+ import pickle
+
+#
+# we're using a python dict in place of
+# of bsddb.btree database. only defining
+# the parts of the interface used by class HyperDatabase
+# only one thing can access this at a time.
+#
+class DumbBTree:
+ """Stores pickles of Article objects
+
+ This dictionary-like object stores pickles of all the Article
+ objects. The object itself is stored using marshal. It would be
+ much simpler, and probably faster, to store the actual objects in
+ the DumbBTree and pickle it.
+
+ TBD: Also needs a more sensible name, like IteratableDictionary or
+ SortedDictionary.
+ """
+
+ def __init__(self, path):
+ self.current_index = 0
+ self.path = path
+ self.lockfile = LockFile.LockFile(self.path + ".lock")
+ self.lock()
+ self.__dirty = 0
+ self.dict = {}
+ self.sorted = []
+ self.load()
+
+ def __repr__(self):
+ return "DumbBTree(%s)" % self.path
+
+ def __sort(self, dirty=None):
+ if self.__dirty == 1 or dirty:
+ self.sorted = self.dict.keys()
+ self.sorted.sort()
+ self.__dirty = 0
+
+ def lock(self):
+ self.lockfile.lock()
+
+ def unlock(self):
+ try:
+ self.lockfile.unlock()
+ except LockFile.NotLockedError:
+ pass
+
+ def __delitem__(self, item):
+ # if first hasn't been called, we can skip the sort
+ if self.current_index == 0:
+ del self.dict[item]
+ self.__dirty = 1
+ return
+ try:
+ ci = self.sorted[self.current_index]
+ except IndexError:
+ ci = None
+ if ci == item:
+ try:
+ ci = self.sorted[self.current_index + 1]
+ except IndexError:
+ ci = None
+ del self.dict[item]
+ self.__sort(dirty=1)
+ if ci is not None:
+ self.current_index = self.sorted.index(ci)
+ else:
+ self.current_index = self.current_index + 1
+
+ def clear(self):
+ # bulk clearing much faster than deleting each item, esp. with the
+ # implementation of __delitem__() above :(
+ self.dict = {}
+
+ def first(self):
+ self.__sort() # guarantee that the list is sorted
+ if not self.sorted:
+ raise KeyError
+ else:
+ key = self.sorted[0]
+ self.current_index = 1
+ return key, self.dict[key]
+
+ def last(self):
+ if not self.sorted:
+ raise KeyError
+ else:
+ key = self.sorted[-1]
+ self.current_index = len(self.sorted) - 1
+ return key, self.dict[key]
+
+ def next(self):
+ try:
+ key = self.sorted[self.current_index]
+ except IndexError:
+ raise KeyError
+ self.current_index = self.current_index + 1
+ return key, self.dict[key]
+
+ def has_key(self, key):
+ return self.dict.has_key(key)
+
+ def set_location(self, loc):
+ if not self.dict.has_key(loc):
+ raise KeyError
+ self.current_index = self.sorted.index(loc)
+
+ def __getitem__(self, item):
+ return self.dict[item]
+
+ def __setitem__(self, item, val):
+ # if first hasn't been called, then we don't need to worry
+ # about sorting again
+ if self.current_index == 0:
+ self.dict[item] = val
+ self.__dirty = 1
+ return
+ try:
+ current_item = self.sorted[self.current_index]
+ except IndexError:
+ current_item = item
+ self.dict[item] = val
+ self.__sort(dirty=1)
+ self.current_index = self.sorted.index(current_item)
+
+ def __len__(self):
+ return len(self.sorted)
+
+ def load(self):
+ try:
+ fp = open(self.path)
+ try:
+ self.dict = marshal.load(fp)
+ finally:
+ fp.close()
+ except IOError, e:
+ if e.errno <> errno.ENOENT: raise
+ pass
+ except EOFError:
+ pass
+ else:
+ self.__sort(dirty=1)
+
+ def close(self):
+ omask = os.umask(007)
+ try:
+ fp = open(self.path, 'w')
+ finally:
+ os.umask(omask)
+ fp.write(marshal.dumps(self.dict))
+ fp.close()
+ self.unlock()
+
+
+# this is lifted straight out of pipermail with
+# the bsddb.btree replaced with above class.
+# didn't use inheritance because of all the
+# __internal stuff that needs to be here -scott
+#
+class HyperDatabase(pipermail.Database):
+ __super_addArticle = pipermail.Database.addArticle
+
+ def __init__(self, basedir, mlist):
+ self.__cache = {}
+ self.__currentOpenArchive = None # The currently open indices
+ self._mlist = mlist
+ self.basedir = os.path.expanduser(basedir)
+ # Recently added articles, indexed only by message ID
+ self.changed={}
+
+ def firstdate(self, archive):
+ self.__openIndices(archive)
+ date = 'None'
+ try:
+ datekey, msgid = self.dateIndex.first()
+ date = time.asctime(time.localtime(float(datekey[0])))
+ except KeyError:
+ pass
+ return date
+
+ def lastdate(self, archive):
+ self.__openIndices(archive)
+ date = 'None'
+ try:
+ datekey, msgid = self.dateIndex.last()
+ date = time.asctime(time.localtime(float(datekey[0])))
+ except KeyError:
+ pass
+ return date
+
+ def numArticles(self, archive):
+ self.__openIndices(archive)
+ return len(self.dateIndex)
+
+ def addArticle(self, archive, article, subject=None, author=None,
+ date=None):
+ self.__openIndices(archive)
+ self.__super_addArticle(archive, article, subject, author, date)
+
+ def __openIndices(self, archive):
+ if self.__currentOpenArchive == archive:
+ return
+ self.__closeIndices()
+ arcdir = os.path.join(self.basedir, 'database')
+ omask = os.umask(0)
+ try:
+ try:
+ os.mkdir(arcdir, 02770)
+ except OSError, e:
+ if e.errno <> errno.EEXIST: raise
+ finally:
+ os.umask(omask)
+ for i in ('date', 'author', 'subject', 'article', 'thread'):
+ t = DumbBTree(os.path.join(arcdir, archive + '-' + i))
+ setattr(self, i + 'Index', t)
+ self.__currentOpenArchive = archive
+
+ def __closeIndices(self):
+ for i in ('date', 'author', 'subject', 'thread', 'article'):
+ attr = i + 'Index'
+ if hasattr(self, attr):
+ index = getattr(self, attr)
+ if i == 'article':
+ if not hasattr(self, 'archive_length'):
+ self.archive_length = {}
+ l = len(index)
+ self.archive_length[self.__currentOpenArchive] = l
+ index.close()
+ delattr(self, attr)
+ self.__currentOpenArchive = None
+
+ def close(self):
+ self.__closeIndices()
+
+ def hasArticle(self, archive, msgid):
+ self.__openIndices(archive)
+ return self.articleIndex.has_key(msgid)
+
+ def setThreadKey(self, archive, key, msgid):
+ self.__openIndices(archive)
+ self.threadIndex[key]=msgid
+
+ def getArticle(self, archive, msgid):
+ self.__openIndices(archive)
+ if not self.__cache.has_key(msgid):
+ # get the pickled object out of the DumbBTree
+ buf = self.articleIndex[msgid]
+ article = self.__cache[msgid] = pickle.loads(buf)
+ # For upgrading older archives
+ article.setListIfUnset(self._mlist)
+ else:
+ article = self.__cache[msgid]
+ return article
+
+ def first(self, archive, index):
+ self.__openIndices(archive)
+ index = getattr(self, index + 'Index')
+ try:
+ key, msgid = index.first()
+ return msgid
+ except KeyError:
+ return None
+
+ def next(self, archive, index):
+ self.__openIndices(archive)
+ index = getattr(self, index + 'Index')
+ try:
+ key, msgid = index.next()
+ return msgid
+ except KeyError:
+ return None
+
+ def getOldestArticle(self, archive, subject):
+ self.__openIndices(archive)
+ subject = subject.lower()
+ try:
+ key, tempid=self.subjectIndex.set_location(subject)
+ self.subjectIndex.next()
+ [subject2, date]= key.split('\0')
+ if subject!=subject2: return None
+ return tempid
+ except KeyError:
+ return None
+
+ def newArchive(self, archive):
+ pass
+
+ def clearIndex(self, archive, index):
+ self.__openIndices(archive)
+ if hasattr(self.threadIndex, 'clear'):
+ self.threadIndex.clear()
+ return
+ finished=0
+ try:
+ key, msgid=self.threadIndex.first()
+ except KeyError: finished=1
+ while not finished:
+ del self.threadIndex[key]
+ try:
+ key, msgid=self.threadIndex.next()
+ except KeyError: finished=1