diff options
Diffstat (limited to '')
-rw-r--r-- | Mailman/Archiver/HyperDatabase.py | 338 |
1 files changed, 338 insertions, 0 deletions
diff --git a/Mailman/Archiver/HyperDatabase.py b/Mailman/Archiver/HyperDatabase.py new file mode 100644 index 00000000..ab41b824 --- /dev/null +++ b/Mailman/Archiver/HyperDatabase.py @@ -0,0 +1,338 @@ +# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# +# site modules +# +import os +import marshal +import time +import errno + +# +# package/project modules +# +import pipermail +from Mailman import LockFile + +CACHESIZE = pipermail.CACHESIZE + +try: + import cPickle + pickle = cPickle +except ImportError: + import pickle + +# +# we're using a python dict in place of +# of bsddb.btree database. only defining +# the parts of the interface used by class HyperDatabase +# only one thing can access this at a time. +# +class DumbBTree: + """Stores pickles of Article objects + + This dictionary-like object stores pickles of all the Article + objects. The object itself is stored using marshal. It would be + much simpler, and probably faster, to store the actual objects in + the DumbBTree and pickle it. + + TBD: Also needs a more sensible name, like IteratableDictionary or + SortedDictionary. + """ + + def __init__(self, path): + self.current_index = 0 + self.path = path + self.lockfile = LockFile.LockFile(self.path + ".lock") + self.lock() + self.__dirty = 0 + self.dict = {} + self.sorted = [] + self.load() + + def __repr__(self): + return "DumbBTree(%s)" % self.path + + def __sort(self, dirty=None): + if self.__dirty == 1 or dirty: + self.sorted = self.dict.keys() + self.sorted.sort() + self.__dirty = 0 + + def lock(self): + self.lockfile.lock() + + def unlock(self): + try: + self.lockfile.unlock() + except LockFile.NotLockedError: + pass + + def __delitem__(self, item): + # if first hasn't been called, we can skip the sort + if self.current_index == 0: + del self.dict[item] + self.__dirty = 1 + return + try: + ci = self.sorted[self.current_index] + except IndexError: + ci = None + if ci == item: + try: + ci = self.sorted[self.current_index + 1] + except IndexError: + ci = None + del self.dict[item] + self.__sort(dirty=1) + if ci is not None: + self.current_index = self.sorted.index(ci) + else: + self.current_index = self.current_index + 1 + + def clear(self): + # bulk clearing much faster than deleting each item, esp. with the + # implementation of __delitem__() above :( + self.dict = {} + + def first(self): + self.__sort() # guarantee that the list is sorted + if not self.sorted: + raise KeyError + else: + key = self.sorted[0] + self.current_index = 1 + return key, self.dict[key] + + def last(self): + if not self.sorted: + raise KeyError + else: + key = self.sorted[-1] + self.current_index = len(self.sorted) - 1 + return key, self.dict[key] + + def next(self): + try: + key = self.sorted[self.current_index] + except IndexError: + raise KeyError + self.current_index = self.current_index + 1 + return key, self.dict[key] + + def has_key(self, key): + return self.dict.has_key(key) + + def set_location(self, loc): + if not self.dict.has_key(loc): + raise KeyError + self.current_index = self.sorted.index(loc) + + def __getitem__(self, item): + return self.dict[item] + + def __setitem__(self, item, val): + # if first hasn't been called, then we don't need to worry + # about sorting again + if self.current_index == 0: + self.dict[item] = val + self.__dirty = 1 + return + try: + current_item = self.sorted[self.current_index] + except IndexError: + current_item = item + self.dict[item] = val + self.__sort(dirty=1) + self.current_index = self.sorted.index(current_item) + + def __len__(self): + return len(self.sorted) + + def load(self): + try: + fp = open(self.path) + try: + self.dict = marshal.load(fp) + finally: + fp.close() + except IOError, e: + if e.errno <> errno.ENOENT: raise + pass + except EOFError: + pass + else: + self.__sort(dirty=1) + + def close(self): + omask = os.umask(007) + try: + fp = open(self.path, 'w') + finally: + os.umask(omask) + fp.write(marshal.dumps(self.dict)) + fp.close() + self.unlock() + + +# this is lifted straight out of pipermail with +# the bsddb.btree replaced with above class. +# didn't use inheritance because of all the +# __internal stuff that needs to be here -scott +# +class HyperDatabase(pipermail.Database): + __super_addArticle = pipermail.Database.addArticle + + def __init__(self, basedir, mlist): + self.__cache = {} + self.__currentOpenArchive = None # The currently open indices + self._mlist = mlist + self.basedir = os.path.expanduser(basedir) + # Recently added articles, indexed only by message ID + self.changed={} + + def firstdate(self, archive): + self.__openIndices(archive) + date = 'None' + try: + datekey, msgid = self.dateIndex.first() + date = time.asctime(time.localtime(float(datekey[0]))) + except KeyError: + pass + return date + + def lastdate(self, archive): + self.__openIndices(archive) + date = 'None' + try: + datekey, msgid = self.dateIndex.last() + date = time.asctime(time.localtime(float(datekey[0]))) + except KeyError: + pass + return date + + def numArticles(self, archive): + self.__openIndices(archive) + return len(self.dateIndex) + + def addArticle(self, archive, article, subject=None, author=None, + date=None): + self.__openIndices(archive) + self.__super_addArticle(archive, article, subject, author, date) + + def __openIndices(self, archive): + if self.__currentOpenArchive == archive: + return + self.__closeIndices() + arcdir = os.path.join(self.basedir, 'database') + omask = os.umask(0) + try: + try: + os.mkdir(arcdir, 02770) + except OSError, e: + if e.errno <> errno.EEXIST: raise + finally: + os.umask(omask) + for i in ('date', 'author', 'subject', 'article', 'thread'): + t = DumbBTree(os.path.join(arcdir, archive + '-' + i)) + setattr(self, i + 'Index', t) + self.__currentOpenArchive = archive + + def __closeIndices(self): + for i in ('date', 'author', 'subject', 'thread', 'article'): + attr = i + 'Index' + if hasattr(self, attr): + index = getattr(self, attr) + if i == 'article': + if not hasattr(self, 'archive_length'): + self.archive_length = {} + l = len(index) + self.archive_length[self.__currentOpenArchive] = l + index.close() + delattr(self, attr) + self.__currentOpenArchive = None + + def close(self): + self.__closeIndices() + + def hasArticle(self, archive, msgid): + self.__openIndices(archive) + return self.articleIndex.has_key(msgid) + + def setThreadKey(self, archive, key, msgid): + self.__openIndices(archive) + self.threadIndex[key]=msgid + + def getArticle(self, archive, msgid): + self.__openIndices(archive) + if not self.__cache.has_key(msgid): + # get the pickled object out of the DumbBTree + buf = self.articleIndex[msgid] + article = self.__cache[msgid] = pickle.loads(buf) + # For upgrading older archives + article.setListIfUnset(self._mlist) + else: + article = self.__cache[msgid] + return article + + def first(self, archive, index): + self.__openIndices(archive) + index = getattr(self, index + 'Index') + try: + key, msgid = index.first() + return msgid + except KeyError: + return None + + def next(self, archive, index): + self.__openIndices(archive) + index = getattr(self, index + 'Index') + try: + key, msgid = index.next() + return msgid + except KeyError: + return None + + def getOldestArticle(self, archive, subject): + self.__openIndices(archive) + subject = subject.lower() + try: + key, tempid=self.subjectIndex.set_location(subject) + self.subjectIndex.next() + [subject2, date]= key.split('\0') + if subject!=subject2: return None + return tempid + except KeyError: + return None + + def newArchive(self, archive): + pass + + def clearIndex(self, archive, index): + self.__openIndices(archive) + if hasattr(self.threadIndex, 'clear'): + self.threadIndex.clear() + return + finished=0 + try: + key, msgid=self.threadIndex.first() + except KeyError: finished=1 + while not finished: + del self.threadIndex[key] + try: + key, msgid=self.threadIndex.next() + except KeyError: finished=1 |