# Copyright (C) 2001-2008 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.
"""Reading and writing message objects and message metadata.
"""
# enqueue() and dequeue() are not symmetric. enqueue() takes a Message
# object. dequeue() returns a email.Message object tree.
#
# Message metadata is represented internally as a Python dictionary. Keys and
# values must be strings. When written to a queue directory, the metadata is
# written into an externally represented format, as defined here. Because
# components of the Mailman system may be written in something other than
# Python, the external interchange format should be chosen based on what those
# other components can read and write.
#
# Most efficient, and recommended if everything is Python, is Python marshal
# format. Also supported by default is Berkeley db format (using the default
# bsddb module compiled into your Python executable -- usually Berkeley db
# 2), and rfc822 style plain text. You can write your own if you have other
# needs.
import os
import sha
import time
import email
import errno
import cPickle
import marshal
from Mailman import mm_cfg
from Mailman import Utils
from Mailman import Message
from Mailman.Logging.Syslog import syslog
# 20 bytes of all bits set, maximum sha.digest() value
shamax = 0xffffffffffffffffffffffffffffffffffffffffL
try:
True, False
except NameError:
True = 1
False = 0
# This flag causes messages to be written as pickles (when True) or text files
# (when False). Pickles are more efficient because the message doesn't need
# to be re-parsed every time it's unqueued, but pickles are not human readable.
SAVE_MSGS_AS_PICKLES = True
# Small increment to add to time in case two entries have the same time. This
# prevents skipping one of two entries with the same time until the next pass.
DELTA = .0001
# We count the number of times a file has been moved to .bak and recovered.
# In order to prevent loops and a message flood, when the count reaches this
# value, we move the file to the shunt queue as a .psv.
MAX_BAK_COUNT = 3
class Switchboard:
def __init__(self, whichq, slice=None, numslices=1, recover=False):
self.__whichq = whichq
# Create the directory if it doesn't yet exist.
# FIXME
omask = os.umask(0) # rwxrws---
try:
try:
os.mkdir(self.__whichq, 0770)
except OSError, e:
if e.errno <> errno.EEXIST: raise
finally:
os.umask(omask)
# Fast track for no slices
self.__lower = None
self.__upper = None
# BAW: test performance and end-cases of this algorithm
if numslices <> 1:
self.__lower = ((shamax+1) * slice) / numslices
self.__upper = (((shamax+1) * (slice+1)) / numslices) - 1
if recover:
self.recover_backup_files()
def whichq(self):
return self.__whichq
def enqueue(self, _msg, _metadata={}, **_kws):
# Calculate the SHA hexdigest of the message to get a unique base
# filename. We're also going to use the digest as a hash into the set
# of parallel qrunner processes.
data = _metadata.copy()
data.update(_kws)
listname = data.get('listname', '--nolist--')
# Get some data for the input to the sha hash
now = time.time()
if SAVE_MSGS_AS_PICKLES and not data.get('_plaintext'):
protocol = 1
msgsave = cPickle.dumps(_msg, protocol)
else:
protocol = 0
msgsave = cPickle.dumps(str(_msg), protocol)
hashfood = msgsave + listname + `now`
# Encode the current time into the file name for FIFO sorting in
# files(). The file name consists of two parts separated by a `+':
# the received time for this message (i.e. when it first showed up on
# this system) and the sha hex digest.
#rcvtime = data.setdefault('received_time', now)
rcvtime = data.setdefault('received_time', now)
filebase = `rcvtime` + '+' + sha.new(hashfood).hexdigest()
filename = os.path.join(self.__whichq, filebase + '.pck')
tmpfile = filename + '.tmp'
# Always add the metadata schema version number
data['version'] = mm_cfg.QFILE_SCHEMA_VERSION
# Filter out volatile entries
for k in data.keys():
if k.startswith('_'):
del data[k]
# We have to tell the dequeue() method whether to parse the message
# object or not.
data['_parsemsg'] = (protocol == 0)
# Write to the pickle file the message object and metadata.
omask = os.umask(007) # -rw-rw----
try:
fp = open(tmpfile, 'w')
try:
fp.write(msgsave)
cPickle.dump(data, fp, protocol)
fp.flush()
os.fsync(fp.fileno())
finally:
fp.close()
finally:
os.umask(omask)
os.rename(tmpfile, filename)
return filebase
def dequeue(self, filebase):
# Calculate the filename from the given filebase.
filename = os.path.join(self.__whichq, filebase + '.pck')
backfile = os.path.join(self.__whichq, filebase + '.bak')
# Read the message object and metadata.
fp = open(filename)
# Move the file to the backup file name for processing. If this
# process crashes uncleanly the .bak file will be used to re-instate
# the .pck file in order to try again.
os.rename(filename, backfile)
try:
msg = cPickle.load(fp)
data = cPickle.load(fp)
finally:
fp.close()
if data.get('_parsemsg'):
msg = email.message_from_string(msg, Message.Message)
return msg, data
def finish(self, filebase, preserve=False):
bakfile = os.path.join(self.__whichq, filebase + '.bak')
try:
if preserve:
psvfile = os.path.join(mm_cfg.SHUNTQUEUE_DIR, filebase + '.psv')
# Create the directory if it doesn't yet exist.
# Copied from __init__.
omask = os.umask(0) # rwxrws---
try:
try:
os.mkdir(mm_cfg.SHUNTQUEUE_DIR, 0770)
except OSError, e:
if e.errno <> errno.EEXIST: raise
finally:
os.umask(omask)
os.rename(bakfile, psvfile)
else:
os.unlink(bakfile)
except EnvironmentError, e:
syslog('error', 'Failed to unlink/preserve backup file: %s',
bakfile)
def files(self, extension='.pck'):
times = {}
lower = self.__lower
upper = self.__upper
for f in os.listdir(self.__whichq):
# By ignoring anything that doesn't end in .pck, we ignore
# tempfiles and avoid a race condition.
filebase, ext = os.path.splitext(f)
if ext <> extension:
continue
when, digest = filebase.split('+')
# Throw out any files which don't match our bitrange. BAW: test
# performance and end-cases of this algorithm. MAS: both
# comparisons need to be <= to get complete range.
if lower is None or (lower <= long(digest, 16) <= upper):
key = float(when)
while times.has_key(key):
key += DELTA
times[key] = filebase
# FIFO sort
keys = times.keys()
keys.sort()
return [times[k] for k in keys]
def recover_backup_files(self):
# Move all .bak files in our slice to .pck. It's impossible for both
# to exist at the same time, so the move is enough to ensure that our
# normal dequeuing process will handle them. We keep count in
# _bak_count in the metadata of the number of times we recover this
# file. When the count reaches MAX_BAK_COUNT, we move the .bak file
# to a .psv file in the shunt queue.
for filebase in self.files('.bak'):
src = os.path.join(self.__whichq, filebase + '.bak')
dst = os.path.join(self.__whichq, filebase + '.pck')
fp = open(src, 'rb+')
try:
msg = cPickle.load(fp)
data_pos = fp.tell()
data = cPickle.load(fp)
data['_bak_count'] = data.setdefault('_bak_count', 0) + 1
fp.seek(data_pos)
if data.get('_parsemsg'):
protocol = 0
else:
protocol = 1
cPickle.dump(data, fp, protocol)
fp.truncate()
fp.flush()
os.fsync(fp.fileno())
finally:
fp.close()
if data['_bak_count'] >= MAX_BAK_COUNT:
syslog('error', '.bak file max count, preserving file: %s',
filebase)
self.finish(filebase, preserve=True)
else:
os.rename(src, dst)