From b132a73f15e432eaf43310fce9196ca0c0651465 Mon Sep 17 00:00:00 2001
From:  <>
Date: Thu, 2 Jan 2003 05:25:50 +0000
Subject: This commit was manufactured by cvs2svn to create branch
 'Release_2_1-maint'.

---
 Mailman/LockFile.py | 596 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 596 insertions(+)
 create mode 100644 Mailman/LockFile.py

(limited to 'Mailman/LockFile.py')

diff --git a/Mailman/LockFile.py b/Mailman/LockFile.py
new file mode 100644
index 00000000..796a81eb
--- /dev/null
+++ b/Mailman/LockFile.py
@@ -0,0 +1,596 @@
+# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software 
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+"""Portable, NFS-safe file locking with timeouts.
+
+This code implements an NFS-safe file-based locking algorithm influenced by
+the GNU/Linux open(2) manpage, under the description of the O_EXCL option.
+From RH6.1:
+
+        [...] O_EXCL is broken on NFS file systems, programs which rely on it
+        for performing locking tasks will contain a race condition.  The
+        solution for performing atomic file locking using a lockfile is to
+        create a unique file on the same fs (e.g., incorporating hostname and
+        pid), use link(2) to make a link to the lockfile.  If link() returns
+        0, the lock is successful.  Otherwise, use stat(2) on the unique file
+        to check if its link count has increased to 2, in which case the lock
+        is also successful.
+
+The assumption made here is that there will be no `outside interference',
+e.g. no agent external to this code will have access to link() to the affected
+lock files.
+
+LockFile objects support lock-breaking so that you can't wedge a process
+forever.  This is especially helpful in a web environment, but may not be
+appropriate for all applications.
+
+Locks have a `lifetime', which is the maximum length of time the process
+expects to retain the lock.  It is important to pick a good number here
+because other processes will not break an existing lock until the expected
+lifetime has expired.  Too long and other processes will hang; too short and
+you'll end up trampling on existing process locks -- and possibly corrupting
+data.  In a distributed (NFS) environment, you also need to make sure that
+your clocks are properly synchronized.
+
+Locks can also log their state to a log file.  When running under Mailman, the
+log file is placed in a Mailman-specific location, otherwise, the log file is
+called `LockFile.log' and placed in the temp directory (calculated from
+tempfile.mktemp()).
+
+"""
+
+# This code has undergone several revisions, with contributions from Barry
+# Warsaw, Thomas Wouters, Harald Meland, and John Viega.  It should also work
+# well outside of Mailman so it could be used for other Python projects
+# requiring file locking.  See the __main__ section at the bottom of the file
+# for unit testing.
+
+import os
+import socket
+import time
+import errno
+import random
+import traceback
+from stat import ST_NLINK, ST_MTIME
+
+# Units are floating-point seconds.
+DEFAULT_LOCK_LIFETIME  = 15
+# Allowable a bit of clock skew
+CLOCK_SLOP = 10
+
+
+
+# Figure out what logfile to use.  This is different depending on whether
+# we're running in a Mailman context or not.
+_logfile = None
+
+def _get_logfile():
+    global _logfile
+    if _logfile is None:
+        try:
+            from Mailman.Logging.StampedLogger import StampedLogger
+            _logfile = StampedLogger('locks')
+        except ImportError:
+            # not running inside Mailman
+            import tempfile
+            dir = os.path.split(tempfile.mktemp())[0]
+            path = os.path.join(dir, 'LockFile.log')
+            # open in line-buffered mode
+            class SimpleUserFile:
+                def __init__(self, path):
+                    self.__fp = open(path, 'a', 1)
+                    self.__prefix = '(%d) ' % os.getpid()
+                def write(self, msg):
+                    now = '%.3f' % time.time()
+                    self.__fp.write(self.__prefix + now + ' ' + msg)
+            _logfile = SimpleUserFile(path)
+    return _logfile
+
+
+
+# Exceptions that can be raised by this module
+class LockError(Exception):
+    """Base class for all exceptions in this module."""
+
+class AlreadyLockedError(LockError):
+    """An attempt is made to lock an already locked object."""
+
+class NotLockedError(LockError):
+    """An attempt is made to unlock an object that isn't locked."""
+
+class TimeOutError(LockError):
+    """The timeout interval elapsed before the lock succeeded."""
+
+
+
+class LockFile:
+    """A portable way to lock resources by way of the file system.
+
+    This class supports the following methods:
+
+    __init__(lockfile[, lifetime[, withlogging]]):
+        Create the resource lock using lockfile as the global lock file.  Each
+        process laying claim to this resource lock will create their own
+        temporary lock files based on the path specified by lockfile.
+        Optional lifetime is the number of seconds the process expects to hold
+        the lock.  Optional withlogging, when true, turns on lockfile logging
+        (see the module docstring for details).
+
+    set_lifetime(lifetime):
+        Set a new lock lifetime.  This takes affect the next time the file is
+        locked, but does not refresh a locked file.
+
+    get_lifetime():
+        Return the lock's lifetime.
+
+    refresh([newlifetime[, unconditionally]]):
+        Refreshes the lifetime of a locked file.  Use this if you realize that
+        you need to keep a resource locked longer than you thought.  With
+        optional newlifetime, set the lock's lifetime.   Raises NotLockedError
+        if the lock is not set, unless optional unconditionally flag is set to
+        true.
+
+    lock([timeout]):
+        Acquire the lock.  This blocks until the lock is acquired unless
+        optional timeout is greater than 0, in which case, a TimeOutError is
+        raised when timeout number of seconds (or possibly more) expires
+        without lock acquisition.  Raises AlreadyLockedError if the lock is
+        already set.
+
+    unlock([unconditionally]):
+        Relinquishes the lock.  Raises a NotLockedError if the lock is not
+        set, unless optional unconditionally is true.
+
+    locked():
+        Return 1 if the lock is set, otherwise 0.  To avoid race conditions,
+        this refreshes the lock (on set locks).
+
+    """
+    # BAW: We need to watch out for two lock objects in the same process
+    # pointing to the same lock file.  Without this, if you lock lf1 and do
+    # not lock lf2, lf2.locked() will still return true.  NOTE: this gimmick
+    # probably does /not/ work in a multithreaded world, but we don't have to
+    # worry about that, do we? <1 wink>.
+    COUNTER = 0
+
+    def __init__(self, lockfile,
+                 lifetime=DEFAULT_LOCK_LIFETIME,
+                 withlogging=0):
+        """Create the resource lock using lockfile as the global lock file.
+
+        Each process laying claim to this resource lock will create their own
+        temporary lock files based on the path specified by lockfile.
+        Optional lifetime is the number of seconds the process expects to hold
+        the lock.  Optional withlogging, when true, turns on lockfile logging
+        (see the module docstring for details).
+
+        """
+        self.__lockfile = lockfile
+        self.__lifetime = lifetime
+        # This works because we know we're single threaded
+        self.__counter = LockFile.COUNTER
+        LockFile.COUNTER += 1
+        self.__tmpfname = '%s.%s.%d.%d' % (
+            lockfile, socket.gethostname(), os.getpid(), self.__counter)
+        self.__withlogging = withlogging
+        self.__logprefix = os.path.split(self.__lockfile)[1]
+        # For transferring ownership across a fork.
+        self.__owned = 1
+	
+    def __repr__(self):
+        return '<LockFile %s: %s [%s: %ssec] pid=%s>' % (
+            id(self), self.__lockfile,
+            self.locked() and 'locked' or 'unlocked',
+            self.__lifetime, os.getpid())
+
+    def set_lifetime(self, lifetime):
+        """Set a new lock lifetime.
+
+        This takes affect the next time the file is locked, but does not
+        refresh a locked file.
+        """
+        self.__lifetime = lifetime
+
+    def get_lifetime(self):
+        """Return the lock's lifetime."""
+        return self.__lifetime
+
+    def refresh(self, newlifetime=None, unconditionally=0):
+        """Refreshes the lifetime of a locked file.
+
+        Use this if you realize that you need to keep a resource locked longer
+        than you thought.  With optional newlifetime, set the lock's lifetime.
+        Raises NotLockedError if the lock is not set, unless optional
+        unconditionally flag is set to true.
+        """
+        if newlifetime is not None:
+            self.set_lifetime(newlifetime)
+        # Do we have the lock?  As a side effect, this refreshes the lock!
+        if not self.locked() and not unconditionally:
+            raise NotLockedError, '%s: %s' % (repr(self), self.__read())
+
+    def lock(self, timeout=0):
+        """Acquire the lock.
+
+        This blocks until the lock is acquired unless optional timeout is
+        greater than 0, in which case, a TimeOutError is raised when timeout
+        number of seconds (or possibly more) expires without lock acquisition.
+        Raises AlreadyLockedError if the lock is already set.
+
+        """
+        if timeout:
+            timeout_time = time.time() + timeout
+        # Make sure my temp lockfile exists, and that its contents are
+        # up-to-date (e.g. the temp file name, and the lock lifetime).
+        self.__write()
+        # TBD: This next call can fail with an EPERM.  I have no idea why, but
+        # I'm nervous about wrapping this in a try/except.  It seems to be a
+        # very rare occurence, only happens from cron, and (only?) on Solaris
+        # 2.6.
+        self.__touch()
+        self.__writelog('laying claim')
+        # for quieting the logging output
+        loopcount = -1
+        while 1:
+            loopcount = loopcount + 1
+            # Create the hard link and test for exactly 2 links to the file
+            try:
+                os.link(self.__tmpfname, self.__lockfile)
+                # If we got here, we know we know we got the lock, and never
+                # had it before, so we're done.  Just touch it again for the
+                # fun of it.
+                self.__writelog('got the lock')
+                self.__touch()
+                break
+            except OSError, e:
+                # The link failed for some reason, possibly because someone
+                # else already has the lock (i.e. we got an EEXIST), or for
+                # some other bizarre reason.
+                if e.errno == errno.ENOENT:
+                    # TBD: in some Linux environments, it is possible to get
+                    # an ENOENT, which is truly strange, because this means
+                    # that self.__tmpfname doesn't exist at the time of the
+                    # os.link(), but self.__write() is supposed to guarantee
+                    # that this happens!  I don't honestly know why this
+                    # happens, but for now we just say we didn't acquire the
+                    # lock, and try again next time.
+                    pass
+                elif e.errno <> errno.EEXIST:
+                    # Something very bizarre happened.  Clean up our state and
+                    # pass the error on up.
+                    self.__writelog('unexpected link error: %s' % e)
+                    os.unlink(self.__tmpfname)
+                    raise
+                elif self.__linkcount() <> 2:
+                    # Somebody's messin' with us!  Log this, and try again
+                    # later.  TBD: should we raise an exception?
+                    self.__writelog('unexpected linkcount: %d' %
+                                    self.__linkcount())
+                elif self.__read() == self.__tmpfname:
+                    # It was us that already had the link.
+                    self.__writelog('already locked')
+                    raise AlreadyLockedError
+                # otherwise, someone else has the lock
+                pass
+            # We did not acquire the lock, because someone else already has
+            # it.  Have we timed out in our quest for the lock?
+            if timeout and timeout_time < time.time():
+                os.unlink(self.__tmpfname)
+                self.__writelog('timed out')
+                raise TimeOutError
+            # Okay, we haven't timed out, but we didn't get the lock.  Let's
+            # find if the lock lifetime has expired.
+            if time.time() > self.__releasetime() + CLOCK_SLOP:
+                # Yes, so break the lock.
+                self.__break()
+                self.__writelog('lifetime has expired, breaking')
+            # Okay, someone else has the lock, our claim hasn't timed out yet,
+            # and the expected lock lifetime hasn't expired yet.  So let's
+            # wait a while for the owner of the lock to give it up.
+            elif not loopcount % 100:
+                self.__writelog('waiting for claim')
+            self.__sleep()
+
+    def unlock(self, unconditionally=0):
+        """Unlock the lock.
+
+        If we don't already own the lock (either because of unbalanced unlock
+        calls, or because the lock was stolen out from under us), raise a
+        NotLockedError, unless optional `unconditionally' is true.
+        """
+        islocked = self.locked()
+        if not islocked and not unconditionally:
+            raise NotLockedError
+        # If we owned the lock, remove the global file, relinquishing it.
+        if islocked:
+            try:
+                os.unlink(self.__lockfile)
+            except OSError, e:
+                if e.errno <> errno.ENOENT: raise
+        # Remove our tempfile
+        try:
+            os.unlink(self.__tmpfname)
+        except OSError, e:
+            if e.errno <> errno.ENOENT: raise
+        self.__writelog('unlocked')
+
+    def locked(self):
+        """Returns 1 if we own the lock, 0 if we do not.
+
+        Checking the status of the lockfile resets the lock's lifetime, which
+        helps avoid race conditions during the lock status test.
+        """
+        # Discourage breaking the lock for a while.
+        try:
+            self.__touch()
+        except OSError, e:
+            if e.errno == errno.EPERM:
+                # We can't touch the file because we're not the owner.  I
+                # don't see how we can own the lock if we're not the owner.
+                return 0
+            else:
+                raise
+        # TBD: can the link count ever be > 2?
+        if self.__linkcount() <> 2:
+            return 0
+        return self.__read() == self.__tmpfname
+
+    def finalize(self):
+        self.unlock(unconditionally=1)
+
+    def __del__(self):
+        if self.__owned:
+            self.finalize()
+
+    # Use these only if you're transfering ownership to a child process across
+    # a fork.  Use at your own risk, but it should be race-condition safe.
+    # _transfer_to() is called in the parent, passing in the pid of the
+    # child.  _take_possession() is called in the child, and blocks until the
+    # parent has transferred possession to the child.  _disown() is used to
+    # set the __owned flag to 0, and it is a disgusting wart necessary to make
+    # forced lock acquisition work in mailmanctl. :(
+    def _transfer_to(self, pid):
+        # First touch it so it won't get broken while we're fiddling about.
+        self.__touch()
+        # Find out current claim's temp filename
+        winner = self.__read()
+        # Now twiddle ours to the given pid
+        self.__tmpfname = '%s.%s.%d' % (
+            self.__lockfile, socket.gethostname(), pid)
+        # Create a hard link from the global lock file to the temp file.  This
+        # actually does things in reverse order of normal operation because we
+        # know that lockfile exists, and tmpfname better not!
+        os.link(self.__lockfile, self.__tmpfname)
+        # Now update the lock file to contain a reference to the new owner
+        self.__write()
+        # Toggle off our ownership of the file so we don't try to finalize it
+        # in our __del__()
+        self.__owned = 0
+        # Unlink the old winner, completing the transfer
+        os.unlink(winner)
+        # And do some sanity checks
+        assert self.__linkcount() == 2
+        assert self.locked()
+        self.__writelog('transferred the lock')
+
+    def _take_possession(self):
+        self.__tmpfname = tmpfname = '%s.%s.%d' % (
+            self.__lockfile, socket.gethostname(), os.getpid())
+        # Wait until the linkcount is 2, indicating the parent has completed
+        # the transfer.
+        while self.__linkcount() <> 2 or self.__read() <> tmpfname:
+            time.sleep(0.25)
+        self.__writelog('took possession of the lock')
+
+    def _disown(self):
+        self.__owned = 0
+
+    #
+    # Private interface
+    #
+
+    def __writelog(self, msg):
+        if self.__withlogging:
+            logf = _get_logfile()
+            logf.write('%s %s\n' % (self.__logprefix, msg))
+            traceback.print_stack(file=logf)
+
+    def __write(self):
+        # Make sure it's group writable
+        oldmask = os.umask(002)
+        try:
+            fp = open(self.__tmpfname, 'w')
+            fp.write(self.__tmpfname)
+            fp.close()
+        finally:
+            os.umask(oldmask)
+
+    def __read(self):
+        try:
+            fp = open(self.__lockfile)
+            filename = fp.read()
+            fp.close()
+            return filename
+        except EnvironmentError, e:
+            if e.errno <> errno.ENOENT: raise
+            return None
+
+    def __touch(self, filename=None):
+        t = time.time() + self.__lifetime
+        try:
+            # TBD: We probably don't need to modify atime, but this is easier.
+            os.utime(filename or self.__tmpfname, (t, t))
+        except OSError, e:
+            if e.errno <> errno.ENOENT: raise
+
+    def __releasetime(self):
+        try:
+            return os.stat(self.__lockfile)[ST_MTIME]
+        except OSError, e:
+            if e.errno <> errno.ENOENT: raise
+            return -1
+
+    def __linkcount(self):
+        try:
+            return os.stat(self.__lockfile)[ST_NLINK]
+        except OSError, e:
+            if e.errno <> errno.ENOENT: raise
+            return -1
+
+    def __break(self):
+        # First, touch the global lock file.  This reduces but does not
+        # eliminate the chance for a race condition during breaking.  Two
+        # processes could both pass the test for lock expiry in lock() before
+        # one of them gets to touch the global lockfile.  This shouldn't be
+        # too bad because all they'll do in this function is wax the lock
+        # files, not claim the lock, and we can be defensive for ENOENTs
+        # here.
+        #
+        # Touching the lock could fail if the process breaking the lock and
+        # the process that claimed the lock have different owners.  We could
+        # solve this by set-uid'ing the CGI and mail wrappers, but I don't
+        # think it's that big a problem.
+        try:
+            self.__touch(self.__lockfile)
+        except OSError, e:
+            if e.errno <> errno.EPERM: raise
+        # Get the name of the old winner's temp file.
+        winner = self.__read()
+        # Remove the global lockfile, which actually breaks the lock.
+        try:
+            os.unlink(self.__lockfile)
+        except OSError, e:
+            if e.errno <> errno.ENOENT: raise
+        # Try to remove the old winner's temp file, since we're assuming the
+        # winner process has hung or died.  Don't worry too much if we can't
+        # unlink their temp file -- this doesn't wreck the locking algorithm,
+        # but will leave temp file turds laying around, a minor inconvenience.
+        try:
+            if winner:
+                os.unlink(winner)
+        except OSError, e:
+            if e.errno <> errno.ENOENT: raise
+
+    def __sleep(self):
+        interval = random.random() * 2.0 + 0.01
+        time.sleep(interval)
+
+
+
+# Unit test framework
+def _dochild():
+    prefix = '[%d]' % os.getpid()
+    # Create somewhere between 1 and 1000 locks
+    lockfile = LockFile('/tmp/LockTest', withlogging=1, lifetime=120)
+    # Use a lock lifetime of between 1 and 15 seconds.  Under normal
+    # situations, Mailman's usage patterns (untested) shouldn't be much longer
+    # than this.
+    workinterval = 5 * random.random()
+    hitwait = 20 * random.random()
+    print prefix, 'workinterval:', workinterval
+    islocked = 0
+    t0 = 0
+    t1 = 0
+    t2 = 0
+    try:
+        try:
+            t0 = time.time()
+            print prefix, 'acquiring...'
+            lockfile.lock()
+            print prefix, 'acquired...'
+            islocked = 1
+        except TimeOutError:
+            print prefix, 'timed out'
+        else:
+            t1 = time.time()
+            print prefix, 'acquisition time:', t1-t0, 'seconds'
+            time.sleep(workinterval)
+    finally:
+        if islocked:
+            try:
+                lockfile.unlock()
+                t2 = time.time()
+                print prefix, 'lock hold time:', t2-t1, 'seconds'
+            except NotLockedError:
+                print prefix, 'lock was broken'
+    # wait for next web hit
+    print prefix, 'webhit sleep:', hitwait
+    time.sleep(hitwait)
+
+
+def _seed():
+    try:
+        fp = open('/dev/random')
+        d = fp.read(40)
+        fp.close()
+    except EnvironmentError, e:
+        if e.errno <> errno.ENOENT:
+            raise
+        import sha
+        d = sha.new(`os.getpid()`+`time.time()`).hexdigest()
+    random.seed(d)
+
+
+def _onetest():
+    loopcount = random.randint(1, 100)
+    for i in range(loopcount):
+        print 'Loop %d of %d' % (i+1, loopcount)
+        pid = os.fork()
+        if pid:
+            # parent, wait for child to exit
+            pid, status = os.waitpid(pid, 0)
+        else:
+            # child
+            _seed()
+            try:
+                _dochild()
+            except KeyboardInterrupt:
+                pass
+            os._exit(0)
+    
+
+def _reap(kids):
+    if not kids:
+        return
+    pid, status = os.waitpid(-1, os.WNOHANG)
+    if pid <> 0:
+        del kids[pid]
+
+
+def _test(numtests):
+    kids = {}
+    for i in range(numtests):
+        pid = os.fork()
+        if pid:
+            # parent
+            kids[pid] = pid
+        else:
+            # child
+            _seed()
+            try:
+                _onetest()
+            except KeyboardInterrupt:
+                pass
+            os._exit(0)
+        # slightly randomize each kid's seed
+    while kids:
+        _reap(kids)
+
+
+if __name__ == '__main__':
+    import sys
+    import random
+    _test(int(sys.argv[1]))
-- 
cgit v1.2.3