1 files changed, 524 insertions, 0 deletions
diff --git a/bin/mailmanctl b/bin/mailmanctl
new file mode 100644
index 00000000..0292e1f3
--- /dev/null
+++ b/bin/mailmanctl
@@ -0,0 +1,524 @@
+#! @PYTHON@
+
+# Copyright (C) 2001,2002 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+"""Primary start-up and shutdown script for Mailman's qrunner daemon.
+
+This script starts, stops, and restarts the main Mailman queue runners, making
+sure that the various long-running qrunners are still alive and kicking.  It
+does this by forking and exec'ing the qrunners and waiting on their pids.
+When it detects a subprocess has exited, it may restart it.
+
+The qrunners respond to SIGINT, SIGTERM, and SIGHUP.  SIGINT and SIGTERM both
+cause the qrunners to exit cleanly, but the master will only restart qrunners
+that have exited due to a SIGINT.  SIGHUP causes the master and the qrunners
+to close their log files, and reopen then upon the next printed message.
+
+The master also responds to SIGINT, SIGTERM, and SIGHUP, which it simply
+passes on to the qrunners (note that the master will close and reopen its own
+log files on receipt of a SIGHUP).  The master also leaves its own process id
+in the file data/master-qrunner.pid but you normally don't need to use this
+pid directly.  The `start', `stop', `restart', and `reopen' commands handle
+everything for you.
+
+Usage: %(PROGRAM)s [options] [ start | stop | restart | reopen ]
+
+Options:
+
+    -n/--no-restart
+        Don't restart the qrunners when they exit because of an error or a
+        SIGINT.  They are never restarted if they exit in response to a
+        SIGTERM.  Use this only for debugging.  Only useful if the `start'
+        command is given.
+
+    -u/--run-as-user
+        Normally, this script will refuse to run if the user id and group id
+        are not set to the `mailman' user and group (as defined when you
+        configured Mailman).  If run as root, this script will change to this
+        user and group before the check is made.
+
+        This can be inconvenient for testing and debugging purposes, so the -u
+        flag means that the step that sets and checks the uid/gid is skipped,
+        and the program is run as the current user and group.  This flag is
+        not recommended for normal production environments.
+
+        Note though, that if you run with -u and are not in the mailman group,
+        you may have permission problems, such as begin unable to delete a
+        list's archives through the web.  Tough luck!
+
+    -s/--stale-lock-cleanup
+        If mailmanctl finds an existing master lock, it will normally exit
+        with an error message.  With this option, mailmanctl will perform an
+        extra level of checking.  If a process matching the host/pid described
+        in the lock file is running, mailmanctl will still exit, but if no
+        matching process is found, mailmanctl will remove the apparently stale
+        lock and make another attempt to claim the master lock.
+
+    -q/--quiet
+        Don't print status messages.  Error messages are still printed to
+        standard error.
+
+    -h/--help
+        Print this message and exit.
+
+Commands:
+
+    start   - Start the master daemon and all qrunners.  Prints a message and
+              exits if the master daemon is already running.
+
+    stop    - Stops the master daemon and all qrunners.  After stopping, no
+              more messages will be processed.
+
+    restart - Restarts the qrunners, but not the master process.  Use this
+              whenever you upgrade or update Mailman so that the qrunners will
+              use the newly installed code.
+
+    reopen  - This will close all log files, causing them to be re-opened the
+              next time a message is written to them
+"""
+
+import sys
+import os
+import time
+import getopt
+import signal
+import errno
+import pwd
+import grp
+import socket
+
+import paths
+from Mailman import mm_cfg
+from Mailman import Utils
+from Mailman import LockFile
+from Mailman.i18n import _
+from Mailman.Logging.Syslog import syslog
+from Mailman.Logging.Utils import LogStdErr
+
+PROGRAM = sys.argv[0]
+COMMASPACE = ', '
+DOT = '.'
+
+# Locking contantsa
+LOCKFILE = os.path.join(mm_cfg.LOCK_DIR, 'master-qrunner')
+# Since we wake up once per day and refresh the lock, the LOCK_LIFETIME
+# needn't be (much) longer than SNOOZE.  We pad it 6 hours just to be safe.
+LOCK_LIFETIME = mm_cfg.days(1) + mm_cfg.hours(6)
+SNOOZE = mm_cfg.days(1)
+MAX_RESTARTS = 10
+
+LogStdErr('error', 'mailmanctl', manual_reprime=0)
+
+
+
+def usage(code, msg=''):
+    if code:
+        fd = sys.stderr
+    else:
+        fd = sys.stdout
+    print >> fd, _(__doc__)
+    if msg:
+        print >> fd, msg
+    sys.exit(code)
+
+
+
+def kill_watcher(sig):
+    try:
+        fp = open(mm_cfg.PIDFILE)
+        pidstr = fp.read()
+        fp.close()
+        pid = int(pidstr.strip())
+    except (IOError, ValueError), e:
+        # For i18n convenience
+        pidfile = mm_cfg.PIDFILE
+        print >> sys.stderr, _('PID unreadable in: %(pidfile)s')
+        print >> sys.stderr, e
+        print >> sys.stderr, _('Is qrunner even running?')
+        return
+    try:
+        os.kill(pid, sig)
+    except OSError, e:
+        if e.errno <> errno.ESRCH: raise
+        print >> sys.stderr, _('No child with pid: %(pid)s')
+        print >> sys.stderr, e
+        print >> sys.stderr, _('Stale pid file removed.')
+        os.unlink(mm_cfg.PIDFILE)
+
+
+
+def get_lock_data():
+    # Return the hostname, pid, and tempfile
+    fp = open(LOCKFILE)
+    filename = os.path.split(fp.read().strip())[1]
+    fp.close()
+    parts = filename.split('.')
+    hostname = DOT.join(parts[1:-1])
+    pid = int(parts[-1])
+    return hostname, int(pid), filename
+
+
+def qrunner_state():
+    # 1 if proc exists on host (but is it qrunner? ;)
+    # 0 if host matches but no proc
+    # hostname if hostname doesn't match
+    hostname, pid, tempfile = get_lock_data()
+    if hostname <> socket.gethostname():
+        return hostname
+    # Find out if the process exists by calling kill with a signal 0.
+    try:
+        os.kill(pid, 0)
+    except OSError, e:
+        if e.errno <> errno.ESRCH: raise
+        return 0
+    return 1
+
+
+def acquire_lock_1(force):
+    # Be sure we can acquire the master qrunner lock.  If not, it means some
+    # other master qrunner daemon is already going.
+    lock = LockFile.LockFile(LOCKFILE, LOCK_LIFETIME)
+    try:
+        lock.lock(0.1)
+        return lock
+    except LockFile.TimeOutError:
+        if not force:
+            raise
+        # Force removal of lock first
+        lock._disown()
+        hostname, pid, tempfile = get_lock_data()
+        os.unlink(LOCKFILE)
+        os.unlink(os.path.join(mm_cfg.LOCK_DIR, tempfile))
+        return acquire_lock_1(force=0)
+
+
+def acquire_lock(force):
+    try:
+        lock = acquire_lock_1(force)
+        return lock
+    except LockFile.TimeOutError:
+        status = qrunner_state()
+        if status == 1:
+            # host matches and proc exists
+            print >> sys.stderr, _("""\
+The master qrunner lock could not be acquired because it appears as if another
+master qrunner is already running.
+""")
+        elif status == 0:
+            # host matches but no proc
+            print >> sys.stderr, _("""\
+The master qrunner lock could not be acquired.  It appears as though there is
+a stale master qrunner lock.  Try re-running mailmanctl with the -s flag.
+""")
+        else:
+            # host doesn't even match
+            print >> sys.stderr, _("""\
+The master qrunner lock could not be acquired, because it appears as if some
+process on some other host may have acquired it.  We can't test for stale
+locks across host boundaries, so you'll have to do this manually.  Or, if you
+know the lock is stale, re-run mailmanctl with the -s flag.
+
+Lock file: %(LOCKFILE)s
+Lock host: %(status)s
+
+Exiting.""")
+
+
+
+def start_runner(qrname, slice, count):
+    pid = os.fork()
+    if pid:
+        # parent
+        return pid
+    # child
+    #
+    # Craft the command line arguments for the exec() call.
+    rswitch = '--runner=%s:%d:%d' % (qrname, slice, count)
+    # BAW: should argv[0] be `python'?
+    exe = os.path.join(mm_cfg.BIN_DIR, 'qrunner')
+    os.execl(mm_cfg.PYTHON, 'qrunner', exe, rswitch, '-s')
+    # Should never get here
+    raise RuntimeError, 'os.execl() failed'
+
+
+def start_all_runners():
+    kids = {}
+    for qrname, count in mm_cfg.QRUNNERS:
+        for slice in range(count):
+            # queue runner name, slice, numslices, restart count
+            info = (qrname, slice, count, 0)
+            pid = start_runner(qrname, slice, count)
+            kids[pid] = info
+    return kids
+
+
+
+def check_privs():
+    # If we're running as root (uid == 0), coerce the uid and gid to that
+    # which Mailman was configured for, and refuse to run if we didn't coerce
+    # the uid/gid.
+    gid = grp.getgrnam(mm_cfg.MAILMAN_GROUP)[2]
+    uid = pwd.getpwnam(mm_cfg.MAILMAN_USER)[2]
+    myuid = os.getuid()
+    if myuid == 0:
+        os.setgid(gid)
+        os.setuid(uid)
+    elif myuid <> uid:
+        name = mm_cfg.MAILMAN_USER
+        usage(1, _(
+            'Run this program as root or as the %(name)s user, or use -u.'))
+
+
+
+def main():
+    global quiet
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], 'hnusq',
+                                   ['help', 'no-start', 'run-as-user',
+                                    'stale-lock-cleanup', 'quiet'])
+    except getopt.error, msg:
+        usage(1, msg)
+
+    restart = 1
+    checkprivs = 1
+    force = 0
+    quiet = 0
+    for opt, arg in opts:
+        if opt in ('-h', '--help'):
+            usage(0)
+        elif opt in ('-n', '--no-restart'):
+            restart = 0
+        elif opt in ('-u', '--run-as-user'):
+            checkprivs = 0
+        elif opt in ('-s', '--stale-lock-cleanup'):
+            force = 1
+        elif opt in ('-q', '--quiet'):
+            quiet = 1
+
+    if len(args) < 1:
+        usage(1, _('No command given.'))
+    elif len(args) > 1:
+        command = COMMASPACE.join(args)
+        usage(1, _('Bad command: %(command)s'))
+
+    if checkprivs:
+        check_privs()
+    else:
+        print _('Warning!  You may encounter permission problems.')
+
+    # Handle the commands
+    command = args[0].lower()
+    if command == 'stop':
+        # Sent the master qrunner process a SIGINT, which is equivalent to
+        # giving cron/qrunner a ctrl-c or KeyboardInterrupt.  This will
+        # effectively shut everything down.
+        if not quiet:
+            print _("Shutting down Mailman's master qrunner")
+        kill_watcher(signal.SIGTERM)
+    elif command == 'restart':
+        # Sent the master qrunner process a SIGHUP.  This will cause the
+        # master qrunner to kill and restart all the worker qrunners, and to
+        # close and re-open its log files.
+        if not quiet:
+            print _("Restarting Mailman's master qrunner")
+        kill_watcher(signal.SIGINT)
+    elif command == 'reopen':
+        if not quiet:
+            print _('Re-opening all log files')
+        kill_watcher(signal.SIGHUP)
+    elif command == 'start':
+        # Here's the scoop on the processes we're about to create.  We'll need
+        # one for each qrunner, and one for a master child process watcher /
+        # lock refresher process.
+        #
+        # The child watcher process simply waits on the pids of the children
+        # qrunners.  Unless explicitly disabled by a mailmanctl switch (or the
+        # children are killed with SIGTERM instead of SIGINT), the watcher
+        # will automatically restart any child process that exits.  This
+        # allows us to be more robust, and also to implement restart by simply
+        # SIGINT'ing the qrunner children, and letting the watcher restart
+        # them.
+        #
+        # Under normal operation, we have a child per queue.  This lets us get
+        # the most out of the available resources, since a qrunner with no
+        # files in its queue directory is pretty cheap, but having a separate
+        # runner process per queue allows for a very responsive system.  Some
+        # people want a more traditional (i.e. MM2.0.x) cron-invoked qrunner.
+        # No problem, but using mailmanctl isn't the answer.  So while
+        # mailmanctl hard codes some things, others, such as the number of
+        # qrunners per queue, is configurable in mm_cfg.py.
+        #
+        # First, acquire the master mailmanctl lock
+        lock = acquire_lock(force)
+        if not lock:
+            return
+        # Daemon process startup according to Stevens, Advanced Programming in
+        # the UNIX Environment, Chapter 13.
+        pid = os.fork()
+        if pid:
+            # parent
+            if not quiet:
+                print _("Starting Mailman's master qrunner.")
+            # Give up the lock "ownership".  This just means the foreground
+            # process won't close/unlock the lock when it finalizes this lock
+            # instance.  We'll let the mater watcher subproc own the lock.
+            lock._transfer_to(pid)
+            return
+        # child
+        lock._take_possession()
+        # First, save our pid in a file for "mailmanctl stop" rendezvous.  We
+        # want the perms on the .pid file to be rw-rw----
+        omask = os.umask(6)
+        try:
+            fp = open(mm_cfg.PIDFILE, 'w')
+            print >> fp, os.getpid()
+            fp.close()
+        finally:
+            os.umask(omask)
+        # Create a new session and become the session leader, but since we
+        # won't be opening any terminal devices, don't do the ultra-paranoid
+        # suggestion of doing a second fork after the setsid() call.
+        os.setsid()
+        # Instead of cd'ing to root, cd to the Mailman installation home
+        os.chdir(mm_cfg.PREFIX)
+        # Clear our file mode creation umask
+        os.umask(0)
+        # I don't think we have any unneeded file descriptors.
+        #
+        # Now start all the qrunners.  This returns a dictionary where the
+        # keys are qrunner pids and the values are tuples of the following
+        # form: (qrname, slice, count).  This does its own fork and exec, and
+        # sets up its own signal handlers.
+        kids = start_all_runners()
+        # Set up a SIGALRM handler to refresh the lock once per day.  The lock
+        # lifetime is 1day+6hours so this should be plenty.
+        def sigalrm_handler(signum, frame, lock=lock):
+            lock.refresh()
+            signal.alarm(mm_cfg.days(1))
+        signal.signal(signal.SIGALRM, sigalrm_handler)
+        signal.alarm(mm_cfg.days(1))
+        # Set up a SIGHUP handler so that if we get one, we'll pass it along
+        # to all the qrunner children.  This will tell them to close and
+        # reopen their log files
+        def sighup_handler(signum, frame, kids=kids):
+            # Closing our syslog will cause it to be re-opened at the next log
+            # print output.
+            syslog.close()
+            for pid in kids.keys():
+                os.kill(pid, signal.SIGHUP)
+            # And just to tweak things...
+            syslog('qrunner',
+                   'Master watcher caught SIGHUP.  Re-opening log files.')
+        signal.signal(signal.SIGHUP, sighup_handler)
+        # We also need to install a SIGTERM handler because that's what init
+        # will kill this process with when changing run levels.
+        def sigterm_handler(signum, frame, kids=kids):
+            for pid in kids.keys():
+                try:
+                    os.kill(pid, signal.SIGTERM)
+                except OSError, e:
+                    if e.errno <> errno.ESRCH: raise
+            syslog('qrunner', 'Master watcher caught SIGTERM.  Exiting.')
+        signal.signal(signal.SIGTERM, sigterm_handler)
+        # Finally, we need a SIGINT handler which will cause the sub-qrunners
+        # to exit, but the master will restart SIGINT'd sub-processes unless
+        # the -n flag was given.
+        def sigint_handler(signum, frame, kids=kids):
+            for pid in kids.keys():
+                os.kill(pid, signal.SIGINT)
+            syslog('qrunner', 'Master watcher caught SIGINT.  Restarting.')
+        signal.signal(signal.SIGINT, sigint_handler)
+        # Now we're ready to simply do our wait/restart loop.  This is the
+        # master qrunner watcher.
+        try:
+            while 1:
+                try:
+                    pid, status = os.wait()
+                except OSError, e:
+                    # No children?  We're done
+                    if e.errno == errno.ECHILD:
+                        break
+                    # If the system call got interrupted, just restart it.
+                    elif e.errno <> errno.EINTR:
+                        raise
+                    continue
+                killsig = exitstatus = None
+                if os.WIFSIGNALED(status):
+                    killsig = os.WTERMSIG(status)
+                if os.WIFEXITED(status):
+                    exitstatus = os.WEXITSTATUS(status)
+                # We'll restart the process unless we were given the
+                # "no-restart" switch, or if the process was SIGTERM'd or
+                # exitted with a SIGTERM exit status.  This lets us better
+                # handle runaway restarts (say, if the subproc had a syntax
+                # error!)
+                restarting = ''
+                if restart:
+                    if (exitstatus == None and killsig <> signal.SIGTERM) or \
+                       (killsig == None and exitstatus <> signal.SIGTERM):
+                        # Then
+                        restarting = '[restarting]'
+                qrname, slice, count, restarts = kids[pid]
+                del kids[pid]
+                syslog('qrunner', """\
+Master qrunner detected subprocess exit
+(pid: %d, sig: %s, sts: %s, class: %s, slice: %d/%d) %s""",
+                       pid, killsig, exitstatus, qrname,
+                       slice+1, count, restarting)
+                # See if we've reached the maximum number of allowable restarts
+                if exitstatus <> signal.SIGINT:
+                    restarts += 1
+                if restarts > MAX_RESTARTS:
+                    syslog('qrunner', """\
+Qrunner %s reached maximum restart limit of %d, not restarting.""",
+                           qrname, MAX_RESTARTS)
+                    restarting = ''
+                # Now perhaps restart the process unless it exited with a
+                # SIGTERM or we aren't restarting.
+                if restarting:
+                    newpid = start_runner(qrname, slice, count)
+                    kids[newpid] = (qrname, slice, count, restarts)
+        finally:
+            # Should we leave the main loop for any reason, we want to be sure
+            # all of our children are exited cleanly.  Send SIGTERMs to all
+            # the child processes and wait for them all to exit.
+            for pid in kids.keys():
+                try:
+                    os.kill(pid, signal.SIGTERM)
+                except OSError, e:
+                    if e.errno == errno.ESRCH:
+                        # The child has already exited
+                        syslog('qrunner', 'ESRCH on pid: %d', pid)
+                        del kids[pid]
+            # Wait for all the children to go away
+            while 1:
+                try:
+                    pid, status = os.wait()
+                except OSError, e:
+                    if e.errno == errno.ECHILD:
+                        break
+                    elif e.errno <> errno.EINTR:
+                        raise
+                    continue
+        # Finally, give up the lock
+        lock.unlock(unconditionally=1)
+        os._exit(0)
+
+
+
+if __name__ == '__main__':
+    main()