diff options
Diffstat (limited to 'cron/gate_news')
-rwxr-xr-x | cron/gate_news | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/cron/gate_news b/cron/gate_news new file mode 100755 index 00000000..3fe466d4 --- /dev/null +++ b/cron/gate_news @@ -0,0 +1,274 @@ +#! @PYTHON@ +# +# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +"""Poll the NNTP servers for messages to be gatewayed to mailing lists. + +Usage: gate_news [options] + +Where options are + + --help + -h + Print this text and exit. + +""" + +import sys +import os +import time +import getopt +import socket +import nntplib + +import paths +# Import this /after/ paths so that the sys.path is properly hacked +import email.Errors +from email.Parser import Parser + +from Mailman import mm_cfg +from Mailman import MailList +from Mailman import Utils +from Mailman import Message +from Mailman import LockFile +from Mailman.i18n import _ +from Mailman.Queue.sbcache import get_switchboard +from Mailman.Logging.Utils import LogStdErr +from Mailman.Logging.Syslog import syslog + +# Work around known problems with some RedHat cron daemons +import signal +signal.signal(signal.SIGCHLD, signal.SIG_DFL) + +GATENEWS_LOCK_FILE = os.path.join(mm_cfg.LOCK_DIR, 'gate_news.lock') + +LogStdErr('error', 'gate_news', manual_reprime=0) + +LOCK_LIFETIME = mm_cfg.hours(2) +NL = '\n' + +# Continues inside try: block are not allowed in Python versions before 2.1. +# This exception is used to work around that. +class _ContinueLoop(Exception): + pass + + + +def usage(status, msg=''): + if code: + fd = sys.stderr + else: + fd = sys.stdout + print >> fd, _(__doc__) + if msg: + print >> fd, msg + sys.exit(code) + + + +_hostcache = {} + +def open_newsgroup(mlist): + # Open up a "mode reader" connection to nntp server. This will be shared + # for all the gated lists having the same nntp_host. + conn = _hostcache.get(mlist.nntp_host) + if conn is None: + try: + conn = nntplib.NNTP(mlist.nntp_host, readermode=1, + user=mm_cfg.NNTP_USERNAME, + password=mm_cfg.NNTP_PASSWORD) + except (socket.error, nntplib.NNTPError, IOError), e: + syslog('fromusenet', + 'error opening connection to nntp_host: %s\n%s', + mlist.nntp_host, e) + raise + _hostcache[mlist.nntp_host] = conn + # Get the GROUP information for the list, but we're only really interested + # in the first article number and the last article number + r,c,f,l,n = conn.group(mlist.linked_newsgroup) + return conn, int(f), int(l) + + +def clearcache(): + reverse = {} + for conn in _hostcache.values(): + reverse[conn] = 1 + for conn in reverse.keys(): + conn.quit() + _hostcache.clear() + + + +# This function requires the list to be locked. +def poll_newsgroup(mlist, conn, first, last, glock): + listname = mlist.internal_name() + # NEWNEWS is not portable and has synchronization issues. + for num in range(first, last): + glock.refresh() + try: + headers = conn.head(`num`)[3] + found_to = 0 + beenthere = 0 + for header in headers: + i = header.find(':') + value = header[:i].lower() + if i > 0 and value == 'to': + found_to = 1 + if value <> 'x-beenthere': + continue + if header[i:] == ': %s' % mlist.GetListEmail(): + beenthere = 1 + break + if not beenthere: + body = conn.body(`num`)[3] + # Usenet originated messages will not have a Unix envelope + # (i.e. "From " header). This breaks Pipermail archiving, so + # we will synthesize one. Be sure to use the format searched + # for by mailbox.UnixMailbox._isrealfromline(). BAW: We use + # the -bounces address here in case any downstream clients use + # the envelope sender for bounces; I'm not sure about this, + # but it's the closest to the old semantics. + lines = ['From %s %s' % (mlist.GetBouncesEmail(), + time.ctime(time.time()))] + lines.extend(headers) + lines.append('') + lines.extend(body) + lines.append('') + p = Parser(Message.Message) + try: + msg = p.parsestr(NL.join(lines)) + except email.Errors.MessageError, e: + syslog('fromusenet', + 'email package exception for %s:%d\n%s', + mlist.linked_newsgroup, num, e) + raise _ContinueLoop + if found_to: + del msg['X-Originally-To'] + msg['X-Originally-To'] = msg['To'] + del msg['To'] + msg['To'] = mlist.GetListEmail() + # Post the message to the locked list + inq = get_switchboard(mm_cfg.INQUEUE_DIR) + inq.enqueue(msg, + listname = mlist.internal_name(), + fromusenet = 1) + syslog('fromusenet', + 'posted to list %s: %7d' % (listname, num)) + except nntplib.NNTPError, e: + syslog('fromusenet', + 'NNTP error for list %s: %7d' % (listname, num)) + syslog('fromusenet', str(e)) + except _ContinueLoop: + continue + # Even if we don't post the message because it was seen on the + # list already, update the watermark + mlist.usenet_watermark = num + + + +def process_lists(glock): + for listname in Utils.list_names(): + glock.refresh() + # Open the list unlocked just to check to see if it is gating news to + # mail. If not, we're done with the list. Otherwise, lock the list + # and gate the group. + mlist = MailList.MailList(listname, lock=0) + if not mlist.gateway_to_mail: + continue + # Get the list's watermark, i.e. the last article number that we gated + # from news to mail. `None' means that this list has never polled its + # newsgroup and that we should do a catch up. + watermark = getattr(mlist, 'usenet_watermark', None) + # Open the newsgroup, but let most exceptions percolate up. + try: + conn, first, last = open_newsgroup(mlist) + except (socket.error, nntplib.NNTPError): + break + syslog('fromusenet', '%s: [%d..%d]' % (listname, first, last)) + try: + try: + if watermark is None: + mlist.Lock(timeout=mm_cfg.LIST_LOCK_TIMEOUT) + # This is the first time we've tried to gate this + # newsgroup. We essentially do a mass catch-up, otherwise + # we'd flood the mailing list. + mlist.usenet_watermark = last + syslog('fromusenet', '%s caught up to article %d' % + (listname, last)) + else: + # The list has been polled previously, so now we simply + # grab all the messages on the newsgroup that have not + # been seen by the mailing list. The first such article + # is the maximum of the lowest article available in the + # newsgroup and the watermark. It's possible that some + # articles have been expired since the last time gate_news + # has run. Not much we can do about that. + start = max(watermark+1, first) + if start > last: + syslog('fromusenet', 'nothing new for list %s' % + listname) + else: + mlist.Lock(timeout=mm_cfg.LIST_LOCK_TIMEOUT) + syslog('fromusenet', 'gating %s articles [%d..%d]' % + (listname, start, last)) + # Use last+1 because poll_newsgroup() employes a for + # loop over range, and this will not include the last + # element in the list. + poll_newsgroup(mlist, conn, start, last+1, glock) + except LockFile.TimeOutError: + syslog('fromusenet', 'Could not acquire list lock: %s' % + listname) + finally: + if mlist.Locked(): + mlist.Save() + mlist.Unlock() + syslog('fromusenet', '%s watermark: %d' % + (listname, mlist.usenet_watermark)) + + + +def main(): + lock = LockFile.LockFile(GATENEWS_LOCK_FILE, + # it's okay to hijack this + lifetime=LOCK_LIFETIME) + try: + lock.lock(timeout=0.5) + except LockFile.TimeOutError: + syslog('fromusenet', 'Could not acquire gate_news lock') + return + try: + process_lists(lock) + finally: + clearcache() + lock.unlock(unconditionally=1) + + + +if __name__ == '__main__': + try: + opts, args = getopt.getopt(sys.argv[1:], 'h', ['help']) + except getopt.error, msg: + usage(1, msg) + + if args: + usage(1, 'No args are expected') + + for opt, arg in opts: + if opt in ('-h', '--help'): + usage(0) + + main() |