diff options
Diffstat (limited to 'bin/cleanarch')
-rw-r--r-- | bin/cleanarch | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/bin/cleanarch b/bin/cleanarch new file mode 100644 index 00000000..85a8df6a --- /dev/null +++ b/bin/cleanarch @@ -0,0 +1,165 @@ +#! @PYTHON@ + +# Copyright (C) 2001,2002 by the Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +"""Clean up an .mbox archive file. + +The archiver looks for Unix-From lines separating messages in an mbox archive +file. For compatibility, it specifically looks for lines that start with +"From " -- i.e. the letters capital-F, lowercase-r, o, m, space, ignoring +everything else on the line. + +Normally, any lines that start "From " in the body of a message should be +escaped such that a > character is actually the first on a line. It is +possible though that body lines are not actually escaped. This script +attempts to fix these by doing a stricter test of the Unix-From lines. Any +lines that start "From " but do not pass this stricter test are escaped with a +> character. + +Usage: cleanarch [options] < inputfile > outputfile +Options: + -s n + --status=n + Print a # character every n lines processed + + -q / --quiet + Don't print changed line information to standard error. + + -n / --dry-run + Don't actually output anything. + + -h / --help + Print this message and exit +""" + +import sys +import re +import getopt +import mailbox + +import paths +from Mailman.i18n import _ + +cre = re.compile(mailbox.UnixMailbox._fromlinepattern) + +# From RFC 2822, a header field name must contain only characters from 33-126 +# inclusive, excluding colon. I.e. from oct 41 to oct 176 less oct 072. Must +# use re.match() so that it's anchored at the beginning of the line. +fre = re.compile(r'[\041-\071\073-\0176]+') + + + +def usage(code, msg=''): + if code: + fd = sys.stderr + else: + fd = sys.stdout + print >> fd, _(__doc__) + if msg: + print >> fd, msg + sys.exit(code) + + + +def escape_line(line, lineno, quiet, output): + if output: + sys.stdout.write('>' + line) + if not quiet: + print >> sys.stderr, _('Unix-From line changed: %(lineno)d') + print >> sys.stderr, line[:-1] + + + +def main(): + try: + opts, args = getopt.getopt( + sys.argv[1:], 'hqns:', + ['help', 'quiet', 'dry-run', 'status=']) + except getopt.error, msg: + usage(1, msg) + + quiet = 0 + output = 1 + status = -1 + + for opt, arg in opts: + if opt in ('-h', '--help'): + usage(0) + elif opt in ('-q', '--quiet'): + quiet = 1 + elif opt in ('-n', '--dry-run'): + output = 0 + elif opt in ('-s', '--status'): + try: + status = int(arg) + except ValueError: + usage(1, _('Bad status number: %(arg)s')) + + if args: + usage(1) + + lineno = 0 + statuscnt = 0 + messages = 0 + while 1: + lineno += 1 + line = sys.stdin.readline() + if not line: + break + if line.startswith('From '): + if cre.match(line): + # This is a real Unix-From line. But it could be a message + # /about/ Unix-From lines, so as a second order test, make + # sure there's at least one RFC 2822 header following + nextline = sys.stdin.readline() + lineno += 1 + if not nextline: + # It was the last line of the mbox, so it couldn't have + # been a Unix-From + escape_line(line, lineno, quiet, output) + break + fieldname = nextline.split(':', 1) + if len(fieldname) < 2 or not fre.match(nextline): + # The following line was not a header, so this wasn't a + # valid Unix-From + escape_line(line, lineno, quiet, output) + if output: + sys.stdout.write(nextline) + else: + # It's a valid Unix-From line + messages += 1 + if output: + sys.stdout.write(line) + sys.stdout.write(nextline) + else: + # This is a bogus Unix-From line + escape_line(line, lineno, quiet, output) + elif output: + # Any old line + sys.stdout.write(line) + if status > 0 and (lineno % status) == 0: + sys.stderr.write('#') + statuscnt += 1 + if statuscnt > 50: + print >> sys.stderr + statuscnt = 0 + print >> sys.stderr, _('%(messages)d messages found') + + + +if __name__ == '__main__': + main() |