aboutsummaryrefslogtreecommitdiffstats
path: root/bin/cleanarch
diff options
context:
space:
mode:
Diffstat (limited to 'bin/cleanarch')
-rw-r--r--bin/cleanarch165
1 files changed, 165 insertions, 0 deletions
diff --git a/bin/cleanarch b/bin/cleanarch
new file mode 100644
index 00000000..85a8df6a
--- /dev/null
+++ b/bin/cleanarch
@@ -0,0 +1,165 @@
+#! @PYTHON@
+
+# Copyright (C) 2001,2002 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+"""Clean up an .mbox archive file.
+
+The archiver looks for Unix-From lines separating messages in an mbox archive
+file. For compatibility, it specifically looks for lines that start with
+"From " -- i.e. the letters capital-F, lowercase-r, o, m, space, ignoring
+everything else on the line.
+
+Normally, any lines that start "From " in the body of a message should be
+escaped such that a > character is actually the first on a line. It is
+possible though that body lines are not actually escaped. This script
+attempts to fix these by doing a stricter test of the Unix-From lines. Any
+lines that start "From " but do not pass this stricter test are escaped with a
+> character.
+
+Usage: cleanarch [options] < inputfile > outputfile
+Options:
+ -s n
+ --status=n
+ Print a # character every n lines processed
+
+ -q / --quiet
+ Don't print changed line information to standard error.
+
+ -n / --dry-run
+ Don't actually output anything.
+
+ -h / --help
+ Print this message and exit
+"""
+
+import sys
+import re
+import getopt
+import mailbox
+
+import paths
+from Mailman.i18n import _
+
+cre = re.compile(mailbox.UnixMailbox._fromlinepattern)
+
+# From RFC 2822, a header field name must contain only characters from 33-126
+# inclusive, excluding colon. I.e. from oct 41 to oct 176 less oct 072. Must
+# use re.match() so that it's anchored at the beginning of the line.
+fre = re.compile(r'[\041-\071\073-\0176]+')
+
+
+
+def usage(code, msg=''):
+ if code:
+ fd = sys.stderr
+ else:
+ fd = sys.stdout
+ print >> fd, _(__doc__)
+ if msg:
+ print >> fd, msg
+ sys.exit(code)
+
+
+
+def escape_line(line, lineno, quiet, output):
+ if output:
+ sys.stdout.write('>' + line)
+ if not quiet:
+ print >> sys.stderr, _('Unix-From line changed: %(lineno)d')
+ print >> sys.stderr, line[:-1]
+
+
+
+def main():
+ try:
+ opts, args = getopt.getopt(
+ sys.argv[1:], 'hqns:',
+ ['help', 'quiet', 'dry-run', 'status='])
+ except getopt.error, msg:
+ usage(1, msg)
+
+ quiet = 0
+ output = 1
+ status = -1
+
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ usage(0)
+ elif opt in ('-q', '--quiet'):
+ quiet = 1
+ elif opt in ('-n', '--dry-run'):
+ output = 0
+ elif opt in ('-s', '--status'):
+ try:
+ status = int(arg)
+ except ValueError:
+ usage(1, _('Bad status number: %(arg)s'))
+
+ if args:
+ usage(1)
+
+ lineno = 0
+ statuscnt = 0
+ messages = 0
+ while 1:
+ lineno += 1
+ line = sys.stdin.readline()
+ if not line:
+ break
+ if line.startswith('From '):
+ if cre.match(line):
+ # This is a real Unix-From line. But it could be a message
+ # /about/ Unix-From lines, so as a second order test, make
+ # sure there's at least one RFC 2822 header following
+ nextline = sys.stdin.readline()
+ lineno += 1
+ if not nextline:
+ # It was the last line of the mbox, so it couldn't have
+ # been a Unix-From
+ escape_line(line, lineno, quiet, output)
+ break
+ fieldname = nextline.split(':', 1)
+ if len(fieldname) < 2 or not fre.match(nextline):
+ # The following line was not a header, so this wasn't a
+ # valid Unix-From
+ escape_line(line, lineno, quiet, output)
+ if output:
+ sys.stdout.write(nextline)
+ else:
+ # It's a valid Unix-From line
+ messages += 1
+ if output:
+ sys.stdout.write(line)
+ sys.stdout.write(nextline)
+ else:
+ # This is a bogus Unix-From line
+ escape_line(line, lineno, quiet, output)
+ elif output:
+ # Any old line
+ sys.stdout.write(line)
+ if status > 0 and (lineno % status) == 0:
+ sys.stderr.write('#')
+ statuscnt += 1
+ if statuscnt > 50:
+ print >> sys.stderr
+ statuscnt = 0
+ print >> sys.stderr, _('%(messages)d messages found')
+
+
+
+if __name__ == '__main__':
+ main()