From 04b2ea680f17d85a87d4fd570b9d40bc5f9932a0 Mon Sep 17 00:00:00 2001 From: Jim Popovitch Date: Thu, 14 Jun 2018 01:59:15 +0000 Subject: Added contrib/sitemapgen a python script to generate sitemap.xml.gz files --- contrib/README.sitemapgen | 11 ++++ contrib/sitemapgen | 164 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 contrib/README.sitemapgen create mode 100755 contrib/sitemapgen diff --git a/contrib/README.sitemapgen b/contrib/README.sitemapgen new file mode 100644 index 00000000..d4daa401 --- /dev/null +++ b/contrib/README.sitemapgen @@ -0,0 +1,11 @@ +Hacked from bin/sync_members + +Copy mailman/contrib/sitemapgen to mailman/bin/ and execute it as so: + + /path/to/mailman/bin/sitemapgen -l + +Alternatively add it to cron like so: + +5 0 * * * for l in `/path/to/mailman/bin/list_lists -apb`; do + /path/to/mailman/bin/sitemapgen $l; done + diff --git a/contrib/sitemapgen b/contrib/sitemapgen new file mode 100755 index 00000000..21a4ffb2 --- /dev/null +++ b/contrib/sitemapgen @@ -0,0 +1,164 @@ +#! @PYTHON@ + +# For a given listname, this script generates sitemap.xml.gz files +# under archives/private// +# +# Copyright (C) 1998-2018 by the Free Software Foundation, Inc. +# +# graciously hacked from bin/sync_members +# + +"""Build Sitemap files for an archive + +Usage: %(program)s [options] listname + +Where `options' are: + + --help + -h + Print this message. + + listname + Required. This specifies the list to generate sitemaps for. +""" + +import os +import sys +import paths +# Import this /after/ paths so that the sys.path is properly hacked +import email.Utils +from Mailman import MailList +from Mailman import Errors +from Mailman import Utils +from Mailman.UserDesc import UserDesc +from Mailman import mm_cfg +from Mailman.i18n import _ +import getopt +import re +import time +from stat import * +from datetime import datetime, timedelta +import gzip + + +# sitemap priorities in age-in-weeks/priority/changefreq tuples +priorities = ([1, 1.0, "daily"], + [4, 1.0, "weekly"], + [30, 1.0, "monthly"], + [52, 0.9, "never"], + [100, 0.8, "never"], + [200, 0.7, "never"], + [300, 0.6, "never"], + [400, 0.5, "never"]) + + +program = sys.argv[0] + +def usage(code, msg=''): + if code: + fd = sys.stderr + else: + fd = sys.stdout + print >> fd, _(__doc__) + if msg: + print >> fd, msg + sys.exit(code) + + + +def main(): + listname = None + + # TBD: can't use getopt with this command line syntax, which is broken and + # should be changed to be getopt compatible. + i = 1 + while i < len(sys.argv): + opt = sys.argv[i] + if opt in ('-h', '--help'): + usage(0) + else: + try: + listname = sys.argv[i].lower() + i += 1 + except IndexError: + usage(1, _('No listname given')) + break + + if listname is None: + usage(1, _('Must have a listname')) + + # get the locked list object + try: + mlist = MailList.MailList(listname, lock=0) + except Errors.MMListError, e: + print _('No such list: %(listname)s') + sys.exit(1) + + rootdir = mlist.archive_dir() + rooturl = mlist.GetBaseArchiveURL() + + reArcPath = re.compile(r'^\d+') + reArcFile = re.compile(r'\d+\.html') + + sitemaps = [] + + now = datetime.now() + + for folder in os.listdir(rootdir): + path = os.path.join(rootdir,folder) + if not os.path.isdir(path) or not reArcPath.search(folder): + continue + + dirtime = os.path.getmtime(path) + + os.umask(0022) + sitemap = os.path.join(rootdir,folder,"sitemap.xml.gz") + f = gzip.open(sitemap, 'wb') + + f.write('\n') + f.write('\n') + + for file in os.listdir(path): + if not reArcFile.search(file): + continue + + # get timestamp of file + st = os.stat(os.path.join(rootdir,folder,file)) + mtime = st[ST_MTIME] #modification time + + ts = datetime.fromtimestamp(mtime) + for weeks, priority, changefreq in priorities: + if ts > now - timedelta(weeks = weeks): + break + + f.write(' \n ' + os.path.join(rooturl,folder,file) + '\n') + f.write(' ' + time.strftime("%Y-%m-%d",time.gmtime(mtime)) + '\n') + f.write(' ' + changefreq + '\n') + f.write(' ' + str(priority) + '\n') + f.write(' \n') + + f.write('\n') + f.close() + + sitemaps.append((os.path.join(rooturl,folder,"sitemap.xml.gz"))) + + + # write out the sitemapindex file + sitemapindex = os.path.join(rootdir,"sitemap.xml.gz") + f = gzip.open(sitemapindex, 'wb') + + f.write('\n') + f.write('\n') + + for sitemap in sitemaps: + f.write(' \n ' + sitemap + '\n') + f.write(' ' + time.strftime("%Y-%m-%d", now.timetuple()) + '\n') + f.write(' \n') + + f.write('\n') + f.close() + + +if __name__ == '__main__': + main() + -- cgit v1.2.3