diff options
author | Mark Sapiro <mark@msapiro.net> | 2018-06-14 10:04:45 -0700 |
---|---|---|
committer | Mark Sapiro <mark@msapiro.net> | 2018-06-14 10:04:45 -0700 |
commit | 022af5c3d16c45b2787da343ffd856e8d65ffb5f (patch) | |
tree | 581f008342ee022f0a08b88f693a8528966a9aa4 | |
parent | d4bf95bd97ebaeebb5291c4f7d9f1d90ff7414fd (diff) | |
parent | 04b2ea680f17d85a87d4fd570b9d40bc5f9932a0 (diff) | |
download | mailman2-022af5c3d16c45b2787da343ffd856e8d65ffb5f.tar.gz mailman2-022af5c3d16c45b2787da343ffd856e8d65ffb5f.tar.xz mailman2-022af5c3d16c45b2787da343ffd856e8d65ffb5f.zip |
Added contrib/sitemapgen.
Diffstat (limited to '')
-rw-r--r-- | NEWS | 5 | ||||
-rw-r--r-- | contrib/README.sitemapgen | 11 | ||||
-rwxr-xr-x | contrib/sitemapgen | 164 |
3 files changed, 180 insertions, 0 deletions
@@ -87,6 +87,11 @@ Here is a history of user visible changes to Mailman. - Quoting in the mailman-config command has been changed from double to single quotes to allow double-quoted parameters. (LP:1774986) + Miscellaneous + + - Added to the contrib directory, a script from Jim Popovitch to generate + Sitemap files for a list's archive. + 2.1.26 (04-Feb-2018) Security diff --git a/contrib/README.sitemapgen b/contrib/README.sitemapgen new file mode 100644 index 00000000..d4daa401 --- /dev/null +++ b/contrib/README.sitemapgen @@ -0,0 +1,11 @@ +Hacked from bin/sync_members + +Copy mailman/contrib/sitemapgen to mailman/bin/ and execute it as so: + + /path/to/mailman/bin/sitemapgen -l <listname> + +Alternatively add it to cron like so: + +5 0 * * * for l in `/path/to/mailman/bin/list_lists -apb`; do + /path/to/mailman/bin/sitemapgen $l; done + diff --git a/contrib/sitemapgen b/contrib/sitemapgen new file mode 100755 index 00000000..21a4ffb2 --- /dev/null +++ b/contrib/sitemapgen @@ -0,0 +1,164 @@ +#! @PYTHON@ + +# For a given listname, this script generates sitemap.xml.gz files +# under archives/private/<listname>/ +# +# Copyright (C) 1998-2018 by the Free Software Foundation, Inc. +# +# graciously hacked from bin/sync_members +# + +"""Build Sitemap files for an archive + +Usage: %(program)s [options] listname + +Where `options' are: + + --help + -h + Print this message. + + listname + Required. This specifies the list to generate sitemaps for. +""" + +import os +import sys +import paths +# Import this /after/ paths so that the sys.path is properly hacked +import email.Utils +from Mailman import MailList +from Mailman import Errors +from Mailman import Utils +from Mailman.UserDesc import UserDesc +from Mailman import mm_cfg +from Mailman.i18n import _ +import getopt +import re +import time +from stat import * +from datetime import datetime, timedelta +import gzip + + +# sitemap priorities in age-in-weeks/priority/changefreq tuples +priorities = ([1, 1.0, "daily"], + [4, 1.0, "weekly"], + [30, 1.0, "monthly"], + [52, 0.9, "never"], + [100, 0.8, "never"], + [200, 0.7, "never"], + [300, 0.6, "never"], + [400, 0.5, "never"]) + + +program = sys.argv[0] + +def usage(code, msg=''): + if code: + fd = sys.stderr + else: + fd = sys.stdout + print >> fd, _(__doc__) + if msg: + print >> fd, msg + sys.exit(code) + + + +def main(): + listname = None + + # TBD: can't use getopt with this command line syntax, which is broken and + # should be changed to be getopt compatible. + i = 1 + while i < len(sys.argv): + opt = sys.argv[i] + if opt in ('-h', '--help'): + usage(0) + else: + try: + listname = sys.argv[i].lower() + i += 1 + except IndexError: + usage(1, _('No listname given')) + break + + if listname is None: + usage(1, _('Must have a listname')) + + # get the locked list object + try: + mlist = MailList.MailList(listname, lock=0) + except Errors.MMListError, e: + print _('No such list: %(listname)s') + sys.exit(1) + + rootdir = mlist.archive_dir() + rooturl = mlist.GetBaseArchiveURL() + + reArcPath = re.compile(r'^\d+') + reArcFile = re.compile(r'\d+\.html') + + sitemaps = [] + + now = datetime.now() + + for folder in os.listdir(rootdir): + path = os.path.join(rootdir,folder) + if not os.path.isdir(path) or not reArcPath.search(folder): + continue + + dirtime = os.path.getmtime(path) + + os.umask(0022) + sitemap = os.path.join(rootdir,folder,"sitemap.xml.gz") + f = gzip.open(sitemap, 'wb') + + f.write('<?xml version="1.0" encoding="UTF-8"?>\n') + f.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n') + + for file in os.listdir(path): + if not reArcFile.search(file): + continue + + # get timestamp of file + st = os.stat(os.path.join(rootdir,folder,file)) + mtime = st[ST_MTIME] #modification time + + ts = datetime.fromtimestamp(mtime) + for weeks, priority, changefreq in priorities: + if ts > now - timedelta(weeks = weeks): + break + + f.write(' <url>\n <loc>' + os.path.join(rooturl,folder,file) + '</loc>\n') + f.write(' <lastmod>' + time.strftime("%Y-%m-%d",time.gmtime(mtime)) + '</lastmod>\n') + f.write(' <changefreq>' + changefreq + '</changefreq>\n') + f.write(' <priority>' + str(priority) + '</priority>\n') + f.write(' </url>\n') + + f.write('</urlset>\n') + f.close() + + sitemaps.append((os.path.join(rooturl,folder,"sitemap.xml.gz"))) + + + # write out the sitemapindex file + sitemapindex = os.path.join(rootdir,"sitemap.xml.gz") + f = gzip.open(sitemapindex, 'wb') + + f.write('<?xml version="1.0" encoding="UTF-8"?>\n') + f.write('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n') + + for sitemap in sitemaps: + f.write(' <sitemap>\n <loc>' + sitemap + '</loc>\n') + f.write(' <lastmod>' + time.strftime("%Y-%m-%d", now.timetuple()) + '</lastmod>\n') + f.write(' </sitemap>\n') + + f.write('</sitemapindex>\n') + f.close() + + +if __name__ == '__main__': + main() + |