aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--NEWS5
-rw-r--r--contrib/README.sitemapgen11
-rwxr-xr-xcontrib/sitemapgen164
3 files changed, 180 insertions, 0 deletions
diff --git a/NEWS b/NEWS
index 23cb0271..b52e8be8 100644
--- a/NEWS
+++ b/NEWS
@@ -87,6 +87,11 @@ Here is a history of user visible changes to Mailman.
- Quoting in the mailman-config command has been changed from double to
single quotes to allow double-quoted parameters. (LP:1774986)
+ Miscellaneous
+
+ - Added to the contrib directory, a script from Jim Popovitch to generate
+ Sitemap files for a list's archive.
+
2.1.26 (04-Feb-2018)
Security
diff --git a/contrib/README.sitemapgen b/contrib/README.sitemapgen
new file mode 100644
index 00000000..d4daa401
--- /dev/null
+++ b/contrib/README.sitemapgen
@@ -0,0 +1,11 @@
+Hacked from bin/sync_members
+
+Copy mailman/contrib/sitemapgen to mailman/bin/ and execute it as so:
+
+ /path/to/mailman/bin/sitemapgen -l <listname>
+
+Alternatively add it to cron like so:
+
+5 0 * * * for l in `/path/to/mailman/bin/list_lists -apb`; do
+ /path/to/mailman/bin/sitemapgen $l; done
+
diff --git a/contrib/sitemapgen b/contrib/sitemapgen
new file mode 100755
index 00000000..21a4ffb2
--- /dev/null
+++ b/contrib/sitemapgen
@@ -0,0 +1,164 @@
+#! @PYTHON@
+
+# For a given listname, this script generates sitemap.xml.gz files
+# under archives/private/<listname>/
+#
+# Copyright (C) 1998-2018 by the Free Software Foundation, Inc.
+#
+# graciously hacked from bin/sync_members
+#
+
+"""Build Sitemap files for an archive
+
+Usage: %(program)s [options] listname
+
+Where `options' are:
+
+ --help
+ -h
+ Print this message.
+
+ listname
+ Required. This specifies the list to generate sitemaps for.
+"""
+
+import os
+import sys
+import paths
+# Import this /after/ paths so that the sys.path is properly hacked
+import email.Utils
+from Mailman import MailList
+from Mailman import Errors
+from Mailman import Utils
+from Mailman.UserDesc import UserDesc
+from Mailman import mm_cfg
+from Mailman.i18n import _
+import getopt
+import re
+import time
+from stat import *
+from datetime import datetime, timedelta
+import gzip
+
+
+# sitemap priorities in age-in-weeks/priority/changefreq tuples
+priorities = ([1, 1.0, "daily"],
+ [4, 1.0, "weekly"],
+ [30, 1.0, "monthly"],
+ [52, 0.9, "never"],
+ [100, 0.8, "never"],
+ [200, 0.7, "never"],
+ [300, 0.6, "never"],
+ [400, 0.5, "never"])
+
+
+program = sys.argv[0]
+
+def usage(code, msg=''):
+ if code:
+ fd = sys.stderr
+ else:
+ fd = sys.stdout
+ print >> fd, _(__doc__)
+ if msg:
+ print >> fd, msg
+ sys.exit(code)
+
+
+
+def main():
+ listname = None
+
+ # TBD: can't use getopt with this command line syntax, which is broken and
+ # should be changed to be getopt compatible.
+ i = 1
+ while i < len(sys.argv):
+ opt = sys.argv[i]
+ if opt in ('-h', '--help'):
+ usage(0)
+ else:
+ try:
+ listname = sys.argv[i].lower()
+ i += 1
+ except IndexError:
+ usage(1, _('No listname given'))
+ break
+
+ if listname is None:
+ usage(1, _('Must have a listname'))
+
+ # get the locked list object
+ try:
+ mlist = MailList.MailList(listname, lock=0)
+ except Errors.MMListError, e:
+ print _('No such list: %(listname)s')
+ sys.exit(1)
+
+ rootdir = mlist.archive_dir()
+ rooturl = mlist.GetBaseArchiveURL()
+
+ reArcPath = re.compile(r'^\d+')
+ reArcFile = re.compile(r'\d+\.html')
+
+ sitemaps = []
+
+ now = datetime.now()
+
+ for folder in os.listdir(rootdir):
+ path = os.path.join(rootdir,folder)
+ if not os.path.isdir(path) or not reArcPath.search(folder):
+ continue
+
+ dirtime = os.path.getmtime(path)
+
+ os.umask(0022)
+ sitemap = os.path.join(rootdir,folder,"sitemap.xml.gz")
+ f = gzip.open(sitemap, 'wb')
+
+ f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ f.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
+
+ for file in os.listdir(path):
+ if not reArcFile.search(file):
+ continue
+
+ # get timestamp of file
+ st = os.stat(os.path.join(rootdir,folder,file))
+ mtime = st[ST_MTIME] #modification time
+
+ ts = datetime.fromtimestamp(mtime)
+ for weeks, priority, changefreq in priorities:
+ if ts > now - timedelta(weeks = weeks):
+ break
+
+ f.write(' <url>\n <loc>' + os.path.join(rooturl,folder,file) + '</loc>\n')
+ f.write(' <lastmod>' + time.strftime("%Y-%m-%d",time.gmtime(mtime)) + '</lastmod>\n')
+ f.write(' <changefreq>' + changefreq + '</changefreq>\n')
+ f.write(' <priority>' + str(priority) + '</priority>\n')
+ f.write(' </url>\n')
+
+ f.write('</urlset>\n')
+ f.close()
+
+ sitemaps.append((os.path.join(rooturl,folder,"sitemap.xml.gz")))
+
+
+ # write out the sitemapindex file
+ sitemapindex = os.path.join(rootdir,"sitemap.xml.gz")
+ f = gzip.open(sitemapindex, 'wb')
+
+ f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ f.write('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
+
+ for sitemap in sitemaps:
+ f.write(' <sitemap>\n <loc>' + sitemap + '</loc>\n')
+ f.write(' <lastmod>' + time.strftime("%Y-%m-%d", now.timetuple()) + '</lastmod>\n')
+ f.write(' </sitemap>\n')
+
+ f.write('</sitemapindex>\n')
+ f.close()
+
+
+if __name__ == '__main__':
+ main()
+