aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/sitemapgen
diff options
context:
space:
mode:
authorJim Popovitch <jimpop@domainmail.org>2018-06-14 01:59:15 +0000
committerJim Popovitch <jimpop@domainmail.org>2018-06-14 01:59:15 +0000
commit04b2ea680f17d85a87d4fd570b9d40bc5f9932a0 (patch)
tree25466ceba6a092ea37577e18910fa131786e5224 /contrib/sitemapgen
parentd4bf95bd97ebaeebb5291c4f7d9f1d90ff7414fd (diff)
downloadmailman2-04b2ea680f17d85a87d4fd570b9d40bc5f9932a0.tar.gz
mailman2-04b2ea680f17d85a87d4fd570b9d40bc5f9932a0.tar.xz
mailman2-04b2ea680f17d85a87d4fd570b9d40bc5f9932a0.zip
Added contrib/sitemapgen a python script to generate sitemap.xml.gz files
Diffstat (limited to 'contrib/sitemapgen')
-rwxr-xr-xcontrib/sitemapgen164
1 files changed, 164 insertions, 0 deletions
diff --git a/contrib/sitemapgen b/contrib/sitemapgen
new file mode 100755
index 00000000..21a4ffb2
--- /dev/null
+++ b/contrib/sitemapgen
@@ -0,0 +1,164 @@
+#! @PYTHON@
+
+# For a given listname, this script generates sitemap.xml.gz files
+# under archives/private/<listname>/
+#
+# Copyright (C) 1998-2018 by the Free Software Foundation, Inc.
+#
+# graciously hacked from bin/sync_members
+#
+
+"""Build Sitemap files for an archive
+
+Usage: %(program)s [options] listname
+
+Where `options' are:
+
+ --help
+ -h
+ Print this message.
+
+ listname
+ Required. This specifies the list to generate sitemaps for.
+"""
+
+import os
+import sys
+import paths
+# Import this /after/ paths so that the sys.path is properly hacked
+import email.Utils
+from Mailman import MailList
+from Mailman import Errors
+from Mailman import Utils
+from Mailman.UserDesc import UserDesc
+from Mailman import mm_cfg
+from Mailman.i18n import _
+import getopt
+import re
+import time
+from stat import *
+from datetime import datetime, timedelta
+import gzip
+
+
+# sitemap priorities in age-in-weeks/priority/changefreq tuples
+priorities = ([1, 1.0, "daily"],
+ [4, 1.0, "weekly"],
+ [30, 1.0, "monthly"],
+ [52, 0.9, "never"],
+ [100, 0.8, "never"],
+ [200, 0.7, "never"],
+ [300, 0.6, "never"],
+ [400, 0.5, "never"])
+
+
+program = sys.argv[0]
+
+def usage(code, msg=''):
+ if code:
+ fd = sys.stderr
+ else:
+ fd = sys.stdout
+ print >> fd, _(__doc__)
+ if msg:
+ print >> fd, msg
+ sys.exit(code)
+
+
+
+def main():
+ listname = None
+
+ # TBD: can't use getopt with this command line syntax, which is broken and
+ # should be changed to be getopt compatible.
+ i = 1
+ while i < len(sys.argv):
+ opt = sys.argv[i]
+ if opt in ('-h', '--help'):
+ usage(0)
+ else:
+ try:
+ listname = sys.argv[i].lower()
+ i += 1
+ except IndexError:
+ usage(1, _('No listname given'))
+ break
+
+ if listname is None:
+ usage(1, _('Must have a listname'))
+
+ # get the locked list object
+ try:
+ mlist = MailList.MailList(listname, lock=0)
+ except Errors.MMListError, e:
+ print _('No such list: %(listname)s')
+ sys.exit(1)
+
+ rootdir = mlist.archive_dir()
+ rooturl = mlist.GetBaseArchiveURL()
+
+ reArcPath = re.compile(r'^\d+')
+ reArcFile = re.compile(r'\d+\.html')
+
+ sitemaps = []
+
+ now = datetime.now()
+
+ for folder in os.listdir(rootdir):
+ path = os.path.join(rootdir,folder)
+ if not os.path.isdir(path) or not reArcPath.search(folder):
+ continue
+
+ dirtime = os.path.getmtime(path)
+
+ os.umask(0022)
+ sitemap = os.path.join(rootdir,folder,"sitemap.xml.gz")
+ f = gzip.open(sitemap, 'wb')
+
+ f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ f.write('<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
+
+ for file in os.listdir(path):
+ if not reArcFile.search(file):
+ continue
+
+ # get timestamp of file
+ st = os.stat(os.path.join(rootdir,folder,file))
+ mtime = st[ST_MTIME] #modification time
+
+ ts = datetime.fromtimestamp(mtime)
+ for weeks, priority, changefreq in priorities:
+ if ts > now - timedelta(weeks = weeks):
+ break
+
+ f.write(' <url>\n <loc>' + os.path.join(rooturl,folder,file) + '</loc>\n')
+ f.write(' <lastmod>' + time.strftime("%Y-%m-%d",time.gmtime(mtime)) + '</lastmod>\n')
+ f.write(' <changefreq>' + changefreq + '</changefreq>\n')
+ f.write(' <priority>' + str(priority) + '</priority>\n')
+ f.write(' </url>\n')
+
+ f.write('</urlset>\n')
+ f.close()
+
+ sitemaps.append((os.path.join(rooturl,folder,"sitemap.xml.gz")))
+
+
+ # write out the sitemapindex file
+ sitemapindex = os.path.join(rootdir,"sitemap.xml.gz")
+ f = gzip.open(sitemapindex, 'wb')
+
+ f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+ f.write('<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n')
+
+ for sitemap in sitemaps:
+ f.write(' <sitemap>\n <loc>' + sitemap + '</loc>\n')
+ f.write(' <lastmod>' + time.strftime("%Y-%m-%d", now.timetuple()) + '</lastmod>\n')
+ f.write(' </sitemap>\n')
+
+ f.write('</sitemapindex>\n')
+ f.close()
+
+
+if __name__ == '__main__':
+ main()
+