aboutsummaryrefslogtreecommitdiff
path: root/references
diff options
context:
space:
mode:
authorRob Austein <sra@hactrn.net>2021-02-14 16:56:57 +0000
committerRob Austein <sra@hactrn.net>2021-02-14 16:56:57 +0000
commit68e18ad1f44e9a6fab66adc38e97d027a58de8a4 (patch)
tree22eb915c53760c6b6f0f5254d027de9dee58abd1 /references
parent7bf10bb74babd8f16b7a3942607f3d1007aa2324 (diff)
Another reorg, and pelican samples
Diffstat (limited to 'references')
-rwxr-xr-xreferences/convert-and-slurp-attachments.sh18
-rw-r--r--references/extract-wiki-content.xsl177
-rwxr-xr-xreferences/generate-json.py154
-rw-r--r--references/pelicanconf.py42
-rw-r--r--references/publishconf.py19
-rw-r--r--references/rpki-wiki-to-markdown.py341
-rw-r--r--references/schema.sql177
-rw-r--r--references/trac-wiki-to-markdown.rb51
-rw-r--r--references/trac2down.py61
9 files changed, 1040 insertions, 0 deletions
diff --git a/references/convert-and-slurp-attachments.sh b/references/convert-and-slurp-attachments.sh
new file mode 100755
index 0000000..ce7f34d
--- /dev/null
+++ b/references/convert-and-slurp-attachments.sh
@@ -0,0 +1,18 @@
+#!/bin/sh -
+
+ls | fgrep -v . |
+while read page
+do
+ base="https://trac.rpki.net"
+ path="/wiki/$(echo $page | sed s=%2F=/=g)"
+
+ # Fetch the Wiki page, extract the useful portion of the HTML, convert that into Markdown
+ curl "${base}${path}" |
+ xsltproc --html extract-wiki-content.xsl - |
+ html2markdown --no-skip-internal-links --reference-links >"$page.md"
+
+ # Fetch a ZIP file containing any attachments, clean up if result is empty or broken
+ curl "${base}/zip-attachment${path}/" >"$page.zip"
+ zipinfo "$page.zip" >/dev/null 2>&1 || rm -f "$page.zip"
+
+done
diff --git a/references/extract-wiki-content.xsl b/references/extract-wiki-content.xsl
new file mode 100644
index 0000000..e4376e8
--- /dev/null
+++ b/references/extract-wiki-content.xsl
@@ -0,0 +1,177 @@
+<!--
+ - XSL transform to extract useful content of a Trac Wiki page.
+ -
+ - Django generates weird HTML for ordered lists: it sometimes breaks
+ - up a single ordered list into multiple adjacent <ol/> elements,
+ - using the @start attribute to try to make the result look like a
+ - single ordered list. This looks OK in Firefox but confuses the
+ - bejesus out of both html2markdown and htmldoc. In some cases this is
+ - probably unavoidable, but most of the uses of this I've seen look
+ - gratuitous, and are probably the result of code modulararity issues
+ - in Django.
+ -
+ - So we try to clean this up, by merging adjacent <ol/> elements where
+ - we can. The merge incantation is an adaptation of:
+ -
+ - http://stackoverflow.com/questions/1806123/merging-adjacent-nodes-of-same-type-xslt-1-0
+ -
+ - There may be a more efficient way to do this, but I don't think
+ - we care, and this seems to work.
+ -
+ - Original author's explanation:
+ -
+ - The rather convoluted XPath expression for selecting the following
+ - sibling aaa nodes which are merged with the current one:
+ -
+ - following-sibling::aaa[ # following 'aaa' siblings
+ - not(preceding-sibling::*[ # if they are not preceded by
+ - not(self::aaa) and # a non-'aaa' node
+ - not(following-sibling::aaa = current()) # after the current node
+ - ])
+ - ]
+ -->
+
+ <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+
+ <xsl:output method="xml" encoding="us-ascii" omit-xml-declaration="yes" />
+
+ <xsl:param name="basename"/>
+ <xsl:param name="path"/>
+
+ <xsl:template match="/">
+ <xsl:message><xsl:value-of select="concat('Got path: ', $path)"/></xsl:message>
+ <xsl:variable name="id">
+ <xsl:call-template name="path-to-id">
+ <xsl:with-param name="p" select="$path"/>
+ </xsl:call-template>
+ </xsl:variable>
+ <xsl:message><xsl:value-of select="concat('Got id: ', $id)"/></xsl:message>
+ <xsl:comment>NEW PAGE</xsl:comment>
+ <html>
+ <body>
+ <div id="{$id}">
+ <xsl:apply-templates select="//div[@id = 'wikipage']/*"/>
+ </div>
+ </body>
+ </html>
+ </xsl:template>
+
+ <xsl:template match="//div[contains(@class, 'wiki-toc')]"/>
+
+ <xsl:template match="//span[@class = 'icon' and not(*)]"/>
+
+ <xsl:template match="a[contains(@class, 'wiki') and
+ starts-with(@href, '/wiki/')]">
+ <xsl:variable name="href">
+ <xsl:call-template name="path-to-id">
+ <xsl:with-param name="p" select="@href"/>
+ </xsl:call-template>
+ </xsl:variable>
+ <a href="#{$href}">
+ <xsl:apply-templates select="@*[name() != 'href']"/>
+ <xsl:apply-templates/>
+ </a>
+ </xsl:template>
+
+ <xsl:template match="a[starts-with(@href, '/attachment/wiki/')]">
+ <a href="{concat($basename, @href)}">
+ <xsl:apply-templates select="@*[name() != 'href']"/>
+ <xsl:apply-templates/>
+ </a>
+ </xsl:template>
+
+ <xsl:template match="img[starts-with(@src, '/raw-attachment/wiki/')]">
+ <img src="{concat($basename, @src)}">
+ <xsl:apply-templates select="@*[name() != 'src']"/>
+ <xsl:apply-templates/>
+ </img>
+ </xsl:template>
+
+ <xsl:template match="object[starts-with(@data, '/raw-attachment/wiki/') or
+ starts-with(@data, '/graphviz/')]">
+ <object data="{concat($basename, @data)}">
+ <xsl:apply-templates select="@*[name() != 'data']"/>
+ <xsl:apply-templates/>
+ </object>
+ </xsl:template>
+
+ <xsl:template match="embed[starts-with(@src, '/raw-attachment/wiki/') or
+ starts-with(@src, '/graphviz/')]">
+ <embed src="{concat($basename, @src)}">
+ <xsl:apply-templates select="@*[name() != 'src']"/>
+ <xsl:apply-templates/>
+ </embed>
+ </xsl:template>
+
+ <xsl:template match="text()[contains(., '&#8203;')]">
+ <xsl:call-template name="remove-zero-width-spaces">
+ <xsl:with-param name="s" select="."/>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template match="@*|node()">
+ <xsl:copy>
+ <xsl:copy-of select="@*"/>
+ <xsl:apply-templates/>
+ </xsl:copy>
+ </xsl:template>
+
+ <xsl:template name="path-to-id">
+ <xsl:param name="p"/>
+ <xsl:text>_</xsl:text>
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="$p"/>
+ <xsl:with-param name="old">/</xsl:with-param>
+ <xsl:with-param name="new">.</xsl:with-param>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template name="remove-zero-width-spaces">
+ <xsl:param name="s"/>
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="$s"/>
+ <xsl:with-param name="old">&#8203;</xsl:with-param>
+ <xsl:with-param name="new"/>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template name="replace">
+ <xsl:param name="s"/>
+ <xsl:param name="old"/>
+ <xsl:param name="new"/>
+ <xsl:choose>
+ <xsl:when test="contains($s, $old)">
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="concat(substring-before($s, $old),
+ $new,
+ substring-after($s, $old))"/>
+ <xsl:with-param name="old" select="$old"/>
+ <xsl:with-param name="new" select="$new"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$s"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <xsl:template match="ol">
+ <xsl:if test="not(preceding-sibling::*[1]/self::ol)">
+ <xsl:variable name="following"
+ select="following-sibling::ol[
+ not(preceding-sibling::*[
+ not(self::ol) and
+ not(following-sibling::ol = current())
+ ])
+ ]"/>
+ <xsl:copy>
+ <xsl:apply-templates select="$following/@*[name() != 'start']"/>
+ <xsl:apply-templates select="@*"/>
+ <xsl:apply-templates select="node()"/>
+ <xsl:apply-templates select="$following/node()"/>
+ </xsl:copy>
+ </xsl:if>
+ </xsl:template>
+
+ </xsl:transform>
+
diff --git a/references/generate-json.py b/references/generate-json.py
new file mode 100755
index 0000000..b8b1f38
--- /dev/null
+++ b/references/generate-json.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+
+# Generate JSON to import Trac tickets into GitHub issues using the new import API
+# described at https://gist.github.com/jonmagic/5282384165e0f86ef105
+
+import os
+import time
+import json
+import yaml
+import sqlite3
+import hashlib
+import argparse
+import subprocess
+
+ticket_query = '''
+SELECT
+ id,
+ type,
+ owner,
+ reporter,
+ milestone,
+ status,
+ resolution,
+ summary,
+ description,
+ component,
+ priority,
+ time / 1000000 AS createdtime,
+ changetime / 1000000 AS modifiedtime
+FROM
+ ticket
+ORDER BY
+ id
+'''
+
+comment_query = '''
+SELECT
+ time / 1000000 AS createdtime,
+ author,
+ newvalue
+FROM
+ ticket_change
+WHERE
+ ticket = ?
+AND
+ field = 'comment'
+AND
+ newvalue <> ''
+ORDER BY
+ time
+'''
+
+attachment_query = '''
+SELECT
+ id,
+ filename,
+ size,
+ author,
+ description,
+ ipnr,
+ time / 1000000 AS createdtime
+FROM
+ attachment
+WHERE
+ id = ?
+AND
+ type = 'ticket'
+ORDER BY
+ time, filename
+'''
+
+def isotime(t):
+ return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t))
+
+def hashname(whatever):
+ return hashlib.sha1(unicode(whatever)).hexdigest()
+
+def ticket_text(ticket):
+ d = dict(ticket, createdtime = isotime(ticket["createdtime"]), modifiedtime = isotime(ticket["modifiedtime"]))
+ return u"{description}\n\n" \
+ u"_Trac ticket #{id} component {component} priority {priority}, owner {owner}," \
+ u" created by {reporter} on {createdtime}, last modified {modifiedtime}_\n".format(**d)
+
+def comment_text(comment):
+ d = dict(comment, createdtime = isotime(comment["createdtime"]))
+ return u"{newvalue}\n\n_Trac comment by {author} on {createdtime}_\n".format(**d)
+
+def attachment_text(attachment):
+ h1 = hashname(attachment["id"])
+ h2 = hashname(attachment["filename"])
+ fn2 = os.path.splitext(attachment["filename"])[1]
+ fn = os.path.join(gist_url, h1[:3], h1, h2 + fn2)
+ url = "{}/raw/{}/ticket.{}.{}{}".format(gist_url.rstrip("/"), gist_commit, h1, h2, fn2)
+ d = dict(attachment, createdtime = isotime(comment["createdtime"]), url = url)
+ return u"[{filename}]({url}) {description}\n_Trac attachment by {author} on {createdtime}_\n".format(**d)
+
+def comment_merge(comments, attachments):
+ result = []
+ while comments and attachments:
+ result.append(comments.pop(0) if comments[0]["created_at"] <= attachments[0]["created_at"] else attachments.pop(0))
+ return result + comments + attachments
+
+parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("-c", "--config", type = argparse.FileType(),
+ default = "generate-json.yaml",
+ help = "YAML config mappings")
+args = parser.parse_args()
+
+cfg = yaml.safe_load(args.config)
+assignee_map = cfg["assignees"]
+type_map = cfg["type_labels"]
+resolution_map = cfg["resolution_labels"]
+
+gist_url = cfg.get("attachment_gist_url")
+if gist_url is not None:
+ gist_commit = subprocess.check_output(("git", "ls-remote", gist_url, "HEAD")).split()[0]
+
+db = sqlite3.connect(cfg["database"])
+db.row_factory = sqlite3.Row
+ticket_cursor = db.cursor()
+comment_cursor = db.cursor()
+attachment_cursor = db.cursor()
+
+if not os.path.isdir(cfg["ticket_directory"]):
+ os.makedirs(cfg["ticket_directory"])
+
+for ticket in ticket_cursor.execute(ticket_query):
+ comments = comment_merge([dict(created_at = isotime(comment["createdtime"]), body = comment_text(comment))
+ for comment in comment_cursor.execute(comment_query, (ticket["id"],))],
+ [] if gist_url is None else
+ [dict(created_at = isotime(attachment["createdtime"]), body = attachment_text(attachment))
+ for attachment in attachment_cursor.execute(attachment_query, (ticket["id"],))])
+ issue = dict(
+ title = ticket["summary"],
+ body = ticket_text(ticket),
+ created_at = isotime(ticket["createdtime"]),
+ updated_at = isotime(ticket["modifiedtime"]))
+ if ticket["status"] == "closed":
+ issue["closed"] = True
+ issue["closed_at"] = isotime(ticket["modifiedtime"])
+ comments.append(dict(created_at = isotime(ticket["modifiedtime"]),
+ body = "_Closed with resolution {resolution}_\n".format(**ticket)))
+ if ticket["owner"] in assignee_map:
+ issue["assignee"] = assignee_map[ticket["owner"]]
+ labels = [type_map.get(ticket["type"]), resolution_map.get(ticket["resolution"])]
+ while None in labels:
+ del labels[labels.index(None)]
+ if labels:
+ issue["labels"] = labels
+ issue = dict(issue = issue)
+ if comments:
+ issue["comments"] = comments
+ with open(os.path.join(cfg["ticket_directory"], "ticket_{:03d}.json".format(ticket["id"])), "wb") as f:
+ json.dump(issue, f, indent = 4, sort_keys = True, separators=(",", ": "))
diff --git a/references/pelicanconf.py b/references/pelicanconf.py
new file mode 100644
index 0000000..a28721d
--- /dev/null
+++ b/references/pelicanconf.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*- #
+from __future__ import unicode_literals
+
+AUTHOR = u'Rob Austein'
+SITENAME = u'Your Bug Report Will Be Graded'
+
+# Apparently this is much longer than theme designer expected. Skip it for now.
+SITESUBTITLE = u'"I\'m not proud of being a congenital pain in the ass. But I will take money for it."'
+
+PATH = 'content'
+TIMEZONE = 'UTC'
+DEFAULT_LANG = u'English'
+
+# Hack article URLs to match what Blogofile did, to avoid breaking links.
+
+ARTICLE_URL = '{date:%Y}/{date:%m}/{date:%d}/{slug}/'
+ARTICLE_SAVE_AS = '{date:%Y}/{date:%m}/{date:%d}/{slug}/index.html'
+
+# Feed generation is usually not desired when developing
+SITEURL = ''
+RELATIVE_URLS = True
+FEED_ALL_ATOM = None
+CATEGORY_FEED_ATOM = None
+TRANSLATION_FEED_ATOM = None
+AUTHOR_FEED_ATOM = None
+AUTHOR_FEED_RSS = None
+
+# Blogroll
+LINKS = (('Pelican', 'http://getpelican.com/'),
+ ('Python.org', 'http://python.org/'),
+ ('Jinja2', 'http://jinja.pocoo.org/'))
+LINKS_WIDGET_NAME = "Links"
+
+# Social widget. Can't get rid of this with default theme, only change its name.
+# Fiddle with themes later
+SOCIAL = ()
+SOCIAL_WIDGET_NAME = "Subscribe"
+
+DEFAULT_PAGINATION = 10
+
+THEME = "/home/blog/pelican-themes/sundown"
diff --git a/references/publishconf.py b/references/publishconf.py
new file mode 100644
index 0000000..f0fb21d
--- /dev/null
+++ b/references/publishconf.py
@@ -0,0 +1,19 @@
+#!/usr/local/bin/python2.7
+# -*- coding: utf-8 -*- #
+from __future__ import unicode_literals
+
+# This file is only used if you use `make publish` or
+# explicitly specify it as your config file.
+
+import os
+import sys
+sys.path.append(os.curdir)
+from pelicanconf import *
+
+SITEURL = 'https://www.hactrn.net/blog'
+RELATIVE_URLS = False
+
+FEED_ALL_ATOM = 'feeds/all.atom.xml'
+CATEGORY_FEED_ATOM = 'feeds/{slug}.atom.xml'
+
+DELETE_OUTPUT_DIRECTORY = True
diff --git a/references/rpki-wiki-to-markdown.py b/references/rpki-wiki-to-markdown.py
new file mode 100644
index 0000000..dff87e6
--- /dev/null
+++ b/references/rpki-wiki-to-markdown.py
@@ -0,0 +1,341 @@
+# Copyright (C) 2016 Parsons Government Services ("PARSONS")
+# Portions copyright (C) 2014 Dragon Research Labs ("DRL")
+# Portions copyright (C) 2012 Internet Systems Consortium ("ISC")
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notices and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND PARSONS, DRL, AND ISC DISCLAIM
+# ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+# PARSONS, DRL, OR ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+"""
+Trac Wiki -> Markdown converter, hacked from old Trac Wiki -> PDF/flat
+text converter.
+
+Pull HTML pages from a Trac Wiki, feed the useful bits to
+html2text to generate Markdown.
+
+Assumes you're using the TracNav plugin for the Wiki pages, and uses
+the same list as the TracNav plugin does to determine the set of pages
+to convert.
+"""
+
+# Dependencies, at least on Ubuntu Xenial:
+#
+# apt-get install python-lxml python-html2text
+#
+# Be warned that there are many unrelated packages named "html2text",
+# installed under various names on various platforms. This one
+# happens to be a useful HTML-to-Markdown converter.
+
+# Most of the work of massaging the HTML is done using XSL transforms,
+# because the template-driven style makes that easy. There's probably
+# some clever way to use lxml's XPath code to do the same thing in a
+# more pythonic way with ElementTrees, but I already had the XSL
+# transforms and there's a point of diminishing returns on this sort of
+# thing.
+
+import sys
+import os
+import argparse
+import lxml.etree
+import urllib
+import urlparse
+import subprocess
+import zipfile
+
+# Main program, up front so it doesn't get lost under all the XSL
+
+def main():
+
+ base = "https://trac.rpki.net"
+
+ parser = argparse.ArgumentParser(description = __doc__, formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument("-b", "--base_url",
+ default = base,
+ help = "base URL for documentation web site")
+ parser.add_argument("-t", "--toc",
+ default = base + "/wiki/doc/RPKI/TOC",
+ help = "table of contents URL")
+ parser.add_argument("-d", "--directory",
+ default = ".",
+ help = "output directory")
+ parser.add_argument("-p", "--prefix",
+ default = "/wiki/doc",
+ help = "page name prefix on wiki")
+ args = parser.parse_args()
+
+ urls = str(xsl_get_toc(lxml.etree.parse(urllib.urlopen(args.toc)).getroot(),
+ basename = repr(args.base_url))).splitlines()
+
+ assert all(urlparse.urlparse(url).path.startswith(args.prefix) for url in urls)
+
+ for pagenum, url in enumerate(urls):
+ path = urlparse.urlparse(url).path
+ page = xsl_get_page(lxml.etree.parse(urllib.urlopen(url)).getroot(),
+ basename = repr(args.base_url),
+ path = repr(path))
+
+ fn_base = os.path.join(args.directory, "{:02d}{}".format(pagenum, path[len(args.prefix):].replace("/", ".")))
+
+ fn = fn_base + ".zip"
+ zip_url = urlparse.urljoin(url, "/zip-attachment{}/".format(path))
+ urllib.urlretrieve(zip_url, fn)
+ with zipfile.ZipFile(fn, "r") as z:
+ if len(z.namelist()) == 0:
+ os.unlink(fn)
+ else:
+ sys.stderr.write("Wrote {}\n".format(fn))
+
+ for imgnum, img in enumerate(page.xpath("//img | //object | //embed")):
+ img_url = img.get("data" if img.tag == "object" else "src")
+ img_url = urlparse.urljoin(url, img_url)
+ fn = "{}.{:02d}{}".format(fn_base, imgnum, os.path.splitext(img_url)[1])
+ urllib.urlretrieve(img_url, fn)
+ sys.stderr.write("Wrote {}\n".format(fn))
+
+ html2markdown = subprocess.Popen(("html2markdown", "--no-skip-internal-links", "--reference-links"),
+ stdin = subprocess.PIPE, stdout = subprocess.PIPE)
+ page.write(html2markdown.stdin)
+ html2markdown.stdin.close()
+ lines = html2markdown.stdout.readlines()
+ html2markdown.stdout.close()
+ html2markdown.wait()
+
+ while lines and lines[0].isspace():
+ del lines[0]
+
+ fn = fn_base + ".md"
+ with open(fn, "w") as f:
+ want_blank = False
+ for line in lines:
+ blank = line.isspace()
+ if want_blank and not blank:
+ f.write("\n")
+ if not blank:
+ f.write(line)
+ want_blank = blank
+ sys.stderr.write("Wrote {}\n".format(fn))
+
+ fn = fn[:-3] + ".wiki"
+ urllib.urlretrieve(url + "?format=txt", fn)
+ sys.stderr.write("Wrote {}\n".format(fn))
+
+
+# XSL transform to extract list of Wiki page URLs from the TOC Wiki page
+
+xsl_get_toc = lxml.etree.XSLT(lxml.etree.XML('''\
+ <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+ <xsl:output method="text" encoding="us-ascii"/>
+
+ <xsl:param name="basename"/>
+
+ <xsl:template match="/">
+ <xsl:for-each select="//div[@id = 'wikipage']/ul//a">
+ <xsl:value-of select="concat($basename, @href, '&#10;')"/>
+ </xsl:for-each>
+ </xsl:template>
+
+ </xsl:transform>
+'''))
+
+# XSL transform to extract useful content of a Wiki page.
+
+# Django generates weird HTML for ordered lists: it sometimes breaks
+# up a single ordered list into multiple adjacent <ol/> elements,
+# using the @start attribute to try to make the result look like a
+# single ordered list. This looks OK in Firefox but confuses the
+# bejesus out of both html2markdown and htmldoc. In some cases this is
+# probably unavoidable, but most of the uses of this I've seen look
+# gratuitous, and are probably the result of code modulararity issues
+# in Django.
+#
+# So we try to clean this up, by merging adjacent <ol/> elements where
+# we can. The merge incantation is an adaptation of:
+#
+# http://stackoverflow.com/questions/1806123/merging-adjacent-nodes-of-same-type-xslt-1-0
+#
+# There may be a more efficient way to do this, but I don't think
+# we care, and this seems to work.
+#
+# Original author's explanation:
+#
+# The rather convoluted XPath expression for selecting the following
+# sibling aaa nodes which are merged with the current one:
+#
+# following-sibling::aaa[ # following 'aaa' siblings
+# not(preceding-sibling::*[ # if they are not preceded by
+# not(self::aaa) and # a non-'aaa' node
+# not(following-sibling::aaa = current()) # after the current node
+# ])
+# ]
+
+xsl_get_page = lxml.etree.XSLT(lxml.etree.XML('''\
+ <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+
+ <xsl:output method="xml" encoding="us-ascii" omit-xml-declaration="yes" />
+
+ <xsl:param name="basename"/>
+ <xsl:param name="path"/>
+
+ <xsl:template match="/">
+ <xsl:message><xsl:value-of select="concat('Got path: ', $path)"/></xsl:message>
+ <xsl:variable name="id">
+ <xsl:call-template name="path-to-id">
+ <xsl:with-param name="p" select="$path"/>
+ </xsl:call-template>
+ </xsl:variable>
+ <xsl:message><xsl:value-of select="concat('Got id: ', $id)"/></xsl:message>
+ <xsl:comment>NEW PAGE</xsl:comment>
+ <html>
+ <body>
+ <div id="{$id}">
+ <xsl:apply-templates select="//div[@id = 'wikipage']/*"/>
+ </div>
+ </body>
+ </html>
+ </xsl:template>
+
+ <xsl:template match="//div[contains(@class, 'wiki-toc')]"/>
+
+ <xsl:template match="//span[@class = 'icon' and not(*)]"/>
+
+ <xsl:template match="a[contains(@class, 'wiki') and
+ starts-with(@href, '/wiki/')]">
+ <xsl:variable name="href">
+ <xsl:call-template name="path-to-id">
+ <xsl:with-param name="p" select="@href"/>
+ </xsl:call-template>
+ </xsl:variable>
+ <a href="#{$href}">
+ <xsl:apply-templates select="@*[name() != 'href']"/>
+ <xsl:apply-templates/>
+ </a>
+ </xsl:template>
+
+ <xsl:template match="a[starts-with(@href, '/attachment/wiki/')]">
+ <a href="{concat($basename, @href)}">
+ <xsl:apply-templates select="@*[name() != 'href']"/>
+ <xsl:apply-templates/>
+ </a>
+ </xsl:template>
+
+ <xsl:template match="img[starts-with(@src, '/raw-attachment/wiki/')]">
+ <img src="{concat($basename, @src)}">
+ <xsl:apply-templates select="@*[name() != 'src']"/>
+ <xsl:apply-templates/>
+ </img>
+ </xsl:template>
+
+ <xsl:template match="object[starts-with(@data, '/raw-attachment/wiki/') or
+ starts-with(@data, '/graphviz/')]">
+ <object data="{concat($basename, @data)}">
+ <xsl:apply-templates select="@*[name() != 'data']"/>
+ <xsl:apply-templates/>
+ </object>
+ </xsl:template>
+
+ <xsl:template match="embed[starts-with(@src, '/raw-attachment/wiki/') or
+ starts-with(@src, '/graphviz/')]">
+ <embed src="{concat($basename, @src)}">
+ <xsl:apply-templates select="@*[name() != 'src']"/>
+ <xsl:apply-templates/>
+ </embed>
+ </xsl:template>
+
+ <xsl:template match="text()[contains(., '&#8203;')]">
+ <xsl:call-template name="remove-zero-width-spaces">
+ <xsl:with-param name="s" select="."/>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template match="@*|node()">
+ <xsl:copy>
+ <xsl:copy-of select="@*"/>
+ <xsl:apply-templates/>
+ </xsl:copy>
+ </xsl:template>
+
+ <xsl:template name="path-to-id">
+ <xsl:param name="p"/>
+ <xsl:text>_</xsl:text>
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="$p"/>
+ <xsl:with-param name="old">/</xsl:with-param>
+ <xsl:with-param name="new">.</xsl:with-param>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template name="remove-zero-width-spaces">
+ <xsl:param name="s"/>
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="$s"/>
+ <xsl:with-param name="old">&#8203;</xsl:with-param>
+ <xsl:with-param name="new"/>
+ </xsl:call-template>
+ </xsl:template>
+
+ <xsl:template name="replace">
+ <xsl:param name="s"/>
+ <xsl:param name="old"/>
+ <xsl:param name="new"/>
+ <xsl:choose>
+ <xsl:when test="contains($s, $old)">
+ <xsl:call-template name="replace">
+ <xsl:with-param name="s" select="concat(substring-before($s, $old),
+ $new,
+ substring-after($s, $old))"/>
+ <xsl:with-param name="old" select="$old"/>
+ <xsl:with-param name="new" select="$new"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$s"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <xsl:template match="ol">
+ <xsl:if test="not(preceding-sibling::*[1]/self::ol)">
+ <xsl:variable name="following"
+ select="following-sibling::ol[
+ not(preceding-sibling::*[
+ not(self::ol) and
+ not(following-sibling::ol = current())
+ ])
+ ]"/>
+ <xsl:copy>
+ <xsl:apply-templates select="$following/@*[name() != 'start']"/>
+ <xsl:apply-templates select="@*"/>
+ <xsl:apply-templates select="node()"/>
+ <xsl:apply-templates select="$following/node()"/>
+ </xsl:copy>
+ </xsl:if>
+ </xsl:template>
+
+ </xsl:transform>
+'''))
+
+# All the files we want to parse are HTML, so make HTML the default
+# parser. In theory the HTML produced by Trac is XHTML thus should
+# parse correctly (in fact, better) as XML, but in practice this seems
+# not to work properly at the moment, while parsing as HTML does.
+# Haven't bothered to figure out why, life is too short.
+#
+# If you're reading this comment because this script stopped working
+# after a Trac upgrade, try commenting out this line to see whether
+# things have changed and Trac's HTML now parses better as XML.
+
+lxml.etree.set_default_parser(lxml.etree.HTMLParser())
+
+# Run the main program.
+main()
diff --git a/references/schema.sql b/references/schema.sql
new file mode 100644
index 0000000..1515dbb
--- /dev/null
+++ b/references/schema.sql
@@ -0,0 +1,177 @@
+CREATE TABLE system (
+ name text PRIMARY KEY,
+ value text
+);
+CREATE TABLE permission (
+ username text,
+ action text,
+ UNIQUE (username,action)
+);
+CREATE TABLE auth_cookie (
+ cookie text,
+ name text,
+ ipnr text,
+ time integer,
+ UNIQUE (cookie,ipnr,name)
+);
+CREATE TABLE session (
+ sid text,
+ authenticated integer,
+ last_visit integer,
+ UNIQUE (sid,authenticated)
+);
+CREATE INDEX session_last_visit_idx ON session (last_visit);
+CREATE INDEX session_authenticated_idx ON session (authenticated);
+CREATE TABLE session_attribute (
+ sid text,
+ authenticated integer,
+ name text,
+ value text,
+ UNIQUE (sid,authenticated,name)
+);
+CREATE TABLE cache (
+ id integer PRIMARY KEY,
+ generation integer,
+ key text
+);
+CREATE TABLE attachment (
+ type text,
+ id text,
+ filename text,
+ size integer,
+ time integer,
+ description text,
+ author text,
+ ipnr text,
+ UNIQUE (type,id,filename)
+);
+CREATE TABLE wiki (
+ name text,
+ version integer,
+ time integer,
+ author text,
+ ipnr text,
+ text text,
+ comment text,
+ readonly integer,
+ UNIQUE (name,version)
+);
+CREATE INDEX wiki_time_idx ON wiki (time);
+CREATE TABLE repository (
+ id integer,
+ name text,
+ value text,
+ UNIQUE (id,name)
+);
+CREATE TABLE revision (
+ repos integer,
+ rev text,
+ time integer,
+ author text,
+ message text,
+ UNIQUE (repos,rev)
+);
+CREATE INDEX revision_repos_time_idx ON revision (repos,time);
+CREATE TABLE ticket (
+ id integer PRIMARY KEY,
+ type text,
+ time integer,
+ changetime integer,
+ component text,
+ severity text,
+ priority text,
+ owner text,
+ reporter text,
+ cc text,
+ version text,
+ milestone text,
+ status text,
+ resolution text,
+ summary text,
+ description text,
+ keywords text
+);
+CREATE INDEX ticket_time_idx ON ticket (time);
+CREATE INDEX ticket_status_idx ON ticket (status);
+CREATE TABLE ticket_change (
+ ticket integer,
+ time integer,
+ author text,
+ field text,
+ oldvalue text,
+ newvalue text,
+ UNIQUE (ticket,time,field)
+);
+CREATE INDEX ticket_change_ticket_idx ON ticket_change (ticket);
+CREATE INDEX ticket_change_time_idx ON ticket_change (time);
+CREATE TABLE ticket_custom (
+ ticket integer,
+ name text,
+ value text,
+ UNIQUE (ticket,name)
+);
+CREATE TABLE enum (
+ type text,
+ name text,
+ value text,
+ UNIQUE (type,name)
+);
+CREATE TABLE component (
+ name text PRIMARY KEY,
+ owner text,
+ description text
+);
+CREATE TABLE milestone (
+ name text PRIMARY KEY,
+ due integer,
+ completed integer,
+ description text
+);
+CREATE TABLE version (
+ name text PRIMARY KEY,
+ time integer,
+ description text
+);
+CREATE TABLE report (
+ id integer PRIMARY KEY,
+ author text,
+ title text,
+ query text,
+ description text
+);
+CREATE TABLE notify_subscription (
+ id integer PRIMARY KEY,
+ time integer,
+ changetime integer,
+ class text,
+ sid text,
+ authenticated integer,
+ distributor text,
+ format text,
+ priority integer,
+ adverb text
+);
+CREATE INDEX notify_subscription_sid_authenticated_idx ON notify_subscription (sid,authenticated);
+CREATE INDEX notify_subscription_class_idx ON notify_subscription (class);
+CREATE TABLE notify_watch (
+ id integer PRIMARY KEY,
+ sid text,
+ authenticated integer,
+ class text,
+ realm text,
+ target text
+);
+CREATE INDEX notify_watch_sid_authenticated_class_idx ON notify_watch (sid,authenticated,class);
+CREATE INDEX notify_watch_class_realm_target_idx ON notify_watch (class,realm,target);
+CREATE TABLE node_change (
+ id integer PRIMARY KEY,
+ repos integer,
+ rev text,
+ path text,
+ node_type text,
+ change_type text,
+ base_path text,
+ base_rev text
+);
+CREATE INDEX node_change_repos_rev_path_idx ON node_change (repos,rev,path);
+CREATE INDEX node_change_repos_path_rev_idx ON node_change (repos,path,rev);
diff --git a/references/trac-wiki-to-markdown.rb b/references/trac-wiki-to-markdown.rb
new file mode 100644
index 0000000..f7d41ae
--- /dev/null
+++ b/references/trac-wiki-to-markdown.rb
@@ -0,0 +1,51 @@
+# Untested code snippet from https://gist.github.com/somebox/619537
+
+class String
+ def trac_to_markdown!
+ gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`')
+ gsub!(/\{\{\{(.+?)\}\}\}/m){|m| m.each_line.map{|x| "\t#{x}".gsub(/[\{\}]{3}/,'')}.join}
+ gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1')
+ gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1')
+ gsub!(/\=\=\s(.+?)\s\=\=/, '# \1')
+ gsub!(/\=\s(.+?)\s\=[\s\n]*/, '')
+ gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)')
+ gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1')
+ gsub!(/'''(.+)'''/, '*\1*')
+ gsub!(/''(.+)''/, '_\1_')
+ gsub!(/^\s\*/, '*')
+ gsub!(/^\s\d\./, '1.')
+
+ gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`')
+ gsub!(/'''(.+?)'''/, '**\1**')
+ gsub!(/''(.+?)''/, '*\1*')
+ gsub!(/((^\|\|[^\n\r]+\|\|[ \t]*\r?(\n|$))+)/m) do |m|
+ m = m.each_line.map do |x|
+ x.gsub(/\t/, ' ')
+ .gsub(/(\|\|){2,}/){|k| k.gsub(/\|\|/, '|| ')}
+ .gsub(/ {3,}/, ' ')
+ end.join
+ lines = m.each_line.to_a
+ line1 = lines.shift
+ line2 = line1.dup.gsub(/[^\n\r\|]/, '-')
+ lines.unshift(line1, line2)
+ c = lines.join
+ c = c.each_line.map do |x|
+ x.gsub(/\=\s?(.+?)\s?=/, ' \1 ')
+ .gsub(/\|\|/, '|')
+ end.join
+ end
+ gsub!(/^\{\{\{(.+?)^\}\}\}/m, '```\1```')
+ gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1')
+ gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1')
+ gsub!(/\=\=\s(.+?)\s\=\=/, '# \1')
+ gsub!(/\=\s(.+?)\s\=[\s\n]*/, '')
+ gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)')
+ gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1')
+ gsub!(/^\s\*/, '*')
+ gsub!(/^\s\d\./, '1.')
+ end
+end
+
+some_trac = 'my document'
+
+puts some_trac.trac_to_markdown!
diff --git a/references/trac2down.py b/references/trac2down.py
new file mode 100644
index 0000000..c66a201
--- /dev/null
+++ b/references/trac2down.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python2
+
+# Untested code from https://gist.githubusercontent.com/sgk/1286682/raw/b744dd2e47a68d60373ad39df87cfe8256f517af/trac2down.py
+
+# vim:set fileencoding=utf-8 sw=2 ai:
+
+import sqlite3
+import datetime
+import re
+
+SQL = '''
+ select
+ name, version, time, author, text
+ from
+ wiki w
+ where
+ version = (select max(version) from wiki where name = w.name)
+'''
+
+conn = sqlite3.connect('../trac.db')
+result = conn.execute(SQL)
+for row in result:
+ name = row[0]
+ version = row[1]
+ time = row[2]
+ author = row[3]
+ text = row[4]
+
+ text = re.sub('\r\n', '\n', text)
+ text = re.sub(r'{{{(.*?)}}}', r'`\1`', text)
+ def indent4(m):
+ return '\n ' + m.group(1).replace('\n', '\n ')
+ text = re.sub(r'(?sm){{{\n(.*?)\n}}}', indent4, text)
+ text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'#### \1', text)
+ text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'### \1', text)
+ text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'## \1', text)
+ text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'# \1', text)
+ text = re.sub(r'^ * ', r'****', text)
+ text = re.sub(r'^ * ', r'***', text)
+ text = re.sub(r'^ * ', r'**', text)
+ text = re.sub(r'^ * ', r'*', text)
+ text = re.sub(r'^ \d+. ', r'1.', text)
+
+ a = []
+ for line in text.split('\n'):
+ if not line.startswith(' '):
+ line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
+ line = re.sub(r'\[(wiki:[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](/\1/)', line)
+ line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
+ line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
+ line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
+ a.append(line)
+ text = '\n'.join(a)
+
+ fp = file('%s.md' % name, 'w')
+ print >>fp, '<!-- Name: %s -->' % name
+ print >>fp, '<!-- Version: %d -->' % version
+ print >>fp, '<!-- Last-Modified: %s -->' % datetime.datetime.fromtimestamp(time).strftime('%Y/%m/%d %H:%M:%S')
+ print >>fp, '<!-- Author: %s -->' % author
+ fp.write(text.encode('utf-8'))
+ fp.close()