diff options
author | Rob Austein <sra@hactrn.net> | 2021-02-14 01:35:10 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2021-02-14 01:35:10 +0000 |
commit | 23bb68fe7e9cc8af176ff60b56e8a51a70f05a89 (patch) | |
tree | 27c87a0c157e6eb343518031c4c3afd1e95bc488 /tools/references | |
parent | fa8b4d0e872d182ee878020fb1b066ce0da621ae (diff) |
Now generating pages directly from sqlite3
Diffstat (limited to 'tools/references')
-rwxr-xr-x | tools/references/convert-and-slurp-attachments.sh | 18 | ||||
-rw-r--r-- | tools/references/extract-wiki-content.xsl | 177 | ||||
-rwxr-xr-x | tools/references/generate-json.py | 154 | ||||
-rw-r--r-- | tools/references/rpki-wiki-to-markdown.py | 341 | ||||
-rw-r--r-- | tools/references/schema.sql | 177 | ||||
-rw-r--r-- | tools/references/trac-wiki-to-markdown.rb | 51 | ||||
-rw-r--r-- | tools/references/trac2down.py | 61 |
7 files changed, 979 insertions, 0 deletions
diff --git a/tools/references/convert-and-slurp-attachments.sh b/tools/references/convert-and-slurp-attachments.sh new file mode 100755 index 0000000..ce7f34d --- /dev/null +++ b/tools/references/convert-and-slurp-attachments.sh @@ -0,0 +1,18 @@ +#!/bin/sh - + +ls | fgrep -v . | +while read page +do + base="https://trac.rpki.net" + path="/wiki/$(echo $page | sed s=%2F=/=g)" + + # Fetch the Wiki page, extract the useful portion of the HTML, convert that into Markdown + curl "${base}${path}" | + xsltproc --html extract-wiki-content.xsl - | + html2markdown --no-skip-internal-links --reference-links >"$page.md" + + # Fetch a ZIP file containing any attachments, clean up if result is empty or broken + curl "${base}/zip-attachment${path}/" >"$page.zip" + zipinfo "$page.zip" >/dev/null 2>&1 || rm -f "$page.zip" + +done diff --git a/tools/references/extract-wiki-content.xsl b/tools/references/extract-wiki-content.xsl new file mode 100644 index 0000000..e4376e8 --- /dev/null +++ b/tools/references/extract-wiki-content.xsl @@ -0,0 +1,177 @@ +<!-- + - XSL transform to extract useful content of a Trac Wiki page. + - + - Django generates weird HTML for ordered lists: it sometimes breaks + - up a single ordered list into multiple adjacent <ol/> elements, + - using the @start attribute to try to make the result look like a + - single ordered list. This looks OK in Firefox but confuses the + - bejesus out of both html2markdown and htmldoc. In some cases this is + - probably unavoidable, but most of the uses of this I've seen look + - gratuitous, and are probably the result of code modulararity issues + - in Django. + - + - So we try to clean this up, by merging adjacent <ol/> elements where + - we can. The merge incantation is an adaptation of: + - + - http://stackoverflow.com/questions/1806123/merging-adjacent-nodes-of-same-type-xslt-1-0 + - + - There may be a more efficient way to do this, but I don't think + - we care, and this seems to work. + - + - Original author's explanation: + - + - The rather convoluted XPath expression for selecting the following + - sibling aaa nodes which are merged with the current one: + - + - following-sibling::aaa[ # following 'aaa' siblings + - not(preceding-sibling::*[ # if they are not preceded by + - not(self::aaa) and # a non-'aaa' node + - not(following-sibling::aaa = current()) # after the current node + - ]) + - ] + --> + + <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + + <xsl:output method="xml" encoding="us-ascii" omit-xml-declaration="yes" /> + + <xsl:param name="basename"/> + <xsl:param name="path"/> + + <xsl:template match="/"> + <xsl:message><xsl:value-of select="concat('Got path: ', $path)"/></xsl:message> + <xsl:variable name="id"> + <xsl:call-template name="path-to-id"> + <xsl:with-param name="p" select="$path"/> + </xsl:call-template> + </xsl:variable> + <xsl:message><xsl:value-of select="concat('Got id: ', $id)"/></xsl:message> + <xsl:comment>NEW PAGE</xsl:comment> + <html> + <body> + <div id="{$id}"> + <xsl:apply-templates select="//div[@id = 'wikipage']/*"/> + </div> + </body> + </html> + </xsl:template> + + <xsl:template match="//div[contains(@class, 'wiki-toc')]"/> + + <xsl:template match="//span[@class = 'icon' and not(*)]"/> + + <xsl:template match="a[contains(@class, 'wiki') and + starts-with(@href, '/wiki/')]"> + <xsl:variable name="href"> + <xsl:call-template name="path-to-id"> + <xsl:with-param name="p" select="@href"/> + </xsl:call-template> + </xsl:variable> + <a href="#{$href}"> + <xsl:apply-templates select="@*[name() != 'href']"/> + <xsl:apply-templates/> + </a> + </xsl:template> + + <xsl:template match="a[starts-with(@href, '/attachment/wiki/')]"> + <a href="{concat($basename, @href)}"> + <xsl:apply-templates select="@*[name() != 'href']"/> + <xsl:apply-templates/> + </a> + </xsl:template> + + <xsl:template match="img[starts-with(@src, '/raw-attachment/wiki/')]"> + <img src="{concat($basename, @src)}"> + <xsl:apply-templates select="@*[name() != 'src']"/> + <xsl:apply-templates/> + </img> + </xsl:template> + + <xsl:template match="object[starts-with(@data, '/raw-attachment/wiki/') or + starts-with(@data, '/graphviz/')]"> + <object data="{concat($basename, @data)}"> + <xsl:apply-templates select="@*[name() != 'data']"/> + <xsl:apply-templates/> + </object> + </xsl:template> + + <xsl:template match="embed[starts-with(@src, '/raw-attachment/wiki/') or + starts-with(@src, '/graphviz/')]"> + <embed src="{concat($basename, @src)}"> + <xsl:apply-templates select="@*[name() != 'src']"/> + <xsl:apply-templates/> + </embed> + </xsl:template> + + <xsl:template match="text()[contains(., '​')]"> + <xsl:call-template name="remove-zero-width-spaces"> + <xsl:with-param name="s" select="."/> + </xsl:call-template> + </xsl:template> + + <xsl:template match="@*|node()"> + <xsl:copy> + <xsl:copy-of select="@*"/> + <xsl:apply-templates/> + </xsl:copy> + </xsl:template> + + <xsl:template name="path-to-id"> + <xsl:param name="p"/> + <xsl:text>_</xsl:text> + <xsl:call-template name="replace"> + <xsl:with-param name="s" select="$p"/> + <xsl:with-param name="old">/</xsl:with-param> + <xsl:with-param name="new">.</xsl:with-param> + </xsl:call-template> + </xsl:template> + + <xsl:template name="remove-zero-width-spaces"> + <xsl:param name="s"/> + <xsl:call-template name="replace"> + <xsl:with-param name="s" select="$s"/> + <xsl:with-param name="old">​</xsl:with-param> + <xsl:with-param name="new"/> + </xsl:call-template> + </xsl:template> + + <xsl:template name="replace"> + <xsl:param name="s"/> + <xsl:param name="old"/> + <xsl:param name="new"/> + <xsl:choose> + <xsl:when test="contains($s, $old)"> + <xsl:call-template name="replace"> + <xsl:with-param name="s" select="concat(substring-before($s, $old), + $new, + substring-after($s, $old))"/> + <xsl:with-param name="old" select="$old"/> + <xsl:with-param name="new" select="$new"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$s"/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="ol"> + <xsl:if test="not(preceding-sibling::*[1]/self::ol)"> + <xsl:variable name="following" + select="following-sibling::ol[ + not(preceding-sibling::*[ + not(self::ol) and + not(following-sibling::ol = current()) + ]) + ]"/> + <xsl:copy> + <xsl:apply-templates select="$following/@*[name() != 'start']"/> + <xsl:apply-templates select="@*"/> + <xsl:apply-templates select="node()"/> + <xsl:apply-templates select="$following/node()"/> + </xsl:copy> + </xsl:if> + </xsl:template> + + </xsl:transform> + diff --git a/tools/references/generate-json.py b/tools/references/generate-json.py new file mode 100755 index 0000000..b8b1f38 --- /dev/null +++ b/tools/references/generate-json.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +# Generate JSON to import Trac tickets into GitHub issues using the new import API +# described at https://gist.github.com/jonmagic/5282384165e0f86ef105 + +import os +import time +import json +import yaml +import sqlite3 +import hashlib +import argparse +import subprocess + +ticket_query = ''' +SELECT + id, + type, + owner, + reporter, + milestone, + status, + resolution, + summary, + description, + component, + priority, + time / 1000000 AS createdtime, + changetime / 1000000 AS modifiedtime +FROM + ticket +ORDER BY + id +''' + +comment_query = ''' +SELECT + time / 1000000 AS createdtime, + author, + newvalue +FROM + ticket_change +WHERE + ticket = ? +AND + field = 'comment' +AND + newvalue <> '' +ORDER BY + time +''' + +attachment_query = ''' +SELECT + id, + filename, + size, + author, + description, + ipnr, + time / 1000000 AS createdtime +FROM + attachment +WHERE + id = ? +AND + type = 'ticket' +ORDER BY + time, filename +''' + +def isotime(t): + return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t)) + +def hashname(whatever): + return hashlib.sha1(unicode(whatever)).hexdigest() + +def ticket_text(ticket): + d = dict(ticket, createdtime = isotime(ticket["createdtime"]), modifiedtime = isotime(ticket["modifiedtime"])) + return u"{description}\n\n" \ + u"_Trac ticket #{id} component {component} priority {priority}, owner {owner}," \ + u" created by {reporter} on {createdtime}, last modified {modifiedtime}_\n".format(**d) + +def comment_text(comment): + d = dict(comment, createdtime = isotime(comment["createdtime"])) + return u"{newvalue}\n\n_Trac comment by {author} on {createdtime}_\n".format(**d) + +def attachment_text(attachment): + h1 = hashname(attachment["id"]) + h2 = hashname(attachment["filename"]) + fn2 = os.path.splitext(attachment["filename"])[1] + fn = os.path.join(gist_url, h1[:3], h1, h2 + fn2) + url = "{}/raw/{}/ticket.{}.{}{}".format(gist_url.rstrip("/"), gist_commit, h1, h2, fn2) + d = dict(attachment, createdtime = isotime(comment["createdtime"]), url = url) + return u"[{filename}]({url}) {description}\n_Trac attachment by {author} on {createdtime}_\n".format(**d) + +def comment_merge(comments, attachments): + result = [] + while comments and attachments: + result.append(comments.pop(0) if comments[0]["created_at"] <= attachments[0]["created_at"] else attachments.pop(0)) + return result + comments + attachments + +parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-c", "--config", type = argparse.FileType(), + default = "generate-json.yaml", + help = "YAML config mappings") +args = parser.parse_args() + +cfg = yaml.safe_load(args.config) +assignee_map = cfg["assignees"] +type_map = cfg["type_labels"] +resolution_map = cfg["resolution_labels"] + +gist_url = cfg.get("attachment_gist_url") +if gist_url is not None: + gist_commit = subprocess.check_output(("git", "ls-remote", gist_url, "HEAD")).split()[0] + +db = sqlite3.connect(cfg["database"]) +db.row_factory = sqlite3.Row +ticket_cursor = db.cursor() +comment_cursor = db.cursor() +attachment_cursor = db.cursor() + +if not os.path.isdir(cfg["ticket_directory"]): + os.makedirs(cfg["ticket_directory"]) + +for ticket in ticket_cursor.execute(ticket_query): + comments = comment_merge([dict(created_at = isotime(comment["createdtime"]), body = comment_text(comment)) + for comment in comment_cursor.execute(comment_query, (ticket["id"],))], + [] if gist_url is None else + [dict(created_at = isotime(attachment["createdtime"]), body = attachment_text(attachment)) + for attachment in attachment_cursor.execute(attachment_query, (ticket["id"],))]) + issue = dict( + title = ticket["summary"], + body = ticket_text(ticket), + created_at = isotime(ticket["createdtime"]), + updated_at = isotime(ticket["modifiedtime"])) + if ticket["status"] == "closed": + issue["closed"] = True + issue["closed_at"] = isotime(ticket["modifiedtime"]) + comments.append(dict(created_at = isotime(ticket["modifiedtime"]), + body = "_Closed with resolution {resolution}_\n".format(**ticket))) + if ticket["owner"] in assignee_map: + issue["assignee"] = assignee_map[ticket["owner"]] + labels = [type_map.get(ticket["type"]), resolution_map.get(ticket["resolution"])] + while None in labels: + del labels[labels.index(None)] + if labels: + issue["labels"] = labels + issue = dict(issue = issue) + if comments: + issue["comments"] = comments + with open(os.path.join(cfg["ticket_directory"], "ticket_{:03d}.json".format(ticket["id"])), "wb") as f: + json.dump(issue, f, indent = 4, sort_keys = True, separators=(",", ": ")) diff --git a/tools/references/rpki-wiki-to-markdown.py b/tools/references/rpki-wiki-to-markdown.py new file mode 100644 index 0000000..dff87e6 --- /dev/null +++ b/tools/references/rpki-wiki-to-markdown.py @@ -0,0 +1,341 @@ +# Copyright (C) 2016 Parsons Government Services ("PARSONS") +# Portions copyright (C) 2014 Dragon Research Labs ("DRL") +# Portions copyright (C) 2012 Internet Systems Consortium ("ISC") +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notices and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND PARSONS, DRL, AND ISC DISCLAIM +# ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL +# PARSONS, DRL, OR ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR +# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION +# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +""" +Trac Wiki -> Markdown converter, hacked from old Trac Wiki -> PDF/flat +text converter. + +Pull HTML pages from a Trac Wiki, feed the useful bits to +html2text to generate Markdown. + +Assumes you're using the TracNav plugin for the Wiki pages, and uses +the same list as the TracNav plugin does to determine the set of pages +to convert. +""" + +# Dependencies, at least on Ubuntu Xenial: +# +# apt-get install python-lxml python-html2text +# +# Be warned that there are many unrelated packages named "html2text", +# installed under various names on various platforms. This one +# happens to be a useful HTML-to-Markdown converter. + +# Most of the work of massaging the HTML is done using XSL transforms, +# because the template-driven style makes that easy. There's probably +# some clever way to use lxml's XPath code to do the same thing in a +# more pythonic way with ElementTrees, but I already had the XSL +# transforms and there's a point of diminishing returns on this sort of +# thing. + +import sys +import os +import argparse +import lxml.etree +import urllib +import urlparse +import subprocess +import zipfile + +# Main program, up front so it doesn't get lost under all the XSL + +def main(): + + base = "https://trac.rpki.net" + + parser = argparse.ArgumentParser(description = __doc__, formatter_class = argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("-b", "--base_url", + default = base, + help = "base URL for documentation web site") + parser.add_argument("-t", "--toc", + default = base + "/wiki/doc/RPKI/TOC", + help = "table of contents URL") + parser.add_argument("-d", "--directory", + default = ".", + help = "output directory") + parser.add_argument("-p", "--prefix", + default = "/wiki/doc", + help = "page name prefix on wiki") + args = parser.parse_args() + + urls = str(xsl_get_toc(lxml.etree.parse(urllib.urlopen(args.toc)).getroot(), + basename = repr(args.base_url))).splitlines() + + assert all(urlparse.urlparse(url).path.startswith(args.prefix) for url in urls) + + for pagenum, url in enumerate(urls): + path = urlparse.urlparse(url).path + page = xsl_get_page(lxml.etree.parse(urllib.urlopen(url)).getroot(), + basename = repr(args.base_url), + path = repr(path)) + + fn_base = os.path.join(args.directory, "{:02d}{}".format(pagenum, path[len(args.prefix):].replace("/", "."))) + + fn = fn_base + ".zip" + zip_url = urlparse.urljoin(url, "/zip-attachment{}/".format(path)) + urllib.urlretrieve(zip_url, fn) + with zipfile.ZipFile(fn, "r") as z: + if len(z.namelist()) == 0: + os.unlink(fn) + else: + sys.stderr.write("Wrote {}\n".format(fn)) + + for imgnum, img in enumerate(page.xpath("//img | //object | //embed")): + img_url = img.get("data" if img.tag == "object" else "src") + img_url = urlparse.urljoin(url, img_url) + fn = "{}.{:02d}{}".format(fn_base, imgnum, os.path.splitext(img_url)[1]) + urllib.urlretrieve(img_url, fn) + sys.stderr.write("Wrote {}\n".format(fn)) + + html2markdown = subprocess.Popen(("html2markdown", "--no-skip-internal-links", "--reference-links"), + stdin = subprocess.PIPE, stdout = subprocess.PIPE) + page.write(html2markdown.stdin) + html2markdown.stdin.close() + lines = html2markdown.stdout.readlines() + html2markdown.stdout.close() + html2markdown.wait() + + while lines and lines[0].isspace(): + del lines[0] + + fn = fn_base + ".md" + with open(fn, "w") as f: + want_blank = False + for line in lines: + blank = line.isspace() + if want_blank and not blank: + f.write("\n") + if not blank: + f.write(line) + want_blank = blank + sys.stderr.write("Wrote {}\n".format(fn)) + + fn = fn[:-3] + ".wiki" + urllib.urlretrieve(url + "?format=txt", fn) + sys.stderr.write("Wrote {}\n".format(fn)) + + +# XSL transform to extract list of Wiki page URLs from the TOC Wiki page + +xsl_get_toc = lxml.etree.XSLT(lxml.etree.XML('''\ + <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + + <xsl:output method="text" encoding="us-ascii"/> + + <xsl:param name="basename"/> + + <xsl:template match="/"> + <xsl:for-each select="//div[@id = 'wikipage']/ul//a"> + <xsl:value-of select="concat($basename, @href, ' ')"/> + </xsl:for-each> + </xsl:template> + + </xsl:transform> +''')) + +# XSL transform to extract useful content of a Wiki page. + +# Django generates weird HTML for ordered lists: it sometimes breaks +# up a single ordered list into multiple adjacent <ol/> elements, +# using the @start attribute to try to make the result look like a +# single ordered list. This looks OK in Firefox but confuses the +# bejesus out of both html2markdown and htmldoc. In some cases this is +# probably unavoidable, but most of the uses of this I've seen look +# gratuitous, and are probably the result of code modulararity issues +# in Django. +# +# So we try to clean this up, by merging adjacent <ol/> elements where +# we can. The merge incantation is an adaptation of: +# +# http://stackoverflow.com/questions/1806123/merging-adjacent-nodes-of-same-type-xslt-1-0 +# +# There may be a more efficient way to do this, but I don't think +# we care, and this seems to work. +# +# Original author's explanation: +# +# The rather convoluted XPath expression for selecting the following +# sibling aaa nodes which are merged with the current one: +# +# following-sibling::aaa[ # following 'aaa' siblings +# not(preceding-sibling::*[ # if they are not preceded by +# not(self::aaa) and # a non-'aaa' node +# not(following-sibling::aaa = current()) # after the current node +# ]) +# ] + +xsl_get_page = lxml.etree.XSLT(lxml.etree.XML('''\ + <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> + + <xsl:output method="xml" encoding="us-ascii" omit-xml-declaration="yes" /> + + <xsl:param name="basename"/> + <xsl:param name="path"/> + + <xsl:template match="/"> + <xsl:message><xsl:value-of select="concat('Got path: ', $path)"/></xsl:message> + <xsl:variable name="id"> + <xsl:call-template name="path-to-id"> + <xsl:with-param name="p" select="$path"/> + </xsl:call-template> + </xsl:variable> + <xsl:message><xsl:value-of select="concat('Got id: ', $id)"/></xsl:message> + <xsl:comment>NEW PAGE</xsl:comment> + <html> + <body> + <div id="{$id}"> + <xsl:apply-templates select="//div[@id = 'wikipage']/*"/> + </div> + </body> + </html> + </xsl:template> + + <xsl:template match="//div[contains(@class, 'wiki-toc')]"/> + + <xsl:template match="//span[@class = 'icon' and not(*)]"/> + + <xsl:template match="a[contains(@class, 'wiki') and + starts-with(@href, '/wiki/')]"> + <xsl:variable name="href"> + <xsl:call-template name="path-to-id"> + <xsl:with-param name="p" select="@href"/> + </xsl:call-template> + </xsl:variable> + <a href="#{$href}"> + <xsl:apply-templates select="@*[name() != 'href']"/> + <xsl:apply-templates/> + </a> + </xsl:template> + + <xsl:template match="a[starts-with(@href, '/attachment/wiki/')]"> + <a href="{concat($basename, @href)}"> + <xsl:apply-templates select="@*[name() != 'href']"/> + <xsl:apply-templates/> + </a> + </xsl:template> + + <xsl:template match="img[starts-with(@src, '/raw-attachment/wiki/')]"> + <img src="{concat($basename, @src)}"> + <xsl:apply-templates select="@*[name() != 'src']"/> + <xsl:apply-templates/> + </img> + </xsl:template> + + <xsl:template match="object[starts-with(@data, '/raw-attachment/wiki/') or + starts-with(@data, '/graphviz/')]"> + <object data="{concat($basename, @data)}"> + <xsl:apply-templates select="@*[name() != 'data']"/> + <xsl:apply-templates/> + </object> + </xsl:template> + + <xsl:template match="embed[starts-with(@src, '/raw-attachment/wiki/') or + starts-with(@src, '/graphviz/')]"> + <embed src="{concat($basename, @src)}"> + <xsl:apply-templates select="@*[name() != 'src']"/> + <xsl:apply-templates/> + </embed> + </xsl:template> + + <xsl:template match="text()[contains(., '​')]"> + <xsl:call-template name="remove-zero-width-spaces"> + <xsl:with-param name="s" select="."/> + </xsl:call-template> + </xsl:template> + + <xsl:template match="@*|node()"> + <xsl:copy> + <xsl:copy-of select="@*"/> + <xsl:apply-templates/> + </xsl:copy> + </xsl:template> + + <xsl:template name="path-to-id"> + <xsl:param name="p"/> + <xsl:text>_</xsl:text> + <xsl:call-template name="replace"> + <xsl:with-param name="s" select="$p"/> + <xsl:with-param name="old">/</xsl:with-param> + <xsl:with-param name="new">.</xsl:with-param> + </xsl:call-template> + </xsl:template> + + <xsl:template name="remove-zero-width-spaces"> + <xsl:param name="s"/> + <xsl:call-template name="replace"> + <xsl:with-param name="s" select="$s"/> + <xsl:with-param name="old">​</xsl:with-param> + <xsl:with-param name="new"/> + </xsl:call-template> + </xsl:template> + + <xsl:template name="replace"> + <xsl:param name="s"/> + <xsl:param name="old"/> + <xsl:param name="new"/> + <xsl:choose> + <xsl:when test="contains($s, $old)"> + <xsl:call-template name="replace"> + <xsl:with-param name="s" select="concat(substring-before($s, $old), + $new, + substring-after($s, $old))"/> + <xsl:with-param name="old" select="$old"/> + <xsl:with-param name="new" select="$new"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$s"/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="ol"> + <xsl:if test="not(preceding-sibling::*[1]/self::ol)"> + <xsl:variable name="following" + select="following-sibling::ol[ + not(preceding-sibling::*[ + not(self::ol) and + not(following-sibling::ol = current()) + ]) + ]"/> + <xsl:copy> + <xsl:apply-templates select="$following/@*[name() != 'start']"/> + <xsl:apply-templates select="@*"/> + <xsl:apply-templates select="node()"/> + <xsl:apply-templates select="$following/node()"/> + </xsl:copy> + </xsl:if> + </xsl:template> + + </xsl:transform> +''')) + +# All the files we want to parse are HTML, so make HTML the default +# parser. In theory the HTML produced by Trac is XHTML thus should +# parse correctly (in fact, better) as XML, but in practice this seems +# not to work properly at the moment, while parsing as HTML does. +# Haven't bothered to figure out why, life is too short. +# +# If you're reading this comment because this script stopped working +# after a Trac upgrade, try commenting out this line to see whether +# things have changed and Trac's HTML now parses better as XML. + +lxml.etree.set_default_parser(lxml.etree.HTMLParser()) + +# Run the main program. +main() diff --git a/tools/references/schema.sql b/tools/references/schema.sql new file mode 100644 index 0000000..1515dbb --- /dev/null +++ b/tools/references/schema.sql @@ -0,0 +1,177 @@ +CREATE TABLE system ( + name text PRIMARY KEY, + value text +); +CREATE TABLE permission ( + username text, + action text, + UNIQUE (username,action) +); +CREATE TABLE auth_cookie ( + cookie text, + name text, + ipnr text, + time integer, + UNIQUE (cookie,ipnr,name) +); +CREATE TABLE session ( + sid text, + authenticated integer, + last_visit integer, + UNIQUE (sid,authenticated) +); +CREATE INDEX session_last_visit_idx ON session (last_visit); +CREATE INDEX session_authenticated_idx ON session (authenticated); +CREATE TABLE session_attribute ( + sid text, + authenticated integer, + name text, + value text, + UNIQUE (sid,authenticated,name) +); +CREATE TABLE cache ( + id integer PRIMARY KEY, + generation integer, + key text +); +CREATE TABLE attachment ( + type text, + id text, + filename text, + size integer, + time integer, + description text, + author text, + ipnr text, + UNIQUE (type,id,filename) +); +CREATE TABLE wiki ( + name text, + version integer, + time integer, + author text, + ipnr text, + text text, + comment text, + readonly integer, + UNIQUE (name,version) +); +CREATE INDEX wiki_time_idx ON wiki (time); +CREATE TABLE repository ( + id integer, + name text, + value text, + UNIQUE (id,name) +); +CREATE TABLE revision ( + repos integer, + rev text, + time integer, + author text, + message text, + UNIQUE (repos,rev) +); +CREATE INDEX revision_repos_time_idx ON revision (repos,time); +CREATE TABLE ticket ( + id integer PRIMARY KEY, + type text, + time integer, + changetime integer, + component text, + severity text, + priority text, + owner text, + reporter text, + cc text, + version text, + milestone text, + status text, + resolution text, + summary text, + description text, + keywords text +); +CREATE INDEX ticket_time_idx ON ticket (time); +CREATE INDEX ticket_status_idx ON ticket (status); +CREATE TABLE ticket_change ( + ticket integer, + time integer, + author text, + field text, + oldvalue text, + newvalue text, + UNIQUE (ticket,time,field) +); +CREATE INDEX ticket_change_ticket_idx ON ticket_change (ticket); +CREATE INDEX ticket_change_time_idx ON ticket_change (time); +CREATE TABLE ticket_custom ( + ticket integer, + name text, + value text, + UNIQUE (ticket,name) +); +CREATE TABLE enum ( + type text, + name text, + value text, + UNIQUE (type,name) +); +CREATE TABLE component ( + name text PRIMARY KEY, + owner text, + description text +); +CREATE TABLE milestone ( + name text PRIMARY KEY, + due integer, + completed integer, + description text +); +CREATE TABLE version ( + name text PRIMARY KEY, + time integer, + description text +); +CREATE TABLE report ( + id integer PRIMARY KEY, + author text, + title text, + query text, + description text +); +CREATE TABLE notify_subscription ( + id integer PRIMARY KEY, + time integer, + changetime integer, + class text, + sid text, + authenticated integer, + distributor text, + format text, + priority integer, + adverb text +); +CREATE INDEX notify_subscription_sid_authenticated_idx ON notify_subscription (sid,authenticated); +CREATE INDEX notify_subscription_class_idx ON notify_subscription (class); +CREATE TABLE notify_watch ( + id integer PRIMARY KEY, + sid text, + authenticated integer, + class text, + realm text, + target text +); +CREATE INDEX notify_watch_sid_authenticated_class_idx ON notify_watch (sid,authenticated,class); +CREATE INDEX notify_watch_class_realm_target_idx ON notify_watch (class,realm,target); +CREATE TABLE node_change ( + id integer PRIMARY KEY, + repos integer, + rev text, + path text, + node_type text, + change_type text, + base_path text, + base_rev text +); +CREATE INDEX node_change_repos_rev_path_idx ON node_change (repos,rev,path); +CREATE INDEX node_change_repos_path_rev_idx ON node_change (repos,path,rev); diff --git a/tools/references/trac-wiki-to-markdown.rb b/tools/references/trac-wiki-to-markdown.rb new file mode 100644 index 0000000..f7d41ae --- /dev/null +++ b/tools/references/trac-wiki-to-markdown.rb @@ -0,0 +1,51 @@ +# Untested code snippet from https://gist.github.com/somebox/619537 + +class String + def trac_to_markdown! + gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`') + gsub!(/\{\{\{(.+?)\}\}\}/m){|m| m.each_line.map{|x| "\t#{x}".gsub(/[\{\}]{3}/,'')}.join} + gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1') + gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1') + gsub!(/\=\=\s(.+?)\s\=\=/, '# \1') + gsub!(/\=\s(.+?)\s\=[\s\n]*/, '') + gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)') + gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1') + gsub!(/'''(.+)'''/, '*\1*') + gsub!(/''(.+)''/, '_\1_') + gsub!(/^\s\*/, '*') + gsub!(/^\s\d\./, '1.') + + gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`') + gsub!(/'''(.+?)'''/, '**\1**') + gsub!(/''(.+?)''/, '*\1*') + gsub!(/((^\|\|[^\n\r]+\|\|[ \t]*\r?(\n|$))+)/m) do |m| + m = m.each_line.map do |x| + x.gsub(/\t/, ' ') + .gsub(/(\|\|){2,}/){|k| k.gsub(/\|\|/, '|| ')} + .gsub(/ {3,}/, ' ') + end.join + lines = m.each_line.to_a + line1 = lines.shift + line2 = line1.dup.gsub(/[^\n\r\|]/, '-') + lines.unshift(line1, line2) + c = lines.join + c = c.each_line.map do |x| + x.gsub(/\=\s?(.+?)\s?=/, ' \1 ') + .gsub(/\|\|/, '|') + end.join + end + gsub!(/^\{\{\{(.+?)^\}\}\}/m, '```\1```') + gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1') + gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1') + gsub!(/\=\=\s(.+?)\s\=\=/, '# \1') + gsub!(/\=\s(.+?)\s\=[\s\n]*/, '') + gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)') + gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1') + gsub!(/^\s\*/, '*') + gsub!(/^\s\d\./, '1.') + end +end + +some_trac = 'my document' + +puts some_trac.trac_to_markdown! diff --git a/tools/references/trac2down.py b/tools/references/trac2down.py new file mode 100644 index 0000000..c66a201 --- /dev/null +++ b/tools/references/trac2down.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python2 + +# Untested code from https://gist.githubusercontent.com/sgk/1286682/raw/b744dd2e47a68d60373ad39df87cfe8256f517af/trac2down.py + +# vim:set fileencoding=utf-8 sw=2 ai: + +import sqlite3 +import datetime +import re + +SQL = ''' + select + name, version, time, author, text + from + wiki w + where + version = (select max(version) from wiki where name = w.name) +''' + +conn = sqlite3.connect('../trac.db') +result = conn.execute(SQL) +for row in result: + name = row[0] + version = row[1] + time = row[2] + author = row[3] + text = row[4] + + text = re.sub('\r\n', '\n', text) + text = re.sub(r'{{{(.*?)}}}', r'`\1`', text) + def indent4(m): + return '\n ' + m.group(1).replace('\n', '\n ') + text = re.sub(r'(?sm){{{\n(.*?)\n}}}', indent4, text) + text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'#### \1', text) + text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'### \1', text) + text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'## \1', text) + text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'# \1', text) + text = re.sub(r'^ * ', r'****', text) + text = re.sub(r'^ * ', r'***', text) + text = re.sub(r'^ * ', r'**', text) + text = re.sub(r'^ * ', r'*', text) + text = re.sub(r'^ \d+. ', r'1.', text) + + a = [] + for line in text.split('\n'): + if not line.startswith(' '): + line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line) + line = re.sub(r'\[(wiki:[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](/\1/)', line) + line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line) + line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line) + line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line) + a.append(line) + text = '\n'.join(a) + + fp = file('%s.md' % name, 'w') + print >>fp, '<!-- Name: %s -->' % name + print >>fp, '<!-- Version: %d -->' % version + print >>fp, '<!-- Last-Modified: %s -->' % datetime.datetime.fromtimestamp(time).strftime('%Y/%m/%d %H:%M:%S') + print >>fp, '<!-- Author: %s -->' % author + fp.write(text.encode('utf-8')) + fp.close() |