From 68e18ad1f44e9a6fab66adc38e97d027a58de8a4 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Sun, 14 Feb 2021 16:56:57 +0000 Subject: Another reorg, and pelican samples --- .gitignore | 6 +- GNUmakefile | 2 +- extract.py | 105 +++++++ references/convert-and-slurp-attachments.sh | 18 ++ references/extract-wiki-content.xsl | 177 +++++++++++ references/generate-json.py | 154 ++++++++++ references/pelicanconf.py | 42 +++ references/publishconf.py | 19 ++ references/rpki-wiki-to-markdown.py | 341 ++++++++++++++++++++++ references/schema.sql | 177 +++++++++++ references/trac-wiki-to-markdown.rb | 51 ++++ references/trac2down.py | 61 ++++ tools/extract.py | 105 ------- tools/references/convert-and-slurp-attachments.sh | 18 -- tools/references/extract-wiki-content.xsl | 177 ----------- tools/references/generate-json.py | 154 ---------- tools/references/rpki-wiki-to-markdown.py | 341 ---------------------- tools/references/schema.sql | 177 ----------- tools/references/trac-wiki-to-markdown.rb | 51 ---- tools/references/trac2down.py | 61 ---- tools/trac2md.py | 234 --------------- trac2md.py | 234 +++++++++++++++ 22 files changed, 1383 insertions(+), 1322 deletions(-) create mode 100755 extract.py create mode 100755 references/convert-and-slurp-attachments.sh create mode 100644 references/extract-wiki-content.xsl create mode 100755 references/generate-json.py create mode 100644 references/pelicanconf.py create mode 100644 references/publishconf.py create mode 100644 references/rpki-wiki-to-markdown.py create mode 100644 references/schema.sql create mode 100644 references/trac-wiki-to-markdown.rb create mode 100644 references/trac2down.py delete mode 100755 tools/extract.py delete mode 100755 tools/references/convert-and-slurp-attachments.sh delete mode 100644 tools/references/extract-wiki-content.xsl delete mode 100755 tools/references/generate-json.py delete mode 100644 tools/references/rpki-wiki-to-markdown.py delete mode 100644 tools/references/schema.sql delete mode 100644 tools/references/trac-wiki-to-markdown.rb delete mode 100644 tools/references/trac2down.py delete mode 100755 tools/trac2md.py create mode 100755 trac2md.py diff --git a/.gitignore b/.gitignore index fdd24ca..02eff3e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ +#pelican +#wiki TAGS +__pycache__ attachments trac.db -tools/__pycache__ -#pelican -#wiki diff --git a/GNUmakefile b/GNUmakefile index ade39b2..2136f0b 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -1,5 +1,5 @@ all: - tools/extract.py + ./extract.py fetch: rsync -aP --delete bikeshed.cryptech.is:/home/trac/db/trac.db bikeshed.cryptech.is:/home/trac/files/attachments . diff --git a/extract.py b/extract.py new file mode 100755 index 0000000..7f76fe9 --- /dev/null +++ b/extract.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +import fnmatch +import hashlib +import json +import os +import shutil +import sqlite3 +import sys +import time +import urllib.parse + +import trac2md + +wiki_query = ''' + SELECT + name, + version, + time / 1000000 AS time, + text + FROM wiki + ORDER BY + name, version +''' + +attachment_query = ''' + SELECT + id, + filename, + size, + author, + description, + ipnr, + time / 1000000 AS createdtime + FROM + attachment + WHERE + type = 'wiki' + ORDER BY + filename, time +''' + +def isotime(t): + return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t)) + +def attachment_link(row): + h = lambda whatever: hashlib.sha1(whatever.encode()).hexdigest() + h1 = h(row.id) + h2 = h(row.filename) + fn2 = os.path.splitext(row["filename"])[1] + return \ + os.path.join("attachments", "wiki", h1[:3], h1, h2 + fn2), \ + os.path.join(urllib.parse.quote(row.id, ""), urllib.parse.quote(row.filename, "")) + +class Filter: + + def __init__(self, filename = "filter.json"): + with open(filename) as f: + filter = json.load(f) + if not all(action in "-+" for action, pattern in filter): + sys.exit("Bad action \"{}\" in filter".format(action)) + self.filter = tuple((action == "+", pattern) for action, pattern in filter) + + def __call__(self, name): + for action, pattern in self.filter: + if fnmatch.fnmatch(name, pattern): + return action + return True + +class Row(sqlite3.Row): + def __getattr__(self, name): + return self[name] + +def main(): + + for dn in ("wiki", "pelican"): + shutil.rmtree(dn) + + for dn in ("wiki", "pelican/content/images", "pelican/content/pages"): + os.makedirs(dn) + + #os.link("pelican.conf", "pelican/pelican.conf") + + keep = Filter() + + db = sqlite3.connect("trac.db") + db.row_factory = Row + + for row in db.execute(wiki_query): + if keep(row.name): + slug = urllib.parse.quote(row.name, "") + print(slug, row.version) + with open("wiki/{}.trac".format(slug), "w") as f: + f.write(row.text) + md = trac2md.WikiToMD(row.text) + with open("pelican/content/{}.md".format(slug), "w") as f: + f.write(md) + + for row in db.execute(attachment_query): + print("{} => {}".format(*attachment_link(row))) + + db.close() + +if __name__ == "__main__": + main() diff --git a/references/convert-and-slurp-attachments.sh b/references/convert-and-slurp-attachments.sh new file mode 100755 index 0000000..ce7f34d --- /dev/null +++ b/references/convert-and-slurp-attachments.sh @@ -0,0 +1,18 @@ +#!/bin/sh - + +ls | fgrep -v . | +while read page +do + base="https://trac.rpki.net" + path="/wiki/$(echo $page | sed s=%2F=/=g)" + + # Fetch the Wiki page, extract the useful portion of the HTML, convert that into Markdown + curl "${base}${path}" | + xsltproc --html extract-wiki-content.xsl - | + html2markdown --no-skip-internal-links --reference-links >"$page.md" + + # Fetch a ZIP file containing any attachments, clean up if result is empty or broken + curl "${base}/zip-attachment${path}/" >"$page.zip" + zipinfo "$page.zip" >/dev/null 2>&1 || rm -f "$page.zip" + +done diff --git a/references/extract-wiki-content.xsl b/references/extract-wiki-content.xsl new file mode 100644 index 0000000..e4376e8 --- /dev/null +++ b/references/extract-wiki-content.xsl @@ -0,0 +1,177 @@ + + + + + + + + + + + + + + + + + + NEW PAGE + + +
+ +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _ + + + / + . + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ diff --git a/references/generate-json.py b/references/generate-json.py new file mode 100755 index 0000000..b8b1f38 --- /dev/null +++ b/references/generate-json.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +# Generate JSON to import Trac tickets into GitHub issues using the new import API +# described at https://gist.github.com/jonmagic/5282384165e0f86ef105 + +import os +import time +import json +import yaml +import sqlite3 +import hashlib +import argparse +import subprocess + +ticket_query = ''' +SELECT + id, + type, + owner, + reporter, + milestone, + status, + resolution, + summary, + description, + component, + priority, + time / 1000000 AS createdtime, + changetime / 1000000 AS modifiedtime +FROM + ticket +ORDER BY + id +''' + +comment_query = ''' +SELECT + time / 1000000 AS createdtime, + author, + newvalue +FROM + ticket_change +WHERE + ticket = ? +AND + field = 'comment' +AND + newvalue <> '' +ORDER BY + time +''' + +attachment_query = ''' +SELECT + id, + filename, + size, + author, + description, + ipnr, + time / 1000000 AS createdtime +FROM + attachment +WHERE + id = ? +AND + type = 'ticket' +ORDER BY + time, filename +''' + +def isotime(t): + return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t)) + +def hashname(whatever): + return hashlib.sha1(unicode(whatever)).hexdigest() + +def ticket_text(ticket): + d = dict(ticket, createdtime = isotime(ticket["createdtime"]), modifiedtime = isotime(ticket["modifiedtime"])) + return u"{description}\n\n" \ + u"_Trac ticket #{id} component {component} priority {priority}, owner {owner}," \ + u" created by {reporter} on {createdtime}, last modified {modifiedtime}_\n".format(**d) + +def comment_text(comment): + d = dict(comment, createdtime = isotime(comment["createdtime"])) + return u"{newvalue}\n\n_Trac comment by {author} on {createdtime}_\n".format(**d) + +def attachment_text(attachment): + h1 = hashname(attachment["id"]) + h2 = hashname(attachment["filename"]) + fn2 = os.path.splitext(attachment["filename"])[1] + fn = os.path.join(gist_url, h1[:3], h1, h2 + fn2) + url = "{}/raw/{}/ticket.{}.{}{}".format(gist_url.rstrip("/"), gist_commit, h1, h2, fn2) + d = dict(attachment, createdtime = isotime(comment["createdtime"]), url = url) + return u"[{filename}]({url}) {description}\n_Trac attachment by {author} on {createdtime}_\n".format(**d) + +def comment_merge(comments, attachments): + result = [] + while comments and attachments: + result.append(comments.pop(0) if comments[0]["created_at"] <= attachments[0]["created_at"] else attachments.pop(0)) + return result + comments + attachments + +parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-c", "--config", type = argparse.FileType(), + default = "generate-json.yaml", + help = "YAML config mappings") +args = parser.parse_args() + +cfg = yaml.safe_load(args.config) +assignee_map = cfg["assignees"] +type_map = cfg["type_labels"] +resolution_map = cfg["resolution_labels"] + +gist_url = cfg.get("attachment_gist_url") +if gist_url is not None: + gist_commit = subprocess.check_output(("git", "ls-remote", gist_url, "HEAD")).split()[0] + +db = sqlite3.connect(cfg["database"]) +db.row_factory = sqlite3.Row +ticket_cursor = db.cursor() +comment_cursor = db.cursor() +attachment_cursor = db.cursor() + +if not os.path.isdir(cfg["ticket_directory"]): + os.makedirs(cfg["ticket_directory"]) + +for ticket in ticket_cursor.execute(ticket_query): + comments = comment_merge([dict(created_at = isotime(comment["createdtime"]), body = comment_text(comment)) + for comment in comment_cursor.execute(comment_query, (ticket["id"],))], + [] if gist_url is None else + [dict(created_at = isotime(attachment["createdtime"]), body = attachment_text(attachment)) + for attachment in attachment_cursor.execute(attachment_query, (ticket["id"],))]) + issue = dict( + title = ticket["summary"], + body = ticket_text(ticket), + created_at = isotime(ticket["createdtime"]), + updated_at = isotime(ticket["modifiedtime"])) + if ticket["status"] == "closed": + issue["closed"] = True + issue["closed_at"] = isotime(ticket["modifiedtime"]) + comments.append(dict(created_at = isotime(ticket["modifiedtime"]), + body = "_Closed with resolution {resolution}_\n".format(**ticket))) + if ticket["owner"] in assignee_map: + issue["assignee"] = assignee_map[ticket["owner"]] + labels = [type_map.get(ticket["type"]), resolution_map.get(ticket["resolution"])] + while None in labels: + del labels[labels.index(None)] + if labels: + issue["labels"] = labels + issue = dict(issue = issue) + if comments: + issue["comments"] = comments + with open(os.path.join(cfg["ticket_directory"], "ticket_{:03d}.json".format(ticket["id"])), "wb") as f: + json.dump(issue, f, indent = 4, sort_keys = True, separators=(",", ": ")) diff --git a/references/pelicanconf.py b/references/pelicanconf.py new file mode 100644 index 0000000..a28721d --- /dev/null +++ b/references/pelicanconf.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- # +from __future__ import unicode_literals + +AUTHOR = u'Rob Austein' +SITENAME = u'Your Bug Report Will Be Graded' + +# Apparently this is much longer than theme designer expected. Skip it for now. +SITESUBTITLE = u'"I\'m not proud of being a congenital pain in the ass. But I will take money for it."' + +PATH = 'content' +TIMEZONE = 'UTC' +DEFAULT_LANG = u'English' + +# Hack article URLs to match what Blogofile did, to avoid breaking links. + +ARTICLE_URL = '{date:%Y}/{date:%m}/{date:%d}/{slug}/' +ARTICLE_SAVE_AS = '{date:%Y}/{date:%m}/{date:%d}/{slug}/index.html' + +# Feed generation is usually not desired when developing +SITEURL = '' +RELATIVE_URLS = True +FEED_ALL_ATOM = None +CATEGORY_FEED_ATOM = None +TRANSLATION_FEED_ATOM = None +AUTHOR_FEED_ATOM = None +AUTHOR_FEED_RSS = None + +# Blogroll +LINKS = (('Pelican', 'http://getpelican.com/'), + ('Python.org', 'http://python.org/'), + ('Jinja2', 'http://jinja.pocoo.org/')) +LINKS_WIDGET_NAME = "Links" + +# Social widget. Can't get rid of this with default theme, only change its name. +# Fiddle with themes later +SOCIAL = () +SOCIAL_WIDGET_NAME = "Subscribe" + +DEFAULT_PAGINATION = 10 + +THEME = "/home/blog/pelican-themes/sundown" diff --git a/references/publishconf.py b/references/publishconf.py new file mode 100644 index 0000000..f0fb21d --- /dev/null +++ b/references/publishconf.py @@ -0,0 +1,19 @@ +#!/usr/local/bin/python2.7 +# -*- coding: utf-8 -*- # +from __future__ import unicode_literals + +# This file is only used if you use `make publish` or +# explicitly specify it as your config file. + +import os +import sys +sys.path.append(os.curdir) +from pelicanconf import * + +SITEURL = 'https://www.hactrn.net/blog' +RELATIVE_URLS = False + +FEED_ALL_ATOM = 'feeds/all.atom.xml' +CATEGORY_FEED_ATOM = 'feeds/{slug}.atom.xml' + +DELETE_OUTPUT_DIRECTORY = True diff --git a/references/rpki-wiki-to-markdown.py b/references/rpki-wiki-to-markdown.py new file mode 100644 index 0000000..dff87e6 --- /dev/null +++ b/references/rpki-wiki-to-markdown.py @@ -0,0 +1,341 @@ +# Copyright (C) 2016 Parsons Government Services ("PARSONS") +# Portions copyright (C) 2014 Dragon Research Labs ("DRL") +# Portions copyright (C) 2012 Internet Systems Consortium ("ISC") +# +# Permission to use, copy, modify, and distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notices and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND PARSONS, DRL, AND ISC DISCLAIM +# ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL +# PARSONS, DRL, OR ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR +# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION +# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +""" +Trac Wiki -> Markdown converter, hacked from old Trac Wiki -> PDF/flat +text converter. + +Pull HTML pages from a Trac Wiki, feed the useful bits to +html2text to generate Markdown. + +Assumes you're using the TracNav plugin for the Wiki pages, and uses +the same list as the TracNav plugin does to determine the set of pages +to convert. +""" + +# Dependencies, at least on Ubuntu Xenial: +# +# apt-get install python-lxml python-html2text +# +# Be warned that there are many unrelated packages named "html2text", +# installed under various names on various platforms. This one +# happens to be a useful HTML-to-Markdown converter. + +# Most of the work of massaging the HTML is done using XSL transforms, +# because the template-driven style makes that easy. There's probably +# some clever way to use lxml's XPath code to do the same thing in a +# more pythonic way with ElementTrees, but I already had the XSL +# transforms and there's a point of diminishing returns on this sort of +# thing. + +import sys +import os +import argparse +import lxml.etree +import urllib +import urlparse +import subprocess +import zipfile + +# Main program, up front so it doesn't get lost under all the XSL + +def main(): + + base = "https://trac.rpki.net" + + parser = argparse.ArgumentParser(description = __doc__, formatter_class = argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("-b", "--base_url", + default = base, + help = "base URL for documentation web site") + parser.add_argument("-t", "--toc", + default = base + "/wiki/doc/RPKI/TOC", + help = "table of contents URL") + parser.add_argument("-d", "--directory", + default = ".", + help = "output directory") + parser.add_argument("-p", "--prefix", + default = "/wiki/doc", + help = "page name prefix on wiki") + args = parser.parse_args() + + urls = str(xsl_get_toc(lxml.etree.parse(urllib.urlopen(args.toc)).getroot(), + basename = repr(args.base_url))).splitlines() + + assert all(urlparse.urlparse(url).path.startswith(args.prefix) for url in urls) + + for pagenum, url in enumerate(urls): + path = urlparse.urlparse(url).path + page = xsl_get_page(lxml.etree.parse(urllib.urlopen(url)).getroot(), + basename = repr(args.base_url), + path = repr(path)) + + fn_base = os.path.join(args.directory, "{:02d}{}".format(pagenum, path[len(args.prefix):].replace("/", "."))) + + fn = fn_base + ".zip" + zip_url = urlparse.urljoin(url, "/zip-attachment{}/".format(path)) + urllib.urlretrieve(zip_url, fn) + with zipfile.ZipFile(fn, "r") as z: + if len(z.namelist()) == 0: + os.unlink(fn) + else: + sys.stderr.write("Wrote {}\n".format(fn)) + + for imgnum, img in enumerate(page.xpath("//img | //object | //embed")): + img_url = img.get("data" if img.tag == "object" else "src") + img_url = urlparse.urljoin(url, img_url) + fn = "{}.{:02d}{}".format(fn_base, imgnum, os.path.splitext(img_url)[1]) + urllib.urlretrieve(img_url, fn) + sys.stderr.write("Wrote {}\n".format(fn)) + + html2markdown = subprocess.Popen(("html2markdown", "--no-skip-internal-links", "--reference-links"), + stdin = subprocess.PIPE, stdout = subprocess.PIPE) + page.write(html2markdown.stdin) + html2markdown.stdin.close() + lines = html2markdown.stdout.readlines() + html2markdown.stdout.close() + html2markdown.wait() + + while lines and lines[0].isspace(): + del lines[0] + + fn = fn_base + ".md" + with open(fn, "w") as f: + want_blank = False + for line in lines: + blank = line.isspace() + if want_blank and not blank: + f.write("\n") + if not blank: + f.write(line) + want_blank = blank + sys.stderr.write("Wrote {}\n".format(fn)) + + fn = fn[:-3] + ".wiki" + urllib.urlretrieve(url + "?format=txt", fn) + sys.stderr.write("Wrote {}\n".format(fn)) + + +# XSL transform to extract list of Wiki page URLs from the TOC Wiki page + +xsl_get_toc = lxml.etree.XSLT(lxml.etree.XML('''\ + + + + + + + + + + + + + +''')) + +# XSL transform to extract useful content of a Wiki page. + +# Django generates weird HTML for ordered lists: it sometimes breaks +# up a single ordered list into multiple adjacent
    elements, +# using the @start attribute to try to make the result look like a +# single ordered list. This looks OK in Firefox but confuses the +# bejesus out of both html2markdown and htmldoc. In some cases this is +# probably unavoidable, but most of the uses of this I've seen look +# gratuitous, and are probably the result of code modulararity issues +# in Django. +# +# So we try to clean this up, by merging adjacent
      elements where +# we can. The merge incantation is an adaptation of: +# +# http://stackoverflow.com/questions/1806123/merging-adjacent-nodes-of-same-type-xslt-1-0 +# +# There may be a more efficient way to do this, but I don't think +# we care, and this seems to work. +# +# Original author's explanation: +# +# The rather convoluted XPath expression for selecting the following +# sibling aaa nodes which are merged with the current one: +# +# following-sibling::aaa[ # following 'aaa' siblings +# not(preceding-sibling::*[ # if they are not preceded by +# not(self::aaa) and # a non-'aaa' node +# not(following-sibling::aaa = current()) # after the current node +# ]) +# ] + +xsl_get_page = lxml.etree.XSLT(lxml.etree.XML('''\ + + + + + + + + + + + + + + + + NEW PAGE + + +
      + +
      + + +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + _ + + + / + . + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      +''')) + +# All the files we want to parse are HTML, so make HTML the default +# parser. In theory the HTML produced by Trac is XHTML thus should +# parse correctly (in fact, better) as XML, but in practice this seems +# not to work properly at the moment, while parsing as HTML does. +# Haven't bothered to figure out why, life is too short. +# +# If you're reading this comment because this script stopped working +# after a Trac upgrade, try commenting out this line to see whether +# things have changed and Trac's HTML now parses better as XML. + +lxml.etree.set_default_parser(lxml.etree.HTMLParser()) + +# Run the main program. +main() diff --git a/references/schema.sql b/references/schema.sql new file mode 100644 index 0000000..1515dbb --- /dev/null +++ b/references/schema.sql @@ -0,0 +1,177 @@ +CREATE TABLE system ( + name text PRIMARY KEY, + value text +); +CREATE TABLE permission ( + username text, + action text, + UNIQUE (username,action) +); +CREATE TABLE auth_cookie ( + cookie text, + name text, + ipnr text, + time integer, + UNIQUE (cookie,ipnr,name) +); +CREATE TABLE session ( + sid text, + authenticated integer, + last_visit integer, + UNIQUE (sid,authenticated) +); +CREATE INDEX session_last_visit_idx ON session (last_visit); +CREATE INDEX session_authenticated_idx ON session (authenticated); +CREATE TABLE session_attribute ( + sid text, + authenticated integer, + name text, + value text, + UNIQUE (sid,authenticated,name) +); +CREATE TABLE cache ( + id integer PRIMARY KEY, + generation integer, + key text +); +CREATE TABLE attachment ( + type text, + id text, + filename text, + size integer, + time integer, + description text, + author text, + ipnr text, + UNIQUE (type,id,filename) +); +CREATE TABLE wiki ( + name text, + version integer, + time integer, + author text, + ipnr text, + text text, + comment text, + readonly integer, + UNIQUE (name,version) +); +CREATE INDEX wiki_time_idx ON wiki (time); +CREATE TABLE repository ( + id integer, + name text, + value text, + UNIQUE (id,name) +); +CREATE TABLE revision ( + repos integer, + rev text, + time integer, + author text, + message text, + UNIQUE (repos,rev) +); +CREATE INDEX revision_repos_time_idx ON revision (repos,time); +CREATE TABLE ticket ( + id integer PRIMARY KEY, + type text, + time integer, + changetime integer, + component text, + severity text, + priority text, + owner text, + reporter text, + cc text, + version text, + milestone text, + status text, + resolution text, + summary text, + description text, + keywords text +); +CREATE INDEX ticket_time_idx ON ticket (time); +CREATE INDEX ticket_status_idx ON ticket (status); +CREATE TABLE ticket_change ( + ticket integer, + time integer, + author text, + field text, + oldvalue text, + newvalue text, + UNIQUE (ticket,time,field) +); +CREATE INDEX ticket_change_ticket_idx ON ticket_change (ticket); +CREATE INDEX ticket_change_time_idx ON ticket_change (time); +CREATE TABLE ticket_custom ( + ticket integer, + name text, + value text, + UNIQUE (ticket,name) +); +CREATE TABLE enum ( + type text, + name text, + value text, + UNIQUE (type,name) +); +CREATE TABLE component ( + name text PRIMARY KEY, + owner text, + description text +); +CREATE TABLE milestone ( + name text PRIMARY KEY, + due integer, + completed integer, + description text +); +CREATE TABLE version ( + name text PRIMARY KEY, + time integer, + description text +); +CREATE TABLE report ( + id integer PRIMARY KEY, + author text, + title text, + query text, + description text +); +CREATE TABLE notify_subscription ( + id integer PRIMARY KEY, + time integer, + changetime integer, + class text, + sid text, + authenticated integer, + distributor text, + format text, + priority integer, + adverb text +); +CREATE INDEX notify_subscription_sid_authenticated_idx ON notify_subscription (sid,authenticated); +CREATE INDEX notify_subscription_class_idx ON notify_subscription (class); +CREATE TABLE notify_watch ( + id integer PRIMARY KEY, + sid text, + authenticated integer, + class text, + realm text, + target text +); +CREATE INDEX notify_watch_sid_authenticated_class_idx ON notify_watch (sid,authenticated,class); +CREATE INDEX notify_watch_class_realm_target_idx ON notify_watch (class,realm,target); +CREATE TABLE node_change ( + id integer PRIMARY KEY, + repos integer, + rev text, + path text, + node_type text, + change_type text, + base_path text, + base_rev text +); +CREATE INDEX node_change_repos_rev_path_idx ON node_change (repos,rev,path); +CREATE INDEX node_change_repos_path_rev_idx ON node_change (repos,path,rev); diff --git a/references/trac-wiki-to-markdown.rb b/references/trac-wiki-to-markdown.rb new file mode 100644 index 0000000..f7d41ae --- /dev/null +++ b/references/trac-wiki-to-markdown.rb @@ -0,0 +1,51 @@ +# Untested code snippet from https://gist.github.com/somebox/619537 + +class String + def trac_to_markdown! + gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`') + gsub!(/\{\{\{(.+?)\}\}\}/m){|m| m.each_line.map{|x| "\t#{x}".gsub(/[\{\}]{3}/,'')}.join} + gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1') + gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1') + gsub!(/\=\=\s(.+?)\s\=\=/, '# \1') + gsub!(/\=\s(.+?)\s\=[\s\n]*/, '') + gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)') + gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1') + gsub!(/'''(.+)'''/, '*\1*') + gsub!(/''(.+)''/, '_\1_') + gsub!(/^\s\*/, '*') + gsub!(/^\s\d\./, '1.') + + gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`') + gsub!(/'''(.+?)'''/, '**\1**') + gsub!(/''(.+?)''/, '*\1*') + gsub!(/((^\|\|[^\n\r]+\|\|[ \t]*\r?(\n|$))+)/m) do |m| + m = m.each_line.map do |x| + x.gsub(/\t/, ' ') + .gsub(/(\|\|){2,}/){|k| k.gsub(/\|\|/, '|| ')} + .gsub(/ {3,}/, ' ') + end.join + lines = m.each_line.to_a + line1 = lines.shift + line2 = line1.dup.gsub(/[^\n\r\|]/, '-') + lines.unshift(line1, line2) + c = lines.join + c = c.each_line.map do |x| + x.gsub(/\=\s?(.+?)\s?=/, ' \1 ') + .gsub(/\|\|/, '|') + end.join + end + gsub!(/^\{\{\{(.+?)^\}\}\}/m, '```\1```') + gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1') + gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1') + gsub!(/\=\=\s(.+?)\s\=\=/, '# \1') + gsub!(/\=\s(.+?)\s\=[\s\n]*/, '') + gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)') + gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1') + gsub!(/^\s\*/, '*') + gsub!(/^\s\d\./, '1.') + end +end + +some_trac = 'my document' + +puts some_trac.trac_to_markdown! diff --git a/references/trac2down.py b/references/trac2down.py new file mode 100644 index 0000000..c66a201 --- /dev/null +++ b/references/trac2down.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python2 + +# Untested code from https://gist.githubusercontent.com/sgk/1286682/raw/b744dd2e47a68d60373ad39df87cfe8256f517af/trac2down.py + +# vim:set fileencoding=utf-8 sw=2 ai: + +import sqlite3 +import datetime +import re + +SQL = ''' + select + name, version, time, author, text + from + wiki w + where + version = (select max(version) from wiki where name = w.name) +''' + +conn = sqlite3.connect('../trac.db') +result = conn.execute(SQL) +for row in result: + name = row[0] + version = row[1] + time = row[2] + author = row[3] + text = row[4] + + text = re.sub('\r\n', '\n', text) + text = re.sub(r'{{{(.*?)}}}', r'`\1`', text) + def indent4(m): + return '\n ' + m.group(1).replace('\n', '\n ') + text = re.sub(r'(?sm){{{\n(.*?)\n}}}', indent4, text) + text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'#### \1', text) + text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'### \1', text) + text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'## \1', text) + text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'# \1', text) + text = re.sub(r'^ * ', r'****', text) + text = re.sub(r'^ * ', r'***', text) + text = re.sub(r'^ * ', r'**', text) + text = re.sub(r'^ * ', r'*', text) + text = re.sub(r'^ \d+. ', r'1.', text) + + a = [] + for line in text.split('\n'): + if not line.startswith(' '): + line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line) + line = re.sub(r'\[(wiki:[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](/\1/)', line) + line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line) + line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line) + line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line) + a.append(line) + text = '\n'.join(a) + + fp = file('%s.md' % name, 'w') + print >>fp, '' % name + print >>fp, '' % version + print >>fp, '' % datetime.datetime.fromtimestamp(time).strftime('%Y/%m/%d %H:%M:%S') + print >>fp, '' % author + fp.write(text.encode('utf-8')) + fp.close() diff --git a/tools/extract.py b/tools/extract.py deleted file mode 100755 index 7f76fe9..0000000 --- a/tools/extract.py +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env python3 - -import fnmatch -import hashlib -import json -import os -import shutil -import sqlite3 -import sys -import time -import urllib.parse - -import trac2md - -wiki_query = ''' - SELECT - name, - version, - time / 1000000 AS time, - text - FROM wiki - ORDER BY - name, version -''' - -attachment_query = ''' - SELECT - id, - filename, - size, - author, - description, - ipnr, - time / 1000000 AS createdtime - FROM - attachment - WHERE - type = 'wiki' - ORDER BY - filename, time -''' - -def isotime(t): - return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t)) - -def attachment_link(row): - h = lambda whatever: hashlib.sha1(whatever.encode()).hexdigest() - h1 = h(row.id) - h2 = h(row.filename) - fn2 = os.path.splitext(row["filename"])[1] - return \ - os.path.join("attachments", "wiki", h1[:3], h1, h2 + fn2), \ - os.path.join(urllib.parse.quote(row.id, ""), urllib.parse.quote(row.filename, "")) - -class Filter: - - def __init__(self, filename = "filter.json"): - with open(filename) as f: - filter = json.load(f) - if not all(action in "-+" for action, pattern in filter): - sys.exit("Bad action \"{}\" in filter".format(action)) - self.filter = tuple((action == "+", pattern) for action, pattern in filter) - - def __call__(self, name): - for action, pattern in self.filter: - if fnmatch.fnmatch(name, pattern): - return action - return True - -class Row(sqlite3.Row): - def __getattr__(self, name): - return self[name] - -def main(): - - for dn in ("wiki", "pelican"): - shutil.rmtree(dn) - - for dn in ("wiki", "pelican/content/images", "pelican/content/pages"): - os.makedirs(dn) - - #os.link("pelican.conf", "pelican/pelican.conf") - - keep = Filter() - - db = sqlite3.connect("trac.db") - db.row_factory = Row - - for row in db.execute(wiki_query): - if keep(row.name): - slug = urllib.parse.quote(row.name, "") - print(slug, row.version) - with open("wiki/{}.trac".format(slug), "w") as f: - f.write(row.text) - md = trac2md.WikiToMD(row.text) - with open("pelican/content/{}.md".format(slug), "w") as f: - f.write(md) - - for row in db.execute(attachment_query): - print("{} => {}".format(*attachment_link(row))) - - db.close() - -if __name__ == "__main__": - main() diff --git a/tools/references/convert-and-slurp-attachments.sh b/tools/references/convert-and-slurp-attachments.sh deleted file mode 100755 index ce7f34d..0000000 --- a/tools/references/convert-and-slurp-attachments.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - - -ls | fgrep -v . | -while read page -do - base="https://trac.rpki.net" - path="/wiki/$(echo $page | sed s=%2F=/=g)" - - # Fetch the Wiki page, extract the useful portion of the HTML, convert that into Markdown - curl "${base}${path}" | - xsltproc --html extract-wiki-content.xsl - | - html2markdown --no-skip-internal-links --reference-links >"$page.md" - - # Fetch a ZIP file containing any attachments, clean up if result is empty or broken - curl "${base}/zip-attachment${path}/" >"$page.zip" - zipinfo "$page.zip" >/dev/null 2>&1 || rm -f "$page.zip" - -done diff --git a/tools/references/extract-wiki-content.xsl b/tools/references/extract-wiki-content.xsl deleted file mode 100644 index e4376e8..0000000 --- a/tools/references/extract-wiki-content.xsl +++ /dev/null @@ -1,177 +0,0 @@ - - - - - - - - - - - - - - - - - - NEW PAGE - - -
      - -
      - - -
      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - _ - - - / - . - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      - diff --git a/tools/references/generate-json.py b/tools/references/generate-json.py deleted file mode 100755 index b8b1f38..0000000 --- a/tools/references/generate-json.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python - -# Generate JSON to import Trac tickets into GitHub issues using the new import API -# described at https://gist.github.com/jonmagic/5282384165e0f86ef105 - -import os -import time -import json -import yaml -import sqlite3 -import hashlib -import argparse -import subprocess - -ticket_query = ''' -SELECT - id, - type, - owner, - reporter, - milestone, - status, - resolution, - summary, - description, - component, - priority, - time / 1000000 AS createdtime, - changetime / 1000000 AS modifiedtime -FROM - ticket -ORDER BY - id -''' - -comment_query = ''' -SELECT - time / 1000000 AS createdtime, - author, - newvalue -FROM - ticket_change -WHERE - ticket = ? -AND - field = 'comment' -AND - newvalue <> '' -ORDER BY - time -''' - -attachment_query = ''' -SELECT - id, - filename, - size, - author, - description, - ipnr, - time / 1000000 AS createdtime -FROM - attachment -WHERE - id = ? -AND - type = 'ticket' -ORDER BY - time, filename -''' - -def isotime(t): - return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t)) - -def hashname(whatever): - return hashlib.sha1(unicode(whatever)).hexdigest() - -def ticket_text(ticket): - d = dict(ticket, createdtime = isotime(ticket["createdtime"]), modifiedtime = isotime(ticket["modifiedtime"])) - return u"{description}\n\n" \ - u"_Trac ticket #{id} component {component} priority {priority}, owner {owner}," \ - u" created by {reporter} on {createdtime}, last modified {modifiedtime}_\n".format(**d) - -def comment_text(comment): - d = dict(comment, createdtime = isotime(comment["createdtime"])) - return u"{newvalue}\n\n_Trac comment by {author} on {createdtime}_\n".format(**d) - -def attachment_text(attachment): - h1 = hashname(attachment["id"]) - h2 = hashname(attachment["filename"]) - fn2 = os.path.splitext(attachment["filename"])[1] - fn = os.path.join(gist_url, h1[:3], h1, h2 + fn2) - url = "{}/raw/{}/ticket.{}.{}{}".format(gist_url.rstrip("/"), gist_commit, h1, h2, fn2) - d = dict(attachment, createdtime = isotime(comment["createdtime"]), url = url) - return u"[{filename}]({url}) {description}\n_Trac attachment by {author} on {createdtime}_\n".format(**d) - -def comment_merge(comments, attachments): - result = [] - while comments and attachments: - result.append(comments.pop(0) if comments[0]["created_at"] <= attachments[0]["created_at"] else attachments.pop(0)) - return result + comments + attachments - -parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument("-c", "--config", type = argparse.FileType(), - default = "generate-json.yaml", - help = "YAML config mappings") -args = parser.parse_args() - -cfg = yaml.safe_load(args.config) -assignee_map = cfg["assignees"] -type_map = cfg["type_labels"] -resolution_map = cfg["resolution_labels"] - -gist_url = cfg.get("attachment_gist_url") -if gist_url is not None: - gist_commit = subprocess.check_output(("git", "ls-remote", gist_url, "HEAD")).split()[0] - -db = sqlite3.connect(cfg["database"]) -db.row_factory = sqlite3.Row -ticket_cursor = db.cursor() -comment_cursor = db.cursor() -attachment_cursor = db.cursor() - -if not os.path.isdir(cfg["ticket_directory"]): - os.makedirs(cfg["ticket_directory"]) - -for ticket in ticket_cursor.execute(ticket_query): - comments = comment_merge([dict(created_at = isotime(comment["createdtime"]), body = comment_text(comment)) - for comment in comment_cursor.execute(comment_query, (ticket["id"],))], - [] if gist_url is None else - [dict(created_at = isotime(attachment["createdtime"]), body = attachment_text(attachment)) - for attachment in attachment_cursor.execute(attachment_query, (ticket["id"],))]) - issue = dict( - title = ticket["summary"], - body = ticket_text(ticket), - created_at = isotime(ticket["createdtime"]), - updated_at = isotime(ticket["modifiedtime"])) - if ticket["status"] == "closed": - issue["closed"] = True - issue["closed_at"] = isotime(ticket["modifiedtime"]) - comments.append(dict(created_at = isotime(ticket["modifiedtime"]), - body = "_Closed with resolution {resolution}_\n".format(**ticket))) - if ticket["owner"] in assignee_map: - issue["assignee"] = assignee_map[ticket["owner"]] - labels = [type_map.get(ticket["type"]), resolution_map.get(ticket["resolution"])] - while None in labels: - del labels[labels.index(None)] - if labels: - issue["labels"] = labels - issue = dict(issue = issue) - if comments: - issue["comments"] = comments - with open(os.path.join(cfg["ticket_directory"], "ticket_{:03d}.json".format(ticket["id"])), "wb") as f: - json.dump(issue, f, indent = 4, sort_keys = True, separators=(",", ": ")) diff --git a/tools/references/rpki-wiki-to-markdown.py b/tools/references/rpki-wiki-to-markdown.py deleted file mode 100644 index dff87e6..0000000 --- a/tools/references/rpki-wiki-to-markdown.py +++ /dev/null @@ -1,341 +0,0 @@ -# Copyright (C) 2016 Parsons Government Services ("PARSONS") -# Portions copyright (C) 2014 Dragon Research Labs ("DRL") -# Portions copyright (C) 2012 Internet Systems Consortium ("ISC") -# -# Permission to use, copy, modify, and distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notices and this permission notice appear in all copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND PARSONS, DRL, AND ISC DISCLAIM -# ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL -# PARSONS, DRL, OR ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR -# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION -# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -""" -Trac Wiki -> Markdown converter, hacked from old Trac Wiki -> PDF/flat -text converter. - -Pull HTML pages from a Trac Wiki, feed the useful bits to -html2text to generate Markdown. - -Assumes you're using the TracNav plugin for the Wiki pages, and uses -the same list as the TracNav plugin does to determine the set of pages -to convert. -""" - -# Dependencies, at least on Ubuntu Xenial: -# -# apt-get install python-lxml python-html2text -# -# Be warned that there are many unrelated packages named "html2text", -# installed under various names on various platforms. This one -# happens to be a useful HTML-to-Markdown converter. - -# Most of the work of massaging the HTML is done using XSL transforms, -# because the template-driven style makes that easy. There's probably -# some clever way to use lxml's XPath code to do the same thing in a -# more pythonic way with ElementTrees, but I already had the XSL -# transforms and there's a point of diminishing returns on this sort of -# thing. - -import sys -import os -import argparse -import lxml.etree -import urllib -import urlparse -import subprocess -import zipfile - -# Main program, up front so it doesn't get lost under all the XSL - -def main(): - - base = "https://trac.rpki.net" - - parser = argparse.ArgumentParser(description = __doc__, formatter_class = argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("-b", "--base_url", - default = base, - help = "base URL for documentation web site") - parser.add_argument("-t", "--toc", - default = base + "/wiki/doc/RPKI/TOC", - help = "table of contents URL") - parser.add_argument("-d", "--directory", - default = ".", - help = "output directory") - parser.add_argument("-p", "--prefix", - default = "/wiki/doc", - help = "page name prefix on wiki") - args = parser.parse_args() - - urls = str(xsl_get_toc(lxml.etree.parse(urllib.urlopen(args.toc)).getroot(), - basename = repr(args.base_url))).splitlines() - - assert all(urlparse.urlparse(url).path.startswith(args.prefix) for url in urls) - - for pagenum, url in enumerate(urls): - path = urlparse.urlparse(url).path - page = xsl_get_page(lxml.etree.parse(urllib.urlopen(url)).getroot(), - basename = repr(args.base_url), - path = repr(path)) - - fn_base = os.path.join(args.directory, "{:02d}{}".format(pagenum, path[len(args.prefix):].replace("/", "."))) - - fn = fn_base + ".zip" - zip_url = urlparse.urljoin(url, "/zip-attachment{}/".format(path)) - urllib.urlretrieve(zip_url, fn) - with zipfile.ZipFile(fn, "r") as z: - if len(z.namelist()) == 0: - os.unlink(fn) - else: - sys.stderr.write("Wrote {}\n".format(fn)) - - for imgnum, img in enumerate(page.xpath("//img | //object | //embed")): - img_url = img.get("data" if img.tag == "object" else "src") - img_url = urlparse.urljoin(url, img_url) - fn = "{}.{:02d}{}".format(fn_base, imgnum, os.path.splitext(img_url)[1]) - urllib.urlretrieve(img_url, fn) - sys.stderr.write("Wrote {}\n".format(fn)) - - html2markdown = subprocess.Popen(("html2markdown", "--no-skip-internal-links", "--reference-links"), - stdin = subprocess.PIPE, stdout = subprocess.PIPE) - page.write(html2markdown.stdin) - html2markdown.stdin.close() - lines = html2markdown.stdout.readlines() - html2markdown.stdout.close() - html2markdown.wait() - - while lines and lines[0].isspace(): - del lines[0] - - fn = fn_base + ".md" - with open(fn, "w") as f: - want_blank = False - for line in lines: - blank = line.isspace() - if want_blank and not blank: - f.write("\n") - if not blank: - f.write(line) - want_blank = blank - sys.stderr.write("Wrote {}\n".format(fn)) - - fn = fn[:-3] + ".wiki" - urllib.urlretrieve(url + "?format=txt", fn) - sys.stderr.write("Wrote {}\n".format(fn)) - - -# XSL transform to extract list of Wiki page URLs from the TOC Wiki page - -xsl_get_toc = lxml.etree.XSLT(lxml.etree.XML('''\ - - - - - - - - - - - - - -''')) - -# XSL transform to extract useful content of a Wiki page. - -# Django generates weird HTML for ordered lists: it sometimes breaks -# up a single ordered list into multiple adjacent
        elements, -# using the @start attribute to try to make the result look like a -# single ordered list. This looks OK in Firefox but confuses the -# bejesus out of both html2markdown and htmldoc. In some cases this is -# probably unavoidable, but most of the uses of this I've seen look -# gratuitous, and are probably the result of code modulararity issues -# in Django. -# -# So we try to clean this up, by merging adjacent
          elements where -# we can. The merge incantation is an adaptation of: -# -# http://stackoverflow.com/questions/1806123/merging-adjacent-nodes-of-same-type-xslt-1-0 -# -# There may be a more efficient way to do this, but I don't think -# we care, and this seems to work. -# -# Original author's explanation: -# -# The rather convoluted XPath expression for selecting the following -# sibling aaa nodes which are merged with the current one: -# -# following-sibling::aaa[ # following 'aaa' siblings -# not(preceding-sibling::*[ # if they are not preceded by -# not(self::aaa) and # a non-'aaa' node -# not(following-sibling::aaa = current()) # after the current node -# ]) -# ] - -xsl_get_page = lxml.etree.XSLT(lxml.etree.XML('''\ - - - - - - - - - - - - - - - - NEW PAGE - - -
          - -
          - - -
          - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - _ - - - / - . - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
          -''')) - -# All the files we want to parse are HTML, so make HTML the default -# parser. In theory the HTML produced by Trac is XHTML thus should -# parse correctly (in fact, better) as XML, but in practice this seems -# not to work properly at the moment, while parsing as HTML does. -# Haven't bothered to figure out why, life is too short. -# -# If you're reading this comment because this script stopped working -# after a Trac upgrade, try commenting out this line to see whether -# things have changed and Trac's HTML now parses better as XML. - -lxml.etree.set_default_parser(lxml.etree.HTMLParser()) - -# Run the main program. -main() diff --git a/tools/references/schema.sql b/tools/references/schema.sql deleted file mode 100644 index 1515dbb..0000000 --- a/tools/references/schema.sql +++ /dev/null @@ -1,177 +0,0 @@ -CREATE TABLE system ( - name text PRIMARY KEY, - value text -); -CREATE TABLE permission ( - username text, - action text, - UNIQUE (username,action) -); -CREATE TABLE auth_cookie ( - cookie text, - name text, - ipnr text, - time integer, - UNIQUE (cookie,ipnr,name) -); -CREATE TABLE session ( - sid text, - authenticated integer, - last_visit integer, - UNIQUE (sid,authenticated) -); -CREATE INDEX session_last_visit_idx ON session (last_visit); -CREATE INDEX session_authenticated_idx ON session (authenticated); -CREATE TABLE session_attribute ( - sid text, - authenticated integer, - name text, - value text, - UNIQUE (sid,authenticated,name) -); -CREATE TABLE cache ( - id integer PRIMARY KEY, - generation integer, - key text -); -CREATE TABLE attachment ( - type text, - id text, - filename text, - size integer, - time integer, - description text, - author text, - ipnr text, - UNIQUE (type,id,filename) -); -CREATE TABLE wiki ( - name text, - version integer, - time integer, - author text, - ipnr text, - text text, - comment text, - readonly integer, - UNIQUE (name,version) -); -CREATE INDEX wiki_time_idx ON wiki (time); -CREATE TABLE repository ( - id integer, - name text, - value text, - UNIQUE (id,name) -); -CREATE TABLE revision ( - repos integer, - rev text, - time integer, - author text, - message text, - UNIQUE (repos,rev) -); -CREATE INDEX revision_repos_time_idx ON revision (repos,time); -CREATE TABLE ticket ( - id integer PRIMARY KEY, - type text, - time integer, - changetime integer, - component text, - severity text, - priority text, - owner text, - reporter text, - cc text, - version text, - milestone text, - status text, - resolution text, - summary text, - description text, - keywords text -); -CREATE INDEX ticket_time_idx ON ticket (time); -CREATE INDEX ticket_status_idx ON ticket (status); -CREATE TABLE ticket_change ( - ticket integer, - time integer, - author text, - field text, - oldvalue text, - newvalue text, - UNIQUE (ticket,time,field) -); -CREATE INDEX ticket_change_ticket_idx ON ticket_change (ticket); -CREATE INDEX ticket_change_time_idx ON ticket_change (time); -CREATE TABLE ticket_custom ( - ticket integer, - name text, - value text, - UNIQUE (ticket,name) -); -CREATE TABLE enum ( - type text, - name text, - value text, - UNIQUE (type,name) -); -CREATE TABLE component ( - name text PRIMARY KEY, - owner text, - description text -); -CREATE TABLE milestone ( - name text PRIMARY KEY, - due integer, - completed integer, - description text -); -CREATE TABLE version ( - name text PRIMARY KEY, - time integer, - description text -); -CREATE TABLE report ( - id integer PRIMARY KEY, - author text, - title text, - query text, - description text -); -CREATE TABLE notify_subscription ( - id integer PRIMARY KEY, - time integer, - changetime integer, - class text, - sid text, - authenticated integer, - distributor text, - format text, - priority integer, - adverb text -); -CREATE INDEX notify_subscription_sid_authenticated_idx ON notify_subscription (sid,authenticated); -CREATE INDEX notify_subscription_class_idx ON notify_subscription (class); -CREATE TABLE notify_watch ( - id integer PRIMARY KEY, - sid text, - authenticated integer, - class text, - realm text, - target text -); -CREATE INDEX notify_watch_sid_authenticated_class_idx ON notify_watch (sid,authenticated,class); -CREATE INDEX notify_watch_class_realm_target_idx ON notify_watch (class,realm,target); -CREATE TABLE node_change ( - id integer PRIMARY KEY, - repos integer, - rev text, - path text, - node_type text, - change_type text, - base_path text, - base_rev text -); -CREATE INDEX node_change_repos_rev_path_idx ON node_change (repos,rev,path); -CREATE INDEX node_change_repos_path_rev_idx ON node_change (repos,path,rev); diff --git a/tools/references/trac-wiki-to-markdown.rb b/tools/references/trac-wiki-to-markdown.rb deleted file mode 100644 index f7d41ae..0000000 --- a/tools/references/trac-wiki-to-markdown.rb +++ /dev/null @@ -1,51 +0,0 @@ -# Untested code snippet from https://gist.github.com/somebox/619537 - -class String - def trac_to_markdown! - gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`') - gsub!(/\{\{\{(.+?)\}\}\}/m){|m| m.each_line.map{|x| "\t#{x}".gsub(/[\{\}]{3}/,'')}.join} - gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1') - gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1') - gsub!(/\=\=\s(.+?)\s\=\=/, '# \1') - gsub!(/\=\s(.+?)\s\=[\s\n]*/, '') - gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)') - gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1') - gsub!(/'''(.+)'''/, '*\1*') - gsub!(/''(.+)''/, '_\1_') - gsub!(/^\s\*/, '*') - gsub!(/^\s\d\./, '1.') - - gsub!(/\{\{\{([^\n]+?)\}\}\}/, '`\1`') - gsub!(/'''(.+?)'''/, '**\1**') - gsub!(/''(.+?)''/, '*\1*') - gsub!(/((^\|\|[^\n\r]+\|\|[ \t]*\r?(\n|$))+)/m) do |m| - m = m.each_line.map do |x| - x.gsub(/\t/, ' ') - .gsub(/(\|\|){2,}/){|k| k.gsub(/\|\|/, '|| ')} - .gsub(/ {3,}/, ' ') - end.join - lines = m.each_line.to_a - line1 = lines.shift - line2 = line1.dup.gsub(/[^\n\r\|]/, '-') - lines.unshift(line1, line2) - c = lines.join - c = c.each_line.map do |x| - x.gsub(/\=\s?(.+?)\s?=/, ' \1 ') - .gsub(/\|\|/, '|') - end.join - end - gsub!(/^\{\{\{(.+?)^\}\}\}/m, '```\1```') - gsub!(/\=\=\=\=\s(.+?)\s\=\=\=\=/, '### \1') - gsub!(/\=\=\=\s(.+?)\s\=\=\=/, '## \1') - gsub!(/\=\=\s(.+?)\s\=\=/, '# \1') - gsub!(/\=\s(.+?)\s\=[\s\n]*/, '') - gsub!(/\[(http[^\s\[\]]+)\s([^\[\]]+)\]/, '[\2](\1)') - gsub!(/\!(([A-Z][a-z0-9]+){2,})/, '\1') - gsub!(/^\s\*/, '*') - gsub!(/^\s\d\./, '1.') - end -end - -some_trac = 'my document' - -puts some_trac.trac_to_markdown! diff --git a/tools/references/trac2down.py b/tools/references/trac2down.py deleted file mode 100644 index c66a201..0000000 --- a/tools/references/trac2down.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python2 - -# Untested code from https://gist.githubusercontent.com/sgk/1286682/raw/b744dd2e47a68d60373ad39df87cfe8256f517af/trac2down.py - -# vim:set fileencoding=utf-8 sw=2 ai: - -import sqlite3 -import datetime -import re - -SQL = ''' - select - name, version, time, author, text - from - wiki w - where - version = (select max(version) from wiki where name = w.name) -''' - -conn = sqlite3.connect('../trac.db') -result = conn.execute(SQL) -for row in result: - name = row[0] - version = row[1] - time = row[2] - author = row[3] - text = row[4] - - text = re.sub('\r\n', '\n', text) - text = re.sub(r'{{{(.*?)}}}', r'`\1`', text) - def indent4(m): - return '\n ' + m.group(1).replace('\n', '\n ') - text = re.sub(r'(?sm){{{\n(.*?)\n}}}', indent4, text) - text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'#### \1', text) - text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'### \1', text) - text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'## \1', text) - text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'# \1', text) - text = re.sub(r'^ * ', r'****', text) - text = re.sub(r'^ * ', r'***', text) - text = re.sub(r'^ * ', r'**', text) - text = re.sub(r'^ * ', r'*', text) - text = re.sub(r'^ \d+. ', r'1.', text) - - a = [] - for line in text.split('\n'): - if not line.startswith(' '): - line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line) - line = re.sub(r'\[(wiki:[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](/\1/)', line) - line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line) - line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line) - line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line) - a.append(line) - text = '\n'.join(a) - - fp = file('%s.md' % name, 'w') - print >>fp, '' % name - print >>fp, '' % version - print >>fp, '' % datetime.datetime.fromtimestamp(time).strftime('%Y/%m/%d %H:%M:%S') - print >>fp, '' % author - fp.write(text.encode('utf-8')) - fp.close() diff --git a/tools/trac2md.py b/tools/trac2md.py deleted file mode 100755 index c022899..0000000 --- a/tools/trac2md.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python2 - -# Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/ - -# This code mostly taken from patches to pagure_importer by mreynolds - -import sys -import re -import time -import requests -import shutil -import os -from base64 import b64decode -from datetime import datetime - -wikilink_pattern = re.compile('\[http(.*)\]') -wikilink_extract = re.compile('\[(.*)\]') -strikethrough_pattern = re.compile('~~(.*)~~') -camelcase_pattern = re.compile("!(\w+)") -image_pattern = re.compile("\[\[Image\((.*)\)\]\]") - -wikiheading_patterns = tuple((level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level))) - for level in range(1, 7)) - -def to_timestamp(tm): - ''' Convert to timestamp which can be jsonified ''' - - tm = tm.replace('+00:00', '') - date = datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S') - ts = str(time.mktime(date.timetuple()))[:-2] # Strip the .0 - return ts - - -def strip_wikilink(content): - ''' Need to remove wiki link format from custom fields. They come in a - variety of forms that can be comma or whitespace separated. They can also - include link names which must also be removed. - - [https://bugzilla.redhat.com/show_bug.cgi?id=772777] - [https://bugzilla.com/123456789], [http://bugzilla.com/7777777 7777777] - [https://bugzilla.com/6666666 6666666] - ''' - - links = [] - if wikilink_pattern.search(content): - # Looks like we have a link in here - links = [] - mylist = re.findall(r'\[([^]]*)\]', content) - for i in mylist: - links.append(i.split(' ', 1)[0]) - return ', '.join(links) - else: - return content - - -def convert_headers(line): - ''' Convert wikiformat headers - ''' - for level_count, header in wikiheading_patterns: - try: - level = header.search(line).group(1) - if level: - line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t")) - break # No need to check other heading levels - except: - # Try the next heading level - pass - - return line - - -def convert_wikilinks(line): - ''' Convert wikiformat links - ''' - if wikilink_pattern.search(line): - try: - result = wikilink_extract.search(line).group(1) - if result: - parts = result.split(' ', 1) - if len(parts) == 1: - mdlink = '[%s](%s)' % (parts[0], parts[0]) - elif len(parts) == 2: - mdlink = '[%s](%s)' % (parts[1], parts[0]) - line = line.replace('[' + result + ']', mdlink) - except: - # Not a link, not a problem - pass - - return line - - -def convert_strike(line): - ''' Convert wikiformat striked text - ''' - striked_result = strikethrough_pattern.search(line) - if striked_result: - try: - striked_text = striked_result.group(1) - if striked_text: - orig_text = '~~%s~~' % striked_text - new_text = '%s' % striked_text - line = line.replace(orig_text, new_text) - except: - # Not striked - pass - return line - -def convert_image(line): - image_result = image_pattern.search(line) - if image_result: - try: - image_text = image_result.group(1).split(",")[0].strip() - old_text = image_result.group(0) - new_text = "".format(image_text) - line = line.replace(old_text, new_text) - except: - pass - return line - -def convert_linebreak(line): - # Markdown spec says linebreak is , who am I to argue? - if line.endswith("\\\\"): - line = line[:-2] + " " - return line - -def WikiToMD(content): - ''' Convert wiki/RST format to Markdown. Code blocks, bold/italics, - wiki links, lists, striked text, and headers. ''' - - # Line breaks in Markdown must be at end of line, so add newlines as needed - content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n") - - code_block = False - in_list = False - in_table = False - nested_level = 0 - prev_indent = 0 - old_content = content.splitlines() - new_content = [] - - while old_content: - line = old_content.pop(0).rstrip() - tail = ["\n"] - while "{{{" in line or "}}}" in line: - if "{{{" in line: - code_block = True - line = line.replace("{{{", "```") - if "}}}" in line: - code_block = False - line = line.replace("}}}", "```") - if not code_block: - # - # Want to convert tables. References: - # https://github.github.com/gfm/#tables-extension- - # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables - # - # Table start: line containing "||" - # Table end: blank line? - # - # Figuring out whether there's a real header line is fun, - # trac doesn't require one, markdown does. Guess we can - # add a dummy header if no better idea. Markdown requires - # delimiter line, which we add immediately after the - # header, both appear to be mandatory. Trac can have - # label cells anywhere, not just in header, might need to - # add "*" to those or just ignore the issue. - # Justification we can sort of figure out from the header, - # if the rows do anything different, ouch, because - # markdown specifies in delimiter line. - # - # Might do something clever with the "=" markers and - # alignment, start with just getting the basic table - # structure to something markdown will believe. - # - if line.strip().startswith("||"): - line = line.replace("=|", "|").replace("|=", "|") - line = line.replace("||", "|") - if not in_table: - tail.append("|---" * (line.count("|") - 1) + "|\n") - in_table = True - elif in_table and not line.strip().startswith("||"): - new_content.append("\n") - in_table = False - - # - # Convert bullet lists. The start and end of a list needs - # an empty line. - # - nested_line = line.lstrip(' ') - if nested_line.startswith('- ') or nested_line.startswith('* '): - if not in_list: - new_content.append("\n") - nested_level = 0 - prev_indent = 0 - in_list = True - indent = len(line) - len(nested_line) - if indent > prev_indent: - nested_level += 1 - elif indent < prev_indent: - nested_level -= 1 - prev_indent = indent - line = ' ' * nested_level + nested_line - elif in_list: - new_content.append("\n") - in_list = False - nested_level = 0 - prev_indent = 0 - - # Convert CamelCase - line = camelcase_pattern.sub("\\1", line) - - # Convert headers - line = convert_headers(line) - - # Convert wiki links - line = convert_wikilinks(line) - - # Convert striked through text - line = convert_strike(line) - - # Convert images - line = convert_image(line) - - # Convert line breaks - line = convert_linebreak(line) - - # Convert bold and italic text (do this last) - line = line.replace("'''", "**") # Convert bold text - line = line.replace("''", "*") # Convert italic text - - new_content.append(line) - new_content.extend(tail) - - return "".join(new_content) diff --git a/trac2md.py b/trac2md.py new file mode 100755 index 0000000..c022899 --- /dev/null +++ b/trac2md.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python2 + +# Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/ + +# This code mostly taken from patches to pagure_importer by mreynolds + +import sys +import re +import time +import requests +import shutil +import os +from base64 import b64decode +from datetime import datetime + +wikilink_pattern = re.compile('\[http(.*)\]') +wikilink_extract = re.compile('\[(.*)\]') +strikethrough_pattern = re.compile('~~(.*)~~') +camelcase_pattern = re.compile("!(\w+)") +image_pattern = re.compile("\[\[Image\((.*)\)\]\]") + +wikiheading_patterns = tuple((level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level))) + for level in range(1, 7)) + +def to_timestamp(tm): + ''' Convert to timestamp which can be jsonified ''' + + tm = tm.replace('+00:00', '') + date = datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S') + ts = str(time.mktime(date.timetuple()))[:-2] # Strip the .0 + return ts + + +def strip_wikilink(content): + ''' Need to remove wiki link format from custom fields. They come in a + variety of forms that can be comma or whitespace separated. They can also + include link names which must also be removed. + + [https://bugzilla.redhat.com/show_bug.cgi?id=772777] + [https://bugzilla.com/123456789], [http://bugzilla.com/7777777 7777777] + [https://bugzilla.com/6666666 6666666] + ''' + + links = [] + if wikilink_pattern.search(content): + # Looks like we have a link in here + links = [] + mylist = re.findall(r'\[([^]]*)\]', content) + for i in mylist: + links.append(i.split(' ', 1)[0]) + return ', '.join(links) + else: + return content + + +def convert_headers(line): + ''' Convert wikiformat headers + ''' + for level_count, header in wikiheading_patterns: + try: + level = header.search(line).group(1) + if level: + line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t")) + break # No need to check other heading levels + except: + # Try the next heading level + pass + + return line + + +def convert_wikilinks(line): + ''' Convert wikiformat links + ''' + if wikilink_pattern.search(line): + try: + result = wikilink_extract.search(line).group(1) + if result: + parts = result.split(' ', 1) + if len(parts) == 1: + mdlink = '[%s](%s)' % (parts[0], parts[0]) + elif len(parts) == 2: + mdlink = '[%s](%s)' % (parts[1], parts[0]) + line = line.replace('[' + result + ']', mdlink) + except: + # Not a link, not a problem + pass + + return line + + +def convert_strike(line): + ''' Convert wikiformat striked text + ''' + striked_result = strikethrough_pattern.search(line) + if striked_result: + try: + striked_text = striked_result.group(1) + if striked_text: + orig_text = '~~%s~~' % striked_text + new_text = '%s' % striked_text + line = line.replace(orig_text, new_text) + except: + # Not striked + pass + return line + +def convert_image(line): + image_result = image_pattern.search(line) + if image_result: + try: + image_text = image_result.group(1).split(",")[0].strip() + old_text = image_result.group(0) + new_text = "".format(image_text) + line = line.replace(old_text, new_text) + except: + pass + return line + +def convert_linebreak(line): + # Markdown spec says linebreak is , who am I to argue? + if line.endswith("\\\\"): + line = line[:-2] + " " + return line + +def WikiToMD(content): + ''' Convert wiki/RST format to Markdown. Code blocks, bold/italics, + wiki links, lists, striked text, and headers. ''' + + # Line breaks in Markdown must be at end of line, so add newlines as needed + content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n") + + code_block = False + in_list = False + in_table = False + nested_level = 0 + prev_indent = 0 + old_content = content.splitlines() + new_content = [] + + while old_content: + line = old_content.pop(0).rstrip() + tail = ["\n"] + while "{{{" in line or "}}}" in line: + if "{{{" in line: + code_block = True + line = line.replace("{{{", "```") + if "}}}" in line: + code_block = False + line = line.replace("}}}", "```") + if not code_block: + # + # Want to convert tables. References: + # https://github.github.com/gfm/#tables-extension- + # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables + # + # Table start: line containing "||" + # Table end: blank line? + # + # Figuring out whether there's a real header line is fun, + # trac doesn't require one, markdown does. Guess we can + # add a dummy header if no better idea. Markdown requires + # delimiter line, which we add immediately after the + # header, both appear to be mandatory. Trac can have + # label cells anywhere, not just in header, might need to + # add "*" to those or just ignore the issue. + # Justification we can sort of figure out from the header, + # if the rows do anything different, ouch, because + # markdown specifies in delimiter line. + # + # Might do something clever with the "=" markers and + # alignment, start with just getting the basic table + # structure to something markdown will believe. + # + if line.strip().startswith("||"): + line = line.replace("=|", "|").replace("|=", "|") + line = line.replace("||", "|") + if not in_table: + tail.append("|---" * (line.count("|") - 1) + "|\n") + in_table = True + elif in_table and not line.strip().startswith("||"): + new_content.append("\n") + in_table = False + + # + # Convert bullet lists. The start and end of a list needs + # an empty line. + # + nested_line = line.lstrip(' ') + if nested_line.startswith('- ') or nested_line.startswith('* '): + if not in_list: + new_content.append("\n") + nested_level = 0 + prev_indent = 0 + in_list = True + indent = len(line) - len(nested_line) + if indent > prev_indent: + nested_level += 1 + elif indent < prev_indent: + nested_level -= 1 + prev_indent = indent + line = ' ' * nested_level + nested_line + elif in_list: + new_content.append("\n") + in_list = False + nested_level = 0 + prev_indent = 0 + + # Convert CamelCase + line = camelcase_pattern.sub("\\1", line) + + # Convert headers + line = convert_headers(line) + + # Convert wiki links + line = convert_wikilinks(line) + + # Convert striked through text + line = convert_strike(line) + + # Convert images + line = convert_image(line) + + # Convert line breaks + line = convert_linebreak(line) + + # Convert bold and italic text (do this last) + line = line.replace("'''", "**") # Convert bold text + line = line.replace("''", "*") # Convert italic text + + new_content.append(line) + new_content.extend(tail) + + return "".join(new_content) -- cgit v1.2.3