From dc6ef096698dcd18d1755a45d79d0b87368a6ce6 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Sun, 14 Feb 2021 00:51:25 +0000 Subject: rsync fresh sql and attachments and work from that, without Trac itself --- .gitignore | 2 + GNUmakefile | 19 ++++++ Makefile | 14 ---- filter.json | 18 +++++ tools/extract.py | 63 ++++++++++++++++++ tools/generate-json.py | 154 ++++++++++++++++++++++++++++++++++++++++++ tools/schema.sql | 177 +++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 433 insertions(+), 14 deletions(-) create mode 100644 GNUmakefile delete mode 100644 Makefile create mode 100644 filter.json create mode 100644 tools/extract.py create mode 100755 tools/generate-json.py create mode 100644 tools/schema.sql diff --git a/.gitignore b/.gitignore index bea5755..32b9d65 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ TAGS +attachments +trac.db diff --git a/GNUmakefile b/GNUmakefile new file mode 100644 index 0000000..019a783 --- /dev/null +++ b/GNUmakefile @@ -0,0 +1,19 @@ +TRAC := $(shell find raw-wiki-dump -type f -name '*.trac') +OUT := ${TRAC:.trac=.md} TAGS +TOOL := tools/trac2md.py + +all: ${OUT} + +fetch: + rsync -aP --delete bikeshed.cryptech.is:/home/trac/db/trac.db bikeshed.cryptech.is:/home/trac/files/attachments . + +clean: + rm ${OUT} + +TAGS: ${TRAC} + etags -l none $^ + +%.md: %.trac GNUmakefile ${TOOL} + ${TOOL} $< + +.PHONY: all clean fetch diff --git a/Makefile b/Makefile deleted file mode 100644 index 789b822..0000000 --- a/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -TRAC := $(shell find raw-wiki-dump -type f -name '*.trac') -OUT := ${TRAC:.trac=.md} TAGS -TOOL := tools/trac2md.py - -all: ${OUT} - -clean: - rm ${OUT} - -TAGS: ${TRAC} - etags -l none $^ - -%.md: %.trac Makefile ${TOOL} - ${TOOL} $< diff --git a/filter.json b/filter.json new file mode 100644 index 0000000..ebb1456 --- /dev/null +++ b/filter.json @@ -0,0 +1,18 @@ +[ + ["+", "WikiStart"], + ["-", "CamelCase"], + ["-", "GitRepositories*"], + ["-", "InterMapTxt"], + ["-", "InterTrac"], + ["-", "InterWiki"], + ["-", "PageTemplates"], + ["-", "PhotoFolder"], + ["-", "RecentChanges"], + ["-", "SandBox"], + ["-", "ScratchPage"], + ["-", "TicketQuery"], + ["-", "TitleIndex"], + ["-", "Trac*"], + ["-", "Wiki*"], + ["+", "*"] +] diff --git a/tools/extract.py b/tools/extract.py new file mode 100644 index 0000000..d60de84 --- /dev/null +++ b/tools/extract.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +import fnmatch +import hashlib +import json +import sqlite3 +import time +import urllib.parse + +wiki_query = ''' + SELECT + name, + version, + time / 1000000 AS time, + text + FROM wiki + ORDER BY + name, version +''' + +attachment_query = ''' + SELECT + id, + filename, + size, + author, + description, + ipnr, + time / 1000000 AS createdtime + FROM + attachment + WHERE + id = ? + AND + type = 'wiki' + ORDER BY + filename, time +''' + +def isotime(t): + return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t)) + +def hashname(whatever): + return hashlib.sha1(unicode(whatever)).hexdigest() + +with open("filter.json") as f: + filter = json.load(f) + +def keep(name): + for k, v in filter: + assert k in "+-" + if fnmatch.fnmatch(name, v): + return k == "+" + return True + +db = sqlite3.connect("trac.db") +db.row_factory = sqlite3.Row + +for row in db.execute(wiki_query): + if keep(row["name"]): + print(urllib.parse.quote(row["name"], ""), row["version"]) + +db.close() diff --git a/tools/generate-json.py b/tools/generate-json.py new file mode 100755 index 0000000..b8b1f38 --- /dev/null +++ b/tools/generate-json.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +# Generate JSON to import Trac tickets into GitHub issues using the new import API +# described at https://gist.github.com/jonmagic/5282384165e0f86ef105 + +import os +import time +import json +import yaml +import sqlite3 +import hashlib +import argparse +import subprocess + +ticket_query = ''' +SELECT + id, + type, + owner, + reporter, + milestone, + status, + resolution, + summary, + description, + component, + priority, + time / 1000000 AS createdtime, + changetime / 1000000 AS modifiedtime +FROM + ticket +ORDER BY + id +''' + +comment_query = ''' +SELECT + time / 1000000 AS createdtime, + author, + newvalue +FROM + ticket_change +WHERE + ticket = ? +AND + field = 'comment' +AND + newvalue <> '' +ORDER BY + time +''' + +attachment_query = ''' +SELECT + id, + filename, + size, + author, + description, + ipnr, + time / 1000000 AS createdtime +FROM + attachment +WHERE + id = ? +AND + type = 'ticket' +ORDER BY + time, filename +''' + +def isotime(t): + return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t)) + +def hashname(whatever): + return hashlib.sha1(unicode(whatever)).hexdigest() + +def ticket_text(ticket): + d = dict(ticket, createdtime = isotime(ticket["createdtime"]), modifiedtime = isotime(ticket["modifiedtime"])) + return u"{description}\n\n" \ + u"_Trac ticket #{id} component {component} priority {priority}, owner {owner}," \ + u" created by {reporter} on {createdtime}, last modified {modifiedtime}_\n".format(**d) + +def comment_text(comment): + d = dict(comment, createdtime = isotime(comment["createdtime"])) + return u"{newvalue}\n\n_Trac comment by {author} on {createdtime}_\n".format(**d) + +def attachment_text(attachment): + h1 = hashname(attachment["id"]) + h2 = hashname(attachment["filename"]) + fn2 = os.path.splitext(attachment["filename"])[1] + fn = os.path.join(gist_url, h1[:3], h1, h2 + fn2) + url = "{}/raw/{}/ticket.{}.{}{}".format(gist_url.rstrip("/"), gist_commit, h1, h2, fn2) + d = dict(attachment, createdtime = isotime(comment["createdtime"]), url = url) + return u"[{filename}]({url}) {description}\n_Trac attachment by {author} on {createdtime}_\n".format(**d) + +def comment_merge(comments, attachments): + result = [] + while comments and attachments: + result.append(comments.pop(0) if comments[0]["created_at"] <= attachments[0]["created_at"] else attachments.pop(0)) + return result + comments + attachments + +parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-c", "--config", type = argparse.FileType(), + default = "generate-json.yaml", + help = "YAML config mappings") +args = parser.parse_args() + +cfg = yaml.safe_load(args.config) +assignee_map = cfg["assignees"] +type_map = cfg["type_labels"] +resolution_map = cfg["resolution_labels"] + +gist_url = cfg.get("attachment_gist_url") +if gist_url is not None: + gist_commit = subprocess.check_output(("git", "ls-remote", gist_url, "HEAD")).split()[0] + +db = sqlite3.connect(cfg["database"]) +db.row_factory = sqlite3.Row +ticket_cursor = db.cursor() +comment_cursor = db.cursor() +attachment_cursor = db.cursor() + +if not os.path.isdir(cfg["ticket_directory"]): + os.makedirs(cfg["ticket_directory"]) + +for ticket in ticket_cursor.execute(ticket_query): + comments = comment_merge([dict(created_at = isotime(comment["createdtime"]), body = comment_text(comment)) + for comment in comment_cursor.execute(comment_query, (ticket["id"],))], + [] if gist_url is None else + [dict(created_at = isotime(attachment["createdtime"]), body = attachment_text(attachment)) + for attachment in attachment_cursor.execute(attachment_query, (ticket["id"],))]) + issue = dict( + title = ticket["summary"], + body = ticket_text(ticket), + created_at = isotime(ticket["createdtime"]), + updated_at = isotime(ticket["modifiedtime"])) + if ticket["status"] == "closed": + issue["closed"] = True + issue["closed_at"] = isotime(ticket["modifiedtime"]) + comments.append(dict(created_at = isotime(ticket["modifiedtime"]), + body = "_Closed with resolution {resolution}_\n".format(**ticket))) + if ticket["owner"] in assignee_map: + issue["assignee"] = assignee_map[ticket["owner"]] + labels = [type_map.get(ticket["type"]), resolution_map.get(ticket["resolution"])] + while None in labels: + del labels[labels.index(None)] + if labels: + issue["labels"] = labels + issue = dict(issue = issue) + if comments: + issue["comments"] = comments + with open(os.path.join(cfg["ticket_directory"], "ticket_{:03d}.json".format(ticket["id"])), "wb") as f: + json.dump(issue, f, indent = 4, sort_keys = True, separators=(",", ": ")) diff --git a/tools/schema.sql b/tools/schema.sql new file mode 100644 index 0000000..1515dbb --- /dev/null +++ b/tools/schema.sql @@ -0,0 +1,177 @@ +CREATE TABLE system ( + name text PRIMARY KEY, + value text +); +CREATE TABLE permission ( + username text, + action text, + UNIQUE (username,action) +); +CREATE TABLE auth_cookie ( + cookie text, + name text, + ipnr text, + time integer, + UNIQUE (cookie,ipnr,name) +); +CREATE TABLE session ( + sid text, + authenticated integer, + last_visit integer, + UNIQUE (sid,authenticated) +); +CREATE INDEX session_last_visit_idx ON session (last_visit); +CREATE INDEX session_authenticated_idx ON session (authenticated); +CREATE TABLE session_attribute ( + sid text, + authenticated integer, + name text, + value text, + UNIQUE (sid,authenticated,name) +); +CREATE TABLE cache ( + id integer PRIMARY KEY, + generation integer, + key text +); +CREATE TABLE attachment ( + type text, + id text, + filename text, + size integer, + time integer, + description text, + author text, + ipnr text, + UNIQUE (type,id,filename) +); +CREATE TABLE wiki ( + name text, + version integer, + time integer, + author text, + ipnr text, + text text, + comment text, + readonly integer, + UNIQUE (name,version) +); +CREATE INDEX wiki_time_idx ON wiki (time); +CREATE TABLE repository ( + id integer, + name text, + value text, + UNIQUE (id,name) +); +CREATE TABLE revision ( + repos integer, + rev text, + time integer, + author text, + message text, + UNIQUE (repos,rev) +); +CREATE INDEX revision_repos_time_idx ON revision (repos,time); +CREATE TABLE ticket ( + id integer PRIMARY KEY, + type text, + time integer, + changetime integer, + component text, + severity text, + priority text, + owner text, + reporter text, + cc text, + version text, + milestone text, + status text, + resolution text, + summary text, + description text, + keywords text +); +CREATE INDEX ticket_time_idx ON ticket (time); +CREATE INDEX ticket_status_idx ON ticket (status); +CREATE TABLE ticket_change ( + ticket integer, + time integer, + author text, + field text, + oldvalue text, + newvalue text, + UNIQUE (ticket,time,field) +); +CREATE INDEX ticket_change_ticket_idx ON ticket_change (ticket); +CREATE INDEX ticket_change_time_idx ON ticket_change (time); +CREATE TABLE ticket_custom ( + ticket integer, + name text, + value text, + UNIQUE (ticket,name) +); +CREATE TABLE enum ( + type text, + name text, + value text, + UNIQUE (type,name) +); +CREATE TABLE component ( + name text PRIMARY KEY, + owner text, + description text +); +CREATE TABLE milestone ( + name text PRIMARY KEY, + due integer, + completed integer, + description text +); +CREATE TABLE version ( + name text PRIMARY KEY, + time integer, + description text +); +CREATE TABLE report ( + id integer PRIMARY KEY, + author text, + title text, + query text, + description text +); +CREATE TABLE notify_subscription ( + id integer PRIMARY KEY, + time integer, + changetime integer, + class text, + sid text, + authenticated integer, + distributor text, + format text, + priority integer, + adverb text +); +CREATE INDEX notify_subscription_sid_authenticated_idx ON notify_subscription (sid,authenticated); +CREATE INDEX notify_subscription_class_idx ON notify_subscription (class); +CREATE TABLE notify_watch ( + id integer PRIMARY KEY, + sid text, + authenticated integer, + class text, + realm text, + target text +); +CREATE INDEX notify_watch_sid_authenticated_class_idx ON notify_watch (sid,authenticated,class); +CREATE INDEX notify_watch_class_realm_target_idx ON notify_watch (class,realm,target); +CREATE TABLE node_change ( + id integer PRIMARY KEY, + repos integer, + rev text, + path text, + node_type text, + change_type text, + base_path text, + base_rev text +); +CREATE INDEX node_change_repos_rev_path_idx ON node_change (repos,rev,path); +CREATE INDEX node_change_repos_path_rev_idx ON node_change (repos,path,rev); -- cgit v1.2.3