From 68e18ad1f44e9a6fab66adc38e97d027a58de8a4 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Sun, 14 Feb 2021 16:56:57 +0000 Subject: Another reorg, and pelican samples --- extract.py | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100755 extract.py (limited to 'extract.py') diff --git a/extract.py b/extract.py new file mode 100755 index 0000000..7f76fe9 --- /dev/null +++ b/extract.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +import fnmatch +import hashlib +import json +import os +import shutil +import sqlite3 +import sys +import time +import urllib.parse + +import trac2md + +wiki_query = ''' + SELECT + name, + version, + time / 1000000 AS time, + text + FROM wiki + ORDER BY + name, version +''' + +attachment_query = ''' + SELECT + id, + filename, + size, + author, + description, + ipnr, + time / 1000000 AS createdtime + FROM + attachment + WHERE + type = 'wiki' + ORDER BY + filename, time +''' + +def isotime(t): + return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t)) + +def attachment_link(row): + h = lambda whatever: hashlib.sha1(whatever.encode()).hexdigest() + h1 = h(row.id) + h2 = h(row.filename) + fn2 = os.path.splitext(row["filename"])[1] + return \ + os.path.join("attachments", "wiki", h1[:3], h1, h2 + fn2), \ + os.path.join(urllib.parse.quote(row.id, ""), urllib.parse.quote(row.filename, "")) + +class Filter: + + def __init__(self, filename = "filter.json"): + with open(filename) as f: + filter = json.load(f) + if not all(action in "-+" for action, pattern in filter): + sys.exit("Bad action \"{}\" in filter".format(action)) + self.filter = tuple((action == "+", pattern) for action, pattern in filter) + + def __call__(self, name): + for action, pattern in self.filter: + if fnmatch.fnmatch(name, pattern): + return action + return True + +class Row(sqlite3.Row): + def __getattr__(self, name): + return self[name] + +def main(): + + for dn in ("wiki", "pelican"): + shutil.rmtree(dn) + + for dn in ("wiki", "pelican/content/images", "pelican/content/pages"): + os.makedirs(dn) + + #os.link("pelican.conf", "pelican/pelican.conf") + + keep = Filter() + + db = sqlite3.connect("trac.db") + db.row_factory = Row + + for row in db.execute(wiki_query): + if keep(row.name): + slug = urllib.parse.quote(row.name, "") + print(slug, row.version) + with open("wiki/{}.trac".format(slug), "w") as f: + f.write(row.text) + md = trac2md.WikiToMD(row.text) + with open("pelican/content/{}.md".format(slug), "w") as f: + f.write(md) + + for row in db.execute(attachment_query): + print("{} => {}".format(*attachment_link(row))) + + db.close() + +if __name__ == "__main__": + main() -- cgit v1.2.3