summaryrefslogtreecommitdiff
path: root/extract.py
diff options
context:
space:
mode:
Diffstat (limited to 'extract.py')
-rwxr-xr-xextract.py105
1 files changed, 105 insertions, 0 deletions
diff --git a/extract.py b/extract.py
new file mode 100755
index 0000000..7f76fe9
--- /dev/null
+++ b/extract.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+
+import fnmatch
+import hashlib
+import json
+import os
+import shutil
+import sqlite3
+import sys
+import time
+import urllib.parse
+
+import trac2md
+
+wiki_query = '''
+ SELECT
+ name,
+ version,
+ time / 1000000 AS time,
+ text
+ FROM wiki
+ ORDER BY
+ name, version
+'''
+
+attachment_query = '''
+ SELECT
+ id,
+ filename,
+ size,
+ author,
+ description,
+ ipnr,
+ time / 1000000 AS createdtime
+ FROM
+ attachment
+ WHERE
+ type = 'wiki'
+ ORDER BY
+ filename, time
+'''
+
+def isotime(t):
+ return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t))
+
+def attachment_link(row):
+ h = lambda whatever: hashlib.sha1(whatever.encode()).hexdigest()
+ h1 = h(row.id)
+ h2 = h(row.filename)
+ fn2 = os.path.splitext(row["filename"])[1]
+ return \
+ os.path.join("attachments", "wiki", h1[:3], h1, h2 + fn2), \
+ os.path.join(urllib.parse.quote(row.id, ""), urllib.parse.quote(row.filename, ""))
+
+class Filter:
+
+ def __init__(self, filename = "filter.json"):
+ with open(filename) as f:
+ filter = json.load(f)
+ if not all(action in "-+" for action, pattern in filter):
+ sys.exit("Bad action \"{}\" in filter".format(action))
+ self.filter = tuple((action == "+", pattern) for action, pattern in filter)
+
+ def __call__(self, name):
+ for action, pattern in self.filter:
+ if fnmatch.fnmatch(name, pattern):
+ return action
+ return True
+
+class Row(sqlite3.Row):
+ def __getattr__(self, name):
+ return self[name]
+
+def main():
+
+ for dn in ("wiki", "pelican"):
+ shutil.rmtree(dn)
+
+ for dn in ("wiki", "pelican/content/images", "pelican/content/pages"):
+ os.makedirs(dn)
+
+ #os.link("pelican.conf", "pelican/pelican.conf")
+
+ keep = Filter()
+
+ db = sqlite3.connect("trac.db")
+ db.row_factory = Row
+
+ for row in db.execute(wiki_query):
+ if keep(row.name):
+ slug = urllib.parse.quote(row.name, "")
+ print(slug, row.version)
+ with open("wiki/{}.trac".format(slug), "w") as f:
+ f.write(row.text)
+ md = trac2md.WikiToMD(row.text)
+ with open("pelican/content/{}.md".format(slug), "w") as f:
+ f.write(md)
+
+ for row in db.execute(attachment_query):
+ print("{} => {}".format(*attachment_link(row)))
+
+ db.close()
+
+if __name__ == "__main__":
+ main()