summaryrefslogtreecommitdiff
path: root/extract.py
blob: c7e35b9fd600549329f450f80b1b3fa923367480 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python3

import fnmatch
import hashlib
import json
import os
import shutil
import sqlite3
import sys
import time
import urllib.parse

import trac2md

wiki_query = '''
  SELECT
    name, 
    author,
    version, 
    time / 1000000 AS time, 
    text 
  FROM wiki
  ORDER BY
    name, version
'''

attachment_query = '''
  SELECT
    id,
    filename,
    size,
    author,
    description,
    ipnr,
    time / 1000000 AS createdtime
  FROM
    attachment
  WHERE
    type = 'wiki'
  ORDER BY
    filename, time
'''

def attachment_link(row):
    h   = lambda whatever: hashlib.sha1(whatever.encode()).hexdigest()
    h1  = h(row.id)
    h2  = h(row.filename)
    fn2 = os.path.splitext(row["filename"])[1]
    return \
        os.path.join("attachments", "wiki", h1[:3], h1, h2 + fn2), \
        os.path.join("pelican", "content", urllib.parse.quote(row.id, ""), row.filename)

class Filter:

    def __init__(self, filename = "filter.json"):
        with open(filename) as f:
            filter = json.load(f)
        if not all(action in "-+" for action, pattern in filter):
            sys.exit("Bad action \"{}\" in filter".format(action))
        self.filter = tuple((action == "+", pattern) for action, pattern in filter)

    def __call__(self, name):
        for action, pattern in self.filter:
            if fnmatch.fnmatch(name, pattern):
                return action
        return True

class Row(sqlite3.Row):

    def __getattr__(self, name):
        return self[name]

    @property
    def isotime(self):
        return time.strftime("%Y-%m-%d %H:%M", time.gmtime(self.time))

def markdown_header(row, first_published):
    if row.name in first_published:
        modtime = "Modified: {}\n".format(row.isotime)
    else:
        modtime = ""
        first_published[row.name] = row.isotime
    return "Title: {}\nAuthor: {}\nDate: {}\n{}\n".format(row.name, row.author, first_published[row.name], modtime)

def main():

    for dn in ("wiki", "pelican"):
        shutil.rmtree(dn)

    for dn in ("wiki", "pelican/content"):
        os.makedirs(dn)

    os.link("pelicanconf.py", "pelican/pelicanconf.py")

    keep = Filter()

    first_published = {}

    db = sqlite3.connect("trac.db")
    db.row_factory = Row

    for row in db.execute(wiki_query):
        if keep(row.name):
            slug = urllib.parse.quote(row.name, "")
            #print(slug, row.version)
            with open("wiki/{}.trac".format(slug), "w") as f:
                f.write(row.text)
            md = markdown_header(row, first_published) + trac2md.WikiToMD(row.text, slug)
            with open("pelican/content/{}.md".format(slug), "w") as f:
                f.write(md)

    for row in db.execute(attachment_query):
        src, dst = attachment_link(row)
        #print("{} => {}".format(dst, src))
        if not os.path.isdir(os.path.dirname(dst)):
            os.makedirs(os.path.dirname(dst))
        os.link(src, dst)

    db.close()

if __name__ == "__main__":
    main()