summaryrefslogtreecommitdiff
path: root/tools/extract.py
blob: f323d448f4f151d073bc5df0ef89c6a42bb839a8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python3

import fnmatch
import hashlib
import json
import sqlite3
import time
import urllib.parse

wiki_query = '''
  SELECT
    name, 
    version, 
    time / 1000000 AS time, 
    text 
  FROM wiki
  ORDER BY
    name, version
'''

attachment_query = '''
  SELECT
    id,
    filename,
    size,
    author,
    description,
    ipnr,
    time / 1000000 AS createdtime
  FROM
    attachment
  WHERE
    id = ?
  AND
    type = 'wiki'
  ORDER BY
    filename, time
'''

def isotime(t):
  return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t))

def hashname(whatever):
  return hashlib.sha1(unicode(whatever)).hexdigest()

with open("filter.json") as f:
    filter = json.load(f)

def keep(name):
    for k, v in filter:
        assert k in "+-"
        if fnmatch.fnmatch(name, v):
            return k == "+"
    return True

class Row(sqlite3.Row):
    def __getattr__(self, name):
        return self[name]

db = sqlite3.connect("trac.db")
db.row_factory = Row

for row in db.execute(wiki_query):
    if keep(row.name):
        print(urllib.parse.quote(row.name, ""), row.version)

db.close()