summaryrefslogblamecommitdiff
path: root/tools/extract.py
blob: 8035b20413a5f63810928b43f689e42e6ebedc92 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11




                      
         
             



                   

              






















                                 





                  
                                                                                  
 







                                                                                          










                                    



                                



                                      
                               
                    

                                  
                      

                                               
                                                         

                                       
                                                                  
                       
 


                                                   
          
#!/usr/bin/env python3

import fnmatch
import hashlib
import json
import os
import shutil
import sqlite3
import time
import urllib.parse

import trac2md

wiki_query = '''
  SELECT
    name, 
    version, 
    time / 1000000 AS time, 
    text 
  FROM wiki
  ORDER BY
    name, version
'''

attachment_query = '''
  SELECT
    id,
    filename,
    size,
    author,
    description,
    ipnr,
    time / 1000000 AS createdtime
  FROM
    attachment
  WHERE
    type = 'wiki'
  ORDER BY
    filename, time
'''

def isotime(t):
    return None if t == 0 else time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(t))

def attachment_link(row):
    h   = lambda whatever: hashlib.sha1(whatever.encode()).hexdigest()
    h1  = h(row.id)
    h2  = h(row.filename)
    fn2 = os.path.splitext(row["filename"])[1]
    return \
        os.path.join("attachments", "wiki", h1[:3], h1, h2 + fn2), \
        os.path.join(urllib.parse.quote(row.id, ""), urllib.parse.quote(row.filename, ""))

with open("filter.json") as f:
    filter = json.load(f)

def keep(name):
    for k, v in filter:
        assert k in "+-"
        if fnmatch.fnmatch(name, v):
            return k == "+"
    return True

class Row(sqlite3.Row):
    def __getattr__(self, name):
        return self[name]

for dn in ("wiki", "pelican/content"):
    if not os.path.exists(dn):
        os.makedirs(dn)

db = sqlite3.connect("trac.db")
db.row_factory = Row

for row in db.execute(wiki_query):
    if keep(row.name):
        slug = urllib.parse.quote(row.name, "")
        print(slug, row.version)
        with open("wiki/{}.trac".format(slug), "w") as f:
            f.write(row.text)
        md = trac2md.WikiToMD(row.text)
        with open("pelican/content/{}.md".format(slug), "w") as f:
            f.write(md)

for row in db.execute(attachment_query):
    print("{} => {}".format(*attachment_link(row)))

db.close()