path: root/
diff options
authorRob Austein <>2021-02-14 16:56:57 +0000
committerRob Austein <>2021-02-14 16:56:57 +0000
commit68e18ad1f44e9a6fab66adc38e97d027a58de8a4 (patch)
tree22eb915c53760c6b6f0f5254d027de9dee58abd1 /
parent7bf10bb74babd8f16b7a3942607f3d1007aa2324 (diff)
Another reorg, and pelican samples
Diffstat (limited to '')
1 files changed, 234 insertions, 0 deletions
diff --git a/ b/
new file mode 100755
index 0000000..c022899
--- /dev/null
+++ b/
@@ -0,0 +1,234 @@
+#!/usr/bin/env python2
+# Untested code from
+# This code mostly taken from patches to pagure_importer by mreynolds
+import sys
+import re
+import time
+import requests
+import shutil
+import os
+from base64 import b64decode
+from datetime import datetime
+wikilink_pattern = re.compile('\[http(.*)\]')
+wikilink_extract = re.compile('\[(.*)\]')
+strikethrough_pattern = re.compile('~~(.*)~~')
+camelcase_pattern = re.compile("!(\w+)")
+image_pattern = re.compile("\[\[Image\((.*)\)\]\]")
+wikiheading_patterns = tuple((level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level)))
+ for level in range(1, 7))
+def to_timestamp(tm):
+ ''' Convert to timestamp which can be jsonified '''
+ tm = tm.replace('+00:00', '')
+ date = datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S')
+ ts = str(time.mktime(date.timetuple()))[:-2] # Strip the .0
+ return ts
+def strip_wikilink(content):
+ ''' Need to remove wiki link format from custom fields. They come in a
+ variety of forms that can be comma or whitespace separated. They can also
+ include link names which must also be removed.
+ []
+ [], [ 7777777]
+ [ 6666666]
+ '''
+ links = []
+ if
+ # Looks like we have a link in here
+ links = []
+ mylist = re.findall(r'\[([^]]*)\]', content)
+ for i in mylist:
+ links.append(i.split(' ', 1)[0])
+ return ', '.join(links)
+ else:
+ return content
+def convert_headers(line):
+ ''' Convert wikiformat headers
+ '''
+ for level_count, header in wikiheading_patterns:
+ try:
+ level =
+ if level:
+ line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t"))
+ break # No need to check other heading levels
+ except:
+ # Try the next heading level
+ pass
+ return line
+def convert_wikilinks(line):
+ ''' Convert wikiformat links
+ '''
+ if
+ try:
+ result =
+ if result:
+ parts = result.split(' ', 1)
+ if len(parts) == 1:
+ mdlink = '[%s](%s)' % (parts[0], parts[0])
+ elif len(parts) == 2:
+ mdlink = '[%s](%s)' % (parts[1], parts[0])
+ line = line.replace('[' + result + ']', mdlink)
+ except:
+ # Not a link, not a problem
+ pass
+ return line
+def convert_strike(line):
+ ''' Convert wikiformat striked text
+ '''
+ striked_result =
+ if striked_result:
+ try:
+ striked_text =
+ if striked_text:
+ orig_text = '~~%s~~' % striked_text
+ new_text = '<s>%s</s>' % striked_text
+ line = line.replace(orig_text, new_text)
+ except:
+ # Not striked
+ pass
+ return line
+def convert_image(line):
+ image_result =
+ if image_result:
+ try:
+ image_text =",")[0].strip()
+ old_text =
+ new_text = "<img src=\"{}\">".format(image_text)
+ line = line.replace(old_text, new_text)
+ except:
+ pass
+ return line
+def convert_linebreak(line):
+ # Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue?
+ if line.endswith("\\\\"):
+ line = line[:-2] + " "
+ return line
+def WikiToMD(content):
+ ''' Convert wiki/RST format to Markdown. Code blocks, bold/italics,
+ wiki links, lists, striked text, and headers. '''
+ # Line breaks in Markdown must be at end of line, so add newlines as needed
+ content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n")
+ code_block = False
+ in_list = False
+ in_table = False
+ nested_level = 0
+ prev_indent = 0
+ old_content = content.splitlines()
+ new_content = []
+ while old_content:
+ line = old_content.pop(0).rstrip()
+ tail = ["\n"]
+ while "{{{" in line or "}}}" in line:
+ if "{{{" in line:
+ code_block = True
+ line = line.replace("{{{", "```")
+ if "}}}" in line:
+ code_block = False
+ line = line.replace("}}}", "```")
+ if not code_block:
+ #
+ # Want to convert tables. References:
+ #
+ #
+ #
+ # Table start: line containing "||"
+ # Table end: blank line?
+ #
+ # Figuring out whether there's a real header line is fun,
+ # trac doesn't require one, markdown does. Guess we can
+ # add a dummy header if no better idea. Markdown requires
+ # delimiter line, which we add immediately after the
+ # header, both appear to be mandatory. Trac can have
+ # label cells anywhere, not just in header, might need to
+ # add "*" to those or just ignore the issue.
+ # Justification we can sort of figure out from the header,
+ # if the rows do anything different, ouch, because
+ # markdown specifies in delimiter line.
+ #
+ # Might do something clever with the "=" markers and
+ # alignment, start with just getting the basic table
+ # structure to something markdown will believe.
+ #
+ if line.strip().startswith("||"):
+ line = line.replace("=|", "|").replace("|=", "|")
+ line = line.replace("||", "|")
+ if not in_table:
+ tail.append("|---" * (line.count("|") - 1) + "|\n")
+ in_table = True
+ elif in_table and not line.strip().startswith("||"):
+ new_content.append("\n")
+ in_table = False
+ #
+ # Convert bullet lists. The start and end of a list needs
+ # an empty line.
+ #
+ nested_line = line.lstrip(' ')
+ if nested_line.startswith('- ') or nested_line.startswith('* '):
+ if not in_list:
+ new_content.append("\n")
+ nested_level = 0
+ prev_indent = 0
+ in_list = True
+ indent = len(line) - len(nested_line)
+ if indent > prev_indent:
+ nested_level += 1
+ elif indent < prev_indent:
+ nested_level -= 1
+ prev_indent = indent
+ line = ' ' * nested_level + nested_line
+ elif in_list:
+ new_content.append("\n")
+ in_list = False
+ nested_level = 0
+ prev_indent = 0
+ # Convert CamelCase
+ line = camelcase_pattern.sub("\\1", line)
+ # Convert headers
+ line = convert_headers(line)
+ # Convert wiki links
+ line = convert_wikilinks(line)
+ # Convert striked through text
+ line = convert_strike(line)
+ # Convert images
+ line = convert_image(line)
+ # Convert line breaks
+ line = convert_linebreak(line)
+ # Convert bold and italic text (do this last)
+ line = line.replace("'''", "**") # Convert bold text
+ line = line.replace("''", "*") # Convert italic text
+ new_content.append(line)
+ new_content.extend(tail)
+ return "".join(new_content)