#!/usr/bin/env python2
# Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/
# This code mostly taken from patches to pagure_importer by mreynolds
import sys
import re
import time
import requests
import shutil
import os
from base64 import b64decode
from datetime import datetime
from urllib.parse import quote
content_linebreak_pattern = re.compile(r"\[\[br\]\]|\\\\", re.I)
traclink_pattern = re.compile(r"(?<!\[)\[([^][]+)\]")
image_pattern = re.compile(r"\[\[Image\((.*)\)\]\]")
wikilink_pattern = re.compile(r"\[\[(wiki:|attachment:|source:|browser:)?([^]|[]+)(?:[|]([^][]+))?\]\]")
strikethrough_pattern = re.compile(r"~~([^~]+)~~")
bangquote_pattern = re.compile(r"!((?:\w|[#])+)")
linebreak_pattern = re.compile(r"\\\\$")
camelcase_pattern = re.compile(r"(?:^|(?<=\s))([A-Z][a-z]+[A-Z][a-z][A-Za-z]*)(?:$|(?=\s))")
span_pattern = re.compile(r"\[\[span\((?:[^][]*,)*([^(),]+)\)\]\]")
delete_pattern = re.compile(r"\[\[PageOutline\]\]", re.I)
wikiheading_patterns = tuple(
(level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level)))
for level in range(1, 7))
def convert_headers(line):
for level_count, header in wikiheading_patterns:
try:
level = header.search(line).group(1)
if level:
line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t"))
break # No need to check other heading levels
except:
pass # Try the next heading level
return line
def convert_traclink_to_creolelink(line):
# Convert Trac's native link form to Creole's, so that rest of the code only has to deal with one format.
# Creole's is easier to parse and harder to confuse with partially converted Markdown.
for m in traclink_pattern.finditer(line):
text = m.group(1).strip()
if " " in text:
line = line.replace(m.group(0), "[[{0[0]}|{0[1]}]]".format(text.split(" ", 1)))
elif ":" in text or camelcase_pattern.match(text):
line = line.replace(m.group(0), "[[{}]]".format(text))
return line
# Probably most of the non-wiki scheme tests should become a table in an
# extended JSON config file which maps
#
# { "source:fee/fie/foe/fum": "https://git.cryptech.is/blarg/blee/blue" }
def convert_wikilinks(line, slug, giturl):
for m in wikilink_pattern.finditer(line):
scheme, link, text = [p.strip() if p else p for p in m.groups()]
if text is None:
text = link
if any(link.startswith(q) and link.endswith(q) for q in ('"', "'")):
link = link[1:-1]
if any(text.startswith(q) and text.endswith(q) for q in ('"', "'")):
text = text[1:-1]
if text == link and link.startswith("http") and "://" in link:
mdlink = "<{}>".format(link)
elif scheme == "attachment:":
mdlink = "[{}]({{attach}}{}/{})".format(text, slug, link)
elif scheme in ("source:", "browser:"):
mdlink = "[{}]({}/{})".format(text, giturl.rstrip("/"), link.lstrip("/"))
elif scheme == "wiki:" or (scheme is None and camelcase_pattern.match(link)):
mdlink = "[{}]({{filename}}{}.md)".format(text, link)
else:
mdlink = "[{}]({})".format(text, link)
line = line.replace(m.group(0), mdlink)
return line
def convert_image(line, slug):
for m in image_pattern.finditer(line):
text = m.group(1).split(",")[0].strip()
if "://" in text:
mdlink = "<img src=\"{}\">".format(text)
else:
mdlink = "![{}]({{attach}}{}/{})".format(text, slug, quote(text, ""))
line = line.replace(m.group(0), mdlink)
return line
def WikiToMD(content, slug):
code_block = False
in_list = False
in_table = False
nested_level = 0
prev_indent = 0
old_content = content_linebreak_pattern.sub("\\\\\\\\\n", content).splitlines()
new_content = []
while old_content:
line = old_content.pop(0).rstrip()
tail = ["\n"]
while "{{{" in line or "}}}" in line:
if "{{{" in line:
code_block = True
line = line.replace("{{{", "```")
if "}}}" in line:
code_block = False
line = line.replace("}}}", "```")
if not code_block:
# Convert CamelCase links to explicit links
line = camelcase_pattern.sub(r"[[\1]]", line)
# Convert TracLinks to WikiCreole links to simplify remaining processing
line = convert_traclink_to_creolelink(line)
# Convert tables. References:
# https://github.github.com/gfm/#tables-extension-
# https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables
# Table start: line containing "||"; table end: blank line?
#
# Figuring out whether there's a real header line is fun, trac doesn't require one, markdown does. Guess we can
# add a dummy header if no better idea. Markdown requires delimiter line, which we add immediately after the
# header, both appear to be mandatory. Trac can have label cells anywhere, not just in header, might need to
# add "*" to those or just ignore the issue. Justification we can sort of figure out from the header,
# if the rows do anything different, ouch, because markdown specifies in delimiter line.
#
# Might do something clever with the "=" markers and alignment, start with just getting the basic table
# structure to something markdown will believe.
if line.strip().startswith("||"):
line = line.replace("=|", "|").replace("|=", "|")
line = line.replace("||", "|")
if not in_table:
tail.append("|---" * (line.count("|") - 1) + "|\n")
in_table = True
elif in_table and not line.strip().startswith("||"):
new_content.append("\n")
in_table = False
#
# Convert bullet lists. The start and end of a list needs an empty line.
#
nested_line = line.lstrip(' ')
if nested_line.startswith('- ') or nested_line.startswith('* '):
if not in_list:
new_content.append("\n")
nested_level = 0
prev_indent = 0
in_list = True
indent = len(line) - len(nested_line)
if indent > prev_indent:
nested_level += 1
elif indent < prev_indent:
nested_level -= 1
prev_indent = indent
line = ' ' * nested_level + nested_line
elif in_list:
new_content.append("\n")
in_list = False
nested_level = 0
prev_indent = 0
# Convert !x quoting
line = bangquote_pattern.sub(r"\1", line)
# Convert (limited subset of) spans
line = span_pattern.sub(r"\1", line)
# Convert headers
line = convert_headers(line)
# Convert images
line = convert_image(line, slug)
# Delete Trac macros that have no useful counterpart
line = delete_pattern.sub("", line)
# Convert wiki links
line = convert_wikilinks(line, slug, "https://git.cryptech.is/")
# Convert striked through text
line = strikethrough_pattern.sub(r"<s>\1</s>", line)
# Convert line breaks
# Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue?
line = linebreak_pattern.sub(" ", line)
# Convert bold and italic text (do this last)
line = line.replace("'''", "**") # Convert bold text
line = line.replace("''", "*") # Convert italic text
new_content.append(line)
new_content.extend(tail)
return "".join(new_content)