#!/usr/bin/env python2
# Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/
# This code mostly taken from patches to pagure_importer by mreynolds
import sys
import re
import time
import requests
import shutil
import os
from base64 import b64decode
from datetime import datetime
from urllib.parse import quote
wikilink_1_pattern = re.compile("\[\[(http.*)\]\]|\[(http.*)\]")
wikilink_2_pattern = re.compile("\[\[(?:wiki:)?([a-zA-Z0-9_]+)\]\]|\[wiki:(.+)\]")
strikethrough_pattern = re.compile("~~(.*)~~")
camelcase_pattern = re.compile("!(\w+)")
image_pattern = re.compile("\[\[Image\((.*)\)\]\]")
wikiheading_patterns = tuple((level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level)))
for level in range(1, 7))
def to_timestamp(tm):
''' Convert to timestamp which can be jsonified '''
tm = tm.replace('+00:00', '')
date = datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S')
ts = str(time.mktime(date.timetuple()))[:-2] # Strip the .0
return ts
def strip_wikilink(content):
''' Need to remove wiki link format from custom fields. They come in a
variety of forms that can be comma or whitespace separated. They can also
include link names which must also be removed.
[https://bugzilla.redhat.com/show_bug.cgi?id=772777]
[https://bugzilla.com/123456789], [http://bugzilla.com/7777777 7777777]
[https://bugzilla.com/6666666 6666666]
'''
links = []
if wikilink_pattern.search(content):
# Looks like we have a link in here
links = []
mylist = re.findall(r'\[([^]]*)\]', content)
for i in mylist:
links.append(i.split(' ', 1)[0])
return ', '.join(links)
else:
return content
def convert_headers(line):
''' Convert wikiformat headers
'''
for level_count, header in wikiheading_patterns:
try:
level = header.search(line).group(1)
if level:
line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t"))
break # No need to check other heading levels
except:
# Try the next heading level
pass
return line
def convert_wikilinks_1(line):
''' Convert wikiformat links
'''
m = wikilink_1_pattern.search(line)
if m:
text = m.group(1) or m.group(2)
for sep in "| ":
if sep in text:
parts = text.split(sep, 1)
break
else:
parts = [text]
mdlink = "[{}]({})".format(parts[-1], parts[0])
line = line.replace(m.group(0), mdlink)
return line
def convert_wikilinks_2(line):
''' Convert more wiki links'''
m = wikilink_2_pattern.search(line)
if m:
text = m.group(1) or m.group(2)
if text.lower() == "pageoutline":
mdlink = ""
else:
for sep in "| ":
if sep in text:
parts = text.split(sep, 1)
break
else:
parts = [text]
mdlink = "[{}]({})".format(parts[-1], parts[0])
line = line.replace(m.group(0), mdlink)
return line
def convert_strike(line):
''' Convert wikiformat striked text
'''
striked_result = strikethrough_pattern.search(line)
if striked_result:
try:
striked_text = striked_result.group(1)
if striked_text:
orig_text = '~~%s~~' % striked_text
new_text = '<s>%s</s>' % striked_text
line = line.replace(orig_text, new_text)
except:
# Not striked
pass
return line
def convert_image(line, slug):
image_result = image_pattern.search(line)
if image_result:
try:
image_text = image_result.group(1).split(",")[0].strip()
old_text = image_result.group(0)
if "://" in image_text:
new_text = "<img src=\"{}\">".format(image_text)
else:
new_text = "![{}]({{attach}}{}/{})".format(image_text, slug, quote(image_text, ""))
line = line.replace(old_text, new_text)
except:
pass
return line
def convert_linebreak(line):
# Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue?
if line.endswith("\\\\"):
line = line[:-2] + " "
return line
def WikiToMD(content, slug):
''' Convert wiki/RST format to Markdown. Code blocks, bold/italics,
wiki links, lists, striked text, and headers. '''
# Line breaks in Markdown must be at end of line, so add newlines as needed
content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n")
code_block = False
in_list = False
in_table = False
nested_level = 0
prev_indent = 0
old_content = content.splitlines()
new_content = []
while old_content:
line = old_content.pop(0).rstrip()
tail = ["\n"]
while "{{{" in line or "}}}" in line:
if "{{{" in line:
code_block = True
line = line.replace("{{{", "```")
if "}}}" in line:
code_block = False
line = line.replace("}}}", "```")
if not code_block:
#
# Want to convert tables. References:
# https://github.github.com/gfm/#tables-extension-
# https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables
#
# Table start: line containing "||"
# Table end: blank line?
#
# Figuring out whether there's a real header line is fun,
# trac doesn't require one, markdown does. Guess we can
# add a dummy header if no better idea. Markdown requires
# delimiter line, which we add immediately after the
# header, both appear to be mandatory. Trac can have
# label cells anywhere, not just in header, might need to
# add "*" to those or just ignore the issue.
# Justification we can sort of figure out from the header,
# if the rows do anything different, ouch, because
# markdown specifies in delimiter line.
#
# Might do something clever with the "=" markers and
# alignment, start with just getting the basic table
# structure to something markdown will believe.
#
if line.strip().startswith("||"):
line = line.replace("=|", "|").replace("|=", "|")
line = line.replace("||", "|")
if not in_table:
tail.append("|---" * (line.count("|") - 1) + "|\n")
in_table = True
elif in_table and not line.strip().startswith("||"):
new_content.append("\n")
in_table = False
#
# Convert bullet lists. The start and end of a list needs
# an empty line.
#
nested_line = line.lstrip(' ')
if nested_line.startswith('- ') or nested_line.startswith('* '):
if not in_list:
new_content.append("\n")
nested_level = 0
prev_indent = 0
in_list = True
indent = len(line) - len(nested_line)
if indent > prev_indent:
nested_level += 1
elif indent < prev_indent:
nested_level -= 1
prev_indent = indent
line = ' ' * nested_level + nested_line
elif in_list:
new_content.append("\n")
in_list = False
nested_level = 0
prev_indent = 0
# Convert CamelCase
line = camelcase_pattern.sub("\\1", line)
# Convert headers
line = convert_headers(line)
# Convert wiki links
line = convert_wikilinks_1(line)
line = convert_wikilinks_2(line)
# Convert striked through text
line = convert_strike(line)
# Convert images
line = convert_image(line, slug)
# Convert line breaks
line = convert_linebreak(line)
# Convert bold and italic text (do this last)
line = line.replace("'''", "**") # Convert bold text
line = line.replace("''", "*") # Convert italic text
new_content.append(line)
new_content.extend(tail)
return "".join(new_content)