diff options
Diffstat (limited to 'trac2md.py')
-rwxr-xr-x | trac2md.py | 234 |
1 files changed, 234 insertions, 0 deletions
diff --git a/trac2md.py b/trac2md.py new file mode 100755 index 0000000..c022899 --- /dev/null +++ b/trac2md.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python2 + +# Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/ + +# This code mostly taken from patches to pagure_importer by mreynolds + +import sys +import re +import time +import requests +import shutil +import os +from base64 import b64decode +from datetime import datetime + +wikilink_pattern = re.compile('\[http(.*)\]') +wikilink_extract = re.compile('\[(.*)\]') +strikethrough_pattern = re.compile('~~(.*)~~') +camelcase_pattern = re.compile("!(\w+)") +image_pattern = re.compile("\[\[Image\((.*)\)\]\]") + +wikiheading_patterns = tuple((level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level))) + for level in range(1, 7)) + +def to_timestamp(tm): + ''' Convert to timestamp which can be jsonified ''' + + tm = tm.replace('+00:00', '') + date = datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S') + ts = str(time.mktime(date.timetuple()))[:-2] # Strip the .0 + return ts + + +def strip_wikilink(content): + ''' Need to remove wiki link format from custom fields. They come in a + variety of forms that can be comma or whitespace separated. They can also + include link names which must also be removed. + + [https://bugzilla.redhat.com/show_bug.cgi?id=772777] + [https://bugzilla.com/123456789], [http://bugzilla.com/7777777 7777777] + [https://bugzilla.com/6666666 6666666] + ''' + + links = [] + if wikilink_pattern.search(content): + # Looks like we have a link in here + links = [] + mylist = re.findall(r'\[([^]]*)\]', content) + for i in mylist: + links.append(i.split(' ', 1)[0]) + return ', '.join(links) + else: + return content + + +def convert_headers(line): + ''' Convert wikiformat headers + ''' + for level_count, header in wikiheading_patterns: + try: + level = header.search(line).group(1) + if level: + line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t")) + break # No need to check other heading levels + except: + # Try the next heading level + pass + + return line + + +def convert_wikilinks(line): + ''' Convert wikiformat links + ''' + if wikilink_pattern.search(line): + try: + result = wikilink_extract.search(line).group(1) + if result: + parts = result.split(' ', 1) + if len(parts) == 1: + mdlink = '[%s](%s)' % (parts[0], parts[0]) + elif len(parts) == 2: + mdlink = '[%s](%s)' % (parts[1], parts[0]) + line = line.replace('[' + result + ']', mdlink) + except: + # Not a link, not a problem + pass + + return line + + +def convert_strike(line): + ''' Convert wikiformat striked text + ''' + striked_result = strikethrough_pattern.search(line) + if striked_result: + try: + striked_text = striked_result.group(1) + if striked_text: + orig_text = '~~%s~~' % striked_text + new_text = '<s>%s</s>' % striked_text + line = line.replace(orig_text, new_text) + except: + # Not striked + pass + return line + +def convert_image(line): + image_result = image_pattern.search(line) + if image_result: + try: + image_text = image_result.group(1).split(",")[0].strip() + old_text = image_result.group(0) + new_text = "<img src=\"{}\">".format(image_text) + line = line.replace(old_text, new_text) + except: + pass + return line + +def convert_linebreak(line): + # Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue? + if line.endswith("\\\\"): + line = line[:-2] + " " + return line + +def WikiToMD(content): + ''' Convert wiki/RST format to Markdown. Code blocks, bold/italics, + wiki links, lists, striked text, and headers. ''' + + # Line breaks in Markdown must be at end of line, so add newlines as needed + content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n") + + code_block = False + in_list = False + in_table = False + nested_level = 0 + prev_indent = 0 + old_content = content.splitlines() + new_content = [] + + while old_content: + line = old_content.pop(0).rstrip() + tail = ["\n"] + while "{{{" in line or "}}}" in line: + if "{{{" in line: + code_block = True + line = line.replace("{{{", "```") + if "}}}" in line: + code_block = False + line = line.replace("}}}", "```") + if not code_block: + # + # Want to convert tables. References: + # https://github.github.com/gfm/#tables-extension- + # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables + # + # Table start: line containing "||" + # Table end: blank line? + # + # Figuring out whether there's a real header line is fun, + # trac doesn't require one, markdown does. Guess we can + # add a dummy header if no better idea. Markdown requires + # delimiter line, which we add immediately after the + # header, both appear to be mandatory. Trac can have + # label cells anywhere, not just in header, might need to + # add "*" to those or just ignore the issue. + # Justification we can sort of figure out from the header, + # if the rows do anything different, ouch, because + # markdown specifies in delimiter line. + # + # Might do something clever with the "=" markers and + # alignment, start with just getting the basic table + # structure to something markdown will believe. + # + if line.strip().startswith("||"): + line = line.replace("=|", "|").replace("|=", "|") + line = line.replace("||", "|") + if not in_table: + tail.append("|---" * (line.count("|") - 1) + "|\n") + in_table = True + elif in_table and not line.strip().startswith("||"): + new_content.append("\n") + in_table = False + + # + # Convert bullet lists. The start and end of a list needs + # an empty line. + # + nested_line = line.lstrip(' ') + if nested_line.startswith('- ') or nested_line.startswith('* '): + if not in_list: + new_content.append("\n") + nested_level = 0 + prev_indent = 0 + in_list = True + indent = len(line) - len(nested_line) + if indent > prev_indent: + nested_level += 1 + elif indent < prev_indent: + nested_level -= 1 + prev_indent = indent + line = ' ' * nested_level + nested_line + elif in_list: + new_content.append("\n") + in_list = False + nested_level = 0 + prev_indent = 0 + + # Convert CamelCase + line = camelcase_pattern.sub("\\1", line) + + # Convert headers + line = convert_headers(line) + + # Convert wiki links + line = convert_wikilinks(line) + + # Convert striked through text + line = convert_strike(line) + + # Convert images + line = convert_image(line) + + # Convert line breaks + line = convert_linebreak(line) + + # Convert bold and italic text (do this last) + line = line.replace("'''", "**") # Convert bold text + line = line.replace("''", "*") # Convert italic text + + new_content.append(line) + new_content.extend(tail) + + return "".join(new_content) |