#!/usr/bin/env python2 # Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/ # This code mostly taken from patches to pagure_importer by mreynolds import sys import re import time import requests import shutil import os from base64 import b64decode from datetime import datetime from urllib.parse import quote image_pattern = re.compile(r"\[\[Image\((.*)\)\]\]") wikilink_1_pattern = re.compile(r"\[\[(http.*)\]\]|\[(http.*)\]") wikilink_2_pattern = re.compile(r"\[\[attachment:([a-zA-Z0-9_/]+)\]\]|\[attachment:([^][]+)\]") wikilink_3_pattern = re.compile(r"\[\[(?:wiki:)?([^][]+)\]\]|\[wiki:([^][]+)\]") strikethrough_pattern = re.compile(r"~~(.*)~~") camelcase_pattern = re.compile(r"!((?:\w|[#])+)") span_pattern = re.compile(r"\[\[span\((?:[^][]*,)*\[([^(), ]+)([^(),]+)\]\)\]\]") wikiheading_patterns = tuple( (level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level))) for level in range(1, 7)) def convert_headers(line): for level_count, header in wikiheading_patterns: try: level = header.search(line).group(1) if level: line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t")) break # No need to check other heading levels except: # Try the next heading level pass return line def make_mdlink(text, slug, sep): if sep in text: parts = text.split(sep, 1) else: parts = [text] parts = [p.strip() for p in parts] for i, part in enumerate(parts): if any(part.startswith(q) and part.endswith(q) for q in ('"', "'")): parts[i] = part[1:-1] if slug is None: return "[{}]({})".format(parts[-1], parts[0]) else: return "[{}]({{attach}}{}/{})".format(parts[-1], slug, parts[0]) def convert_wikilinks(line, pattern, slug = None): pos = 0 while True: m = pattern.search(line, pos) if not m: break text = m.group(1) or m.group(2) if text.lower() == "pageoutline": mdlink = "" else: mdlink = make_mdlink(text, slug, "|" if m.group(0).startswith("[[") else " ") line = line.replace(m.group(0), mdlink) pos = m.start() + len(mdlink) return line def convert_strike(line): striked_result = strikethrough_pattern.search(line) if striked_result: try: striked_text = striked_result.group(1) if striked_text: orig_text = '~~%s~~' % striked_text new_text = '%s' % striked_text line = line.replace(orig_text, new_text) except: # Not striked pass return line def convert_image(line, slug): image_result = image_pattern.search(line) if image_result: try: image_text = image_result.group(1).split(",")[0].strip() old_text = image_result.group(0) if "://" in image_text: new_text = "".format(image_text) else: new_text = "![{}]({{attach}}{}/{})".format(image_text, slug, quote(image_text, "")) line = line.replace(old_text, new_text) except: pass return line def convert_linebreak(line): # Markdown spec says linebreak is , who am I to argue? if line.endswith("\\\\"): line = line[:-2] + " " return line def WikiToMD(content, slug): # Line breaks in Markdown must be at end of line, so add newlines as needed content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n") code_block = False in_list = False in_table = False nested_level = 0 prev_indent = 0 old_content = content.splitlines() new_content = [] while old_content: line = old_content.pop(0).rstrip() tail = ["\n"] while "{{{" in line or "}}}" in line: if "{{{" in line: code_block = True line = line.replace("{{{", "```") if "}}}" in line: code_block = False line = line.replace("}}}", "```") if not code_block: # # Want to convert tables. References: # https://github.github.com/gfm/#tables-extension- # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables # # Table start: line containing "||" # Table end: blank line? # # Figuring out whether there's a real header line is fun, # trac doesn't require one, markdown does. Guess we can # add a dummy header if no better idea. Markdown requires # delimiter line, which we add immediately after the # header, both appear to be mandatory. Trac can have # label cells anywhere, not just in header, might need to # add "*" to those or just ignore the issue. # Justification we can sort of figure out from the header, # if the rows do anything different, ouch, because # markdown specifies in delimiter line. # # Might do something clever with the "=" markers and # alignment, start with just getting the basic table # structure to something markdown will believe. # if line.strip().startswith("||"): line = line.replace("=|", "|").replace("|=", "|") line = line.replace("||", "|") if not in_table: tail.append("|---" * (line.count("|") - 1) + "|\n") in_table = True elif in_table and not line.strip().startswith("||"): new_content.append("\n") in_table = False # # Convert bullet lists. The start and end of a list needs # an empty line. # nested_line = line.lstrip(' ') if nested_line.startswith('- ') or nested_line.startswith('* '): if not in_list: new_content.append("\n") nested_level = 0 prev_indent = 0 in_list = True indent = len(line) - len(nested_line) if indent > prev_indent: nested_level += 1 elif indent < prev_indent: nested_level -= 1 prev_indent = indent line = ' ' * nested_level + nested_line elif in_list: new_content.append("\n") in_list = False nested_level = 0 prev_indent = 0 # Convert CamelCase line = camelcase_pattern.sub(r"\1", line) # Convert (limited subset of) spans line = span_pattern.sub(r"[[\1|\2]]", line) # Convert headers line = convert_headers(line) # Convert images line = convert_image(line, slug) # Convert wiki links line = convert_wikilinks(line, wikilink_1_pattern) line = convert_wikilinks(line, wikilink_2_pattern, slug) line = convert_wikilinks(line, wikilink_3_pattern) # Convert striked through text line = convert_strike(line) # Convert line breaks line = convert_linebreak(line) # Convert bold and italic text (do this last) line = line.replace("'''", "**") # Convert bold text line = line.replace("''", "*") # Convert italic text new_content.append(line) new_content.extend(tail) return "".join(new_content)