diff options
Diffstat (limited to 'trac2md.py')
-rwxr-xr-x | trac2md.py | 167 |
1 files changed, 78 insertions, 89 deletions
@@ -14,16 +14,23 @@ from base64 import b64decode from datetime import datetime from urllib.parse import quote +content_linebreak_pattern = re.compile(r"\[\[br\]\]|\\\\", re.I) + +traclink_pattern = re.compile(r"(?<!\[)\[([^][]+)\]") + image_pattern = re.compile(r"\[\[Image\((.*)\)\]\]") -wikilink_1_pattern = re.compile(r"\[\[(http.*)\]\]|\[(http.*)\]") -wikilink_2_pattern = re.compile(r"\[\[attachment:([a-zA-Z0-9_/]+)\]\]|\[attachment:([^][]+)\]") -wikilink_3_pattern = re.compile(r"\[\[(?:wiki:)?([^][]+)\]\]|\[wiki:([^][]+)\]") +wikilink_pattern = re.compile(r"\[\[(wiki:|attachment:)?([^]|[]+)(?:[|]([^][]+))?\]\]") + +strikethrough_pattern = re.compile(r"~~([^~]+)~~") +bangquote_pattern = re.compile(r"!((?:\w|[#])+)") +linebreak_pattern = re.compile(r"\\\\$") + +camelcase_pattern = re.compile(r"(?:^|(?<=\s))([A-Z][a-z]+[A-Z][a-z][A-Za-z]*)(?:$|(?=\s))") -strikethrough_pattern = re.compile(r"~~(.*)~~") -camelcase_pattern = re.compile(r"!((?:\w|[#])+)") +span_pattern = re.compile(r"\[\[span\((?:[^][]*,)*([^(),]+)\)\]\]") -span_pattern = re.compile(r"\[\[span\((?:[^][]*,)*\[([^(), ]+)([^(),]+)\]\)\]\]") +delete_pattern = re.compile(r"\[\[PageOutline\]\]", re.I) wikiheading_patterns = tuple( (level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level))) @@ -36,57 +43,50 @@ def convert_headers(line): level = header.search(line).group(1) if level: line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t")) - break # No need to check other heading levels + break # No need to check other heading levels except: - # Try the next heading level - pass - + pass # Try the next heading level return line -def make_mdlink(text, slug, sep): - if sep in text: - parts = text.split(sep, 1) - else: - parts = [text] - parts = [p.strip() for p in parts] - for i, part in enumerate(parts): - if any(part.startswith(q) and part.endswith(q) for q in ('"', "'")): - parts[i] = part[1:-1] - if slug is None: - return "[{}]({})".format(parts[-1], parts[0]) - else: - return "[{}]({{attach}}{}/{})".format(parts[-1], slug, parts[0]) - - -def convert_wikilinks(line, pattern, slug = None): - pos = 0 - while True: - m = pattern.search(line, pos) - if not m: - break - text = m.group(1) or m.group(2) - if text.lower() == "pageoutline": - mdlink = "" - else: - mdlink = make_mdlink(text, slug, "|" if m.group(0).startswith("[[") else " ") - line = line.replace(m.group(0), mdlink) - pos = m.start() + len(mdlink) +def convert_traclink_to_creolelink(line): + # Convert Trac's native link form to Creole's, so that rest of the code only has to deal with one format. + # Creole's is easier to parse and harder to confuse with partially converted Markdown. + + for m in traclink_pattern.finditer(line): + text = m.group(1).strip() + if " " in text: + line = line.replace(m.group(0), "[[{0[0]}|{0[1]}]]".format(text.split(" ", 1))) + elif any(text.startswith(scheme) for scheme in ("wiki:", "attachment:")) or camelcase_pattern.match(text): + line = line.replace(m.group(0), "[[{}]]".format(text)) return line -def convert_strike(line): - striked_result = strikethrough_pattern.search(line) - if striked_result: - try: - striked_text = striked_result.group(1) - if striked_text: - orig_text = '~~%s~~' % striked_text - new_text = '<s>%s</s>' % striked_text - line = line.replace(orig_text, new_text) - except: - # Not striked - pass +def convert_wikilinks(line, slug): + for m in wikilink_pattern.finditer(line): + scheme, link, text = [p.strip() if p else p for p in m.groups()] + if text is None: + text = link + if any(link.startswith(q) and link.endswith(q) for q in ('"', "'")): + link = link[1:-1] + if any(text.startswith(q) and text.endswith(q) for q in ('"', "'")): + text = text[1:-1] + if scheme == "attachment:": + mdlink = "[{}]({{attach}}{}/{})".format(text, slug, link) + elif scheme == "wiki:" or (scheme is None and camelcase_pattern.match(link)): + mdlink = "[{}]({}.md)".format(text, link) + else: + mdlink = "[{}]({})".format(text, link) + # + #whine = "/user/sra/build-tools/https-sync-repos" in line + whine = False + if whine: + print("Old:", line) + line = line.replace(m.group(0), mdlink) + if whine: + print("New:", line) + whine = False + # return line @@ -106,24 +106,14 @@ def convert_image(line, slug): return line -def convert_linebreak(line): - # Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue? - if line.endswith("\\\\"): - line = line[:-2] + " " - return line - - def WikiToMD(content, slug): - # Line breaks in Markdown must be at end of line, so add newlines as needed - content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n") - code_block = False in_list = False in_table = False nested_level = 0 prev_indent = 0 - old_content = content.splitlines() + old_content = content_linebreak_pattern.sub("\\\\\\\\\n", content).splitlines() new_content = [] while old_content: @@ -137,29 +127,27 @@ def WikiToMD(content, slug): code_block = False line = line.replace("}}}", "```") if not code_block: - # - # Want to convert tables. References: + + # Convert CamelCase links to explicit links + line = camelcase_pattern.sub(r"[[\1]]", line) + + # Convert TracLinks to WikiCreole links to simplify remaining processing + line = convert_traclink_to_creolelink(line) + + # Convert tables. References: # https://github.github.com/gfm/#tables-extension- # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables + # Table start: line containing "||"; table end: blank line? # - # Table start: line containing "||" - # Table end: blank line? - # - # Figuring out whether there's a real header line is fun, - # trac doesn't require one, markdown does. Guess we can - # add a dummy header if no better idea. Markdown requires - # delimiter line, which we add immediately after the - # header, both appear to be mandatory. Trac can have - # label cells anywhere, not just in header, might need to - # add "*" to those or just ignore the issue. - # Justification we can sort of figure out from the header, - # if the rows do anything different, ouch, because - # markdown specifies in delimiter line. + # Figuring out whether there's a real header line is fun, trac doesn't require one, markdown does. Guess we can + # add a dummy header if no better idea. Markdown requires delimiter line, which we add immediately after the + # header, both appear to be mandatory. Trac can have label cells anywhere, not just in header, might need to + # add "*" to those or just ignore the issue. Justification we can sort of figure out from the header, + # if the rows do anything different, ouch, because markdown specifies in delimiter line. # - # Might do something clever with the "=" markers and - # alignment, start with just getting the basic table + # Might do something clever with the "=" markers and alignment, start with just getting the basic table # structure to something markdown will believe. - # + if line.strip().startswith("||"): line = line.replace("=|", "|").replace("|=", "|") line = line.replace("||", "|") @@ -171,8 +159,7 @@ def WikiToMD(content, slug): in_table = False # - # Convert bullet lists. The start and end of a list needs - # an empty line. + # Convert bullet lists. The start and end of a list needs an empty line. # nested_line = line.lstrip(' ') if nested_line.startswith('- ') or nested_line.startswith('* '): @@ -194,11 +181,11 @@ def WikiToMD(content, slug): nested_level = 0 prev_indent = 0 - # Convert CamelCase - line = camelcase_pattern.sub(r"\1", line) + # Convert !x quoting + line = bangquote_pattern.sub(r"\1", line) # Convert (limited subset of) spans - line = span_pattern.sub(r"[[\1|\2]]", line) + line = span_pattern.sub(r"\1", line) # Convert headers line = convert_headers(line) @@ -206,16 +193,18 @@ def WikiToMD(content, slug): # Convert images line = convert_image(line, slug) + # Delete Trac macros that have no useful counterpart + line = delete_pattern.sub("", line) + # Convert wiki links - line = convert_wikilinks(line, wikilink_1_pattern) - line = convert_wikilinks(line, wikilink_2_pattern, slug) - line = convert_wikilinks(line, wikilink_3_pattern) + line = convert_wikilinks(line, slug) # Convert striked through text - line = convert_strike(line) + line = strikethrough_pattern.sub(r"<s>\1</s>", line) # Convert line breaks - line = convert_linebreak(line) + # Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue? + line = linebreak_pattern.sub(" ", line) # Convert bold and italic text (do this last) line = line.replace("'''", "**") # Convert bold text |