From 18fb1695b84248fc75ceb3569ff03cbeca51a620 Mon Sep 17 00:00:00 2001 From: Rob Austein Date: Mon, 15 Feb 2021 22:29:56 +0000 Subject: Seriously rework link processing --- trac2md.py | 167 +++++++++++++++++++++++++++++-------------------------------- 1 file changed, 78 insertions(+), 89 deletions(-) (limited to 'trac2md.py') diff --git a/trac2md.py b/trac2md.py index a8a632b..7e17c5c 100755 --- a/trac2md.py +++ b/trac2md.py @@ -14,16 +14,23 @@ from base64 import b64decode from datetime import datetime from urllib.parse import quote +content_linebreak_pattern = re.compile(r"\[\[br\]\]|\\\\", re.I) + +traclink_pattern = re.compile(r"(?%s' % striked_text - line = line.replace(orig_text, new_text) - except: - # Not striked - pass +def convert_wikilinks(line, slug): + for m in wikilink_pattern.finditer(line): + scheme, link, text = [p.strip() if p else p for p in m.groups()] + if text is None: + text = link + if any(link.startswith(q) and link.endswith(q) for q in ('"', "'")): + link = link[1:-1] + if any(text.startswith(q) and text.endswith(q) for q in ('"', "'")): + text = text[1:-1] + if scheme == "attachment:": + mdlink = "[{}]({{attach}}{}/{})".format(text, slug, link) + elif scheme == "wiki:" or (scheme is None and camelcase_pattern.match(link)): + mdlink = "[{}]({}.md)".format(text, link) + else: + mdlink = "[{}]({})".format(text, link) + # + #whine = "/user/sra/build-tools/https-sync-repos" in line + whine = False + if whine: + print("Old:", line) + line = line.replace(m.group(0), mdlink) + if whine: + print("New:", line) + whine = False + # return line @@ -106,24 +106,14 @@ def convert_image(line, slug): return line -def convert_linebreak(line): - # Markdown spec says linebreak is , who am I to argue? - if line.endswith("\\\\"): - line = line[:-2] + " " - return line - - def WikiToMD(content, slug): - # Line breaks in Markdown must be at end of line, so add newlines as needed - content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n") - code_block = False in_list = False in_table = False nested_level = 0 prev_indent = 0 - old_content = content.splitlines() + old_content = content_linebreak_pattern.sub("\\\\\\\\\n", content).splitlines() new_content = [] while old_content: @@ -137,29 +127,27 @@ def WikiToMD(content, slug): code_block = False line = line.replace("}}}", "```") if not code_block: - # - # Want to convert tables. References: + + # Convert CamelCase links to explicit links + line = camelcase_pattern.sub(r"[[\1]]", line) + + # Convert TracLinks to WikiCreole links to simplify remaining processing + line = convert_traclink_to_creolelink(line) + + # Convert tables. References: # https://github.github.com/gfm/#tables-extension- # https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables + # Table start: line containing "||"; table end: blank line? # - # Table start: line containing "||" - # Table end: blank line? - # - # Figuring out whether there's a real header line is fun, - # trac doesn't require one, markdown does. Guess we can - # add a dummy header if no better idea. Markdown requires - # delimiter line, which we add immediately after the - # header, both appear to be mandatory. Trac can have - # label cells anywhere, not just in header, might need to - # add "*" to those or just ignore the issue. - # Justification we can sort of figure out from the header, - # if the rows do anything different, ouch, because - # markdown specifies in delimiter line. + # Figuring out whether there's a real header line is fun, trac doesn't require one, markdown does. Guess we can + # add a dummy header if no better idea. Markdown requires delimiter line, which we add immediately after the + # header, both appear to be mandatory. Trac can have label cells anywhere, not just in header, might need to + # add "*" to those or just ignore the issue. Justification we can sort of figure out from the header, + # if the rows do anything different, ouch, because markdown specifies in delimiter line. # - # Might do something clever with the "=" markers and - # alignment, start with just getting the basic table + # Might do something clever with the "=" markers and alignment, start with just getting the basic table # structure to something markdown will believe. - # + if line.strip().startswith("||"): line = line.replace("=|", "|").replace("|=", "|") line = line.replace("||", "|") @@ -171,8 +159,7 @@ def WikiToMD(content, slug): in_table = False # - # Convert bullet lists. The start and end of a list needs - # an empty line. + # Convert bullet lists. The start and end of a list needs an empty line. # nested_line = line.lstrip(' ') if nested_line.startswith('- ') or nested_line.startswith('* '): @@ -194,11 +181,11 @@ def WikiToMD(content, slug): nested_level = 0 prev_indent = 0 - # Convert CamelCase - line = camelcase_pattern.sub(r"\1", line) + # Convert !x quoting + line = bangquote_pattern.sub(r"\1", line) # Convert (limited subset of) spans - line = span_pattern.sub(r"[[\1|\2]]", line) + line = span_pattern.sub(r"\1", line) # Convert headers line = convert_headers(line) @@ -206,16 +193,18 @@ def WikiToMD(content, slug): # Convert images line = convert_image(line, slug) + # Delete Trac macros that have no useful counterpart + line = delete_pattern.sub("", line) + # Convert wiki links - line = convert_wikilinks(line, wikilink_1_pattern) - line = convert_wikilinks(line, wikilink_2_pattern, slug) - line = convert_wikilinks(line, wikilink_3_pattern) + line = convert_wikilinks(line, slug) # Convert striked through text - line = convert_strike(line) + line = strikethrough_pattern.sub(r"\1", line) # Convert line breaks - line = convert_linebreak(line) + # Markdown spec says linebreak is , who am I to argue? + line = linebreak_pattern.sub(" ", line) # Convert bold and italic text (do this last) line = line.replace("'''", "**") # Convert bold text -- cgit v1.2.3