summaryrefslogtreecommitdiff
path: root/trac2md.py
diff options
context:
space:
mode:
Diffstat (limited to 'trac2md.py')
-rwxr-xr-xtrac2md.py167
1 files changed, 78 insertions, 89 deletions
diff --git a/trac2md.py b/trac2md.py
index a8a632b..7e17c5c 100755
--- a/trac2md.py
+++ b/trac2md.py
@@ -14,16 +14,23 @@ from base64 import b64decode
from datetime import datetime
from urllib.parse import quote
+content_linebreak_pattern = re.compile(r"\[\[br\]\]|\\\\", re.I)
+
+traclink_pattern = re.compile(r"(?<!\[)\[([^][]+)\]")
+
image_pattern = re.compile(r"\[\[Image\((.*)\)\]\]")
-wikilink_1_pattern = re.compile(r"\[\[(http.*)\]\]|\[(http.*)\]")
-wikilink_2_pattern = re.compile(r"\[\[attachment:([a-zA-Z0-9_/]+)\]\]|\[attachment:([^][]+)\]")
-wikilink_3_pattern = re.compile(r"\[\[(?:wiki:)?([^][]+)\]\]|\[wiki:([^][]+)\]")
+wikilink_pattern = re.compile(r"\[\[(wiki:|attachment:)?([^]|[]+)(?:[|]([^][]+))?\]\]")
+
+strikethrough_pattern = re.compile(r"~~([^~]+)~~")
+bangquote_pattern = re.compile(r"!((?:\w|[#])+)")
+linebreak_pattern = re.compile(r"\\\\$")
+
+camelcase_pattern = re.compile(r"(?:^|(?<=\s))([A-Z][a-z]+[A-Z][a-z][A-Za-z]*)(?:$|(?=\s))")
-strikethrough_pattern = re.compile(r"~~(.*)~~")
-camelcase_pattern = re.compile(r"!((?:\w|[#])+)")
+span_pattern = re.compile(r"\[\[span\((?:[^][]*,)*([^(),]+)\)\]\]")
-span_pattern = re.compile(r"\[\[span\((?:[^][]*,)*\[([^(), ]+)([^(),]+)\]\)\]\]")
+delete_pattern = re.compile(r"\[\[PageOutline\]\]", re.I)
wikiheading_patterns = tuple(
(level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level)))
@@ -36,57 +43,50 @@ def convert_headers(line):
level = header.search(line).group(1)
if level:
line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t"))
- break # No need to check other heading levels
+ break # No need to check other heading levels
except:
- # Try the next heading level
- pass
-
+ pass # Try the next heading level
return line
-def make_mdlink(text, slug, sep):
- if sep in text:
- parts = text.split(sep, 1)
- else:
- parts = [text]
- parts = [p.strip() for p in parts]
- for i, part in enumerate(parts):
- if any(part.startswith(q) and part.endswith(q) for q in ('"', "'")):
- parts[i] = part[1:-1]
- if slug is None:
- return "[{}]({})".format(parts[-1], parts[0])
- else:
- return "[{}]({{attach}}{}/{})".format(parts[-1], slug, parts[0])
-
-
-def convert_wikilinks(line, pattern, slug = None):
- pos = 0
- while True:
- m = pattern.search(line, pos)
- if not m:
- break
- text = m.group(1) or m.group(2)
- if text.lower() == "pageoutline":
- mdlink = ""
- else:
- mdlink = make_mdlink(text, slug, "|" if m.group(0).startswith("[[") else " ")
- line = line.replace(m.group(0), mdlink)
- pos = m.start() + len(mdlink)
+def convert_traclink_to_creolelink(line):
+ # Convert Trac's native link form to Creole's, so that rest of the code only has to deal with one format.
+ # Creole's is easier to parse and harder to confuse with partially converted Markdown.
+
+ for m in traclink_pattern.finditer(line):
+ text = m.group(1).strip()
+ if " " in text:
+ line = line.replace(m.group(0), "[[{0[0]}|{0[1]}]]".format(text.split(" ", 1)))
+ elif any(text.startswith(scheme) for scheme in ("wiki:", "attachment:")) or camelcase_pattern.match(text):
+ line = line.replace(m.group(0), "[[{}]]".format(text))
return line
-def convert_strike(line):
- striked_result = strikethrough_pattern.search(line)
- if striked_result:
- try:
- striked_text = striked_result.group(1)
- if striked_text:
- orig_text = '~~%s~~' % striked_text
- new_text = '<s>%s</s>' % striked_text
- line = line.replace(orig_text, new_text)
- except:
- # Not striked
- pass
+def convert_wikilinks(line, slug):
+ for m in wikilink_pattern.finditer(line):
+ scheme, link, text = [p.strip() if p else p for p in m.groups()]
+ if text is None:
+ text = link
+ if any(link.startswith(q) and link.endswith(q) for q in ('"', "'")):
+ link = link[1:-1]
+ if any(text.startswith(q) and text.endswith(q) for q in ('"', "'")):
+ text = text[1:-1]
+ if scheme == "attachment:":
+ mdlink = "[{}]({{attach}}{}/{})".format(text, slug, link)
+ elif scheme == "wiki:" or (scheme is None and camelcase_pattern.match(link)):
+ mdlink = "[{}]({}.md)".format(text, link)
+ else:
+ mdlink = "[{}]({})".format(text, link)
+ #
+ #whine = "/user/sra/build-tools/https-sync-repos" in line
+ whine = False
+ if whine:
+ print("Old:", line)
+ line = line.replace(m.group(0), mdlink)
+ if whine:
+ print("New:", line)
+ whine = False
+ #
return line
@@ -106,24 +106,14 @@ def convert_image(line, slug):
return line
-def convert_linebreak(line):
- # Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue?
- if line.endswith("\\\\"):
- line = line[:-2] + " "
- return line
-
-
def WikiToMD(content, slug):
- # Line breaks in Markdown must be at end of line, so add newlines as needed
- content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n")
-
code_block = False
in_list = False
in_table = False
nested_level = 0
prev_indent = 0
- old_content = content.splitlines()
+ old_content = content_linebreak_pattern.sub("\\\\\\\\\n", content).splitlines()
new_content = []
while old_content:
@@ -137,29 +127,27 @@ def WikiToMD(content, slug):
code_block = False
line = line.replace("}}}", "```")
if not code_block:
- #
- # Want to convert tables. References:
+
+ # Convert CamelCase links to explicit links
+ line = camelcase_pattern.sub(r"[[\1]]", line)
+
+ # Convert TracLinks to WikiCreole links to simplify remaining processing
+ line = convert_traclink_to_creolelink(line)
+
+ # Convert tables. References:
# https://github.github.com/gfm/#tables-extension-
# https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables
+ # Table start: line containing "||"; table end: blank line?
#
- # Table start: line containing "||"
- # Table end: blank line?
- #
- # Figuring out whether there's a real header line is fun,
- # trac doesn't require one, markdown does. Guess we can
- # add a dummy header if no better idea. Markdown requires
- # delimiter line, which we add immediately after the
- # header, both appear to be mandatory. Trac can have
- # label cells anywhere, not just in header, might need to
- # add "*" to those or just ignore the issue.
- # Justification we can sort of figure out from the header,
- # if the rows do anything different, ouch, because
- # markdown specifies in delimiter line.
+ # Figuring out whether there's a real header line is fun, trac doesn't require one, markdown does. Guess we can
+ # add a dummy header if no better idea. Markdown requires delimiter line, which we add immediately after the
+ # header, both appear to be mandatory. Trac can have label cells anywhere, not just in header, might need to
+ # add "*" to those or just ignore the issue. Justification we can sort of figure out from the header,
+ # if the rows do anything different, ouch, because markdown specifies in delimiter line.
#
- # Might do something clever with the "=" markers and
- # alignment, start with just getting the basic table
+ # Might do something clever with the "=" markers and alignment, start with just getting the basic table
# structure to something markdown will believe.
- #
+
if line.strip().startswith("||"):
line = line.replace("=|", "|").replace("|=", "|")
line = line.replace("||", "|")
@@ -171,8 +159,7 @@ def WikiToMD(content, slug):
in_table = False
#
- # Convert bullet lists. The start and end of a list needs
- # an empty line.
+ # Convert bullet lists. The start and end of a list needs an empty line.
#
nested_line = line.lstrip(' ')
if nested_line.startswith('- ') or nested_line.startswith('* '):
@@ -194,11 +181,11 @@ def WikiToMD(content, slug):
nested_level = 0
prev_indent = 0
- # Convert CamelCase
- line = camelcase_pattern.sub(r"\1", line)
+ # Convert !x quoting
+ line = bangquote_pattern.sub(r"\1", line)
# Convert (limited subset of) spans
- line = span_pattern.sub(r"[[\1|\2]]", line)
+ line = span_pattern.sub(r"\1", line)
# Convert headers
line = convert_headers(line)
@@ -206,16 +193,18 @@ def WikiToMD(content, slug):
# Convert images
line = convert_image(line, slug)
+ # Delete Trac macros that have no useful counterpart
+ line = delete_pattern.sub("", line)
+
# Convert wiki links
- line = convert_wikilinks(line, wikilink_1_pattern)
- line = convert_wikilinks(line, wikilink_2_pattern, slug)
- line = convert_wikilinks(line, wikilink_3_pattern)
+ line = convert_wikilinks(line, slug)
# Convert striked through text
- line = convert_strike(line)
+ line = strikethrough_pattern.sub(r"<s>\1</s>", line)
# Convert line breaks
- line = convert_linebreak(line)
+ # Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue?
+ line = linebreak_pattern.sub(" ", line)
# Convert bold and italic text (do this last)
line = line.replace("'''", "**") # Convert bold text