path: root/tools/trac2md.py

                      

#!/usr/bin/env python2

# Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/

# This code mostly taken from patches to pagure_importer by mreynolds

import sys
import re
import time
import requests
import shutil
import os
from base64 import b64decode
from datetime import datetime

wikilink_pattern = re.compile('\[http(.*)\]')
wikilink_extract = re.compile('\[(.*)\]')
strikethrough_pattern = re.compile('~~(.*)~~')
camelcase_pattern = re.compile("!(\w+)")
image_pattern = re.compile("\[\[Image\((.*)\)\]\]")

wikiheading_patterns = tuple((level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level)))
                             for level in range(1, 7))

def to_timestamp(tm):
    ''' Convert to timestamp which can be jsonified '''

    tm = tm.replace('+00:00', '')
    date = datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S')
    ts = str(time.mktime(date.timetuple()))[:-2]  # Strip the .0
    return ts


def strip_wikilink(content):
    ''' Need to remove wiki link format from custom fields.  They come in a
    variety of forms that can be comma or whitespace separated.  They can also
    include link names which must also be removed.

        [https://bugzilla.redhat.com/show_bug.cgi?id=772777]
        [https://bugzilla.com/123456789], [http://bugzilla.com/7777777 7777777]
        [https://bugzilla.com/6666666 6666666]
    '''

    links = []
    if wikilink_pattern.search(content):
        # Looks like we have a link in here
        links = []
        mylist = re.findall(r'\[([^]]*)\]', content)
        for i in mylist:
            links.append(i.split(' ', 1)[0])
        return ', '.join(links)
    else:
        return content


def convert_headers(line):
    ''' Convert wikiformat headers
    '''
    for level_count, header in wikiheading_patterns:
        try:
            level = header.search(line).group(1)
            if level:
                line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t"))
                break  # No need to check other heading levels
        except:
            # Try the next heading level
            pass

    return line


def convert_wikilinks(line):
    ''' Convert wikiformat links
    '''
    if wikilink_pattern.search(line):
        try:
            result = wikilink_extract.search(line).group(1)
            if result:
                parts = result.split(' ', 1)
                if len(parts) == 1:
                    mdlink = '[%s](%s)' % (parts[0], parts[0])
                elif len(parts) == 2:
                    mdlink = '[%s](%s)' % (parts[1], parts[0])
                line = line.replace('[' + result + ']', mdlink)
        except:
            # Not a link, not a problem
            pass

    return line


def convert_strike(line):
    ''' Convert wikiformat striked text
    '''
    striked_result = strikethrough_pattern.search(line)
    if striked_result:
        try:
            striked_text = striked_result.group(1)
            if striked_text:
                orig_text = '~~%s~~' % striked_text
                new_text = '<s>%s</s>' % striked_text
                line = line.replace(orig_text, new_text)
        except:
            # Not striked
            pass
    return line

def convert_image(line):
    image_result = image_pattern.search(line)
    if image_result:
        try:
            image_text = image_result.group(1).split(",")[0].strip()
            old_text = image_result.group(0)
            new_text = "<img src=\"{}\">".format(image_text)
            line = line.replace(old_text, new_text)
        except:
            pass
    return line

def convert_linebreak(line):
    # Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue?
    if line.endswith("\\\\"):
        line = line[:-2] + "  "
    return line

def WikiToMD(content):
    ''' Convert wiki/RST format to Markdown.  Code blocks, bold/italics,
    wiki links, lists, striked text, and headers. '''

    # Line breaks in Markdown must be at end of line, so add newlines as needed
    content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n")

    code_block = False
    in_list = False
    in_table = False
    nested_level = 0
    prev_indent = 0
    old_content = content.splitlines()
    new_content = []

    while old_content:
        line = old_content.pop(0).rstrip()
        tail = ["\n"]
        while "{{{" in line or "}}}" in line:
            if "{{{" in line:
                code_block = True
                line = line.replace("{{{", "```")
            if "}}}" in line:
                code_block = False
                line = line.replace("}}}", "```")
        if not code_block:
            #
            # Want to convert tables.  References:
            #   https://github.github.com/gfm/#tables-extension-
            #   https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables
            #
            # Table start: line containing "||"
            # Table end: blank line?
            #
            # Figuring out whether there's a real header line is fun,
            # trac doesn't require one, markdown does.  Guess we can
            # add a dummy header if no better idea.  Markdown requires
            # delimiter line, which we add immediately after the
            # header, both appear to be mandatory.  Trac can have
            # label cells anywhere, not just in header, might need to
            # add "*" to those or just ignore the issue.
            # Justification we can sort of figure out from the header,
            # if the rows do anything different, ouch, because
            # markdown specifies in delimiter line.
            #
            # Might do something clever with the "=" markers and
            # alignment, start with just getting the basic table
            # structure to something markdown will believe.
            #
            if line.strip().startswith("||"):
                line = line.replace("=|", "|").replace("|=", "|")
                line = line.replace("||", "|")
                if not in_table:
                    tail.append("|---" * (line.count("|") - 1) + "|\n")
                in_table = True
            elif in_table and not line.strip().startswith("||"):
                new_content.append("\n")
                in_table = False

            #
            # Convert bullet lists.  The start and end of a list needs
            # an empty line.
            #
            nested_line = line.lstrip(' ')
            if nested_line.startswith('- ') or nested_line.startswith('* '):
                if not in_list:
                    new_content.append("\n")
                    nested_level = 0
                    prev_indent = 0
                    in_list = True
                indent = len(line) - len(nested_line)
                if indent > prev_indent:
                    nested_level += 1
                elif indent < prev_indent:
                    nested_level -= 1
                prev_indent = indent
                line = '    ' * nested_level + nested_line
            elif in_list:
                new_content.append("\n")
                in_list = False
                nested_level = 0
                prev_indent = 0

            # Convert CamelCase
            line = camelcase_pattern.sub("\\1", line)

            # Convert headers
            line = convert_headers(line)

            # Convert wiki links
            line = convert_wikilinks(line)

            # Convert striked through text
            line = convert_strike(line)

            # Convert images
            line = convert_image(line)

            # Convert line breaks
            line = convert_linebreak(line)

            # Convert bold and italic text (do this last)
            line = line.replace("'''", "**")  # Convert bold text
            line = line.replace("''", "*")  # Convert italic text

        new_content.append(line)
        new_content.extend(tail)

    return "".join(new_content)
#!/usr/bin/env python2

# Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/

# This code mostly taken from patches to pagure_importer by mreynolds

import sys
import re
import time
import requests
import shutil
import os
from base64 import b64decode
from datetime import datetime

wikilink_pattern = re.compile('\[http(.*)\]')
wikilink_extract = re.compile('\[(.*)\]')
strikethrough_pattern = re.compile('~~(.*)~~')
camelcase_pattern = re.compile("!(\w+)")
image_pattern = re.compile("\[\[Image\((.*)\)\]\]")

wikiheading_patterns = tuple((level, re.compile("^{} (.*)[ \t]*=*$".format("=" * level)))
                             for level in range(1, 7))

def to_timestamp(tm):
    ''' Convert to timestamp which can be jsonified '''

    tm = tm.replace('+00:00', '')
    date = datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S')
    ts = str(time.mktime(date.timetuple()))[:-2]  # Strip the .0
    return ts


def strip_wikilink(content):
    ''' Need to remove wiki link format from custom fields.  They come in a
    variety of forms that can be comma or whitespace separated.  They can also
    include link names which must also be removed.

        [https://bugzilla.redhat.com/show_bug.cgi?id=772777]
        [https://bugzilla.com/123456789], [http://bugzilla.com/7777777 7777777]
        [https://bugzilla.com/6666666 6666666]
    '''

    links = []
    if wikilink_pattern.search(content):
        # Looks like we have a link in here
        links = []
        mylist = re.findall(r'\[([^]]*)\]', content)
        for i in mylist:
            links.append(i.split(' ', 1)[0])
        return ', '.join(links)
    else:
        return content


def convert_headers(line):
    ''' Convert wikiformat headers
    '''
    for level_count, header in wikiheading_patterns:
        try:
            level = header.search(line).group(1)
            if level:
                line = "%s %s" % ('#' * level_count, level.rstrip("= \r\t"))
                break  # No need to check other heading levels
        except:
            # Try the next heading level
            pass

    return line


def convert_wikilinks(line):
    ''' Convert wikiformat links
    '''
    if wikilink_pattern.search(line):
        try:
            result = wikilink_extract.search(line).group(1)
            if result:
                parts = result.split(' ', 1)
                if len(parts) == 1:
                    mdlink = '[%s](%s)' % (parts[0], parts[0])
                elif len(parts) == 2:
                    mdlink = '[%s](%s)' % (parts[1], parts[0])
                line = line.replace('[' + result + ']', mdlink)
        except:
            # Not a link, not a problem
            pass

    return line


def convert_strike(line):
    ''' Convert wikiformat striked text
    '''
    striked_result = strikethrough_pattern.search(line)
    if striked_result:
        try:
            striked_text = striked_result.group(1)
            if striked_text:
                orig_text = '~~%s~~' % striked_text
                new_text = '<s>%s</s>' % striked_text
                line = line.replace(orig_text, new_text)
        except:
            # Not striked
            pass
    return line

def convert_image(line):
    image_result = image_pattern.search(line)
    if image_result:
        try:
            image_text = image_result.group(1).split(",")[0].strip()
            old_text = image_result.group(0)
            new_text = "<img src=\"{}\">".format(image_text)
            line = line.replace(old_text, new_text)
        except:
            pass
    return line

def convert_linebreak(line):
    # Markdown spec says linebreak is <SPACE><SPACE><RETURN>, who am I to argue?
    if line.endswith("\\\\"):
        line = line[:-2] + "  "
    return line

def WikiToMD(content):
    ''' Convert wiki/RST format to Markdown.  Code blocks, bold/italics,
    wiki links, lists, striked text, and headers. '''

    # Line breaks in Markdown must be at end of line, so add newlines as needed
    content = content.replace("[[br]]", "\\\\").replace("[[BR]]", "\\\\").replace("\\\\", "\\\\\n")

    code_block = False
    in_list = False
    in_table = False
    nested_level = 0
    prev_indent = 0
    old_content = content.splitlines()
    new_content = []

    while old_content:
        line = old_content.pop(0).rstrip()
        tail = ["\n"]
        while "{{{" in line or "}}}" in line:
            if "{{{" in line:
                code_block = True
                line = line.replace("{{{", "```")
            if "}}}" in line:
                code_block = False
                line = line.replace("}}}", "```")
        if not code_block:
            #
            # Want to convert tables.  References:
            #   https://github.github.com/gfm/#tables-extension-
            #   https://permatrac.noc.ietf.org/wiki/WikiFormatting#Tables
            #
            # Table start: line containing "||"
            # Table end: blank line?
            #
            # Figuring out whether there's a real header line is fun,
            # trac doesn't require one, markdown does.  Guess we can
            # add a dummy header if no better idea.  Markdown requires
            # delimiter line, which we add immediately after the
            # header, both appear to be mandatory.  Trac can have
            # label cells anywhere, not just in header, might need to
            # add "*" to those or just ignore the issue.
            # Justification we can sort of figure out from the header,
            # if the rows do anything different, ouch, because
            # markdown specifies in delimiter line.
            #
            # Might do something clever with the "=" markers and
            # alignment, start with just getting the basic table
            # structure to something markdown will believe.
            #
            if line.strip().startswith("||"):
                line = line.replace("=|", "|").replace("|=", "|")
                line = line.replace("||", "|")
                if not in_table:
                    tail.append("|---" * (line.count("|") - 1) + "|\n")
                in_table = True
            elif in_table and not line.strip().startswith("||"):
                new_content.append("\n")
                in_table = False

            #
            # Convert bullet lists.  The start and end of a list needs
            # an empty line.
            #
            nested_line = line.lstrip(' ')
            if nested_line.startswith('- ') or nested_line.startswith('* '):
                if not in_list:
                    new_content.append("\n")
                    nested_level = 0
                    prev_indent = 0
                    in_list = True
                indent = len(line) - len(nested_line)
                if indent > prev_indent:
                    nested_level += 1
                elif indent < prev_indent:
                    nested_level -= 1
                prev_indent = indent
                line = '    ' * nested_level + nested_line
            elif in_list:
                new_content.append("\n")
                in_list = False
                nested_level = 0
                prev_indent = 0

            # Convert CamelCase
            line = camelcase_pattern.sub("\\1", line)

            # Convert headers
            line = convert_headers(line)

            # Convert wiki links
            line = convert_wikilinks(line)

            # Convert striked through text
            line = convert_strike(line)

            # Convert images
            line = convert_image(line)

            # Convert line breaks
            line = convert_linebreak(line)

            # Convert bold and italic text (do this last)
            line = line.replace("'''", "**")  # Convert bold text
            line = line.replace("''", "*")  # Convert italic text

        new_content.append(line)
        new_content.extend(tail)

    return "".join(new_content)