summaryrefslogblamecommitdiff
path: root/tools/trac2md.py
blob: ab0bf795b743c977e83c6c40e45aeedde68469a6 (plain) (tree)




















































































































                                                                                               
                                      
                    
 

                                                   






                                                 







                                                                      
                                            





                                                                   
                                            



















                                                             
                                            
















                                                                 

                                
 
                               







                                     
#!/usr/bin/python

# Untested code from https://www.snip2code.com/Snippet/1704331/Convert-trac-markup-to-Markdown/

# This code mostly taken from patches to pagure_importer by mreynolds

import sys
import re
import time
import requests
import shutil
import os
from base64 import b64decode
from datetime import datetime

wikilink_pattern = re.compile('\[http(.*)\]')
wikilink_extract = re.compile('\[(.*)\]')
wikiheading1_pattern = re.compile('^= (.*) =$')
wikiheading2_pattern = re.compile('^== (.*) ==$')
wikiheading3_pattern = re.compile('^=== (.*) ===$')
strikethrough_pattern = re.compile('~~(.*)~~')

def to_timestamp(tm):
    ''' Convert to timestamp which can be jsonified '''

    tm = tm.replace('+00:00', '')
    date = datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S')
    ts = str(time.mktime(date.timetuple()))[:-2]  # Strip the .0
    return ts


def strip_wikilink(content):
    ''' Need to remove wiki link format from custom fields.  They come in a
    variety of forms that can be comma or whitespace separated.  They can also
    include link names which must also be removed.

        [https://bugzilla.redhat.com/show_bug.cgi?id=772777]
        [https://bugzilla.com/123456789], [http://bugzilla.com/7777777 7777777]
        [https://bugzilla.com/6666666 6666666]
    '''

    links = []
    if wikilink_pattern.search(content):
        # Looks like we have a link in here
        links = []
        mylist = re.findall(r'\[([^]]*)\]', content)
        for i in mylist:
            links.append(i.split(' ', 1)[0])
        return ', '.join(links)
    else:
        return content


def convert_headers(line):
    ''' Convert wikiformat headers
    '''
    level_count = 1
    for header in [wikiheading1_pattern,
                   wikiheading2_pattern,
                   wikiheading3_pattern]:
        try:
            level = header.search(line).group(1)
            if level:
                line = "%s %s" % ('#' * level_count, level)
                break  # No need to check other heading levels
        except:
            # Try the next heading level
            pass
        level_count += 1

    return line


def convert_wikilinks(line):
    ''' Convert wikiformat links
    '''
    if wikilink_pattern.search(line):
        try:
            result = wikilink_extract.search(line).group(1)
            if result:
                parts = result.split(' ', 1)
                if len(parts) == 1:
                    mdlink = '[%s](%s)' % (parts[0], parts[0])
                elif len(parts) == 2:
                    mdlink = '[%s](%s)' % (parts[1], parts[0])
                line = line.replace('[' + result + ']', mdlink)
        except:
            # Not a link, not a problem
            pass

    return line


def convert_strike(line):
    ''' Convert wikiformat striked text
    '''
    striked_result = strikethrough_pattern.search(line)
    if striked_result:
        try:
            striked_text = striked_result.group(1)
            if striked_text:
                orig_text = '~~%s~~' % striked_text
                new_text = '<s>%s</s>' % striked_text
                line = line.replace(orig_text, new_text)
        except:
            # Not striked
            pass
    return line

def WikiToMD(content):
    ''' Convert wiki/RST format to Markdown.  Code blocks, bold/italics,
    wiki links, lists, striked text, and headers. '''

    code_block = False
    in_list = False
    nested_level = 0
    prev_indent = 0
    old_content = content.splitlines()
    new_content = []

    while old_content:
        line = old_content.pop(0).replace("\r", "")
        while "{{{" in line or "}}}" in line:
            if "{{{" in line:
                code_block = True
                line = line.replace("{{{", "```")
            if "}}}" in line:
                code_block = False
                line = line.replace("}}}", "```")
        if not code_block:
            #
            # Convert bullet lists.  The start and end of a list needs
            # an empty line.  wikiformat uses both '*' and '-' for its
            # lists.  However, markdown only supports '-'.
            #
            if line.startswith('* '):
                if not in_list:
                    new_content.append("\n")
                in_list = True
                line = line[1:]
                line = '-%s' % (line)
            elif line.startswith('- '):
                # No need to modify the line, just add the new line
                if not in_list:
                    new_content.append("\n")
                in_list = True
            elif line.startswith(' '):
                # Check for nested lists
                nested_line = line.lstrip(' ')
                if nested_line.startswith('* ') or \
                   nested_line.startswith('- '):
                    # Adjust the nested list level as needed
                    indent = len(line) - len(nested_line)
                    if indent > prev_indent:
                        nested_level += 1
                    elif indent < prev_indent:
                        nested_level -= 1
                    prev_indent = indent

                    # Set the proper indentation for markdown
                    line = ('%s-%s' % ('    ' * nested_level,
                                       nested_line[1:]))
            else:
                if in_list:
                    # Add the closing empty line
                    new_content.append("\n")
                in_list = False
                nested_level = 0
                prev_indent = 0

            # Convert headers
            line = convert_headers(line)

            # Convert wiki links
            line = convert_wikilinks(line)

            # Convert striked through text
            line = convert_strike(line)

            # Convert bold and italic text (do this last)
            line = line.replace("'''", "**")  # Convert bold text
            line = line.replace("''", "*")  # Convert italic text

        new_content.append(line)
        new_content.append("\n")

    return "".join(new_content)

for f in sys.argv[1:]:
    d = WikiToMD(open(f, "r").read())
    newf = f.replace(".trac", ".md")
    with open(newf, "w") as fp:
        fp.write(d)
        pass
    pass