diff options
author | jwansek <eddie.atten.ea29@gmail.com> | 2021-03-15 16:14:47 +0000 |
---|---|---|
committer | jwansek <eddie.atten.ea29@gmail.com> | 2021-03-15 16:14:47 +0000 |
commit | 5e528a1484533e64815db07608dc7fb1613fa36f (patch) | |
tree | b389cd15f58351055c75b29507ea844dcc9e7f94 /parser.py | |
parent | 2278b8331c6c84eea1d860c89adc7ae4eed24d27 (diff) | |
download | eda.gay-5e528a1484533e64815db07608dc7fb1613fa36f.tar.gz eda.gay-5e528a1484533e64815db07608dc7fb1613fa36f.zip |
switched markdown parser, added greentexts
Diffstat (limited to 'parser.py')
-rwxr-xr-x | parser.py | 159 |
1 files changed, 28 insertions, 131 deletions
@@ -1,26 +1,40 @@ #!/usr/bin/env python3 from urllib.parse import urlparse +from pygments import highlight +from pygments.formatters import HtmlFormatter, ClassNotFound +from pygments.lexers import get_lexer_by_name import webbrowser import database import argparse import getpass +import houdini +import misaka import app import sys import re import os -# DISCLAIMER -# There is almost certainly a python package to -# do this better. I wanted to do it myself as a challenge. - -# TODO: -# - Add table formatting -# - Fix <br>s with newlines -# - Fix nested markdown elements - -HEADER_INCREMENTER = 1 -IMAGE_TYPES = [".png", ".jpg"] +class HighlighterRenderer(misaka.SaferHtmlRenderer): + def blockcode(self, text, lang): + try: + lexer = get_lexer_by_name(lang, stripall=True) + except ClassNotFound: + lexer = None + + if lexer: + formatter = HtmlFormatter() + return highlight(text, lexer, formatter) + # default + return '\n<pre><code>{}</code></pre>\n'.format(houdini.escape_html(text.strip())) + + def blockquote(self, content): + content = content[3:-5] # idk why this is required... + out = '\n<blockquote>' + for line in houdini.escape_html(content.strip()).split("\n"): + out += '\n<span class="quote">{}</span><br>'.format(line) + print(out) + return out + '\n</blockquote>' def get_thought_from_id(db, id_): category_name, title, dt, markdown = db.get_thought(id_) @@ -33,127 +47,10 @@ def parse_file(path): return parse_text(unformatted) def parse_text(unformatted): - formatted = parse_headers(unformatted) - formatted = parse_asteriscs(formatted) - formatted = parse_links(formatted) - formatted = parse_code(formatted) - formatted = parse_lists(formatted) - formatted = add_linebreaks(formatted) - - return formatted - -def parse_headers(test_str): - regex = r"^#{1,5}\s\w.*$" - matches = re.finditer(regex, test_str, re.MULTILINE) - offset = 0 - - for match in matches: - # work out if its h2, h3 etc. from the number of #s - headerNo = len(match.group().split(" ")[0]) + HEADER_INCREMENTER - - replacement = "<h%i>%s</h%i>" % (headerNo, " ".join(match.group().split(" ")[1:]), headerNo) - - #don't use .replace() in the unlikely case the the regex hit appears in a block - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - #replacing the hits fucks up the indexes, accommodate for this - offset += (len(replacement) - (match.end() - match.start())) - - return test_str - -def parse_asteriscs(test_str): - regex = r"(?<!\\)\*{1,3}.*?\*{1,3}" - matches = re.finditer(regex, test_str, re.MULTILINE) - offset = 0 - - for match in matches: - if len(re.findall(r"\*{1,3}.*?\\\*{1,3}", match.group())) == 0: #need to find a way of doing this with regexes - if match.group().startswith(re.findall(r"\w\*{1,3}", match.group())[0][1:]): #this too - if match.group().startswith("***"): - replacement = "<b><i>%s</i></b>" % (match.group()[3:-3]) - elif match.group().startswith("**"): - replacement = "<b>%s</b>" % (match.group()[2:-2]) - else: - replacement = "<i>%s</i>" % (match.group()[1:-1]) - - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - offset += (len(replacement) - (match.end() - match.start())) - - return test_str - -def parse_links(test_str): - regex = r"(?<!\\)\[.*?\]\(.*?\)" - matches = re.finditer(regex, test_str, re.MULTILINE) - offset = 0 - - for match in matches: - s = match.group().split("(") - label = s[0][1:-1] - url = s[1][:-1] - - if os.path.splitext(urlparse(url).path)[1] in IMAGE_TYPES: - replacement = "<img alt='%s' src=%s>" % (label, url) - else: - replacement = "<a href=%s>%s</a>" % (url, label) - - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - offset += (len(replacement) - (match.end() - match.start())) - - return test_str - -def parse_code(test_str): - regex = r"(?<!\\)`\w{1,}?`" - # this only matches single words, but escaping is less complicated - matches = re.finditer(regex, test_str, re.MULTILINE) - offset = 0 - - for match in matches: - replacement = "<em class=inlineCode style='font-family: monospace;font-style: normal;'>%s</em>" % match.group()[1:-1] - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - offset += (len(replacement) - (match.end() - match.start())) - - out = "" - inBlock = 0 - for line in test_str.split("\n"): - if line == "```": - if inBlock % 2 == 0: - out += "<p class=codeBlock style='font-family: monospace;font-style: normal;white-space: pre-wrap;'>\n" - else: - out += "</p>\n" - inBlock += 1 - else: - out += line + "\n" - - return out - -def parse_lists(test_str): - regex = r"^[1-9][.)] .*$|- .*$" - matches = re.finditer(regex, test_str, re.MULTILINE) - offset = 0 - theFirstOne = True - - for match in matches: - if theFirstOne: - if match.group()[0].isdigit(): - listType = "ol" - cutoff = 3 - else: - listType = "ul" - cutoff = 2 - replacement = "<%s>\n<li>%s</li>" % (listType, match.group()[cutoff:]) - theFirstOne = False - else: - if re.match(regex, [i for i in test_str[match.end()+offset:].split("\n") if i != ''][0]) is None: - theFirstOne = True - replacement = "<li>%s</li>\n</%s>" % (match.group()[cutoff:], listType) - else: - replacement = "<li>%s</li>" % match.group()[cutoff:] - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - offset += (len(replacement) - (match.end() - match.start())) - - return test_str + renderer = HighlighterRenderer() + md = misaka.Markdown(renderer, extensions=('fenced-code', 'quote')) -def add_linebreaks(test_str): - return re.sub(r"^$", "<br><br>", test_str, 0, re.MULTILINE) + return md(unformatted) def preview_markdown(path, title, category): def startBrowser(): |