From 5e528a1484533e64815db07608dc7fb1613fa36f Mon Sep 17 00:00:00 2001 From: jwansek Date: Mon, 15 Mar 2021 16:14:47 +0000 Subject: switched markdown parser, added greentexts --- parser.py | 159 +++++++++++--------------------------------------------------- 1 file changed, 28 insertions(+), 131 deletions(-) (limited to 'parser.py') diff --git a/parser.py b/parser.py index 8fb7408..1ba94cd 100755 --- a/parser.py +++ b/parser.py @@ -1,26 +1,40 @@ #!/usr/bin/env python3 from urllib.parse import urlparse +from pygments import highlight +from pygments.formatters import HtmlFormatter, ClassNotFound +from pygments.lexers import get_lexer_by_name import webbrowser import database import argparse import getpass +import houdini +import misaka import app import sys import re import os -# DISCLAIMER -# There is almost certainly a python package to -# do this better. I wanted to do it myself as a challenge. - -# TODO: -# - Add table formatting -# - Fix
s with newlines -# - Fix nested markdown elements - -HEADER_INCREMENTER = 1 -IMAGE_TYPES = [".png", ".jpg"] +class HighlighterRenderer(misaka.SaferHtmlRenderer): + def blockcode(self, text, lang): + try: + lexer = get_lexer_by_name(lang, stripall=True) + except ClassNotFound: + lexer = None + + if lexer: + formatter = HtmlFormatter() + return highlight(text, lexer, formatter) + # default + return '\n
{}
\n'.format(houdini.escape_html(text.strip())) + + def blockquote(self, content): + content = content[3:-5] # idk why this is required... + out = '\n
' + for line in houdini.escape_html(content.strip()).split("\n"): + out += '\n{}
'.format(line) + print(out) + return out + '\n
' def get_thought_from_id(db, id_): category_name, title, dt, markdown = db.get_thought(id_) @@ -33,127 +47,10 @@ def parse_file(path): return parse_text(unformatted) def parse_text(unformatted): - formatted = parse_headers(unformatted) - formatted = parse_asteriscs(formatted) - formatted = parse_links(formatted) - formatted = parse_code(formatted) - formatted = parse_lists(formatted) - formatted = add_linebreaks(formatted) - - return formatted - -def parse_headers(test_str): - regex = r"^#{1,5}\s\w.*$" - matches = re.finditer(regex, test_str, re.MULTILINE) - offset = 0 - - for match in matches: - # work out if its h2, h3 etc. from the number of #s - headerNo = len(match.group().split(" ")[0]) + HEADER_INCREMENTER - - replacement = "%s" % (headerNo, " ".join(match.group().split(" ")[1:]), headerNo) - - #don't use .replace() in the unlikely case the the regex hit appears in a block - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - #replacing the hits fucks up the indexes, accommodate for this - offset += (len(replacement) - (match.end() - match.start())) - - return test_str - -def parse_asteriscs(test_str): - regex = r"(?%s" % (match.group()[3:-3]) - elif match.group().startswith("**"): - replacement = "%s" % (match.group()[2:-2]) - else: - replacement = "%s" % (match.group()[1:-1]) - - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - offset += (len(replacement) - (match.end() - match.start())) - - return test_str - -def parse_links(test_str): - regex = r"(?" % (label, url) - else: - replacement = "%s" % (url, label) - - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - offset += (len(replacement) - (match.end() - match.start())) - - return test_str - -def parse_code(test_str): - regex = r"(?%s" % match.group()[1:-1] - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - offset += (len(replacement) - (match.end() - match.start())) - - out = "" - inBlock = 0 - for line in test_str.split("\n"): - if line == "```": - if inBlock % 2 == 0: - out += "

\n" - else: - out += "

\n" - inBlock += 1 - else: - out += line + "\n" - - return out - -def parse_lists(test_str): - regex = r"^[1-9][.)] .*$|- .*$" - matches = re.finditer(regex, test_str, re.MULTILINE) - offset = 0 - theFirstOne = True - - for match in matches: - if theFirstOne: - if match.group()[0].isdigit(): - listType = "ol" - cutoff = 3 - else: - listType = "ul" - cutoff = 2 - replacement = "<%s>\n
  • %s
  • " % (listType, match.group()[cutoff:]) - theFirstOne = False - else: - if re.match(regex, [i for i in test_str[match.end()+offset:].split("\n") if i != ''][0]) is None: - theFirstOne = True - replacement = "
  • %s
  • \n" % (match.group()[cutoff:], listType) - else: - replacement = "
  • %s
  • " % match.group()[cutoff:] - test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] - offset += (len(replacement) - (match.end() - match.start())) - - return test_str + renderer = HighlighterRenderer() + md = misaka.Markdown(renderer, extensions=('fenced-code', 'quote')) -def add_linebreaks(test_str): - return re.sub(r"^$", "

    ", test_str, 0, re.MULTILINE) + return md(unformatted) def preview_markdown(path, title, category): def startBrowser(): -- cgit v1.2.3