From c7fea1d2c78e77654f098f9ac7409f97ad171f44 Mon Sep 17 00:00:00 2001 From: jwansek Date: Sat, 12 Mar 2022 19:50:03 +0000 Subject: switched to mistune (over misaka) for markdown parsing, added table of contents --- parser.py | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) (limited to 'parser.py') diff --git a/parser.py b/parser.py index d79d400..4044fb8 100755 --- a/parser.py +++ b/parser.py @@ -6,17 +6,19 @@ from pygments.formatters import HtmlFormatter, ClassNotFound from pygments.lexers import get_lexer_by_name import urllib.parse import webbrowser +import lxml.etree +import lxml.html import database import argparse import getpass import houdini -import misaka +import mistune import app import sys import re import os -class HighlighterRenderer(misaka.SaferHtmlRenderer): +class EdawebRenderer(mistune.HTMLRenderer): def blockcode(self, text, lang): try: lexer = get_lexer_by_name(lang, stripall=True) @@ -29,42 +31,55 @@ class HighlighterRenderer(misaka.SaferHtmlRenderer): # default return '\n
{}
\n'.format(houdini.escape_html(text.strip())) - def blockquote(self, content): + def block_quote(self, content): content = content[3:-5] # idk why this is required... out = '\n
' for line in houdini.escape_html(content.strip()).split("\n"): out += '\n{}
'.format(line) return out + '\n
' - def image(self, link, title, alt): + def image(self, link, text, title): return "%s" % ( - urlparse(link)._replace(query='').geturl(), alt, link + urlparse(link)._replace(query='').geturl(), text, link ) - def header(self, content, level): - # if level > 1: - hash_ = urllib.parse.quote_plus(content) + def heading(self, text, level): + hash_ = urllib.parse.quote_plus(text) return "%s [#]" % ( - level, hash_, content, hash_, level + level, hash_, text, hash_, level ) - # else: - # return "

%s

" % content def get_thought_from_id(db, id_): category_name, title, dt, markdown = db.get_thought(id_) - return category_name, title, dt, parse_text(markdown) + html, headers = parse_text(markdown) + return category_name, title, dt, html, headers def parse_file(path): with open(path, "r") as f: unformatted = f.read() - return parse_text(unformatted) + return parse_text(unformatted)[0] def parse_text(unformatted): - renderer = HighlighterRenderer() - md = misaka.Markdown(renderer, extensions=('fenced-code', 'quote')) + md = mistune.create_markdown( + renderer = EdawebRenderer(), + plugins = ["strikethrough", "table", "url", "task_lists"] + ) + html = md(unformatted) + root = lxml.html.fromstring(html) + + headers = [] + for node in root.xpath('//h1|//h2|//h3|//h4|//h5//h6'): + headers.append(( + # lxml.etree.tostring(node), + # "

%s

" % urllib.parse.unquote_plus(node.attrib["id"]), # possibly insecure? + urllib.parse.unquote_plus(node.attrib["id"]), + int(node.tag[-1]), # -horrible hack + "#%s" % node.attrib["id"]) + ) + # print(headers) - return md(unformatted) + return html, headers def preview_markdown(path, title, category): def startBrowser(): -- cgit v1.2.3