switched to mistune (over misaka) for markdown parsing, added table of contents

author: jwansek <eddie.atten.ea29@gmail.com> 2022-03-12 19:50:03 +0000
committer: jwansek <eddie.atten.ea29@gmail.com> 2022-03-12 19:50:03 +0000
commit: c7fea1d2c78e77654f098f9ac7409f97ad171f44 (patch)
tree: c637935478b24bd3d89490148eb8528ec1e39d2a /parser.py
parent: ae079307387d1e97a7ae425dc9c110cafd873116 (diff)
download: boymoder.blog-c7fea1d2c78e77654f098f9ac7409f97ad171f44.tar.gz
boymoder.blog-c7fea1d2c78e77654f098f9ac7409f97ad171f44.zip
1 files changed, 31 insertions, 16 deletions
diff --git a/parser.py b/parser.py
index d79d400..4044fb8 100755
--- a/parser.py
+++ b/parser.py
@@ -6,17 +6,19 @@ from pygments.formatters import HtmlFormatter, ClassNotFound
 from pygments.lexers import get_lexer_by_name
 import urllib.parse
 import webbrowser
+import lxml.etree
+import lxml.html
 import database
 import argparse
 import getpass
 import houdini
-import misaka
+import mistune
 import app
 import sys
 import re
 import os
 
-class HighlighterRenderer(misaka.SaferHtmlRenderer):
+class EdawebRenderer(mistune.HTMLRenderer):
     def blockcode(self, text, lang):
         try:
             lexer = get_lexer_by_name(lang, stripall=True)
@@ -29,42 +31,55 @@ class HighlighterRenderer(misaka.SaferHtmlRenderer):
         # default
         return '\n<pre><code>{}</code></pre>\n'.format(houdini.escape_html(text.strip()))
 
-    def blockquote(self, content):
+    def block_quote(self, content):
         content = content[3:-5] # idk why this is required...
         out = '\n<blockquote>'
         for line in houdini.escape_html(content.strip()).split("\n"):
             out += '\n<span class="quote">{}</span><br>'.format(line)
         return out + '\n</blockquote>'
 
-    def image(self, link, title, alt):
+    def image(self, link, text, title):
         return "<a href='%s' target='_blank'><img alt='%s' src='%s'></a>" % (
-            urlparse(link)._replace(query='').geturl(), alt, link
+            urlparse(link)._replace(query='').geturl(), text, link
         )
 
-    def header(self, content, level):
-        # if level > 1:
-        hash_ = urllib.parse.quote_plus(content)
+    def heading(self, text, level):
+        hash_ = urllib.parse.quote_plus(text)
         return "<h%d id='%s'>%s <a class='header_linker' href='#%s'>[#]</a></h%d>" % (
-            level, hash_, content, hash_, level
+            level, hash_, text, hash_, level
         )
-        # else:
-        #     return "<h1>%s</h1>" % content
 
 def get_thought_from_id(db, id_):
     category_name, title, dt, markdown = db.get_thought(id_)
-    return category_name, title, dt, parse_text(markdown)
+    html, headers = parse_text(markdown)
+    return category_name, title, dt, html, headers
 
 def parse_file(path):
     with open(path, "r") as f:
         unformatted = f.read()
 
-    return parse_text(unformatted)    
+    return parse_text(unformatted)[0]    
 
 def parse_text(unformatted):
-    renderer = HighlighterRenderer()
-    md = misaka.Markdown(renderer, extensions=('fenced-code', 'quote'))
+    md = mistune.create_markdown(
+        renderer = EdawebRenderer(), 
+        plugins = ["strikethrough", "table", "url", "task_lists"]
+    )
+    html = md(unformatted)
+    root = lxml.html.fromstring(html)
+
+    headers = []
+    for node in root.xpath('//h1|//h2|//h3|//h4|//h5//h6'):
+        headers.append((
+            # lxml.etree.tostring(node),
+            # "<p>%s</p>" % urllib.parse.unquote_plus(node.attrib["id"]),     # possibly insecure?
+            urllib.parse.unquote_plus(node.attrib["id"]),
+            int(node.tag[-1]),                                              #   -horrible hack
+            "#%s" % node.attrib["id"])
+        )
+    # print(headers)
 
-    return md(unformatted)
+    return html, headers
 
 def preview_markdown(path, title, category):
     def startBrowser():
author	jwansek <eddie.atten.ea29@gmail.com>	2022-03-12 19:50:03 +0000
committer	jwansek <eddie.atten.ea29@gmail.com>	2022-03-12 19:50:03 +0000
commit	c7fea1d2c78e77654f098f9ac7409f97ad171f44 (patch)
tree	c637935478b24bd3d89490148eb8528ec1e39d2a /parser.py
parent	ae079307387d1e97a7ae425dc9c110cafd873116 (diff)
download	boymoder.blog-c7fea1d2c78e77654f098f9ac7409f97ad171f44.tar.gz boymoder.blog-c7fea1d2c78e77654f098f9ac7409f97ad171f44.zip