aboutsummaryrefslogtreecommitdiffstats
path: root/parser.py
diff options
context:
space:
mode:
authorjwansek <eddie.atten.ea29@gmail.com>2021-03-15 16:14:47 +0000
committerjwansek <eddie.atten.ea29@gmail.com>2021-03-15 16:14:47 +0000
commit5e528a1484533e64815db07608dc7fb1613fa36f (patch)
treeb389cd15f58351055c75b29507ea844dcc9e7f94 /parser.py
parent2278b8331c6c84eea1d860c89adc7ae4eed24d27 (diff)
downloadeda.gay-5e528a1484533e64815db07608dc7fb1613fa36f.tar.gz
eda.gay-5e528a1484533e64815db07608dc7fb1613fa36f.zip
switched markdown parser, added greentexts
Diffstat (limited to 'parser.py')
-rwxr-xr-xparser.py159
1 files changed, 28 insertions, 131 deletions
diff --git a/parser.py b/parser.py
index 8fb7408..1ba94cd 100755
--- a/parser.py
+++ b/parser.py
@@ -1,26 +1,40 @@
#!/usr/bin/env python3
from urllib.parse import urlparse
+from pygments import highlight
+from pygments.formatters import HtmlFormatter, ClassNotFound
+from pygments.lexers import get_lexer_by_name
import webbrowser
import database
import argparse
import getpass
+import houdini
+import misaka
import app
import sys
import re
import os
-# DISCLAIMER
-# There is almost certainly a python package to
-# do this better. I wanted to do it myself as a challenge.
-
-# TODO:
-# - Add table formatting
-# - Fix <br>s with newlines
-# - Fix nested markdown elements
-
-HEADER_INCREMENTER = 1
-IMAGE_TYPES = [".png", ".jpg"]
+class HighlighterRenderer(misaka.SaferHtmlRenderer):
+ def blockcode(self, text, lang):
+ try:
+ lexer = get_lexer_by_name(lang, stripall=True)
+ except ClassNotFound:
+ lexer = None
+
+ if lexer:
+ formatter = HtmlFormatter()
+ return highlight(text, lexer, formatter)
+ # default
+ return '\n<pre><code>{}</code></pre>\n'.format(houdini.escape_html(text.strip()))
+
+ def blockquote(self, content):
+ content = content[3:-5] # idk why this is required...
+ out = '\n<blockquote>'
+ for line in houdini.escape_html(content.strip()).split("\n"):
+ out += '\n<span class="quote">{}</span><br>'.format(line)
+ print(out)
+ return out + '\n</blockquote>'
def get_thought_from_id(db, id_):
category_name, title, dt, markdown = db.get_thought(id_)
@@ -33,127 +47,10 @@ def parse_file(path):
return parse_text(unformatted)
def parse_text(unformatted):
- formatted = parse_headers(unformatted)
- formatted = parse_asteriscs(formatted)
- formatted = parse_links(formatted)
- formatted = parse_code(formatted)
- formatted = parse_lists(formatted)
- formatted = add_linebreaks(formatted)
-
- return formatted
-
-def parse_headers(test_str):
- regex = r"^#{1,5}\s\w.*$"
- matches = re.finditer(regex, test_str, re.MULTILINE)
- offset = 0
-
- for match in matches:
- # work out if its h2, h3 etc. from the number of #s
- headerNo = len(match.group().split(" ")[0]) + HEADER_INCREMENTER
-
- replacement = "<h%i>%s</h%i>" % (headerNo, " ".join(match.group().split(" ")[1:]), headerNo)
-
- #don't use .replace() in the unlikely case the the regex hit appears in a block
- test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
- #replacing the hits fucks up the indexes, accommodate for this
- offset += (len(replacement) - (match.end() - match.start()))
-
- return test_str
-
-def parse_asteriscs(test_str):
- regex = r"(?<!\\)\*{1,3}.*?\*{1,3}"
- matches = re.finditer(regex, test_str, re.MULTILINE)
- offset = 0
-
- for match in matches:
- if len(re.findall(r"\*{1,3}.*?\\\*{1,3}", match.group())) == 0: #need to find a way of doing this with regexes
- if match.group().startswith(re.findall(r"\w\*{1,3}", match.group())[0][1:]): #this too
- if match.group().startswith("***"):
- replacement = "<b><i>%s</i></b>" % (match.group()[3:-3])
- elif match.group().startswith("**"):
- replacement = "<b>%s</b>" % (match.group()[2:-2])
- else:
- replacement = "<i>%s</i>" % (match.group()[1:-1])
-
- test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
- offset += (len(replacement) - (match.end() - match.start()))
-
- return test_str
-
-def parse_links(test_str):
- regex = r"(?<!\\)\[.*?\]\(.*?\)"
- matches = re.finditer(regex, test_str, re.MULTILINE)
- offset = 0
-
- for match in matches:
- s = match.group().split("(")
- label = s[0][1:-1]
- url = s[1][:-1]
-
- if os.path.splitext(urlparse(url).path)[1] in IMAGE_TYPES:
- replacement = "<img alt='%s' src=%s>" % (label, url)
- else:
- replacement = "<a href=%s>%s</a>" % (url, label)
-
- test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
- offset += (len(replacement) - (match.end() - match.start()))
-
- return test_str
-
-def parse_code(test_str):
- regex = r"(?<!\\)`\w{1,}?`"
- # this only matches single words, but escaping is less complicated
- matches = re.finditer(regex, test_str, re.MULTILINE)
- offset = 0
-
- for match in matches:
- replacement = "<em class=inlineCode style='font-family: monospace;font-style: normal;'>%s</em>" % match.group()[1:-1]
- test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
- offset += (len(replacement) - (match.end() - match.start()))
-
- out = ""
- inBlock = 0
- for line in test_str.split("\n"):
- if line == "```":
- if inBlock % 2 == 0:
- out += "<p class=codeBlock style='font-family: monospace;font-style: normal;white-space: pre-wrap;'>\n"
- else:
- out += "</p>\n"
- inBlock += 1
- else:
- out += line + "\n"
-
- return out
-
-def parse_lists(test_str):
- regex = r"^[1-9][.)] .*$|- .*$"
- matches = re.finditer(regex, test_str, re.MULTILINE)
- offset = 0
- theFirstOne = True
-
- for match in matches:
- if theFirstOne:
- if match.group()[0].isdigit():
- listType = "ol"
- cutoff = 3
- else:
- listType = "ul"
- cutoff = 2
- replacement = "<%s>\n<li>%s</li>" % (listType, match.group()[cutoff:])
- theFirstOne = False
- else:
- if re.match(regex, [i for i in test_str[match.end()+offset:].split("\n") if i != ''][0]) is None:
- theFirstOne = True
- replacement = "<li>%s</li>\n</%s>" % (match.group()[cutoff:], listType)
- else:
- replacement = "<li>%s</li>" % match.group()[cutoff:]
- test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:]
- offset += (len(replacement) - (match.end() - match.start()))
-
- return test_str
+ renderer = HighlighterRenderer()
+ md = misaka.Markdown(renderer, extensions=('fenced-code', 'quote'))
-def add_linebreaks(test_str):
- return re.sub(r"^$", "<br><br>", test_str, 0, re.MULTILINE)
+ return md(unformatted)
def preview_markdown(path, title, category):
def startBrowser():