diff options
author | jwansek <eddie.atten.ea29@gmail.com> | 2021-02-05 16:25:06 +0000 |
---|---|---|
committer | jwansek <eddie.atten.ea29@gmail.com> | 2021-02-05 16:25:06 +0000 |
commit | 58fecf855d151ef0459a43ab9af271a3a4922ad5 (patch) | |
tree | abdde5f0ddc7b9f4d9765e7aaceb56c3d7ddfee3 /parser.py | |
parent | c4b59d34db126a4d3471162ff34a04661ff41c99 (diff) | |
download | boymoder.blog-58fecf855d151ef0459a43ab9af271a3a4922ad5.tar.gz boymoder.blog-58fecf855d151ef0459a43ab9af271a3a4922ad5.zip |
finished start formatting, added preview script
Diffstat (limited to 'parser.py')
-rw-r--r-- | parser.py | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..8677342 --- /dev/null +++ b/parser.py @@ -0,0 +1,123 @@ +import argparse +from urllib.parse import urlparse +import webbrowser +import app +import re +import os + +HEADER_INCREMENTER = 1 +IMAGE_TYPES = [".png", ".jpg"] + +def parse_file(path): + with open(path, "r") as f: + unformatted = f.read() + + formatted = parse_headers(unformatted) + formatted = parse_asteriscs(formatted) + formatted = parse_links(formatted) + formatted = add_linebreaks(formatted) + + return '{% extends "template.html" %}\n{% block content %}\n' + formatted + '\n{% endblock %}' + +def parse_headers(test_str): + regex = r"^#{1,5}\s\w.*$" + matches = re.finditer(regex, test_str, re.MULTILINE) + offset = 0 + + for match in matches: + # work out if its h2, h3 etc. from the number of #s + headerNo = len(match.group().split(" ")[0]) + HEADER_INCREMENTER + + replacement = "<h%i>%s</h%i>" % (headerNo, " ".join(match.group().split(" ")[1:]), headerNo) + + #don't use .replace() in the unlikely case the the regex hit appears in a block + test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] + #replacing the hits fucks up the indexes, accommodate for this + offset += (len(replacement) - (match.end() - match.start())) + + return test_str + +def parse_asteriscs(test_str): + regex = r"(?<!\\)\*{1,3}.*?\*{1,3}" + matches = re.finditer(regex, test_str, re.MULTILINE) + offset = 0 + + for match in matches: + if len(re.findall(r"\*{1,3}.*?\\\*{1,3}", match.group())) == 0: #need to find a way of doing this with regexes + if match.group().startswith(re.findall(r"\w\*{1,3}", match.group())[0][1:]): #this too + if match.group().startswith("***"): + replacement = "<b><i>%s</i></b>" % (match.group()[3:-3]) + elif match.group().startswith("**"): + replacement = "<b>%s</b>" % (match.group()[2:-2]) + else: + replacement = "<i>%s</i>" % (match.group()[1:-1]) + + test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] + offset += (len(replacement) - (match.end() - match.start())) + + return test_str + +def parse_links(test_str): + regex = r"(?<!\\)\[.*?\]\(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+\)" + matches = re.finditer(regex, test_str, re.MULTILINE) + offset = 0 + + for match in matches: + s = match.group().split("(") + label = s[0][1:-1] + url = s[1][:-1] + + if os.path.splitext(urlparse(url).path)[1] in IMAGE_TYPES: + replacement = "<img alt='%s' src=%s>" % (label, url) + else: + replacement = "<a href=%s>%s</a>" % (url, label) + + test_str = test_str[:match.start()+offset] + replacement + test_str[match.end()+offset:] + offset += (len(replacement) - (match.end() - match.start())) + + return test_str + +def add_linebreaks(test_str): + return re.sub(r"^$", "<br><br>", test_str, 0, re.MULTILINE) + +def preview_markdown(path, title): + def startBrowser(): + webbrowser.get("firefox").open("http://localhost:5000/preview") + del os.environ["PREVIEW"] + del os.environ["PREVIEW_TITLE"] + + os.environ["PREVIEW"] = parse_file(path) + os.environ["PREVIEW_TITLE"] = title + + import threading + threading.Timer(1.25, startBrowser ).start() + + app.app.run(host = "0.0.0.0", debug = True) + +def main(): + p = argparse.ArgumentParser() + p.add_argument( + "-m", "--markdown", + help = "Path to a markdown file", + required = True, + type = str + ) + p.add_argument( + "-t", "--title", + help = "Article title", + required = True, + type = str + ) + p.add_argument( + "-p", "--preview", + help = "Preview markdown rendering", + action='store_true' + ) + args = vars(p.parse_args()) + if args["preview"]: + preview_markdown(args["markdown"], args["title"]) + else: + print(parse_file(args["markdown"])) + +if __name__ == "__main__": + main()
\ No newline at end of file |