diff options
author | jwansek <eddie.atten.ea29@gmail.com> | 2025-04-08 17:42:04 +0100 |
---|---|---|
committer | jwansek <eddie.atten.ea29@gmail.com> | 2025-04-08 17:42:04 +0100 |
commit | 4909153556a84d0b968005cc8b43df528f2dde1d (patch) | |
tree | 8b82e9dd20a260ab82ee66fa69ce2a1fadd6d473 | |
parent | f422d957177563a89a4b7792fe395588bb157e10 (diff) | |
download | boymoder.blog-4909153556a84d0b968005cc8b43df528f2dde1d.tar.gz boymoder.blog-4909153556a84d0b968005cc8b43df528f2dde1d.zip |
Added scraping whispa for Q&As
-rwxr-xr-x | app.py | 1 | ||||
-rw-r--r-- | database.py | 19 | ||||
-rwxr-xr-x | services.py | 29 | ||||
-rwxr-xr-x | static/style.css | 4 | ||||
-rw-r--r-- | templates/questions.html.j2 | 10 |
5 files changed, 52 insertions, 11 deletions
@@ -225,6 +225,7 @@ def serve_questions(): return flask.render_template( "questions.html.j2", **get_template_items("questions and answers", db), + qnas_link = CONFIG.get("qnas", "url"), qnas = db.get_qnas() ) diff --git a/database.py b/database.py index d5327e4..cd15cec 100644 --- a/database.py +++ b/database.py @@ -224,15 +224,20 @@ class Database: cursor.execute("SELECT curiouscat_id FROM qnas WHERE curiouscat_id = %s;", (qna["id"], )) if cursor.fetchone() is None: - cursor.execute("INSERT INTO `qnas` VALUES (%s, %s, %s, %s, %s);", ( - qna["id"], qna["link"], qna["datetime"], qna["question"], qna["answer"] + cursor.execute("INSERT INTO `qnas` VALUES (%s, %s, %s, %s, %s, %s);", ( + qna["id"], qna["link"], qna["datetime"], qna["question"], qna["answer"], qna["host"] )) - print("Appended question with timestamp %s" % datetime.datetime.fromtimestamp(qna["id"]).isoformat()) + print("Appended question with timestamp %s" % qna["datetime"].isoformat()) else: - print("Skipped question with timestamp %s" % datetime.datetime.fromtimestamp(qna["id"]).isoformat()) + print("Skipped question with timestamp %s" % qna["datetime"].isoformat()) self.__connection.commit() + def get_oldest_qna(self): + with self.__connection.cursor() as cursor: + cursor.execute("SELECT MAX(timestamp) FROM qnas;") + return cursor.fetchone()[0] + def get_qnas(self): with self.__connection.cursor() as cursor: cursor.execute("SELECT * FROM qnas;") @@ -243,12 +248,12 @@ def update_cache(): with Database() as db: db.update_commit_cache(services.request_recent_commits(since = db.get_last_commit_time())) print("Finished adding github commits...") + db.append_qnas(services.scrape_whispa(db.config.get("qnas", "url"), since = db.get_oldest_qna())) + print("Finished parsing Q&As...") if __name__ == "__main__": update_cache() - import json - with Database() as db: - print(db.get_cached_commits()[0]) + diff --git a/services.py b/services.py index 075d533..b56a07d 100755 --- a/services.py +++ b/services.py @@ -6,6 +6,7 @@ import multiprocessing import pihole as ph import qbittorrent import configparser +import math as maths import requests import datetime import urllib @@ -242,6 +243,30 @@ def parse_tweet(tweet_url): return dt, replying_to, text, images +def scrape_whispa(whispa_url, since): + tree = html.fromstring(requests.get(whispa_url).content.decode()) + qnas = [] + # we're not doing proper HTML scraping here really... since the site uses client side rendering + # we rather parse the JS scripts to get the JSON payload of useful information... sadly this looks horrible + for i, script in enumerate(tree.xpath("/html/body/script"), 0): + js = str(script.text) + if "receivedFeedback" in js: + # my god this is horrible... + for j in json.loads(json.loads(js[19:-1])[1][2:])[0][3]["loadedUser"]["receivedFeedback"]: + dt = datetime.datetime.fromisoformat(j["childFeedback"][0]["createdAt"][:-1]) + + qnas.append({ + # "id": int(str(maths.modf(maths.log(int(j["id"], 16)))[0])[2:]), + "id": int(dt.timestamp()), + "link": None, + "datetime": dt, + "question": j["content"], + "answer": j["childFeedback"][0]["content"], + "host": "whispa.sh" + }) + return qnas + + if __name__ == "__main__": # print(get_trans_stats()) @@ -250,4 +275,6 @@ if __name__ == "__main__": # print(parse_tweet("https://nitter.net/HONMISGENDERER/status/1694231618443981161#m")) - print(request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=30))) + # print(request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=30))) + + print(scrape_whispa(CONFIG.get("qnas", "url"), datetime.datetime.fromtimestamp(0.0))) diff --git a/static/style.css b/static/style.css index 4a47ea7..6069ebf 100755 --- a/static/style.css +++ b/static/style.css @@ -147,6 +147,10 @@ aside { margin-top: 10px; } +.qnaheader { + font-weight: bold; +} + blockquote span { color: #789922; } diff --git a/templates/questions.html.j2 b/templates/questions.html.j2 index 97edd38..eb58380 100644 --- a/templates/questions.html.j2 +++ b/templates/questions.html.j2 @@ -1,9 +1,13 @@ {% extends "template.html.j2" %} {% block content %} - <h4><a href="https://whispa.sh/@boymoderology">ask a question!</a></h4> + <h4><a href="{{ qnas_link }}">ask a question!</a></h4> <dl> - {% for id_, link, dt, question, answer in qnas %} - <dt><a href="{{ link }}">{{ dt.isoformat() }}</a></dt> + {% for id_, link, dt, question, answer, host in qnas %} + {% if host == "curiouscat" %} + <dt class="qnaheader"><a href="{{ link }}">{{ dt.isoformat() }}</a> - {{ host }}</dt> + {% else %} + <dt class="qnaheader">{{ dt.isoformat() }} - {{ host }}</dt> + {% endif %} <dd> <dt class="question"><p>{{ question }}</p></dt> <dd class="answer"><p>{{ answer }}</p></dd> |