Added scraping whispa for Q&As

author: jwansek <eddie.atten.ea29@gmail.com> 2025-04-08 17:42:04 +0100
committer: jwansek <eddie.atten.ea29@gmail.com> 2025-04-08 17:42:04 +0100
commit: 4909153556a84d0b968005cc8b43df528f2dde1d (patch)
tree: 8b82e9dd20a260ab82ee66fa69ce2a1fadd6d473 /services.py
parent: f422d957177563a89a4b7792fe395588bb157e10 (diff)
download: boymoder.blog-4909153556a84d0b968005cc8b43df528f2dde1d.tar.gz
boymoder.blog-4909153556a84d0b968005cc8b43df528f2dde1d.zip
1 files changed, 28 insertions, 1 deletions
diff --git a/services.py b/services.py
index 075d533..b56a07d 100755
--- a/services.py
+++ b/services.py
@@ -6,6 +6,7 @@ import multiprocessing
 import pihole as ph
 import qbittorrent
 import configparser
+import math as maths
 import requests
 import datetime
 import urllib
@@ -242,6 +243,30 @@ def parse_tweet(tweet_url):
 
     return dt, replying_to, text, images
 
+def scrape_whispa(whispa_url, since):
+    tree = html.fromstring(requests.get(whispa_url).content.decode())
+    qnas = []
+    # we're not doing proper HTML scraping here really... since the site uses client side rendering
+    # we rather parse the JS scripts to get the JSON payload of useful information... sadly this looks horrible
+    for i, script in enumerate(tree.xpath("/html/body/script"), 0):
+        js = str(script.text)
+        if "receivedFeedback" in js:
+            # my god this is horrible...
+            for j in json.loads(json.loads(js[19:-1])[1][2:])[0][3]["loadedUser"]["receivedFeedback"]:
+                dt = datetime.datetime.fromisoformat(j["childFeedback"][0]["createdAt"][:-1])
+            
+                qnas.append({
+                    # "id": int(str(maths.modf(maths.log(int(j["id"], 16)))[0])[2:]),
+                    "id": int(dt.timestamp()),
+                    "link": None,
+                    "datetime": dt,
+                    "question": j["content"],
+                    "answer": j["childFeedback"][0]["content"],
+                    "host": "whispa.sh"
+                })
+    return qnas
+
+
 if __name__ == "__main__":
     # print(get_trans_stats())
 
@@ -250,4 +275,6 @@ if __name__ == "__main__":
 
     # print(parse_tweet("https://nitter.net/HONMISGENDERER/status/1694231618443981161#m"))
 
-    print(request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=30)))
+    # print(request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=30)))
+
+    print(scrape_whispa(CONFIG.get("qnas", "url"), datetime.datetime.fromtimestamp(0.0)))
author	jwansek <eddie.atten.ea29@gmail.com>	2025-04-08 17:42:04 +0100
committer	jwansek <eddie.atten.ea29@gmail.com>	2025-04-08 17:42:04 +0100
commit	4909153556a84d0b968005cc8b43df528f2dde1d (patch)
tree	8b82e9dd20a260ab82ee66fa69ce2a1fadd6d473 /services.py
parent	f422d957177563a89a4b7792fe395588bb157e10 (diff)
download	boymoder.blog-4909153556a84d0b968005cc8b43df528f2dde1d.tar.gz boymoder.blog-4909153556a84d0b968005cc8b43df528f2dde1d.zip