aboutsummaryrefslogtreecommitdiffstats
path: root/services.py
diff options
context:
space:
mode:
authorjwansek <eddie.atten.ea29@gmail.com>2025-04-08 17:42:04 +0100
committerjwansek <eddie.atten.ea29@gmail.com>2025-04-08 17:42:04 +0100
commit4909153556a84d0b968005cc8b43df528f2dde1d (patch)
tree8b82e9dd20a260ab82ee66fa69ce2a1fadd6d473 /services.py
parentf422d957177563a89a4b7792fe395588bb157e10 (diff)
downloadboymoder.blog-4909153556a84d0b968005cc8b43df528f2dde1d.tar.gz
boymoder.blog-4909153556a84d0b968005cc8b43df528f2dde1d.zip
Added scraping whispa for Q&As
Diffstat (limited to 'services.py')
-rwxr-xr-xservices.py29
1 files changed, 28 insertions, 1 deletions
diff --git a/services.py b/services.py
index 075d533..b56a07d 100755
--- a/services.py
+++ b/services.py
@@ -6,6 +6,7 @@ import multiprocessing
import pihole as ph
import qbittorrent
import configparser
+import math as maths
import requests
import datetime
import urllib
@@ -242,6 +243,30 @@ def parse_tweet(tweet_url):
return dt, replying_to, text, images
+def scrape_whispa(whispa_url, since):
+ tree = html.fromstring(requests.get(whispa_url).content.decode())
+ qnas = []
+ # we're not doing proper HTML scraping here really... since the site uses client side rendering
+ # we rather parse the JS scripts to get the JSON payload of useful information... sadly this looks horrible
+ for i, script in enumerate(tree.xpath("/html/body/script"), 0):
+ js = str(script.text)
+ if "receivedFeedback" in js:
+ # my god this is horrible...
+ for j in json.loads(json.loads(js[19:-1])[1][2:])[0][3]["loadedUser"]["receivedFeedback"]:
+ dt = datetime.datetime.fromisoformat(j["childFeedback"][0]["createdAt"][:-1])
+
+ qnas.append({
+ # "id": int(str(maths.modf(maths.log(int(j["id"], 16)))[0])[2:]),
+ "id": int(dt.timestamp()),
+ "link": None,
+ "datetime": dt,
+ "question": j["content"],
+ "answer": j["childFeedback"][0]["content"],
+ "host": "whispa.sh"
+ })
+ return qnas
+
+
if __name__ == "__main__":
# print(get_trans_stats())
@@ -250,4 +275,6 @@ if __name__ == "__main__":
# print(parse_tweet("https://nitter.net/HONMISGENDERER/status/1694231618443981161#m"))
- print(request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=30)))
+ # print(request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=30)))
+
+ print(scrape_whispa(CONFIG.get("qnas", "url"), datetime.datetime.fromtimestamp(0.0)))