diff options
author | jwansek <eddie.atten.ea29@gmail.com> | 2025-05-05 15:11:32 +0100 |
---|---|---|
committer | jwansek <eddie.atten.ea29@gmail.com> | 2025-05-05 15:11:32 +0100 |
commit | 7842068dea7aa97fa08e297b2b8bdce700b2fa75 (patch) | |
tree | 59b9dabd9a31bebb7812012c1347c30a2dffc1d8 /edaweb/services.py | |
parent | b246221f81da40c4e19e699138cbb6b6fa44c1ae (diff) | |
download | eda.gay-7842068dea7aa97fa08e297b2b8bdce700b2fa75.tar.gz eda.gay-7842068dea7aa97fa08e297b2b8bdce700b2fa75.zip |
Fixed bug with whispa scraping
Diffstat (limited to 'edaweb/services.py')
-rw-r--r-- | edaweb/services.py | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/edaweb/services.py b/edaweb/services.py index 87af050..14d7d23 100644 --- a/edaweb/services.py +++ b/edaweb/services.py @@ -247,7 +247,7 @@ def parse_tweet(tweet_url): return dt, replying_to, text, images -def scrape_whispa(whispa_url, since): +def scrape_whispa(whispa_url): tree = html.fromstring(requests.get(whispa_url).content.decode()) qnas = [] # we're not doing proper HTML scraping here really... since the site uses client side rendering @@ -257,6 +257,9 @@ def scrape_whispa(whispa_url, since): if "receivedFeedback" in js: # my god this is horrible... for j in json.loads(json.loads(js[19:-1])[1][2:])[0][3]["loadedUser"]["receivedFeedback"]: + if j["childFeedback"] == []: + continue + dt = datetime.datetime.fromisoformat(j["childFeedback"][0]["createdAt"][:-1]) qnas.append({ @@ -359,7 +362,8 @@ def get_recent_commits(db, max_per_repo = 3): return sorted(out, key = lambda a: a["datetime"], reverse = True) if __name__ == "__main__": - import database + print(scrape_whispa(CONFIG.get("qnas", "url"))) + # import database - with database.Database() as db: - print(json.dumps(get_recent_commits(db), indent=4)) + # with database.Database() as db: + # print(json.dumps(get_recent_commits(db), indent=4)) |