diff options
| author | jwansek <eddie.atten.ea29@gmail.com> | 2026-02-16 17:31:15 +0000 |
|---|---|---|
| committer | jwansek <eddie.atten.ea29@gmail.com> | 2026-02-16 17:31:15 +0000 |
| commit | fc287020b0968832932327997a3a8c84b8333117 (patch) | |
| tree | 47294b4164dee007252e2b61fc454ca9917ff9e7 | |
| parent | 13666206dc6007537ef2cfc7f599e76434a4c7f1 (diff) | |
| download | boymoder.blog-fc287020b0968832932327997a3a8c84b8333117.tar.gz boymoder.blog-fc287020b0968832932327997a3a8c84b8333117.zip | |
| -rw-r--r-- | edaweb/services.py | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/edaweb/services.py b/edaweb/services.py index bb70d2a..50eed45 100644 --- a/edaweb/services.py +++ b/edaweb/services.py @@ -251,6 +251,16 @@ def parse_tweet(tweet_url): return dt, replying_to, text, images def scrape_whispa(whispa_url, since = None): + def query_answer(answer_url, max_retries = 10): + for i in range(max_retries): + try: + return requests.get(answer_url) + except requests.exceptions.ConnectionError: + s = 5.05 * (i + 1) + print("Connection timed out, retrying in %.2fs" % s) + time.sleep(s) + continue + # add a bit of wiggle room in case i don't answer the questions in order (i often do this) if since is None: stop_at = datetime.datetime(year = 2001, month = 8, day = 12) @@ -279,8 +289,11 @@ def scrape_whispa(whispa_url, since = None): continue answer_url = "https://apiv4.whispa.sh/feedbacks/%s/children/public" % j["id"] - req = requests.get(answer_url) - firstanswer = req.json()["data"][0] + req = query_answer(answer_url) + try: + firstanswer = req.json()["data"][0] + except IndexError: + continue dt = datetime.datetime.fromisoformat(firstanswer["createdAt"][:-1]) qna = { @@ -294,6 +307,7 @@ def scrape_whispa(whispa_url, since = None): } print(qna) qnas.append(qna) + time.sleep(2.03) if dt <= stop_at: print("Met the threshold for oldest Q&A, so stopped looking.") break |
