aboutsummaryrefslogtreecommitdiffstats
path: root/services.py
diff options
context:
space:
mode:
Diffstat (limited to 'services.py')
-rwxr-xr-xservices.py16
1 files changed, 7 insertions, 9 deletions
diff --git a/services.py b/services.py
index 221d36e..ee98677 100755
--- a/services.py
+++ b/services.py
@@ -277,15 +277,14 @@ def scrape_nitter(username, get_until:int):
try:
dt, replying_to, text, images = parse_tweet(tweet_link)
new_tweets.append((id_, dt, replying_to, text, username, images))
- print(dt, text)
+ print(dt, "'%s'" % text)
except IndexError:
print("Couldn't get any more tweets")
scrape_new_pages = False
break
except ConnectionError:
print("Rate limited, try again later")
- scrape_new_pages = False
- break
+ return []
try:
@@ -312,7 +311,9 @@ def parse_tweet(tweet_url):
dt_str = main_tweet_elem.xpath('//*[@class="tweet-published"]')[0].text
dt = datetime.datetime.strptime(dt_str.replace("Â", ""), "%b %d, %Y · %I:%M %p UTC")
- text = tree.xpath('//*[@class="main-tweet"]/div/div/div[2]')[0].text
+ text = tree.xpath('//*[@class="main-tweet"]/div/div/div[2]')[0].text_content()
+ if text == "":
+ text = "[Image only]"
replying_to_elems = tree.xpath('//*[@class="before-tweet thread-line"]/div/a')
if replying_to_elems != []:
replying_to = int(urllib.parse.urlparse(replying_to_elems[-1].get("href")).path.split("/")[-1])
@@ -326,13 +327,10 @@ def parse_tweet(tweet_url):
return dt, replying_to, text, images
-
-
-
-
if __name__ == "__main__":
# print(get_trans_stats())
print(scrape_nitter(CONFIG.get("twitter", "diary_account"), 1697430888617840909))
+ print(scrape_nitter("estrogenizedboy", 1698107440489734640))
- # print(parse_tweet("https://nitter.net/HONMISGENDERER/status/1694231618443981161#m"))
+ # print(parse_tweet("https://nitter.net/HONMISGENDERER/status/1694231618443981161#m")) \ No newline at end of file