aboutsummaryrefslogtreecommitdiffstats
path: root/database.py
diff options
context:
space:
mode:
authorjwansek <eddie.atten.ea29@gmail.com>2023-09-03 20:57:35 +0100
committerjwansek <eddie.atten.ea29@gmail.com>2023-09-03 20:57:35 +0100
commite484a4ecd182d806d004a0b5b9116683bc07217e (patch)
treeedc225a17aaf7dc93000b9046c92656009a3de57 /database.py
parentc4ab716c20729a62f7b78b60029c27e0f166f41c (diff)
downloadeda.gay-e484a4ecd182d806d004a0b5b9116683bc07217e.tar.gz
eda.gay-e484a4ecd182d806d004a0b5b9116683bc07217e.zip
Added local nitter instance, fixed twitter caching using new method
Diffstat (limited to 'database.py')
-rwxr-xr-xdatabase.py156
1 files changed, 49 insertions, 107 deletions
diff --git a/database.py b/database.py
index 1877ab7..ea3835c 100755
--- a/database.py
+++ b/database.py
@@ -1,10 +1,10 @@
from urllib.parse import urlparse
from dataclasses import dataclass
-from github import Github
from lxml import html
import configparser
import curiouscat
import threading
+import services
import operator
import datetime
import requests
@@ -40,6 +40,7 @@ class Database:
return self
def __exit__(self, type, value, traceback):
+ self.__connection.commit()
self.__connection.close()
def get_header_links(self):
@@ -132,24 +133,18 @@ class Database:
""")
return cursor.fetchall()
- def get_cached_tweets(self, numToGet = None, recurse = True):
+ def get_cached_tweets(self, numToGet = None):
with self.__connection.cursor() as cursor:
+ sql = "SELECT tweet, tweet_id, account FROM diary WHERE account = %s ORDER BY tweeted_at"
+ args = (self.config.get("twitter", "main_account"), )
if numToGet is not None:
- cursor.execute("SELECT text, url FROM twitterCache ORDER BY appended DESC LIMIT %s;", (numToGet, ))
+ sql += " LIMIT %s;"
+ args = (self.config.get("twitter", "main_account"), numToGet)
else:
- cursor.execute("SELECT text, url FROM twitterCache ORDER BY appended DESC;")
- if recurse:
- threading.Thread(target = update_cache).start()
- return list(cursor.fetchall())
+ sql += ";"
+ cursor.execute(sql, args)
- def update_twitter_cache(self, requested):
- with self.__connection.cursor() as cursor:
- cursor.execute("SELECT DISTINCT url FROM twitterCache;")
- urls = [i[0] for i in cursor.fetchall()]
- for url, text in requested:
- if url not in urls:
- cursor.execute("INSERT INTO twitterCache (text, url) VALUES (%s, %s);", (text, url))
- self.__connection.commit()
+ return [(i[0], "https://%s/%s/status/%d" % (self.config.get("nitter", "outsideurl"), i[2], i[1])) for i in cursor.fetchall()]
def get_cached_commits(self, since = None, recurse = True):
with self.__connection.cursor() as cursor:
@@ -214,29 +209,38 @@ class Database:
self.__connection.commit()
return id_
- def append_diary(self, tweet_id, tweeted_at, replying_to, tweet):
+ def append_diary(self, tweet_id, tweeted_at, replying_to, tweet, account):
+ if tweet is None:
+ tweet = "(Image only)"
with self.__connection.cursor() as cursor:
- cursor.execute("INSERT INTO diary VALUES (%s, %s, %s, %s);", (tweet_id, tweeted_at, replying_to, tweet))
- print("Appended diary with tweet '%s'" % tweet)
+ cursor.execute("INSERT INTO diary VALUES (%s, %s, %s, %s, %s);", (tweet_id, tweeted_at, replying_to, tweet, account))
+ self.__connection.commit()
+
+ print("Appended diary with tweet " + tweet + " @ " + str(tweeted_at))
def append_diary_images(self, tweet_id, imurl):
with self.__connection.cursor() as cursor:
cursor.execute("INSERT INTO diaryimages (tweet_id, link) VALUES (%s, %s);", (tweet_id, imurl))
+ self.__connection.commit()
+
- def get_diary(self):
+ def get_diary(self, account = None):
threading.Thread(target = update_cache).start()
out = {}
+ if account is None:
+ account = self.get_my_diary_twitter()
+
with self.__connection.cursor() as cursor:
# cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL ORDER BY tweeted_at DESC;")
# attempt to ignore curiouscat automatic tweets by comparing with the q&a table
- cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL AND tweet_id NOT IN (SELECT tweet_id FROM diary INNER JOIN qnas ON SUBSTRING(tweet, 1, 16) = SUBSTRING(question, 1, 16)) ORDER BY tweeted_at DESC;")
+ cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL AND tweet_id NOT IN (SELECT tweet_id FROM diary INNER JOIN qnas ON SUBSTRING(tweet, 1, 16) = SUBSTRING(question, 1, 16)) AND account = %s ORDER BY tweeted_at DESC;", (account, ))
for tweet_id, tweeted_at, tweet_text in cursor.fetchall():
# print(tweet_id, tweeted_at, tweet_text)
out[tweeted_at] = [{
"text": tweet_text,
"images": self.get_diary_image(tweet_id),
"link": "https://%s/%s/status/%d" % (
- self.config.get("nitter", "domain"),
+ self.config.get("nitter", "outsideurl"),
self.get_my_diary_twitter(),
tweet_id
)
@@ -268,46 +272,17 @@ class Database:
"text": out[1],
"images": self.get_diary_image(id_),
"link": "https://%s/%s/status/%d" % (
- self.config.get("nitter", "domain"), self.get_my_diary_twitter(), id_
+ self.config.get("nitter", "outsideurl"), self.get_my_diary_twitter(), id_
)
}, id_
- def get_newest_diary_tweet_id(self):
+ def get_newest_diary_tweet_id(self, account = None):
+ if account is None:
+ account = self.get_my_diary_twitter()
with self.__connection.cursor() as cursor:
- cursor.execute("SELECT MAX(tweet_id) FROM diary;")
+ cursor.execute("SELECT MAX(tweet_id) FROM diary WHERE account = %s;", (account, ))
return cursor.fetchone()[0]
- def fetch_diary(self):
- twitteracc = self.get_my_diary_twitter()
-
- twitter = twython.Twython(
- self.config.get("twitter", "app_key"),
- access_token = self.config.get("twitter", "oauth_2_token")
- )
-
- for tweet in twitter.search(
- q = "(from:%s)" % twitteracc, since_id = self.get_newest_diary_tweet_id(),
- tweet_mode = 'extended'
- )["statuses"]:
-
- tweet_id = tweet["id"]
- tweeted_at = datetime.datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
- replying_to = tweet["in_reply_to_status_id"]
- tweet_text = re.sub(r"https://t\.co/\w{10}", "", tweet["full_text"], 0, re.MULTILINE)
-
- if tweet["in_reply_to_screen_name"] == twitteracc or tweet["in_reply_to_screen_name"] is None:
- self.append_diary(tweet_id, tweeted_at, replying_to, tweet_text)
-
- if "media" in tweet["entities"].keys():
- associated_images = [
- i["media_url_https"].replace("pbs.twimg.com", self.config.get("nitter", "domain") + "/pic")
- for i in tweet["entities"]["media"]
- ]
- for im in associated_images:
- self.append_diary_images(tweet_id, im)
-
- self.__connection.commit()
-
def get_curiouscat_username(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT link FROM headerLinks WHERE name = 'curiouscat';")
@@ -339,7 +314,7 @@ class Database:
return sorted(cursor.fetchall(), key = operator.itemgetter(2), reverse = True)
def update_cache():
- # print("updating cache...")
+ print("Updating cache...")
with Database() as db:
db.append_curiouscat_qnas(
curiouscat.get_all_curiouscat_qnas_before(
@@ -347,58 +322,25 @@ def update_cache():
db.get_biggest_curiouscat_timestamp()
)
)
- db.fetch_diary()
- db.update_twitter_cache(request_recent_tweets(10000))
- # print("Done updating twitter cache...")
- db.update_commit_cache(request_recent_commits(since = db.get_last_commit_time()))
- # print("Done updating commit cache...")
-
-CONFIG = configparser.ConfigParser()
-CONFIG.read("edaweb.conf")
-
-def request_recent_tweets(numToGet):
- tweets = []
- domain = "http://" + CONFIG.get("nitter", "domain")
- with Database() as db:
- for title, url in db.get_header_links():
- if title == "twitter":
- break
- tree = html.fromstring(requests.get(url).content)
- for i, tweetUrlElement in enumerate(tree.xpath('//*[@class="tweet-link"]'), 0):
- if i > 0:
- tweets.append((
- domain + tweetUrlElement.get("href"),
- tweetUrlElement.getparent().find_class("tweet-content media-body")[0].text
- ))
- if len(tweets) >= numToGet:
- break
- return tweets
-
-def request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=7)):
- g = Github(CONFIG.get("github", "access_code"))
- out = []
- for repo in g.get_user().get_repos():
- # print(repo.name, list(repo.get_branches()))
- try:
- for commit in repo.get_commits(since = since):
- out.append({
- "repo": repo.name,
- "message": commit.commit.message,
- "url": commit.html_url,
- "datetime": commit.commit.author.date,
- "stats": {
- "additions": commit.stats.additions,
- "deletions": commit.stats.deletions,
- "total": commit.stats.total
- }
- })
- except Exception as e:
- print(e)
-
- return sorted(out, key = lambda a: a["datetime"], reverse = True)
+ print("Finished adding curiouscat...")
+ db.update_commit_cache(services.request_recent_commits(since = db.get_last_commit_time()))
+ print("Finished adding github commits...")
+ for id_, dt, replying_to, text, username, images in services.scrape_nitter(db.get_my_diary_twitter(), db.get_newest_diary_tweet_id()):
+ db.append_diary(id_, dt, replying_to, text, username)
+ for image in images:
+ db.append_diary_images(id_, image)
+ print("Finished getting diary tweets...")
+ for id_, dt, replying_to, text, username, images in services.scrape_nitter(
+ db.config.get("twitter", "main_account"), db.get_newest_diary_tweet_id(db.config.get("twitter", "main_account"))
+ ):
+ db.append_diary(id_, dt, replying_to, text, username)
+ for image in images:
+ db.append_diary_images(id_, image)
+ print("Done updating commit cache...")
if __name__ == "__main__":
- # print(request_recent_commits())
with Database() as db:
- print(db.get_curiouscat_qnas())
+ print(db.get_cached_tweets())
+
+ # update_cache()