aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjwansek <eddie.atten.ea29@gmail.com>2023-09-03 20:57:35 +0100
committerjwansek <eddie.atten.ea29@gmail.com>2023-09-03 20:57:35 +0100
commite484a4ecd182d806d004a0b5b9116683bc07217e (patch)
treeedc225a17aaf7dc93000b9046c92656009a3de57
parentc4ab716c20729a62f7b78b60029c27e0f166f41c (diff)
downloadboymoder.blog-e484a4ecd182d806d004a0b5b9116683bc07217e.tar.gz
boymoder.blog-e484a4ecd182d806d004a0b5b9116683bc07217e.zip
Added local nitter instance, fixed twitter caching using new method
-rwxr-xr-x.gitignore2
-rw-r--r--.gitmodules4
-rwxr-xr-xapp.py5
-rwxr-xr-xdatabase.py156
-rwxr-xr-xdocker-compose.yml44
m---------nitter/nitter0
-rwxr-xr-xservices.py115
-rwxr-xr-xstatic/index.md4
-rwxr-xr-xtemplates/diary.html.j22
9 files changed, 211 insertions, 121 deletions
diff --git a/.gitignore b/.gitignore
index 5393c5f..2c678d3 100755
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@ static/images/random.jpg
static/zips/*.zip
.nfs*
static/images/Thumbs.db
+nitter/nitter.conf
+nitter/guest_accounts.json
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..1b086ba
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "nitter/nitter"]
+ path = nitter/nitter
+ url = git@github.com:zedeus/nitter.git
+ branch = guest_accounts
diff --git a/app.py b/app.py
index 81cfad9..1be6e76 100755
--- a/app.py
+++ b/app.py
@@ -71,7 +71,7 @@ def index():
markdown = parser.parse_text(f.read())[0],
featured_thoughts = db.get_featured_thoughts(),
tweets = db.get_cached_tweets(7) + [("view all tweets...", db.get_my_twitter())],
- commits = db.get_cached_commits(since = datetime.datetime.now() - datetime.timedelta(days = 7)),
+ commits = db.get_cached_commits()[:7],
sidebar_img = get_sidebar_img(db)
)
@@ -85,7 +85,8 @@ def diary():
return flask.render_template(
"diary.html.j2",
**get_template_items("diary", db),
- diary = db.get_diary()
+ diary = db.get_diary(),
+ diary_account = db.config.get("twitter", "diary_account")
)
@app.route("/discord")
diff --git a/database.py b/database.py
index 1877ab7..ea3835c 100755
--- a/database.py
+++ b/database.py
@@ -1,10 +1,10 @@
from urllib.parse import urlparse
from dataclasses import dataclass
-from github import Github
from lxml import html
import configparser
import curiouscat
import threading
+import services
import operator
import datetime
import requests
@@ -40,6 +40,7 @@ class Database:
return self
def __exit__(self, type, value, traceback):
+ self.__connection.commit()
self.__connection.close()
def get_header_links(self):
@@ -132,24 +133,18 @@ class Database:
""")
return cursor.fetchall()
- def get_cached_tweets(self, numToGet = None, recurse = True):
+ def get_cached_tweets(self, numToGet = None):
with self.__connection.cursor() as cursor:
+ sql = "SELECT tweet, tweet_id, account FROM diary WHERE account = %s ORDER BY tweeted_at"
+ args = (self.config.get("twitter", "main_account"), )
if numToGet is not None:
- cursor.execute("SELECT text, url FROM twitterCache ORDER BY appended DESC LIMIT %s;", (numToGet, ))
+ sql += " LIMIT %s;"
+ args = (self.config.get("twitter", "main_account"), numToGet)
else:
- cursor.execute("SELECT text, url FROM twitterCache ORDER BY appended DESC;")
- if recurse:
- threading.Thread(target = update_cache).start()
- return list(cursor.fetchall())
+ sql += ";"
+ cursor.execute(sql, args)
- def update_twitter_cache(self, requested):
- with self.__connection.cursor() as cursor:
- cursor.execute("SELECT DISTINCT url FROM twitterCache;")
- urls = [i[0] for i in cursor.fetchall()]
- for url, text in requested:
- if url not in urls:
- cursor.execute("INSERT INTO twitterCache (text, url) VALUES (%s, %s);", (text, url))
- self.__connection.commit()
+ return [(i[0], "https://%s/%s/status/%d" % (self.config.get("nitter", "outsideurl"), i[2], i[1])) for i in cursor.fetchall()]
def get_cached_commits(self, since = None, recurse = True):
with self.__connection.cursor() as cursor:
@@ -214,29 +209,38 @@ class Database:
self.__connection.commit()
return id_
- def append_diary(self, tweet_id, tweeted_at, replying_to, tweet):
+ def append_diary(self, tweet_id, tweeted_at, replying_to, tweet, account):
+ if tweet is None:
+ tweet = "(Image only)"
with self.__connection.cursor() as cursor:
- cursor.execute("INSERT INTO diary VALUES (%s, %s, %s, %s);", (tweet_id, tweeted_at, replying_to, tweet))
- print("Appended diary with tweet '%s'" % tweet)
+ cursor.execute("INSERT INTO diary VALUES (%s, %s, %s, %s, %s);", (tweet_id, tweeted_at, replying_to, tweet, account))
+ self.__connection.commit()
+
+ print("Appended diary with tweet " + tweet + " @ " + str(tweeted_at))
def append_diary_images(self, tweet_id, imurl):
with self.__connection.cursor() as cursor:
cursor.execute("INSERT INTO diaryimages (tweet_id, link) VALUES (%s, %s);", (tweet_id, imurl))
+ self.__connection.commit()
+
- def get_diary(self):
+ def get_diary(self, account = None):
threading.Thread(target = update_cache).start()
out = {}
+ if account is None:
+ account = self.get_my_diary_twitter()
+
with self.__connection.cursor() as cursor:
# cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL ORDER BY tweeted_at DESC;")
# attempt to ignore curiouscat automatic tweets by comparing with the q&a table
- cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL AND tweet_id NOT IN (SELECT tweet_id FROM diary INNER JOIN qnas ON SUBSTRING(tweet, 1, 16) = SUBSTRING(question, 1, 16)) ORDER BY tweeted_at DESC;")
+ cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL AND tweet_id NOT IN (SELECT tweet_id FROM diary INNER JOIN qnas ON SUBSTRING(tweet, 1, 16) = SUBSTRING(question, 1, 16)) AND account = %s ORDER BY tweeted_at DESC;", (account, ))
for tweet_id, tweeted_at, tweet_text in cursor.fetchall():
# print(tweet_id, tweeted_at, tweet_text)
out[tweeted_at] = [{
"text": tweet_text,
"images": self.get_diary_image(tweet_id),
"link": "https://%s/%s/status/%d" % (
- self.config.get("nitter", "domain"),
+ self.config.get("nitter", "outsideurl"),
self.get_my_diary_twitter(),
tweet_id
)
@@ -268,46 +272,17 @@ class Database:
"text": out[1],
"images": self.get_diary_image(id_),
"link": "https://%s/%s/status/%d" % (
- self.config.get("nitter", "domain"), self.get_my_diary_twitter(), id_
+ self.config.get("nitter", "outsideurl"), self.get_my_diary_twitter(), id_
)
}, id_
- def get_newest_diary_tweet_id(self):
+ def get_newest_diary_tweet_id(self, account = None):
+ if account is None:
+ account = self.get_my_diary_twitter()
with self.__connection.cursor() as cursor:
- cursor.execute("SELECT MAX(tweet_id) FROM diary;")
+ cursor.execute("SELECT MAX(tweet_id) FROM diary WHERE account = %s;", (account, ))
return cursor.fetchone()[0]
- def fetch_diary(self):
- twitteracc = self.get_my_diary_twitter()
-
- twitter = twython.Twython(
- self.config.get("twitter", "app_key"),
- access_token = self.config.get("twitter", "oauth_2_token")
- )
-
- for tweet in twitter.search(
- q = "(from:%s)" % twitteracc, since_id = self.get_newest_diary_tweet_id(),
- tweet_mode = 'extended'
- )["statuses"]:
-
- tweet_id = tweet["id"]
- tweeted_at = datetime.datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
- replying_to = tweet["in_reply_to_status_id"]
- tweet_text = re.sub(r"https://t\.co/\w{10}", "", tweet["full_text"], 0, re.MULTILINE)
-
- if tweet["in_reply_to_screen_name"] == twitteracc or tweet["in_reply_to_screen_name"] is None:
- self.append_diary(tweet_id, tweeted_at, replying_to, tweet_text)
-
- if "media" in tweet["entities"].keys():
- associated_images = [
- i["media_url_https"].replace("pbs.twimg.com", self.config.get("nitter", "domain") + "/pic")
- for i in tweet["entities"]["media"]
- ]
- for im in associated_images:
- self.append_diary_images(tweet_id, im)
-
- self.__connection.commit()
-
def get_curiouscat_username(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT link FROM headerLinks WHERE name = 'curiouscat';")
@@ -339,7 +314,7 @@ class Database:
return sorted(cursor.fetchall(), key = operator.itemgetter(2), reverse = True)
def update_cache():
- # print("updating cache...")
+ print("Updating cache...")
with Database() as db:
db.append_curiouscat_qnas(
curiouscat.get_all_curiouscat_qnas_before(
@@ -347,58 +322,25 @@ def update_cache():
db.get_biggest_curiouscat_timestamp()
)
)
- db.fetch_diary()
- db.update_twitter_cache(request_recent_tweets(10000))
- # print("Done updating twitter cache...")
- db.update_commit_cache(request_recent_commits(since = db.get_last_commit_time()))
- # print("Done updating commit cache...")
-
-CONFIG = configparser.ConfigParser()
-CONFIG.read("edaweb.conf")
-
-def request_recent_tweets(numToGet):
- tweets = []
- domain = "http://" + CONFIG.get("nitter", "domain")
- with Database() as db:
- for title, url in db.get_header_links():
- if title == "twitter":
- break
- tree = html.fromstring(requests.get(url).content)
- for i, tweetUrlElement in enumerate(tree.xpath('//*[@class="tweet-link"]'), 0):
- if i > 0:
- tweets.append((
- domain + tweetUrlElement.get("href"),
- tweetUrlElement.getparent().find_class("tweet-content media-body")[0].text
- ))
- if len(tweets) >= numToGet:
- break
- return tweets
-
-def request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=7)):
- g = Github(CONFIG.get("github", "access_code"))
- out = []
- for repo in g.get_user().get_repos():
- # print(repo.name, list(repo.get_branches()))
- try:
- for commit in repo.get_commits(since = since):
- out.append({
- "repo": repo.name,
- "message": commit.commit.message,
- "url": commit.html_url,
- "datetime": commit.commit.author.date,
- "stats": {
- "additions": commit.stats.additions,
- "deletions": commit.stats.deletions,
- "total": commit.stats.total
- }
- })
- except Exception as e:
- print(e)
-
- return sorted(out, key = lambda a: a["datetime"], reverse = True)
+ print("Finished adding curiouscat...")
+ db.update_commit_cache(services.request_recent_commits(since = db.get_last_commit_time()))
+ print("Finished adding github commits...")
+ for id_, dt, replying_to, text, username, images in services.scrape_nitter(db.get_my_diary_twitter(), db.get_newest_diary_tweet_id()):
+ db.append_diary(id_, dt, replying_to, text, username)
+ for image in images:
+ db.append_diary_images(id_, image)
+ print("Finished getting diary tweets...")
+ for id_, dt, replying_to, text, username, images in services.scrape_nitter(
+ db.config.get("twitter", "main_account"), db.get_newest_diary_tweet_id(db.config.get("twitter", "main_account"))
+ ):
+ db.append_diary(id_, dt, replying_to, text, username)
+ for image in images:
+ db.append_diary_images(id_, image)
+ print("Done updating commit cache...")
if __name__ == "__main__":
- # print(request_recent_commits())
with Database() as db:
- print(db.get_curiouscat_qnas())
+ print(db.get_cached_tweets())
+
+ # update_cache()
diff --git a/docker-compose.yml b/docker-compose.yml
index c8a70c1..2a78e22 100755
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -37,6 +37,47 @@ services:
- mariadb:mysql
restart: unless-stopped
+ nitter:
+ build:
+ context: ./nitter/nitter
+ dockerfile: Dockerfile
+ image: jwansek/nitter:latest
+ ports:
+ - "127.0.0.1:7777:7777" # Replace with "8080:8080" if you don't use a reverse proxy
+ volumes:
+ - ./nitter/nitter.conf:/src/nitter.conf:Z,ro
+ - ./nitter/guest_accounts.json:/src/guest_accounts.json:Z,ro
+ depends_on:
+ - nitter-redis
+ healthcheck:
+ test: wget -nv --tries=1 --spider http://127.0.0.1:8080/Jack/status/20 || exit 1
+ interval: 30s
+ timeout: 5s
+ retries: 2
+ user: "998:998"
+ read_only: true
+ security_opt:
+ - no-new-privileges:true
+ cap_drop:
+ - ALL
+
+ nitter-redis:
+ image: redis:6-alpine
+ command: redis-server --save 60 1 --loglevel warning
+ volumes:
+ - nitter-redis:/data
+ healthcheck:
+ test: redis-cli ping
+ interval: 30s
+ timeout: 5s
+ retries: 2
+ user: "999:1000"
+ read_only: true
+ security_opt:
+ - no-new-privileges:true
+ cap_drop:
+ - ALL
+
networks:
edaweb-net:
external:
@@ -45,3 +86,6 @@ networks:
db-network:
external:
name: mariadb
+
+volumes:
+ nitter-redis:
diff --git a/nitter/nitter b/nitter/nitter
new file mode 160000
+Subproject fcd74e8048362fcf8284871ee067099e8de28a8
diff --git a/services.py b/services.py
index 77361fa..0a10d34 100755
--- a/services.py
+++ b/services.py
@@ -1,9 +1,11 @@
from dataclasses import dataclass
from io import StringIO
from lxml import html, etree
+from github import Github
import multiprocessing
import pihole as ph
import qbittorrent
+import configparser
import requests
import datetime
import urllib
@@ -13,10 +15,11 @@ import random
import queue
import json
import time
-import app
import os
theLastId = 0
+CONFIG = configparser.ConfigParser(interpolation = None)
+CONFIG.read("edaweb.conf")
def humanbytes(B):
'Return the given bytes as a human friendly KB, MB, GB, or TB string'
@@ -51,7 +54,7 @@ def timeout(func):
t = multiprocessing.Process(target = runFunc, args = (returnVan, func))
t.start()
- t.join(timeout = app.CONFIG["servicetimeout"].getint("seconds"))
+ t.join(timeout = CONFIG["servicetimeout"].getint("seconds"))
# print("Request took:", time.time() - ti)
try:
@@ -64,7 +67,7 @@ def timeout(func):
@timeout
def get_docker_stats():
- client = docker.DockerClient(base_url = "tcp://%s:%s" % (app.CONFIG["docker"]["url"], app.CONFIG["docker"]["port"]))
+ client = docker.DockerClient(base_url = "tcp://%s:%s" % (CONFIG["docker"]["url"], CONFIG["docker"]["port"]))
return {
container.name: container.status
for container in client.containers.list(all = True)
@@ -76,8 +79,8 @@ def get_qbit_stats():
numtorrents = 0
bytes_dl = 0
bytes_up = 0
- qb = qbittorrent.Client('http://%s:%s/' % (app.CONFIG["qbittorrent"]["url"], app.CONFIG["qbittorrent"]["port"]))
- qb.login(username = app.CONFIG["qbittorrent"]["user"], password = app.CONFIG["qbittorrent"]["passwd"])
+ qb = qbittorrent.Client('http://%s:%s/' % (CONFIG["qbittorrent"]["url"], CONFIG["qbittorrent"]["port"]))
+ qb.login(username = CONFIG["qbittorrent"]["user"], password = CONFIG["qbittorrent"]["passwd"])
for torrent in qb.torrents():
numtorrents += 1
@@ -94,9 +97,9 @@ def get_qbit_stats():
@timeout
def get_trans_stats():
client = clutch.client.Client(
- address = "http://%s:%s/transmission/rpc" % (app.CONFIG["transmission"]["url"], app.CONFIG["transmission"]["port"]),
- # username = app.CONFIG["transmission"]["username"],
- # password = app.CONFIG["transmission"]["password"]
+ address = "http://%s:%s/transmission/rpc" % (CONFIG["transmission"]["url"], CONFIG["transmission"]["port"]),
+ # username = CONFIG["transmission"]["username"],
+ # password = CONFIG["transmission"]["password"]
)
stats = json.loads(client.session.stats().json())
active_for = datetime.timedelta(seconds = stats["arguments"]["cumulative_stats"]["seconds_active"])
@@ -110,7 +113,7 @@ def get_trans_stats():
@timeout
def get_pihole_stats():
- pihole = ph.PiHole(app.CONFIG["pihole"]["url"])
+ pihole = ph.PiHole(CONFIG["pihole"]["url"])
return {
"status": pihole.status,
"queries": pihole.total_queries,
@@ -230,5 +233,97 @@ def link_deleted(url):
text = requests.get(url).text
return text[text.find("<title>") + 7 : text.find("</title>")] in ["Error | nitter", "イラストコミュニケーションサービス[pixiv]"]
+def request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=7)):
+ g = Github(CONFIG.get("github", "access_code"))
+ out = []
+ for repo in g.get_user().get_repos():
+ # print(repo.name, list(repo.get_branches()))
+ try:
+ for commit in repo.get_commits(since = since):
+ out.append({
+ "repo": repo.name,
+ "message": commit.commit.message,
+ "url": commit.html_url,
+ "datetime": commit.commit.author.date,
+ "stats": {
+ "additions": commit.stats.additions,
+ "deletions": commit.stats.deletions,
+ "total": commit.stats.total
+ }
+ })
+ except Exception as e:
+ print(e)
+
+ return sorted(out, key = lambda a: a["datetime"], reverse = True)
+
+def scrape_nitter(username, get_until:int):
+ new_tweets = []
+ nitter_url = CONFIG.get("nitter", "internalurl")
+ nitter_port = CONFIG.getint("nitter", "internalport")
+ scrape_new_pages = True
+ url = "http://%s:%d/%s" % (nitter_url, nitter_port, username)
+
+ while scrape_new_pages:
+ tree = html.fromstring(requests.get(url).content)
+ for i, tweetUrlElement in enumerate(tree.xpath('//*[@class="tweet-link"]'), 0):
+ if i > 0 and tweetUrlElement.get("href").split("/")[1] == username:
+ id_ = int(urllib.parse.urlparse(tweetUrlElement.get("href")).path.split("/")[-1])
+ tweet_link = "http://%s:%d%s" % (nitter_url, nitter_port, tweetUrlElement.get("href"))
+
+ if id_ == get_until:
+ scrape_new_pages = False
+ break
+
+ try:
+ dt, replying_to, text, images = parse_tweet(tweet_link)
+ new_tweets.append((id_, dt, replying_to, text, username, images))
+ print(dt, text)
+ except IndexError:
+ print("Couldn't get any more tweets")
+ scrape_new_pages = False
+ break
+
+
+ try:
+ cursor = tree.xpath('//*[@class="show-more"]/a')[0].get("href")
+ except IndexError:
+ # no more elements
+ break
+ url = "http://%s:%d/%s%s" % (nitter_url, nitter_port, username, cursor)
+
+ return new_tweets
+
+def parse_tweet(tweet_url):
+ # print(tweet_url)
+ tree = html.fromstring(requests.get(tweet_url).content)
+ # with open("2images.html", "r") as f:
+ # tree = html.fromstring(f.read())
+
+ main_tweet_elem = tree.xpath('//*[@class="main-tweet"]')[0]
+
+ dt_str = main_tweet_elem.xpath('//*[@class="tweet-published"]')[0].text
+ dt = datetime.datetime.strptime(dt_str.replace("Â", ""), "%b %d, %Y · %I:%M %p UTC")
+ text = tree.xpath('//*[@class="main-tweet"]/div/div/div[2]')[0].text
+ replying_to_elems = tree.xpath('//*[@class="before-tweet thread-line"]/div/a')
+ if replying_to_elems != []:
+ replying_to = int(urllib.parse.urlparse(replying_to_elems[-1].get("href")).path.split("/")[-1])
+ else:
+ replying_to = None
+
+ images = []
+ images_elems = tree.xpath('//*[@class="main-tweet"]/div/div/div[3]/div/div/a/img')
+ for image_elem in images_elems:
+ images.append("https://" + CONFIG.get("nitter", "outsideurl") + urllib.parse.urlparse(image_elem.get("src")).path)
+
+ return dt, replying_to, text, images
+
+
+
+
+
if __name__ == "__main__":
- print(get_trans_stats())
+ # print(get_trans_stats())
+
+ print(scrape_nitter(CONFIG.get("twitter", "diary_account"), 1694624180405260291))
+
+ # print(parse_tweet("https://nitter.net/HONMISGENDERER/status/1694231618443981161#m"))
diff --git a/static/index.md b/static/index.md
index 6e8a1db..535f8c8 100755
--- a/static/index.md
+++ b/static/index.md
@@ -1,7 +1,7 @@
![(you)s](https://eda.gay/img/braindamage.png)
## haiiiiiii
-my name is eden and im a 21yo (boymoder) computer science undergraduate. i made my own website to encourage others to do so too.
+my name is eden and im a 22yo (boymoder) computer science student. i made my own website to encourage others to do so too.
i'll post my thoughts on here sometimes, and use this site to link to other stuff i host [more about me](/thought?id=2).
[click here for a random image of lio fotia](https://eda.gay/random?tags=lio_fotia)
@@ -12,6 +12,8 @@ i'll post my thoughts on here sometimes, and use this site to link to other stuf
## FOSS alternative services
+(some are currently down as i am temporarily hosting on a VPS instead of my [normal server](https://wiki.eda.gay))
+
- [nextcloud - dropbox (+ much more!) alternative](https://nc.eda.gay)
- [invidious - youtube alternative](https://invidious.eda.gay)
- [nitter - alternative twitter frontend](https://nitter.eda.gay)
diff --git a/templates/diary.html.j2 b/templates/diary.html.j2
index d7c363b..f6604f7 100755
--- a/templates/diary.html.j2
+++ b/templates/diary.html.j2
@@ -1,7 +1,7 @@
{% extends "template.html.j2" %}
{% block content %}
<h4>this page might not be up-to-date if my diary account is search banned</h4>
- <p>if in doubt check my <a href="https://twitter.com/FORMER_SHOTA">diary account</a> <br> <s>serves me right for using the official API instead of HTML scraping like i did with the <a href="/">recent tweets</a> thing</s> <br> <a href="https://shadowban.yuzurisa.com/FORMER_SHOTA">check if i'm currently search banned</a></p>
+ <p>if in doubt check my <a href="https://twitter.com/{{ diary_account }}">diary account</a> <br> <a href="https://shadowban.yuzurisa.com/{{ diary_account }}">check if i'm currently search banned</a></p>
<dl>
{% for dt, entries in diary.items() %}
<dt><a href="{{ entries[0]['link'] }}">{{ dt }}</a></dt>