8 files changed, 208 insertions, 120 deletions
diff --git a/.gitignore b/.gitignore
index 5393c5f..2c678d3 100755
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@ static/images/random.jpg
 static/zips/*.zip
 .nfs*
 static/images/Thumbs.db
+nitter/nitter.conf
+nitter/guest_accounts.json
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..1b086ba
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "nitter/nitter"]
+	path = nitter/nitter
+	url = git@github.com:zedeus/nitter.git
+	branch = guest_accounts
diff --git a/app.py b/app.py
index 81cfad9..1be6e76 100755
--- a/app.py
+++ b/app.py
@@ -71,7 +71,7 @@ def index():
                 markdown = parser.parse_text(f.read())[0],
                 featured_thoughts = db.get_featured_thoughts(),
                 tweets = db.get_cached_tweets(7) + [("view all tweets...", db.get_my_twitter())],
-                commits = db.get_cached_commits(since = datetime.datetime.now() - datetime.timedelta(days = 7)),
+                commits = db.get_cached_commits()[:7],
                 sidebar_img = get_sidebar_img(db)
             )
 
@@ -85,7 +85,8 @@ def diary():
         return flask.render_template(
             "diary.html.j2",
             **get_template_items("diary", db),
-            diary = db.get_diary()
+            diary = db.get_diary(),
+            diary_account = db.config.get("twitter", "diary_account")
         )
 
 @app.route("/discord")
diff --git a/database.py b/database.py
index 1877ab7..ea3835c 100755
--- a/database.py
+++ b/database.py
@@ -1,10 +1,10 @@
 from urllib.parse import urlparse
 from dataclasses import dataclass
-from github import Github
 from lxml import html
 import configparser
 import curiouscat
 import threading
+import services
 import operator
 import datetime
 import requests
@@ -40,6 +40,7 @@ class Database:
         return self
 
     def __exit__(self, type, value, traceback):
+        self.__connection.commit()
         self.__connection.close()
 
     def get_header_links(self):
@@ -132,24 +133,18 @@ class Database:
             """)
             return cursor.fetchall()
 
-    def get_cached_tweets(self, numToGet = None, recurse = True):
+    def get_cached_tweets(self, numToGet = None):
         with self.__connection.cursor() as cursor:
+            sql = "SELECT tweet, tweet_id, account FROM diary WHERE account = %s ORDER BY tweeted_at"
+            args = (self.config.get("twitter", "main_account"), )
             if numToGet is not None:
-                cursor.execute("SELECT text, url FROM twitterCache ORDER BY appended DESC LIMIT %s;", (numToGet, ))
+                sql += " LIMIT %s;"
+                args = (self.config.get("twitter", "main_account"), numToGet)
             else:
-                cursor.execute("SELECT text, url FROM twitterCache ORDER BY appended DESC;")
-            if recurse:
-                threading.Thread(target = update_cache).start()
-            return list(cursor.fetchall())
+                sql += ";"
+            cursor.execute(sql, args)
 
-    def update_twitter_cache(self, requested):
-        with self.__connection.cursor() as cursor:
-            cursor.execute("SELECT DISTINCT url FROM twitterCache;")
-            urls = [i[0] for i in cursor.fetchall()]
-            for url, text in requested:
-                if url not in urls:
-                    cursor.execute("INSERT INTO twitterCache (text, url) VALUES (%s, %s);", (text, url))
-        self.__connection.commit()
+            return [(i[0], "https://%s/%s/status/%d" % (self.config.get("nitter", "outsideurl"), i[2], i[1])) for i in cursor.fetchall()]
 
     def get_cached_commits(self, since = None, recurse = True):
         with self.__connection.cursor() as cursor:
@@ -214,29 +209,38 @@ class Database:
         self.__connection.commit()
         return id_
 
-    def append_diary(self, tweet_id, tweeted_at, replying_to, tweet):
+    def append_diary(self, tweet_id, tweeted_at, replying_to, tweet, account):
+        if tweet is None:
+            tweet = "(Image only)"
         with self.__connection.cursor() as cursor:
-            cursor.execute("INSERT INTO diary VALUES (%s, %s, %s, %s);", (tweet_id, tweeted_at, replying_to, tweet))
-        print("Appended diary with tweet '%s'" % tweet)
+            cursor.execute("INSERT INTO diary VALUES (%s, %s, %s, %s, %s);", (tweet_id, tweeted_at, replying_to, tweet, account))
+        self.__connection.commit()
+
+        print("Appended diary with tweet " + tweet + " @ " + str(tweeted_at))
 
     def append_diary_images(self, tweet_id, imurl):
         with self.__connection.cursor() as cursor:
             cursor.execute("INSERT INTO diaryimages (tweet_id, link) VALUES (%s, %s);", (tweet_id, imurl))
+        self.__connection.commit()
+        
 
-    def get_diary(self):
+    def get_diary(self, account = None):
         threading.Thread(target = update_cache).start()
         out = {}
+        if account is None:
+            account = self.get_my_diary_twitter()
+
         with self.__connection.cursor() as cursor:
             # cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL ORDER BY tweeted_at DESC;")
             # attempt to ignore curiouscat automatic tweets by comparing with the q&a table
-            cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL AND tweet_id NOT IN (SELECT tweet_id FROM diary INNER JOIN qnas ON SUBSTRING(tweet, 1, 16) = SUBSTRING(question, 1, 16)) ORDER BY tweeted_at DESC;")
+            cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL AND tweet_id NOT IN (SELECT tweet_id FROM diary INNER JOIN qnas ON SUBSTRING(tweet, 1, 16) = SUBSTRING(question, 1, 16)) AND account = %s ORDER BY tweeted_at DESC;", (account, ))
             for tweet_id, tweeted_at, tweet_text in cursor.fetchall():
                 # print(tweet_id, tweeted_at, tweet_text)
                 out[tweeted_at] = [{
                     "text": tweet_text, 
                     "images": self.get_diary_image(tweet_id), 
                     "link": "https://%s/%s/status/%d" % (
-                        self.config.get("nitter", "domain"), 
+                        self.config.get("nitter", "outsideurl"), 
                         self.get_my_diary_twitter(), 
                         tweet_id
                     )
@@ -268,46 +272,17 @@ class Database:
             "text": out[1], 
             "images": self.get_diary_image(id_), 
             "link": "https://%s/%s/status/%d" % (
-                self.config.get("nitter", "domain"), self.get_my_diary_twitter(), id_
+                self.config.get("nitter", "outsideurl"), self.get_my_diary_twitter(), id_
             )
         }, id_
 
-    def get_newest_diary_tweet_id(self):
+    def get_newest_diary_tweet_id(self, account = None):
+        if account is None:
+            account = self.get_my_diary_twitter()
         with self.__connection.cursor() as cursor:
-            cursor.execute("SELECT MAX(tweet_id) FROM diary;")
+            cursor.execute("SELECT MAX(tweet_id) FROM diary WHERE account = %s;", (account, ))
             return cursor.fetchone()[0]
 
-    def fetch_diary(self):
-        twitteracc = self.get_my_diary_twitter()
-
-        twitter = twython.Twython(
-            self.config.get("twitter", "app_key"), 
-            access_token = self.config.get("twitter", "oauth_2_token")
-        )
-        
-        for tweet in twitter.search(
-                q = "(from:%s)" % twitteracc, since_id = self.get_newest_diary_tweet_id(),
-                tweet_mode = 'extended'
-            )["statuses"]:
-
-            tweet_id = tweet["id"]
-            tweeted_at = datetime.datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
-            replying_to = tweet["in_reply_to_status_id"]
-            tweet_text = re.sub(r"https://t\.co/\w{10}", "", tweet["full_text"], 0, re.MULTILINE)
-            
-            if tweet["in_reply_to_screen_name"] == twitteracc or tweet["in_reply_to_screen_name"] is None:
-                self.append_diary(tweet_id, tweeted_at, replying_to, tweet_text)
-
-            if "media" in tweet["entities"].keys():
-                associated_images = [
-                    i["media_url_https"].replace("pbs.twimg.com", self.config.get("nitter", "domain") + "/pic") 
-                    for i in tweet["entities"]["media"]
-                ]
-                for im in associated_images:
-                    self.append_diary_images(tweet_id, im)
-
-        self.__connection.commit()
-
     def get_curiouscat_username(self):
         with self.__connection.cursor() as cursor:
             cursor.execute("SELECT link FROM headerLinks WHERE name = 'curiouscat';")
@@ -339,7 +314,7 @@ class Database:
             return sorted(cursor.fetchall(), key = operator.itemgetter(2), reverse = True)
 
 def update_cache():
-    # print("updating cache...")
+    print("Updating cache...")
     with Database() as db:
         db.append_curiouscat_qnas(
             curiouscat.get_all_curiouscat_qnas_before(
@@ -347,58 +322,25 @@ def update_cache():
                 db.get_biggest_curiouscat_timestamp()
             )
         )
-        db.fetch_diary()
-        db.update_twitter_cache(request_recent_tweets(10000))
-        # print("Done updating twitter cache...")
-        db.update_commit_cache(request_recent_commits(since = db.get_last_commit_time()))
-        # print("Done updating commit cache...")
-
-CONFIG = configparser.ConfigParser()
-CONFIG.read("edaweb.conf")
-
-def request_recent_tweets(numToGet):
-    tweets = []
-    domain = "http://" + CONFIG.get("nitter", "domain")
-    with Database() as db:
-        for title, url in db.get_header_links():
-            if title == "twitter":
-                break
-    tree = html.fromstring(requests.get(url).content)
-    for i, tweetUrlElement in enumerate(tree.xpath('//*[@class="tweet-link"]'), 0):
-        if i > 0:
-            tweets.append((
-                domain + tweetUrlElement.get("href"),
-                tweetUrlElement.getparent().find_class("tweet-content media-body")[0].text
-            ))
-        if len(tweets) >= numToGet:
-            break
-    return tweets
-            
-def request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=7)):
-    g = Github(CONFIG.get("github", "access_code"))
-    out = []
-    for repo in g.get_user().get_repos():
-        # print(repo.name, list(repo.get_branches()))
-        try:
-            for commit in repo.get_commits(since = since):
-                out.append({
-                    "repo": repo.name,
-                    "message": commit.commit.message,
-                    "url": commit.html_url,
-                    "datetime": commit.commit.author.date,
-                    "stats": {
-                        "additions": commit.stats.additions,
-                        "deletions": commit.stats.deletions,
-                        "total": commit.stats.total
-                    }
-                })
-        except Exception as e:
-            print(e)
-
-    return sorted(out, key = lambda a: a["datetime"], reverse = True) 
+        print("Finished adding curiouscat...")
+        db.update_commit_cache(services.request_recent_commits(since = db.get_last_commit_time()))
+        print("Finished adding github commits...")
+        for id_, dt, replying_to, text, username, images in services.scrape_nitter(db.get_my_diary_twitter(), db.get_newest_diary_tweet_id()):
+            db.append_diary(id_, dt, replying_to, text, username)
+            for image in images:
+                db.append_diary_images(id_, image)
+        print("Finished getting diary tweets...")
+        for id_, dt, replying_to, text, username, images in services.scrape_nitter(
+            db.config.get("twitter", "main_account"), db.get_newest_diary_tweet_id(db.config.get("twitter", "main_account"))
+            ):
+            db.append_diary(id_, dt, replying_to, text, username)
+            for image in images:
+                db.append_diary_images(id_, image)
+    print("Done updating commit cache...")
 
 
 if __name__ == "__main__":
-    # print(request_recent_commits())
     with Database() as db:
-        print(db.get_curiouscat_qnas())
+        print(db.get_cached_tweets())
+
+    # update_cache()
diff --git a/docker-compose.yml b/docker-compose.yml
index c8a70c1..2a78e22 100755
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -37,6 +37,47 @@ services:
             - mariadb:mysql
         restart: unless-stopped
 
+    nitter:
+        build:
+            context: ./nitter/nitter
+            dockerfile: Dockerfile
+        image: jwansek/nitter:latest
+        ports:
+            - "127.0.0.1:7777:7777" # Replace with "8080:8080" if you don't use a reverse proxy
+        volumes:
+            - ./nitter/nitter.conf:/src/nitter.conf:Z,ro
+            - ./nitter/guest_accounts.json:/src/guest_accounts.json:Z,ro
+        depends_on:
+            - nitter-redis
+        healthcheck:
+            test: wget -nv --tries=1 --spider http://127.0.0.1:8080/Jack/status/20 || exit 1
+            interval: 30s
+            timeout: 5s
+            retries: 2
+        user: "998:998"
+        read_only: true
+        security_opt:
+            - no-new-privileges:true
+        cap_drop:
+            - ALL
+
+    nitter-redis:
+        image: redis:6-alpine
+        command: redis-server --save 60 1 --loglevel warning
+        volumes:
+            - nitter-redis:/data
+        healthcheck:
+            test: redis-cli ping
+            interval: 30s
+            timeout: 5s
+            retries: 2
+        user: "999:1000"
+        read_only: true
+        security_opt:
+            - no-new-privileges:true
+        cap_drop:
+            - ALL
+
 networks:
     edaweb-net:
         external:
@@ -45,3 +86,6 @@ networks:
     db-network:
         external:
             name: mariadb
+
+volumes:
+    nitter-redis:
diff --git a/nitter/nitter b/nitter/nitter
new file mode 160000
+Subproject fcd74e8048362fcf8284871ee067099e8de28a8
diff --git a/services.py b/services.py
index 77361fa..0a10d34 100755
--- a/services.py
+++ b/services.py
@@ -1,9 +1,11 @@
 from dataclasses import dataclass
 from io import StringIO
 from lxml import html, etree
+from github import Github
 import multiprocessing
 import pihole as ph
 import qbittorrent
+import configparser
 import requests
 import datetime
 import urllib
@@ -13,10 +15,11 @@ import random
 import queue
 import json
 import time
-import app
 import os
 
 theLastId = 0
+CONFIG = configparser.ConfigParser(interpolation = None)
+CONFIG.read("edaweb.conf")
 
 def humanbytes(B):
    'Return the given bytes as a human friendly KB, MB, GB, or TB string'
@@ -51,7 +54,7 @@ def timeout(func):
         t = multiprocessing.Process(target = runFunc, args = (returnVan, func))
         t.start()
 
-        t.join(timeout = app.CONFIG["servicetimeout"].getint("seconds"))
+        t.join(timeout = CONFIG["servicetimeout"].getint("seconds"))
 
         # print("Request took:", time.time() - ti)
         try:
@@ -64,7 +67,7 @@ def timeout(func):
 
 @timeout
 def get_docker_stats():
-    client = docker.DockerClient(base_url = "tcp://%s:%s" % (app.CONFIG["docker"]["url"], app.CONFIG["docker"]["port"]))
+    client = docker.DockerClient(base_url = "tcp://%s:%s" % (CONFIG["docker"]["url"], CONFIG["docker"]["port"]))
     return {
         container.name: container.status
         for container in client.containers.list(all = True)
@@ -76,8 +79,8 @@ def get_qbit_stats():
     numtorrents = 0
     bytes_dl = 0
     bytes_up = 0
-    qb = qbittorrent.Client('http://%s:%s/' % (app.CONFIG["qbittorrent"]["url"], app.CONFIG["qbittorrent"]["port"]))
-    qb.login(username = app.CONFIG["qbittorrent"]["user"], password = app.CONFIG["qbittorrent"]["passwd"])
+    qb = qbittorrent.Client('http://%s:%s/' % (CONFIG["qbittorrent"]["url"], CONFIG["qbittorrent"]["port"]))
+    qb.login(username = CONFIG["qbittorrent"]["user"], password = CONFIG["qbittorrent"]["passwd"])
 
     for torrent in qb.torrents():
         numtorrents += 1
@@ -94,9 +97,9 @@ def get_qbit_stats():
 @timeout
 def get_trans_stats():
     client = clutch.client.Client(
-        address = "http://%s:%s/transmission/rpc" % (app.CONFIG["transmission"]["url"], app.CONFIG["transmission"]["port"]),
-        # username = app.CONFIG["transmission"]["username"],
-        # password = app.CONFIG["transmission"]["password"]
+        address = "http://%s:%s/transmission/rpc" % (CONFIG["transmission"]["url"], CONFIG["transmission"]["port"]),
+        # username = CONFIG["transmission"]["username"],
+        # password = CONFIG["transmission"]["password"]
     )
     stats = json.loads(client.session.stats().json())
     active_for = datetime.timedelta(seconds = stats["arguments"]["cumulative_stats"]["seconds_active"])
@@ -110,7 +113,7 @@ def get_trans_stats():
 
 @timeout
 def get_pihole_stats():
-    pihole = ph.PiHole(app.CONFIG["pihole"]["url"])
+    pihole = ph.PiHole(CONFIG["pihole"]["url"])
     return {
         "status": pihole.status,
         "queries": pihole.total_queries,
@@ -230,5 +233,97 @@ def link_deleted(url):
     text = requests.get(url).text
     return text[text.find("<title>") + 7 : text.find("</title>")] in ["Error | nitter", "イラストコミュニケーションサービス[pixiv]"]
 
+def request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=7)):
+    g = Github(CONFIG.get("github", "access_code"))
+    out = []
+    for repo in g.get_user().get_repos():
+        # print(repo.name, list(repo.get_branches()))
+        try:
+            for commit in repo.get_commits(since = since):
+                out.append({
+                    "repo": repo.name,
+                    "message": commit.commit.message,
+                    "url": commit.html_url,
+                    "datetime": commit.commit.author.date,
+                    "stats": {
+                        "additions": commit.stats.additions,
+                        "deletions": commit.stats.deletions,
+                        "total": commit.stats.total
+                    }
+                })
+        except Exception as e:
+            print(e)
+
+    return sorted(out, key = lambda a: a["datetime"], reverse = True) 
+
+def scrape_nitter(username, get_until:int):
+    new_tweets = []
+    nitter_url = CONFIG.get("nitter", "internalurl")
+    nitter_port = CONFIG.getint("nitter", "internalport")
+    scrape_new_pages = True
+    url = "http://%s:%d/%s" % (nitter_url, nitter_port, username)
+
+    while scrape_new_pages:
+        tree = html.fromstring(requests.get(url).content)
+        for i, tweetUrlElement in enumerate(tree.xpath('//*[@class="tweet-link"]'), 0):
+            if i > 0 and tweetUrlElement.get("href").split("/")[1] == username:
+                id_ = int(urllib.parse.urlparse(tweetUrlElement.get("href")).path.split("/")[-1])
+                tweet_link = "http://%s:%d%s" % (nitter_url, nitter_port, tweetUrlElement.get("href"))
+
+                if id_ == get_until:
+                    scrape_new_pages = False
+                    break
+
+                try:
+                    dt, replying_to, text, images = parse_tweet(tweet_link)
+                    new_tweets.append((id_, dt, replying_to, text, username, images))
+                    print(dt, text)
+                except IndexError:
+                    print("Couldn't get any more tweets")
+                    scrape_new_pages = False
+                    break
+
+
+        try:
+            cursor = tree.xpath('//*[@class="show-more"]/a')[0].get("href")
+        except IndexError:
+            # no more elements
+            break
+        url = "http://%s:%d/%s%s" % (nitter_url, nitter_port, username, cursor)
+
+    return new_tweets
+
+def parse_tweet(tweet_url):
+    # print(tweet_url)
+    tree = html.fromstring(requests.get(tweet_url).content)
+    # with open("2images.html", "r") as f:
+    #     tree = html.fromstring(f.read())
+
+    main_tweet_elem = tree.xpath('//*[@class="main-tweet"]')[0]
+
+    dt_str = main_tweet_elem.xpath('//*[@class="tweet-published"]')[0].text
+    dt = datetime.datetime.strptime(dt_str.replace("Â", ""), "%b %d, %Y · %I:%M %p UTC")
+    text = tree.xpath('//*[@class="main-tweet"]/div/div/div[2]')[0].text
+    replying_to_elems = tree.xpath('//*[@class="before-tweet thread-line"]/div/a')
+    if replying_to_elems != []:
+        replying_to = int(urllib.parse.urlparse(replying_to_elems[-1].get("href")).path.split("/")[-1])
+    else:
+        replying_to = None
+
+    images = []
+    images_elems = tree.xpath('//*[@class="main-tweet"]/div/div/div[3]/div/div/a/img')
+    for image_elem in images_elems:
+        images.append("https://" + CONFIG.get("nitter", "outsideurl") + urllib.parse.urlparse(image_elem.get("src")).path)
+
+    return dt, replying_to, text, images
+
+
+
+
+
 if __name__ == "__main__":
-    print(get_trans_stats())
+    # print(get_trans_stats())
+
+    print(scrape_nitter(CONFIG.get("twitter", "diary_account"), 1694624180405260291))
+
+    # print(parse_tweet("https://nitter.net/HONMISGENDERER/status/1694231618443981161#m"))
diff --git a/templates/diary.html.j2 b/templates/diary.html.j2
index d7c363b..f6604f7 100755
--- a/templates/diary.html.j2
+++ b/templates/diary.html.j2
@@ -1,7 +1,7 @@
 {% extends "template.html.j2" %}
 {% block content %}
     <h4>this page might not be up-to-date if my diary account is search banned</h4>
-    <p>if in doubt check my <a href="https://twitter.com/FORMER_SHOTA">diary account</a> <br> <s>serves me right for using the official API instead of HTML scraping like i did with the <a href="/">recent tweets</a> thing</s> <br> <a href="https://shadowban.yuzurisa.com/FORMER_SHOTA">check if i'm currently search banned</a></p>
+    <p>if in doubt check my <a href="https://twitter.com/{{ diary_account }}">diary account</a> <br> <a href="https://shadowban.yuzurisa.com/{{ diary_account }}">check if i'm currently search banned</a></p>
     <dl>
         {% for dt, entries in diary.items() %}
             <dt><a href="{{ entries[0]['link'] }}">{{ dt }}</a></dt>