diff options
| author | jwansek <eddie.atten.ea29@gmail.com> | 2023-09-03 20:57:35 +0100 | 
|---|---|---|
| committer | jwansek <eddie.atten.ea29@gmail.com> | 2023-09-03 20:57:35 +0100 | 
| commit | e484a4ecd182d806d004a0b5b9116683bc07217e (patch) | |
| tree | edc225a17aaf7dc93000b9046c92656009a3de57 | |
| parent | c4ab716c20729a62f7b78b60029c27e0f166f41c (diff) | |
| download | boymoder.blog-e484a4ecd182d806d004a0b5b9116683bc07217e.tar.gz boymoder.blog-e484a4ecd182d806d004a0b5b9116683bc07217e.zip | |
Added local nitter instance, fixed twitter caching using new method
| -rwxr-xr-x | .gitignore | 2 | ||||
| -rw-r--r-- | .gitmodules | 4 | ||||
| -rwxr-xr-x | app.py | 5 | ||||
| -rwxr-xr-x | database.py | 156 | ||||
| -rwxr-xr-x | docker-compose.yml | 44 | ||||
| m--------- | nitter/nitter | 0 | ||||
| -rwxr-xr-x | services.py | 115 | ||||
| -rwxr-xr-x | static/index.md | 4 | ||||
| -rwxr-xr-x | templates/diary.html.j2 | 2 | 
9 files changed, 211 insertions, 121 deletions
| @@ -6,6 +6,8 @@ static/images/random.jpg  static/zips/*.zip  .nfs*  static/images/Thumbs.db +nitter/nitter.conf +nitter/guest_accounts.json  # Byte-compiled / optimized / DLL files  __pycache__/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..1b086ba --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "nitter/nitter"] +	path = nitter/nitter +	url = git@github.com:zedeus/nitter.git +	branch = guest_accounts @@ -71,7 +71,7 @@ def index():                  markdown = parser.parse_text(f.read())[0],                  featured_thoughts = db.get_featured_thoughts(),                  tweets = db.get_cached_tweets(7) + [("view all tweets...", db.get_my_twitter())], -                commits = db.get_cached_commits(since = datetime.datetime.now() - datetime.timedelta(days = 7)), +                commits = db.get_cached_commits()[:7],                  sidebar_img = get_sidebar_img(db)              ) @@ -85,7 +85,8 @@ def diary():          return flask.render_template(              "diary.html.j2",              **get_template_items("diary", db), -            diary = db.get_diary() +            diary = db.get_diary(), +            diary_account = db.config.get("twitter", "diary_account")          )  @app.route("/discord") diff --git a/database.py b/database.py index 1877ab7..ea3835c 100755 --- a/database.py +++ b/database.py @@ -1,10 +1,10 @@  from urllib.parse import urlparse  from dataclasses import dataclass -from github import Github  from lxml import html  import configparser  import curiouscat  import threading +import services  import operator  import datetime  import requests @@ -40,6 +40,7 @@ class Database:          return self      def __exit__(self, type, value, traceback): +        self.__connection.commit()          self.__connection.close()      def get_header_links(self): @@ -132,24 +133,18 @@ class Database:              """)              return cursor.fetchall() -    def get_cached_tweets(self, numToGet = None, recurse = True): +    def get_cached_tweets(self, numToGet = None):          with self.__connection.cursor() as cursor: +            sql = "SELECT tweet, tweet_id, account FROM diary WHERE account = %s ORDER BY tweeted_at" +            args = (self.config.get("twitter", "main_account"), )              if numToGet is not None: -                cursor.execute("SELECT text, url FROM twitterCache ORDER BY appended DESC LIMIT %s;", (numToGet, )) +                sql += " LIMIT %s;" +                args = (self.config.get("twitter", "main_account"), numToGet)              else: -                cursor.execute("SELECT text, url FROM twitterCache ORDER BY appended DESC;") -            if recurse: -                threading.Thread(target = update_cache).start() -            return list(cursor.fetchall()) +                sql += ";" +            cursor.execute(sql, args) -    def update_twitter_cache(self, requested): -        with self.__connection.cursor() as cursor: -            cursor.execute("SELECT DISTINCT url FROM twitterCache;") -            urls = [i[0] for i in cursor.fetchall()] -            for url, text in requested: -                if url not in urls: -                    cursor.execute("INSERT INTO twitterCache (text, url) VALUES (%s, %s);", (text, url)) -        self.__connection.commit() +            return [(i[0], "https://%s/%s/status/%d" % (self.config.get("nitter", "outsideurl"), i[2], i[1])) for i in cursor.fetchall()]      def get_cached_commits(self, since = None, recurse = True):          with self.__connection.cursor() as cursor: @@ -214,29 +209,38 @@ class Database:          self.__connection.commit()          return id_ -    def append_diary(self, tweet_id, tweeted_at, replying_to, tweet): +    def append_diary(self, tweet_id, tweeted_at, replying_to, tweet, account): +        if tweet is None: +            tweet = "(Image only)"          with self.__connection.cursor() as cursor: -            cursor.execute("INSERT INTO diary VALUES (%s, %s, %s, %s);", (tweet_id, tweeted_at, replying_to, tweet)) -        print("Appended diary with tweet '%s'" % tweet) +            cursor.execute("INSERT INTO diary VALUES (%s, %s, %s, %s, %s);", (tweet_id, tweeted_at, replying_to, tweet, account)) +        self.__connection.commit() + +        print("Appended diary with tweet " + tweet + " @ " + str(tweeted_at))      def append_diary_images(self, tweet_id, imurl):          with self.__connection.cursor() as cursor:              cursor.execute("INSERT INTO diaryimages (tweet_id, link) VALUES (%s, %s);", (tweet_id, imurl)) +        self.__connection.commit() +         -    def get_diary(self): +    def get_diary(self, account = None):          threading.Thread(target = update_cache).start()          out = {} +        if account is None: +            account = self.get_my_diary_twitter() +          with self.__connection.cursor() as cursor:              # cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL ORDER BY tweeted_at DESC;")              # attempt to ignore curiouscat automatic tweets by comparing with the q&a table -            cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL AND tweet_id NOT IN (SELECT tweet_id FROM diary INNER JOIN qnas ON SUBSTRING(tweet, 1, 16) = SUBSTRING(question, 1, 16)) ORDER BY tweeted_at DESC;") +            cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL AND tweet_id NOT IN (SELECT tweet_id FROM diary INNER JOIN qnas ON SUBSTRING(tweet, 1, 16) = SUBSTRING(question, 1, 16)) AND account = %s ORDER BY tweeted_at DESC;", (account, ))              for tweet_id, tweeted_at, tweet_text in cursor.fetchall():                  # print(tweet_id, tweeted_at, tweet_text)                  out[tweeted_at] = [{                      "text": tweet_text,                       "images": self.get_diary_image(tweet_id),                       "link": "https://%s/%s/status/%d" % ( -                        self.config.get("nitter", "domain"),  +                        self.config.get("nitter", "outsideurl"),                           self.get_my_diary_twitter(),                           tweet_id                      ) @@ -268,46 +272,17 @@ class Database:              "text": out[1],               "images": self.get_diary_image(id_),               "link": "https://%s/%s/status/%d" % ( -                self.config.get("nitter", "domain"), self.get_my_diary_twitter(), id_ +                self.config.get("nitter", "outsideurl"), self.get_my_diary_twitter(), id_              )          }, id_ -    def get_newest_diary_tweet_id(self): +    def get_newest_diary_tweet_id(self, account = None): +        if account is None: +            account = self.get_my_diary_twitter()          with self.__connection.cursor() as cursor: -            cursor.execute("SELECT MAX(tweet_id) FROM diary;") +            cursor.execute("SELECT MAX(tweet_id) FROM diary WHERE account = %s;", (account, ))              return cursor.fetchone()[0] -    def fetch_diary(self): -        twitteracc = self.get_my_diary_twitter() - -        twitter = twython.Twython( -            self.config.get("twitter", "app_key"),  -            access_token = self.config.get("twitter", "oauth_2_token") -        ) -         -        for tweet in twitter.search( -                q = "(from:%s)" % twitteracc, since_id = self.get_newest_diary_tweet_id(), -                tweet_mode = 'extended' -            )["statuses"]: - -            tweet_id = tweet["id"] -            tweeted_at = datetime.datetime.strptime(tweet["created_at"], "%a %b %d %H:%M:%S %z %Y") -            replying_to = tweet["in_reply_to_status_id"] -            tweet_text = re.sub(r"https://t\.co/\w{10}", "", tweet["full_text"], 0, re.MULTILINE) -             -            if tweet["in_reply_to_screen_name"] == twitteracc or tweet["in_reply_to_screen_name"] is None: -                self.append_diary(tweet_id, tweeted_at, replying_to, tweet_text) - -            if "media" in tweet["entities"].keys(): -                associated_images = [ -                    i["media_url_https"].replace("pbs.twimg.com", self.config.get("nitter", "domain") + "/pic")  -                    for i in tweet["entities"]["media"] -                ] -                for im in associated_images: -                    self.append_diary_images(tweet_id, im) - -        self.__connection.commit() -      def get_curiouscat_username(self):          with self.__connection.cursor() as cursor:              cursor.execute("SELECT link FROM headerLinks WHERE name = 'curiouscat';") @@ -339,7 +314,7 @@ class Database:              return sorted(cursor.fetchall(), key = operator.itemgetter(2), reverse = True)  def update_cache(): -    # print("updating cache...") +    print("Updating cache...")      with Database() as db:          db.append_curiouscat_qnas(              curiouscat.get_all_curiouscat_qnas_before( @@ -347,58 +322,25 @@ def update_cache():                  db.get_biggest_curiouscat_timestamp()              )          ) -        db.fetch_diary() -        db.update_twitter_cache(request_recent_tweets(10000)) -        # print("Done updating twitter cache...") -        db.update_commit_cache(request_recent_commits(since = db.get_last_commit_time())) -        # print("Done updating commit cache...") - -CONFIG = configparser.ConfigParser() -CONFIG.read("edaweb.conf") - -def request_recent_tweets(numToGet): -    tweets = [] -    domain = "http://" + CONFIG.get("nitter", "domain") -    with Database() as db: -        for title, url in db.get_header_links(): -            if title == "twitter": -                break -    tree = html.fromstring(requests.get(url).content) -    for i, tweetUrlElement in enumerate(tree.xpath('//*[@class="tweet-link"]'), 0): -        if i > 0: -            tweets.append(( -                domain + tweetUrlElement.get("href"), -                tweetUrlElement.getparent().find_class("tweet-content media-body")[0].text -            )) -        if len(tweets) >= numToGet: -            break -    return tweets -             -def request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=7)): -    g = Github(CONFIG.get("github", "access_code")) -    out = [] -    for repo in g.get_user().get_repos(): -        # print(repo.name, list(repo.get_branches())) -        try: -            for commit in repo.get_commits(since = since): -                out.append({ -                    "repo": repo.name, -                    "message": commit.commit.message, -                    "url": commit.html_url, -                    "datetime": commit.commit.author.date, -                    "stats": { -                        "additions": commit.stats.additions, -                        "deletions": commit.stats.deletions, -                        "total": commit.stats.total -                    } -                }) -        except Exception as e: -            print(e) - -    return sorted(out, key = lambda a: a["datetime"], reverse = True)  +        print("Finished adding curiouscat...") +        db.update_commit_cache(services.request_recent_commits(since = db.get_last_commit_time())) +        print("Finished adding github commits...") +        for id_, dt, replying_to, text, username, images in services.scrape_nitter(db.get_my_diary_twitter(), db.get_newest_diary_tweet_id()): +            db.append_diary(id_, dt, replying_to, text, username) +            for image in images: +                db.append_diary_images(id_, image) +        print("Finished getting diary tweets...") +        for id_, dt, replying_to, text, username, images in services.scrape_nitter( +            db.config.get("twitter", "main_account"), db.get_newest_diary_tweet_id(db.config.get("twitter", "main_account")) +            ): +            db.append_diary(id_, dt, replying_to, text, username) +            for image in images: +                db.append_diary_images(id_, image) +    print("Done updating commit cache...")  if __name__ == "__main__": -    # print(request_recent_commits())      with Database() as db: -        print(db.get_curiouscat_qnas()) +        print(db.get_cached_tweets()) + +    # update_cache() diff --git a/docker-compose.yml b/docker-compose.yml index c8a70c1..2a78e22 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -37,6 +37,47 @@ services:              - mariadb:mysql          restart: unless-stopped +    nitter: +        build: +            context: ./nitter/nitter +            dockerfile: Dockerfile +        image: jwansek/nitter:latest +        ports: +            - "127.0.0.1:7777:7777" # Replace with "8080:8080" if you don't use a reverse proxy +        volumes: +            - ./nitter/nitter.conf:/src/nitter.conf:Z,ro +            - ./nitter/guest_accounts.json:/src/guest_accounts.json:Z,ro +        depends_on: +            - nitter-redis +        healthcheck: +            test: wget -nv --tries=1 --spider http://127.0.0.1:8080/Jack/status/20 || exit 1 +            interval: 30s +            timeout: 5s +            retries: 2 +        user: "998:998" +        read_only: true +        security_opt: +            - no-new-privileges:true +        cap_drop: +            - ALL + +    nitter-redis: +        image: redis:6-alpine +        command: redis-server --save 60 1 --loglevel warning +        volumes: +            - nitter-redis:/data +        healthcheck: +            test: redis-cli ping +            interval: 30s +            timeout: 5s +            retries: 2 +        user: "999:1000" +        read_only: true +        security_opt: +            - no-new-privileges:true +        cap_drop: +            - ALL +  networks:      edaweb-net:          external: @@ -45,3 +86,6 @@ networks:      db-network:          external:              name: mariadb + +volumes: +    nitter-redis: diff --git a/nitter/nitter b/nitter/nitter new file mode 160000 +Subproject fcd74e8048362fcf8284871ee067099e8de28a8 diff --git a/services.py b/services.py index 77361fa..0a10d34 100755 --- a/services.py +++ b/services.py @@ -1,9 +1,11 @@  from dataclasses import dataclass  from io import StringIO  from lxml import html, etree +from github import Github  import multiprocessing  import pihole as ph  import qbittorrent +import configparser  import requests  import datetime  import urllib @@ -13,10 +15,11 @@ import random  import queue  import json  import time -import app  import os  theLastId = 0 +CONFIG = configparser.ConfigParser(interpolation = None) +CONFIG.read("edaweb.conf")  def humanbytes(B):     'Return the given bytes as a human friendly KB, MB, GB, or TB string' @@ -51,7 +54,7 @@ def timeout(func):          t = multiprocessing.Process(target = runFunc, args = (returnVan, func))          t.start() -        t.join(timeout = app.CONFIG["servicetimeout"].getint("seconds")) +        t.join(timeout = CONFIG["servicetimeout"].getint("seconds"))          # print("Request took:", time.time() - ti)          try: @@ -64,7 +67,7 @@ def timeout(func):  @timeout  def get_docker_stats(): -    client = docker.DockerClient(base_url = "tcp://%s:%s" % (app.CONFIG["docker"]["url"], app.CONFIG["docker"]["port"])) +    client = docker.DockerClient(base_url = "tcp://%s:%s" % (CONFIG["docker"]["url"], CONFIG["docker"]["port"]))      return {          container.name: container.status          for container in client.containers.list(all = True) @@ -76,8 +79,8 @@ def get_qbit_stats():      numtorrents = 0      bytes_dl = 0      bytes_up = 0 -    qb = qbittorrent.Client('http://%s:%s/' % (app.CONFIG["qbittorrent"]["url"], app.CONFIG["qbittorrent"]["port"])) -    qb.login(username = app.CONFIG["qbittorrent"]["user"], password = app.CONFIG["qbittorrent"]["passwd"]) +    qb = qbittorrent.Client('http://%s:%s/' % (CONFIG["qbittorrent"]["url"], CONFIG["qbittorrent"]["port"])) +    qb.login(username = CONFIG["qbittorrent"]["user"], password = CONFIG["qbittorrent"]["passwd"])      for torrent in qb.torrents():          numtorrents += 1 @@ -94,9 +97,9 @@ def get_qbit_stats():  @timeout  def get_trans_stats():      client = clutch.client.Client( -        address = "http://%s:%s/transmission/rpc" % (app.CONFIG["transmission"]["url"], app.CONFIG["transmission"]["port"]), -        # username = app.CONFIG["transmission"]["username"], -        # password = app.CONFIG["transmission"]["password"] +        address = "http://%s:%s/transmission/rpc" % (CONFIG["transmission"]["url"], CONFIG["transmission"]["port"]), +        # username = CONFIG["transmission"]["username"], +        # password = CONFIG["transmission"]["password"]      )      stats = json.loads(client.session.stats().json())      active_for = datetime.timedelta(seconds = stats["arguments"]["cumulative_stats"]["seconds_active"]) @@ -110,7 +113,7 @@ def get_trans_stats():  @timeout  def get_pihole_stats(): -    pihole = ph.PiHole(app.CONFIG["pihole"]["url"]) +    pihole = ph.PiHole(CONFIG["pihole"]["url"])      return {          "status": pihole.status,          "queries": pihole.total_queries, @@ -230,5 +233,97 @@ def link_deleted(url):      text = requests.get(url).text      return text[text.find("<title>") + 7 : text.find("</title>")] in ["Error | nitter", "イラストコミュニケーションサービス[pixiv]"] +def request_recent_commits(since = datetime.datetime.now() - datetime.timedelta(days=7)): +    g = Github(CONFIG.get("github", "access_code")) +    out = [] +    for repo in g.get_user().get_repos(): +        # print(repo.name, list(repo.get_branches())) +        try: +            for commit in repo.get_commits(since = since): +                out.append({ +                    "repo": repo.name, +                    "message": commit.commit.message, +                    "url": commit.html_url, +                    "datetime": commit.commit.author.date, +                    "stats": { +                        "additions": commit.stats.additions, +                        "deletions": commit.stats.deletions, +                        "total": commit.stats.total +                    } +                }) +        except Exception as e: +            print(e) + +    return sorted(out, key = lambda a: a["datetime"], reverse = True)  + +def scrape_nitter(username, get_until:int): +    new_tweets = [] +    nitter_url = CONFIG.get("nitter", "internalurl") +    nitter_port = CONFIG.getint("nitter", "internalport") +    scrape_new_pages = True +    url = "http://%s:%d/%s" % (nitter_url, nitter_port, username) + +    while scrape_new_pages: +        tree = html.fromstring(requests.get(url).content) +        for i, tweetUrlElement in enumerate(tree.xpath('//*[@class="tweet-link"]'), 0): +            if i > 0 and tweetUrlElement.get("href").split("/")[1] == username: +                id_ = int(urllib.parse.urlparse(tweetUrlElement.get("href")).path.split("/")[-1]) +                tweet_link = "http://%s:%d%s" % (nitter_url, nitter_port, tweetUrlElement.get("href")) + +                if id_ == get_until: +                    scrape_new_pages = False +                    break + +                try: +                    dt, replying_to, text, images = parse_tweet(tweet_link) +                    new_tweets.append((id_, dt, replying_to, text, username, images)) +                    print(dt, text) +                except IndexError: +                    print("Couldn't get any more tweets") +                    scrape_new_pages = False +                    break + + +        try: +            cursor = tree.xpath('//*[@class="show-more"]/a')[0].get("href") +        except IndexError: +            # no more elements +            break +        url = "http://%s:%d/%s%s" % (nitter_url, nitter_port, username, cursor) + +    return new_tweets + +def parse_tweet(tweet_url): +    # print(tweet_url) +    tree = html.fromstring(requests.get(tweet_url).content) +    # with open("2images.html", "r") as f: +    #     tree = html.fromstring(f.read()) + +    main_tweet_elem = tree.xpath('//*[@class="main-tweet"]')[0] + +    dt_str = main_tweet_elem.xpath('//*[@class="tweet-published"]')[0].text +    dt = datetime.datetime.strptime(dt_str.replace("Â", ""), "%b %d, %Y · %I:%M %p UTC") +    text = tree.xpath('//*[@class="main-tweet"]/div/div/div[2]')[0].text +    replying_to_elems = tree.xpath('//*[@class="before-tweet thread-line"]/div/a') +    if replying_to_elems != []: +        replying_to = int(urllib.parse.urlparse(replying_to_elems[-1].get("href")).path.split("/")[-1]) +    else: +        replying_to = None + +    images = [] +    images_elems = tree.xpath('//*[@class="main-tweet"]/div/div/div[3]/div/div/a/img') +    for image_elem in images_elems: +        images.append("https://" + CONFIG.get("nitter", "outsideurl") + urllib.parse.urlparse(image_elem.get("src")).path) + +    return dt, replying_to, text, images + + + + +  if __name__ == "__main__": -    print(get_trans_stats()) +    # print(get_trans_stats()) + +    print(scrape_nitter(CONFIG.get("twitter", "diary_account"), 1694624180405260291)) + +    # print(parse_tweet("https://nitter.net/HONMISGENDERER/status/1694231618443981161#m")) diff --git a/static/index.md b/static/index.md index 6e8a1db..535f8c8 100755 --- a/static/index.md +++ b/static/index.md @@ -1,7 +1,7 @@    ## haiiiiiii -my name is eden and im a 21yo (boymoder) computer science undergraduate. i made my own website to encourage others to do so too. +my name is eden and im a 22yo (boymoder) computer science student. i made my own website to encourage others to do so too.  i'll post my thoughts on here sometimes, and use this site to link to other stuff i host [more about me](/thought?id=2).  [click here for a random image of lio fotia](https://eda.gay/random?tags=lio_fotia) @@ -12,6 +12,8 @@ i'll post my thoughts on here sometimes, and use this site to link to other stuf  ## FOSS alternative services +(some are currently down as i am temporarily hosting on a VPS instead of my [normal server](https://wiki.eda.gay)) +  - [nextcloud - dropbox (+ much more!) alternative](https://nc.eda.gay)  - [invidious - youtube alternative](https://invidious.eda.gay)  - [nitter - alternative twitter frontend](https://nitter.eda.gay) diff --git a/templates/diary.html.j2 b/templates/diary.html.j2 index d7c363b..f6604f7 100755 --- a/templates/diary.html.j2 +++ b/templates/diary.html.j2 @@ -1,7 +1,7 @@  {% extends "template.html.j2" %}  {% block content %}      <h4>this page might not be up-to-date if my diary account is search banned</h4> -    <p>if in doubt check my <a href="https://twitter.com/FORMER_SHOTA">diary account</a> <br> <s>serves me right for using the official API instead of HTML scraping like i did with the <a href="/">recent tweets</a> thing</s> <br> <a href="https://shadowban.yuzurisa.com/FORMER_SHOTA">check if i'm currently search banned</a></p> +    <p>if in doubt check my <a href="https://twitter.com/{{ diary_account }}">diary account</a> <br> <a href="https://shadowban.yuzurisa.com/{{ diary_account }}">check if i'm currently search banned</a></p>      <dl>          {% for dt, entries in diary.items() %}              <dt><a href="{{ entries[0]['link'] }}">{{ dt }}</a></dt> | 
