diff options
author | jwansek <eddie.atten.ea29@gmail.com> | 2021-02-20 15:11:14 +0000 |
---|---|---|
committer | jwansek <eddie.atten.ea29@gmail.com> | 2021-02-20 15:11:14 +0000 |
commit | 4b22e646bf7afbf6292b02a908477007c02c9b53 (patch) | |
tree | d84164da3bb55796052686fad6b58bbc4f56101b /get_images.py | |
parent | a101cc450c1fcb71a50244e1be4e06cb0bd82552 (diff) | |
download | yaoi-communism-4b22e646bf7afbf6292b02a908477007c02c9b53.tar.gz yaoi-communism-4b22e646bf7afbf6292b02a908477007c02c9b53.zip |
changed the getting sauces system
Diffstat (limited to 'get_images.py')
-rwxr-xr-x[-rw-r--r--] | get_images.py | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/get_images.py b/get_images.py index 817b9c1..9d25599 100644..100755 --- a/get_images.py +++ b/get_images.py @@ -1,5 +1,6 @@ from dataclasses import dataclass from PIL import Image, ImageDraw +from lxml.html import parse from io import StringIO from lxml import etree import requests @@ -85,10 +86,21 @@ def get_num_pages(tags): else: return int(int(urllib.parse.parse_qs(page_element.get("href"))["pid"][0]) / (5*8)) +def check_pixiv_404(url): + text = requests.get(url).text + return text[text.find("<title>") + 7 : text.find("</title>")] == "イラストコミュニケーションサービス[pixiv]" + def fix_source_url(url): - if "pixiv.net" in url or "pximg.net" in url: - return "https://www.pixiv.net/en/artworks/%s" % url.split("/")[-1][:8] - return url + parsed = urllib.parse.urlparse(url) + if parsed.netloc == "www.pixiv.net": + return "https://www.pixiv.net/en/artworks/" + urllib.parse.parse_qs(parsed.query)["illust_id"][0] + elif parsed.netloc in ["bishie.booru.org", "www.secchan.net"]: + return ConnectionError("Couldn't get source") + elif "pximg.net" in parsed.netloc or "pixiv.net" in parsed.netloc: + return "https://www.pixiv.net/en/artworks/" + parsed.path.split("/")[-1][:8] + elif parsed.netloc == "twitter.com": + return url.replace("twitter.com", "nitter.eda.gay") + return url def append_blacklisted(id_): with open(CONFIG["blacklist"], "a") as f: @@ -127,6 +139,10 @@ def main(draw_faces = False): logging.info("Retried, already posted image...") return main() + if check_pixiv_404(fix_source_url(simg.source)): + logging.warning("Skipping since pixiv linked 404'd") + return main() + append_blacklisted(simg.id) with DownloadedImage(simg.imurl) as impath: |