格雷 3 lat temu
rodzic
commit
dc2ec1ec2e

+ 8 - 0
.idea/.gitignore

@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

+ 8 - 0
.idea/DAGASI.iml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

+ 13 - 0
.idea/dataSources.xml

@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="DataSourceManagerImpl" format="xml" multifile-model="true">
+    <data-source source="LOCAL" name="sql_localtest@192.168.1.106" uuid="0a827fe0-eba4-4248-9686-6961b165e72b">
+      <driver-ref>mysql.8</driver-ref>
+      <synchronize>true</synchronize>
+      <remarks>测试数据库</remarks>
+      <jdbc-driver>com.mysql.cj.jdbc.Driver</jdbc-driver>
+      <jdbc-url>jdbc:mysql://192.168.1.106:3306/sql_localtest</jdbc-url>
+      <working-dir>$ProjectFileDir$</working-dir>
+    </data-source>
+  </component>
+</project>

+ 6 - 0
.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 4 - 0
.idea/misc.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>

+ 8 - 0
.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/DAGASI.iml" filepath="$PROJECT_DIR$/.idea/DAGASI.iml" />
+    </modules>
+  </component>
+</project>

+ 6 - 0
.idea/sqldialects.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="SqlDialectMappings">
+    <file url="PROJECT" dialect="MySQL" />
+  </component>
+</project>

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

+ 47 - 0
fanbox.py

@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+""" 
+---------------------------------------
+ # @Project    : DAGASI
+ # @File       : fanbox.py
+ # @Author     : GrayZhao
+ # @Date       : 2023/2/20 16:25
+ # @Version    : 
+ # @Description : 
+---------------------------------------
+"""
+import requests
+
+URL = "https://api.fanbox.cc/post.listTagged?tag=高画質MP4&userId=1549213"
+
+HEADER = {
+    "origin": "https://dagasi.fanbox.cc",
+    "referer": "https://dagasi.fanbox.cc/",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
+}
+
+
+class FanboxID:
+    """获取作者在 fanbox 中的作品ID"""
+
+    @classmethod
+    def __get_fanbox_json(cls, url: str):
+        response = requests.get(url=url, headers=HEADER)
+        _json = response.json()
+        return _json
+
+    @classmethod
+    def iterator(cls):
+        nextUrl = URL
+        while nextUrl:
+            data_json = cls.__get_fanbox_json(nextUrl)
+            nextUrl = data_json["body"]["nextUrl"]
+            items = data_json["body"]["items"]
+            for item in items:
+                yield item["id"]
+
+
+if __name__ == '__main__':
+    for index, _id in enumerate(FanboxID.iterator()):
+        print(index, _id)

+ 151 - 0
kemono.py

@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+""" 
+---------------------------------------
+ # @Project    : DAGASI
+ # @File       : kemono.py
+ # @Author     : GrayZhao
+ # @Date       : 2023/2/20 17:26
+ # @Version    : 
+ # @Description : 
+---------------------------------------
+"""
+import gevent
+import requests
+from requests.exceptions import ConnectionError, ChunkedEncodingError
+import re
+import os
+from queue import Queue
+from bs4 import BeautifulSoup
+from tqdm import tqdm
+
+
+HEADER = {
+    "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
+}
+
+URL_LOGIN = "https://kemono.party/account/login"
+HEADER_LOGIN = {
+    "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
+    "origin": "https://kemono.party",
+    "referer": "https://kemono.party/account/login",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
+}
+
+"""
+URL = "https://kemono.party/fanbox/user/1549213/post/5285772"
+response = requests.get(URL, headers=HEADER)
+soup = BeautifulSoup(response.text, features="lxml")
+timestamp = soup.find("div", class_="post__published").findChild("time")["datetime"].split(" ")[0]
+print(timestamp)
+downloads = soup.find_all("a", class_="post__attachment-link", text=re.compile(r"高画質"))
+for download in downloads:
+    dw_url = download["href"]
+    name = download.text.replace("\n", "").strip().replace("Download ", "")
+    print(name, dw_url)
+"""
+
+
+class Kemono:
+    __error_ids = list()
+    __data_queue = Queue()
+    __session = requests.session()
+
+    @classmethod
+    def login(cls, username: str, passwd: str):
+        _data = {"username": username, "password": passwd}
+        cls.__session.post(url=URL_LOGIN, headers=HEADER_LOGIN, data=_data)
+
+    @classmethod
+    def produce(cls, id_q: Queue):
+        tasks = [gevent.spawn(cls.__create_data, id_q) for _ in range(4)]
+        gevent.joinall(tasks)
+        cls.__data_queue.join()
+
+    @classmethod
+    def __create_data(cls, id_q: Queue):
+        while not id_q.empty():
+            post_id = id_q.get_nowait()
+            url = f"https://kemono.party/fanbox/user/1549213/post/{post_id}"
+            response = cls.__session.get(url, headers=HEADER)
+            soup = BeautifulSoup(response.text, features="lxml")
+            timestamp = soup.find("div", class_="post__published").findChild("time")["datetime"].split(" ")[0]
+            downloads = soup.find_all("a", class_="post__attachment-link", text=re.compile(r"高画質"))
+            for download in downloads:
+                dw_url = download["href"]
+                all_name = download.text.replace("\n", "").strip().replace("Download ", "")
+                name = os.path.splitext(all_name)[0]
+                save_name = f"[{timestamp}] {all_name}"
+                q_data = {
+                    "postID": post_id,
+                    "name": name,
+                    "saveName": save_name,
+                    "url": dw_url,
+                    "error_count": 0
+                }
+                cls.__data_queue.put_nowait(q_data)
+
+    @classmethod
+    def consume(cls):
+        tasks = [gevent.spawn(cls.__download) for _ in range(4)]
+        gevent.joinall(tasks)
+
+    @classmethod
+    def __download(cls):
+        while True:
+            data = cls.__data_queue.get()
+            post_id = data["postID"]
+            name = data["name"]
+            save_name = "output/" + data["saveName"]
+            url = data["url"]
+            try:
+                response = cls.__session.get(url=url, headers=HEADER, stream=True)
+            except (ConnectionError, ChunkedEncodingError):
+                if data["error_count"] >= 3:
+                    cls.__error_ids.append(f"{post_id} {name}")
+                else:
+                    data["error_count"] += 1
+                    cls.__data_queue.put(data)
+                cls.__data_queue.task_done()
+                continue
+            total_size = int(response.headers.get("content-length", 0))
+            if os.path.exists(save_name):
+                now_range = os.path.getsize(save_name)
+            else:
+                now_range = 0
+            if now_range >= total_size:
+                print(f"[{post_id}] {name} 已下载完成...")
+                cls.__data_queue.task_done()
+                continue
+            header = HEADER.copy()
+            header.update(Range=f"bytes={now_range}-")
+
+            block_size = 1024
+            pbar = tqdm(desc=f"{name}", total=total_size, initial=now_range, unit="KB", unit_scale=True, leave=False)
+            try:
+                response = cls.__session.get(url=url, headers=header, stream=True)
+                with open(save_name, "ab") as file:
+                    for chuck in response.iter_content(block_size):
+                        file.write(chuck)
+                        pbar.update(len(chuck))
+                        pbar.set_postfix(info=f"剩余个数:{cls.__data_queue.qsize()}")
+            except (ConnectionError, ChunkedEncodingError) as e:
+                if data["error_count"] >= 3:
+                    cls.__error_ids.append(f"{post_id} {name}")
+                else:
+                    data["error_count"] += 1
+                    cls.__data_queue.put(data)
+                pbar.write(f"出现错误:\n{e}")
+            finally:
+                pbar.close()
+                cls.__data_queue.task_done()
+                continue
+
+    @classmethod
+    def is_error_ids(cls):
+        if cls.__error_ids:
+            return cls.__error_ids.sort()
+        else:
+            return False

+ 52 - 0
main.py

@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+""" 
+---------------------------------------
+ # @Project    : DAGASI
+ # @File       : main.py
+ # @Author     : GrayZhao
+ # @Date       : 2023/2/22 0:37
+ # @Version    : 
+ # @Description : 
+---------------------------------------
+"""
+from gevent import monkey;monkey.patch_all()
+from threading import Thread
+from queue import Queue
+from fanbox import FanboxID
+from kemono import Kemono
+
+if __name__ == '__main__':
+    id_queue = Queue()
+    # 选择菜单
+    print("1. 下载全部作品\n2. 下载指定作品")
+    while True:
+        opt = input("请输入序号 [1]:")
+        if opt == "1" or not opt:
+            for _id in FanboxID.iterator():
+                id_queue.put(_id)
+            break
+        elif opt == "2":
+            while True:
+                _ids = input("请输入投稿ID [多个ID请使用‘,’分隔]:").split(",")
+                if _ids:
+                    for _id in _ids:
+                        id_queue.put(_id)
+                    break
+                print("<Error> ID不能为空!")
+            break
+        print("<Error> 请输入正确序号!")
+    # 下载
+    print(f"共有{id_queue.qsize()}篇投稿需要下载,请耐心等待...")
+    Kemono.login(username="zhy201810576", passwd="zhy120001")
+    produce = Thread(target=Kemono.produce, args=(id_queue,))
+    consume = Thread(target=Kemono.consume)
+    consume.setDaemon(True)
+    produce.start()
+    consume.start()
+    produce.join()
+    # 输出未完成任务
+    if Kemono.is_error_ids():
+        for item in Kemono.is_error_ids():
+            print(item)

+ 0 - 0
output/video