3 yıl önce · dc2ec1ec2e
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
 
				+# 默认忽略的文件
			
 
				+/shelf/
			
 
				+/workspace.xml
			
 
				+# 基于编辑器的 HTTP 客户端请求
			
 
				+/httpRequests/
			
 
				+# Datasource local storage ignored files
			
 
				+/dataSources/
			
 
				+/dataSources.local.xml
			
--- a/.idea/DAGASI.iml
+++ b/.idea/DAGASI.iml
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<module type="PYTHON_MODULE" version="4">
			
 
				+  <component name="NewModuleRootManager">
			
 
				+    <content url="file://$MODULE_DIR$" />
			
 
				+    <orderEntry type="inheritedJdk" />
			
 
				+    <orderEntry type="sourceFolder" forTests="false" />
			
 
				+  </component>
			
 
				+</module>
			
--- a/.idea/dataSources.xml
+++ b/.idea/dataSources.xml
@@ -0,0 +1,13 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="DataSourceManagerImpl" format="xml" multifile-model="true">
			
 
				+    <data-source source="LOCAL" name="sql_localtest@192.168.1.106" uuid="0a827fe0-eba4-4248-9686-6961b165e72b">
			
 
				+      <driver-ref>mysql.8</driver-ref>
			
 
				+      <synchronize>true</synchronize>
			
 
				+      <remarks>测试数据库</remarks>
			
 
				+      <jdbc-driver>com.mysql.cj.jdbc.Driver</jdbc-driver>
			
 
				+      <jdbc-url>jdbc:mysql://192.168.1.106:3306/sql_localtest</jdbc-url>
			
 
				+      <working-dir>$ProjectFileDir$</working-dir>
			
 
				+    </data-source>
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
 
				+<component name="InspectionProjectProfileManager">
			
 
				+  <settings>
			
 
				+    <option name="USE_PROJECT_PROFILE" value="false" />
			
 
				+    <version value="1.0" />
			
 
				+  </settings>
			
 
				+</component>
			
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
			
 
				+</project>
			
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="ProjectModuleManager">
			
 
				+    <modules>
			
 
				+      <module fileurl="file://$PROJECT_DIR$/.idea/DAGASI.iml" filepath="$PROJECT_DIR$/.idea/DAGASI.iml" />
			
 
				+    </modules>
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/sqldialects.xml
+++ b/.idea/sqldialects.xml
@@ -0,0 +1,6 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="SqlDialectMappings">
			
 
				+    <file url="PROJECT" dialect="MySQL" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project version="4">
			
 
				+  <component name="VcsDirectoryMappings">
			
 
				+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
			
 
				+  </component>
			
 
				+</project>
			
--- a/fanbox.py
+++ b/fanbox.py
@@ -0,0 +1,47 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- encoding: utf-8 -*-
			
 
				+
			
 
				+""" 
			
 
				+---------------------------------------
			
 
				+ # @Project    : DAGASI
			
 
				+ # @File       : fanbox.py
			
 
				+ # @Author     : GrayZhao
			
 
				+ # @Date       : 2023/2/20 16:25
			
 
				+ # @Version    : 
			
 
				+ # @Description : 
			
 
				+---------------------------------------
			
 
				+"""
			
 
				+import requests
			
 
				+
			
 
				+URL = "https://api.fanbox.cc/post.listTagged?tag=高画質MP4&userId=1549213"
			
 
				+
			
 
				+HEADER = {
			
 
				+    "origin": "https://dagasi.fanbox.cc",
			
 
				+    "referer": "https://dagasi.fanbox.cc/",
			
 
				+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
			
 
				+}
			
 
				+
			
 
				+
			
 
				+class FanboxID:
			
 
				+    """获取作者在 fanbox 中的作品ID"""
			
 
				+
			
 
				+    @classmethod
			
 
				+    def __get_fanbox_json(cls, url: str):
			
 
				+        response = requests.get(url=url, headers=HEADER)
			
 
				+        _json = response.json()
			
 
				+        return _json
			
 
				+
			
 
				+    @classmethod
			
 
				+    def iterator(cls):
			
 
				+        nextUrl = URL
			
 
				+        while nextUrl:
			
 
				+            data_json = cls.__get_fanbox_json(nextUrl)
			
 
				+            nextUrl = data_json["body"]["nextUrl"]
			
 
				+            items = data_json["body"]["items"]
			
 
				+            for item in items:
			
 
				+                yield item["id"]
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    for index, _id in enumerate(FanboxID.iterator()):
			
 
				+        print(index, _id)
			
--- a/kemono.py
+++ b/kemono.py
@@ -0,0 +1,151 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- encoding: utf-8 -*-
			
 
				+
			
 
				+""" 
			
 
				+---------------------------------------
			
 
				+ # @Project    : DAGASI
			
 
				+ # @File       : kemono.py
			
 
				+ # @Author     : GrayZhao
			
 
				+ # @Date       : 2023/2/20 17:26
			
 
				+ # @Version    : 
			
 
				+ # @Description : 
			
 
				+---------------------------------------
			
 
				+"""
			
 
				+import gevent
			
 
				+import requests
			
 
				+from requests.exceptions import ConnectionError, ChunkedEncodingError
			
 
				+import re
			
 
				+import os
			
 
				+from queue import Queue
			
 
				+from bs4 import BeautifulSoup
			
 
				+from tqdm import tqdm
			
 
				+
			
 
				+
			
 
				+HEADER = {
			
 
				+    "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
			
 
				+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
			
 
				+}
			
 
				+
			
 
				+URL_LOGIN = "https://kemono.party/account/login"
			
 
				+HEADER_LOGIN = {
			
 
				+    "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
			
 
				+    "origin": "https://kemono.party",
			
 
				+    "referer": "https://kemono.party/account/login",
			
 
				+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
			
 
				+}
			
 
				+
			
 
				+"""
			
 
				+URL = "https://kemono.party/fanbox/user/1549213/post/5285772"
			
 
				+response = requests.get(URL, headers=HEADER)
			
 
				+soup = BeautifulSoup(response.text, features="lxml")
			
 
				+timestamp = soup.find("div", class_="post__published").findChild("time")["datetime"].split(" ")[0]
			
 
				+print(timestamp)
			
 
				+downloads = soup.find_all("a", class_="post__attachment-link", text=re.compile(r"高画質"))
			
 
				+for download in downloads:
			
 
				+    dw_url = download["href"]
			
 
				+    name = download.text.replace("\n", "").strip().replace("Download ", "")
			
 
				+    print(name, dw_url)
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+class Kemono:
			
 
				+    __error_ids = list()
			
 
				+    __data_queue = Queue()
			
 
				+    __session = requests.session()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def login(cls, username: str, passwd: str):
			
 
				+        _data = {"username": username, "password": passwd}
			
 
				+        cls.__session.post(url=URL_LOGIN, headers=HEADER_LOGIN, data=_data)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def produce(cls, id_q: Queue):
			
 
				+        tasks = [gevent.spawn(cls.__create_data, id_q) for _ in range(4)]
			
 
				+        gevent.joinall(tasks)
			
 
				+        cls.__data_queue.join()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def __create_data(cls, id_q: Queue):
			
 
				+        while not id_q.empty():
			
 
				+            post_id = id_q.get_nowait()
			
 
				+            url = f"https://kemono.party/fanbox/user/1549213/post/{post_id}"
			
 
				+            response = cls.__session.get(url, headers=HEADER)
			
 
				+            soup = BeautifulSoup(response.text, features="lxml")
			
 
				+            timestamp = soup.find("div", class_="post__published").findChild("time")["datetime"].split(" ")[0]
			
 
				+            downloads = soup.find_all("a", class_="post__attachment-link", text=re.compile(r"高画質"))
			
 
				+            for download in downloads:
			
 
				+                dw_url = download["href"]
			
 
				+                all_name = download.text.replace("\n", "").strip().replace("Download ", "")
			
 
				+                name = os.path.splitext(all_name)[0]
			
 
				+                save_name = f"[{timestamp}] {all_name}"
			
 
				+                q_data = {
			
 
				+                    "postID": post_id,
			
 
				+                    "name": name,
			
 
				+                    "saveName": save_name,
			
 
				+                    "url": dw_url,
			
 
				+                    "error_count": 0
			
 
				+                }
			
 
				+                cls.__data_queue.put_nowait(q_data)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def consume(cls):
			
 
				+        tasks = [gevent.spawn(cls.__download) for _ in range(4)]
			
 
				+        gevent.joinall(tasks)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def __download(cls):
			
 
				+        while True:
			
 
				+            data = cls.__data_queue.get()
			
 
				+            post_id = data["postID"]
			
 
				+            name = data["name"]
			
 
				+            save_name = "output/" + data["saveName"]
			
 
				+            url = data["url"]
			
 
				+            try:
			
 
				+                response = cls.__session.get(url=url, headers=HEADER, stream=True)
			
 
				+            except (ConnectionError, ChunkedEncodingError):
			
 
				+                if data["error_count"] >= 3:
			
 
				+                    cls.__error_ids.append(f"{post_id} {name}")
			
 
				+                else:
			
 
				+                    data["error_count"] += 1
			
 
				+                    cls.__data_queue.put(data)
			
 
				+                cls.__data_queue.task_done()
			
 
				+                continue
			
 
				+            total_size = int(response.headers.get("content-length", 0))
			
 
				+            if os.path.exists(save_name):
			
 
				+                now_range = os.path.getsize(save_name)
			
 
				+            else:
			
 
				+                now_range = 0
			
 
				+            if now_range >= total_size:
			
 
				+                print(f"[{post_id}] {name} 已下载完成...")
			
 
				+                cls.__data_queue.task_done()
			
 
				+                continue
			
 
				+            header = HEADER.copy()
			
 
				+            header.update(Range=f"bytes={now_range}-")
			
 
				+
			
 
				+            block_size = 1024
			
 
				+            pbar = tqdm(desc=f"{name}", total=total_size, initial=now_range, unit="KB", unit_scale=True, leave=False)
			
 
				+            try:
			
 
				+                response = cls.__session.get(url=url, headers=header, stream=True)
			
 
				+                with open(save_name, "ab") as file:
			
 
				+                    for chuck in response.iter_content(block_size):
			
 
				+                        file.write(chuck)
			
 
				+                        pbar.update(len(chuck))
			
 
				+                        pbar.set_postfix(info=f"剩余个数：{cls.__data_queue.qsize()}")
			
 
				+            except (ConnectionError, ChunkedEncodingError) as e:
			
 
				+                if data["error_count"] >= 3:
			
 
				+                    cls.__error_ids.append(f"{post_id} {name}")
			
 
				+                else:
			
 
				+                    data["error_count"] += 1
			
 
				+                    cls.__data_queue.put(data)
			
 
				+                pbar.write(f"出现错误：\n{e}")
			
 
				+            finally:
			
 
				+                pbar.close()
			
 
				+                cls.__data_queue.task_done()
			
 
				+                continue
			
 
				+
			
 
				+    @classmethod
			
 
				+    def is_error_ids(cls):
			
 
				+        if cls.__error_ids:
			
 
				+            return cls.__error_ids.sort()
			
 
				+        else:
			
 
				+            return False
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,52 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- encoding: utf-8 -*-
			
 
				+
			
 
				+""" 
			
 
				+---------------------------------------
			
 
				+ # @Project    : DAGASI
			
 
				+ # @File       : main.py
			
 
				+ # @Author     : GrayZhao
			
 
				+ # @Date       : 2023/2/22 0:37
			
 
				+ # @Version    : 
			
 
				+ # @Description : 
			
 
				+---------------------------------------
			
 
				+"""
			
 
				+from gevent import monkey;monkey.patch_all()
			
 
				+from threading import Thread
			
 
				+from queue import Queue
			
 
				+from fanbox import FanboxID
			
 
				+from kemono import Kemono
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    id_queue = Queue()
			
 
				+    # 选择菜单
			
 
				+    print("1. 下载全部作品\n2. 下载指定作品")
			
 
				+    while True:
			
 
				+        opt = input("请输入序号 [1]：")
			
 
				+        if opt == "1" or not opt:
			
 
				+            for _id in FanboxID.iterator():
			
 
				+                id_queue.put(_id)
			
 
				+            break
			
 
				+        elif opt == "2":
			
 
				+            while True:
			
 
				+                _ids = input("请输入投稿ID [多个ID请使用‘,’分隔]：").split(",")
			
 
				+                if _ids:
			
 
				+                    for _id in _ids:
			
 
				+                        id_queue.put(_id)
			
 
				+                    break
			
 
				+                print("<Error> ID不能为空！")
			
 
				+            break
			
 
				+        print("<Error> 请输入正确序号！")
			
 
				+    # 下载
			
 
				+    print(f"共有{id_queue.qsize()}篇投稿需要下载，请耐心等待...")
			
 
				+    Kemono.login(username="zhy201810576", passwd="zhy120001")
			
 
				+    produce = Thread(target=Kemono.produce, args=(id_queue,))
			
 
				+    consume = Thread(target=Kemono.consume)
			
 
				+    consume.setDaemon(True)
			
 
				+    produce.start()
			
 
				+    consume.start()
			
 
				+    produce.join()
			
 
				+    # 输出未完成任务
			
 
				+    if Kemono.is_error_ids():
			
 
				+        for item in Kemono.is_error_ids():
			
 
				+            print(item)
			
--- a/output/video
+++ b/output/video