kemono.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. """
  4. ---------------------------------------
  5. # @Project : DAGASI
  6. # @File : kemono.py
  7. # @Author : GrayZhao
  8. # @Date : 2023/2/20 17:26
  9. # @Version :
  10. # @Description :
  11. ---------------------------------------
  12. """
  13. import gevent
  14. import requests
  15. from requests.exceptions import ConnectionError, ChunkedEncodingError
  16. import re
  17. import os
  18. from queue import Queue
  19. from bs4 import BeautifulSoup
  20. from tqdm import tqdm
  21. HEADER = {
  22. "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
  23. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
  24. }
  25. URL_LOGIN = "https://kemono.party/account/login"
  26. HEADER_LOGIN = {
  27. "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
  28. "origin": "https://kemono.party",
  29. "referer": "https://kemono.party/account/login",
  30. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
  31. }
  32. """
  33. URL = "https://kemono.party/fanbox/user/1549213/post/5285772"
  34. response = requests.get(URL, headers=HEADER)
  35. soup = BeautifulSoup(response.text, features="lxml")
  36. timestamp = soup.find("div", class_="post__published").findChild("time")["datetime"].split(" ")[0]
  37. print(timestamp)
  38. downloads = soup.find_all("a", class_="post__attachment-link", text=re.compile(r"高画質"))
  39. for download in downloads:
  40. dw_url = download["href"]
  41. name = download.text.replace("\n", "").strip().replace("Download ", "")
  42. print(name, dw_url)
  43. """
  44. class Kemono:
  45. __error_ids = list()
  46. __data_queue = Queue()
  47. __session = requests.session()
  48. @classmethod
  49. def login(cls, username: str, passwd: str):
  50. _data = {"username": username, "password": passwd}
  51. cls.__session.post(url=URL_LOGIN, headers=HEADER_LOGIN, data=_data)
  52. @classmethod
  53. def produce(cls, id_q: Queue):
  54. tasks = [gevent.spawn(cls.__create_data, id_q) for _ in range(4)]
  55. gevent.joinall(tasks)
  56. cls.__data_queue.join()
  57. @classmethod
  58. def __create_data(cls, id_q: Queue):
  59. while not id_q.empty():
  60. post_id = id_q.get_nowait()
  61. url = f"https://kemono.party/fanbox/user/1549213/post/{post_id}"
  62. response = cls.__session.get(url, headers=HEADER)
  63. soup = BeautifulSoup(response.text, features="lxml")
  64. timestamp = soup.find("div", class_="post__published").findChild("time")["datetime"].split(" ")[0]
  65. downloads = soup.find_all("a", class_="post__attachment-link", text=re.compile(r"高画質"))
  66. for download in downloads:
  67. dw_url = download["href"]
  68. all_name = download.text.replace("\n", "").strip().replace("Download ", "")
  69. name = os.path.splitext(all_name)[0]
  70. save_name = f"[{timestamp}] {all_name}"
  71. q_data = {
  72. "postID": post_id,
  73. "name": name,
  74. "saveName": save_name,
  75. "url": dw_url,
  76. "error_count": 0
  77. }
  78. cls.__data_queue.put_nowait(q_data)
  79. @classmethod
  80. def consume(cls):
  81. tasks = [gevent.spawn(cls.__download) for _ in range(4)]
  82. gevent.joinall(tasks)
  83. @classmethod
  84. def __download(cls):
  85. while True:
  86. data = cls.__data_queue.get()
  87. post_id = data["postID"]
  88. name = data["name"]
  89. save_name = "output/" + data["saveName"]
  90. url = data["url"]
  91. try:
  92. response = cls.__session.get(url=url, headers=HEADER, stream=True)
  93. except (ConnectionError, ChunkedEncodingError):
  94. if data["error_count"] >= 3:
  95. cls.__error_ids.append(f"{post_id} {name}")
  96. else:
  97. data["error_count"] += 1
  98. cls.__data_queue.put(data)
  99. cls.__data_queue.task_done()
  100. continue
  101. total_size = int(response.headers.get("content-length", 0))
  102. if os.path.exists(save_name):
  103. now_range = os.path.getsize(save_name)
  104. else:
  105. now_range = 0
  106. if now_range >= total_size:
  107. print(f"[{post_id}] {name} 已下载完成...")
  108. cls.__data_queue.task_done()
  109. continue
  110. header = HEADER.copy()
  111. header.update(Range=f"bytes={now_range}-")
  112. block_size = 1024
  113. pbar = tqdm(desc=f"{name}", total=total_size, initial=now_range, unit="KB", unit_scale=True, leave=False)
  114. try:
  115. response = cls.__session.get(url=url, headers=header, stream=True)
  116. with open(save_name, "ab") as file:
  117. for chuck in response.iter_content(block_size):
  118. file.write(chuck)
  119. pbar.update(len(chuck))
  120. pbar.set_postfix(info=f"剩余个数:{cls.__data_queue.qsize()}")
  121. except (ConnectionError, ChunkedEncodingError) as e:
  122. if data["error_count"] >= 3:
  123. cls.__error_ids.append(f"{post_id} {name}")
  124. else:
  125. data["error_count"] += 1
  126. cls.__data_queue.put(data)
  127. pbar.write(f"出现错误:\n{e}")
  128. finally:
  129. pbar.close()
  130. cls.__data_queue.task_done()
  131. continue
  132. @classmethod
  133. def is_error_ids(cls):
  134. if cls.__error_ids:
  135. return cls.__error_ids.sort()
  136. else:
  137. return False