Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

重构代码以支持断点续传、代理、多线程 #251

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,20 @@

## 下载游戏文件


在根目录下运行 Python 3 脚本

``` python
pip install retrying requests[socks]
python download_data.py
```

为`DOWNLOAD_PROXY`环境变量赋值可以启用代理,替代方法是修改`BASE_URL`为反代地址。

``` python
env DOWNLOAD_PROXY="socks5://127.0.0.1:1080" python download_data.py
```

若下载出错请参见 [Issue #26](https://github.com/rwv/chinese-dos-games/issues/26)

## 游戏列表
Expand Down
195 changes: 151 additions & 44 deletions download_data.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,61 +1,168 @@
import hashlib
import inspect
import os
import json
import urllib.request
import hashlib
import shutil
import requests

from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from urllib.parse import quote
from retrying import retry


class Config:
BUF_SIZE = 65536 # 读取缓冲区大小(字节)
THREADS_NUM = 10 # 用于同时下载任务的线程数

BASE_URL: str = "https://dos-bin.zczc.cz" # 游戏文件源的基本URL

DESTINATION_PATH: Path = (
Path(__file__).resolve().parent / "bin"
) # 保存下载的游戏文件的目标路径。

GAMES_JSON_PATH: Path = (
Path(__file__).resolve().parent / "games.json"
) # JSON数据文件,包含有关游戏的信息。

PROXY: str = os.getenv(
"DOWNLOAD_PROXY"
) # 从环境变量中提取的代理服务器地址。


@retry(stop_max_attempt_number=3)
def make_request(
session: requests.Session, url_encoded: str, proxies=None, headers=None
):
return session.get(url_encoded, stream=True, proxies=proxies, headers=headers)


class GameDownloader:
""" 创建新方法用于获得已下载文件的字节数 """
@staticmethod
def get_downloaded_size(file_path: Path) -> int:
# 获取已部分下载的文件的大小。
return file_path.stat().st_size if file_path.exists() else 0

@staticmethod
def calculate_sha256(file_path: Path) -> str:
""" 计算给定文件的SHA-256哈希。"""
# 在尝试打开文件之前,请检查该文件是否存在
if not file_path or not file_path.exists():
return None

sha256_hasher = hashlib.sha256()

with file_path.open("rb") as f:
for data in iter(lambda: f.read(Config.BUF_SIZE), b""):
sha256_hasher.update(data)

return sha256_hasher.hexdigest()

@staticmethod
def download_file(
session: requests.Session, identifier: str, url_encoded: str, destination: str
):
print(f"Downloading {identifier} game file")

proxies = None

if Config.PROXY and Config.PROXY.startswith(
("socks5h://", "socks5://", "http://", "https://")
):
proxies = {
"http": Config.PROXY,
"https": Config.PROXY,
}

# 获取已经下载了多少字节
downloaded_bytes = GameDownloader.get_downloaded_size(Path(destination))

# 添加 Range header 到 HTTP 请求中以实现断点续传功能
headers = {"Range": f"bytes={downloaded_bytes}-"}

try:
with make_request(
session, url_encoded, proxies=proxies, headers=headers
) as r:
r.raise_for_status()

with open(destination, "ab") as f:
shutil.copyfileobj(r.raw, f)

except Exception as e:
print(f"Error downloading {identifier}: {e}")

# 仅当下载过程出错时才删除文件
if Path(destination).exists():
Path(destination).unlink()

@staticmethod
def load_game_info(path: Path) -> dict:

"""从指定json文件加载游戏信息"""

with open(path, "r") as f:
return json.load(f)["games"]

from concurrent.futures import ThreadPoolExecutor, wait
@classmethod
def download_games(
cls, base_url=Config.BASE_URL, destination_path=Config.DESTINATION_PATH
):

root = os.path.dirname(os.path.abspath(
inspect.getfile(inspect.currentframe())))
"""此功能使用多线程处理游戏文件的下载和保存。"""

PREFIX = "https://dos-bin.zczc.cz/"
DESTINATION = os.path.join(root, 'bin')
BUF_SIZE = 65536
THREAD_SIZE = 10
destination_path.mkdir(
parents=True, exist_ok=True
) # 创建目录(如果不存在)

# read game infos from games.json
with open(os.path.join(root, 'games.json'), encoding='utf8') as f:
game_infos = json.load(f)
game_infos = cls.load_game_info(Config.GAMES_JSON_PATH)

tasks = []

def generate_sha256(file):
sha256 = hashlib.sha256()
with open(file, 'rb') as f:
while True:
data = f.read(BUF_SIZE)
if not data:
break
sha256.update(data)
return sha256.hexdigest()
with ThreadPoolExecutor(max_workers=Config.THREADS_NUM) as executor:

hash_futures = {
executor.submit(
cls.calculate_sha256,
Path(f"{destination_path}/{identifier}.zip")
if (Path(f"{destination_path}/{identifier}.zip").exists())
else None,
): identifier
for identifier in game_infos.keys()
}

def download(identifier, url, file):
print(f'Downloading {identifier} game file')
urllib.request.urlretrieve(url, file)
json_futures = {
executor.submit(
cls.load_game_info, Config.GAMES_JSON_PATH
): "game_infos"
}

for future in as_completed(hash_futures):
identifier = hash_futures[future]
try:
sha_hash_of_file = (
future.result() if future.result() is not None else ""
)
except Exception as exc:
print(f"{identifier} generated an exception: {exc}")
else:
file_path = destination_path / f"{identifier}.zip"
url_encoded = quote(f"{base_url}/{identifier}.zip", safe="/:")

def main(prefix=PREFIX, destination=DESTINATION):
# create folder
os.makedirs(destination, exist_ok=True)
if file_path.exists() and sha_hash_of_file == game_infos.get(
identifier
).get("sha256"):
print(f"Skipping {identifier}")
continue

executor = ThreadPoolExecutor(max_workers=THREAD_SIZE)
all_task = list()
tasks.append((identifier, url_encoded, str(file_path)))

downloaded = list()
for identifier in game_infos['games'].keys():
file = os.path.normcase(os.path.join(destination, identifier + '.zip'))
url = prefix + urllib.parse.quote(identifier) + '.zip'
if os.path.isfile(file) and generate_sha256(file) == game_infos['games'][identifier]['sha256']:
print(f'skip {identifier}')
else:
downloaded.append(identifier)
task = executor.submit(download, identifier, url, file)
all_task.append(task)
with requests.Session() as session:

wait(all_task)
return downloaded
list(
executor.map(lambda args: cls.download_file(session, *args), tasks)
)


if __name__ == '__main__':
main()
if __name__ == "__main__":
GameDownloader.download_games()