From 1f2138a908d9a5a50cb5d779c17b0dd29b8c3f16 Mon Sep 17 00:00:00 2001 From: daiaji Date: Wed, 21 Feb 2024 00:54:34 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E9=87=8D=E6=9E=84=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E4=BB=A5=E6=94=AF=E6=8C=81=E6=96=AD=E7=82=B9=E7=BB=AD=E4=BC=A0?= =?UTF-8?q?=E3=80=81=E4=BB=A3=E7=90=86=E3=80=81=E5=A4=9A=E7=BA=BF=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 8 ++ download_data.py | 195 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 159 insertions(+), 44 deletions(-) mode change 100644 => 100755 README.md mode change 100644 => 100755 download_data.py diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 75ed03f2..14d98f42 --- a/README.md +++ b/README.md @@ -7,12 +7,20 @@ ## 下载游戏文件 + 在根目录下运行 Python 3 脚本 ``` python +pip install retrying requests[socks] python download_data.py ``` +为`DOWNLOAD_PROXY`环境变量赋值可以启用代理,替代方法是修改`BASE_URL`为反代地址。 + +``` python +env DOWNLOAD_PROXY="socks5://127.0.0.1:1080" python download_data.py +``` + 若下载出错请参见 [Issue #26](https://github.com/rwv/chinese-dos-games/issues/26) ## 游戏列表 diff --git a/download_data.py b/download_data.py old mode 100644 new mode 100755 index 006a43c5..d28cff74 --- a/download_data.py +++ b/download_data.py @@ -1,61 +1,168 @@ -import hashlib -import inspect import os import json -import urllib.request +import hashlib +import shutil +import requests + +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from urllib.parse import quote +from retrying import retry + + +class Config: + BUF_SIZE = 65536 # 读取缓冲区大小(字节) + THREADS_NUM = 10 # 用于同时下载任务的线程数 + + BASE_URL: str = "https://dos-bin.zczc.cz" # 游戏文件源的基本URL + + DESTINATION_PATH: Path = ( + Path(__file__).resolve().parent / "bin" + ) # 保存下载的游戏文件的目标路径。 + + GAMES_JSON_PATH: Path = ( + Path(__file__).resolve().parent / "games.json" + ) #J SON数据文件,包含有关游戏的信息。 + + PROXY: str = os.getenv( + "DOWNLOAD_PROXY" + ) # 从环境变量中提取的代理服务器地址。 + + +@retry(stop_max_attempt_number=3) +def make_request( + session: requests.Session, url_encoded: str, proxies=None, headers=None +): + return session.get(url_encoded, stream=True, proxies=proxies, headers=headers) + + +class GameDownloader: + """ 创建新方法用于获得已下载文件的字节数 """ + @staticmethod + def get_downloaded_size(file_path: Path) -> int: + # 获取已部分下载的文件的大小。 + return file_path.stat().st_size if file_path.exists() else 0 + + @staticmethod + def calculate_sha256(file_path: Path) -> str: + """ 计算给定文件的SHA-256哈希。""" + # 在尝试打开文件之前,请检查该文件是否存在 + if not file_path or not file_path.exists(): + return None + + sha256_hasher = hashlib.sha256() + + with file_path.open("rb") as f: + for data in iter(lambda: f.read(Config.BUF_SIZE), b""): + sha256_hasher.update(data) + + return sha256_hasher.hexdigest() + + @staticmethod + def download_file( + session: requests.Session, identifier: str, url_encoded: str, destination: str + ): + print(f"Downloading {identifier} game file") + + proxies = None + + if Config.PROXY and Config.PROXY.startswith( + ("socks5h://", "socks5://", "http://", "https://") + ): + proxies = { + "http": Config.PROXY, + "https": Config.PROXY, + } + + # 获取已经下载了多少字节 + downloaded_bytes = GameDownloader.get_downloaded_size(Path(destination)) + + # 添加 Range header 到 HTTP 请求中以实现断点续传功能 + headers = {"Range": f"bytes={downloaded_bytes}-"} + + try: + with make_request( + session, url_encoded, proxies=proxies, headers=headers + ) as r: + r.raise_for_status() + + with open(destination, "ab") as f: + shutil.copyfileobj(r.raw, f) + + except Exception as e: + print(f"Error downloading {identifier}: {e}") + + # 仅当下载过程出错时才删除文件 + if Path(destination).exists(): + Path(destination).unlink() + + @staticmethod + def load_game_info(path: Path) -> dict: + + """从指定json文件加载游戏信息""" + + with open(path, "r") as f: + return json.load(f)["games"] -from concurrent.futures import ThreadPoolExecutor, wait + @classmethod + def download_games( + cls, base_url=Config.BASE_URL, destination_path=Config.DESTINATION_PATH + ): -root = os.path.dirname(os.path.abspath( - inspect.getfile(inspect.currentframe()))) + """此功能使用多线程处理游戏文件的下载和保存。""" -PREFIX = "https://dos-bin.zczc.cz/" -DESTINATION = os.path.join(root, 'bin') -BUF_SIZE = 65536 -THREAD_SIZE = 10 + destination_path.mkdir( + parents=True, exist_ok=True + ) # 创建目录(如果不存在) -# read game infos from games.json -with open(os.path.join(root, 'games.json'), encoding='utf8') as f: - game_infos = json.load(f) + game_infos = cls.load_game_info(Config.GAMES_JSON_PATH) + tasks = [] -def generate_sha256(file): - sha256 = hashlib.sha256() - with open(file, 'rb') as f: - while True: - data = f.read(BUF_SIZE) - if not data: - break - sha256.update(data) - return sha256.hexdigest() + with ThreadPoolExecutor(max_workers=Config.THREADS_NUM) as executor: + hash_futures = { + executor.submit( + cls.calculate_sha256, + Path(f"{destination_path}/{identifier}.zip") + if (Path(f"{destination_path}/{identifier}.zip").exists()) + else None, + ): identifier + for identifier in game_infos.keys() + } -def download(identifier, url, file): - print(f'Downloading {identifier} game file') - urllib.request.urlretrieve(url, file) + json_futures = { + executor.submit( + cls.load_game_info, Config.GAMES_JSON_PATH + ): "game_infos" + } + for future in as_completed(hash_futures): + identifier = hash_futures[future] + try: + sha_hash_of_file = ( + future.result() if future.result() is not None else "" + ) + except Exception as exc: + print(f"{identifier} generated an exception: {exc}") + else: + file_path = destination_path / f"{identifier}.zip" + url_encoded = quote(f"{base_url}/{identifier}.zip", safe="/:") -def main(prefix=PREFIX, destination=DESTINATION): - # create folder - os.makedirs(destination, exist_ok=True) + if file_path.exists() and sha_hash_of_file == game_infos.get( + identifier + ).get("sha256"): + print(f"Skipping {identifier}") + continue - executor = ThreadPoolExecutor(max_workers=THREAD_SIZE) - all_task = list() + tasks.append((identifier, url_encoded, str(file_path))) - downloaded = list() - for identifier in game_infos['games'].keys(): - file = os.path.normcase(os.path.join(destination, identifier + '.zip')) - url = prefix + urllib.parse.quote(identifier) + '.zip' - if os.path.isfile(file) and generate_sha256(file) == game_infos['games'][identifier]['sha256']: - print(f'skip {identifier}') - else: - downloaded.append(identifier) - task = executor.submit(download, identifier, url, file) - all_task.append(task) + with requests.Session() as session: - wait(all_task) - return downloaded + list( + executor.map(lambda args: cls.download_file(session, *args), tasks) + ) -if __name__ == '__main__': - main() +if __name__ == "__main__": + GameDownloader.download_games() From e142162c977e708d2cbf5445983e3b00d0e435f8 Mon Sep 17 00:00:00 2001 From: daiaji Date: Wed, 21 Feb 2024 01:02:36 +0800 Subject: [PATCH 2/2] fix --- download_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/download_data.py b/download_data.py index d28cff74..700e5b69 100755 --- a/download_data.py +++ b/download_data.py @@ -22,7 +22,7 @@ class Config: GAMES_JSON_PATH: Path = ( Path(__file__).resolve().parent / "games.json" - ) #J SON数据文件,包含有关游戏的信息。 + ) # JSON数据文件,包含有关游戏的信息。 PROXY: str = os.getenv( "DOWNLOAD_PROXY"