From 30a2ad7a638c84d6b79e77e5162e8d2f92a1f1e4 Mon Sep 17 00:00:00 2001 From: Jesse Bannon Date: Sun, 2 Jun 2024 10:53:19 -0700 Subject: [PATCH] [BUGFIX] Prevent corrupt writes to download archive (#983) Attempts to make writes to the download archive safer (https://github.com/jmbannon/ytdl-sub/issues/982). `ytdl-sub` will now *copy* the download archive from the working directory to the output directory with a temp name, then perform a *move* to store it with its final expected name. This will drastically lower the window of time where the process could die mid-write and corrupt it on the next read. --- .../subscriptions/subscription_download.py | 2 +- .../subscription_ytdl_options.py | 4 +++- src/ytdl_sub/utils/file_handler.py | 6 +++++- .../enhanced_download_archive.py | 20 +++++++++++++++---- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/ytdl_sub/subscriptions/subscription_download.py b/src/ytdl_sub/subscriptions/subscription_download.py index 1655a6333..95108ecee 100644 --- a/src/ytdl_sub/subscriptions/subscription_download.py +++ b/src/ytdl_sub/subscriptions/subscription_download.py @@ -161,7 +161,7 @@ def _maintain_archive_file(self): ) self.download_archive.save_download_mappings() - FileHandler.delete(self.download_archive.working_file_path) + FileHandler.delete(self.download_archive.working_ytdl_file_path) @contextlib.contextmanager def _remove_empty_directories_in_output_directory(self): diff --git a/src/ytdl_sub/subscriptions/subscription_ytdl_options.py b/src/ytdl_sub/subscriptions/subscription_ytdl_options.py index 230134db3..1cc6f3174 100644 --- a/src/ytdl_sub/subscriptions/subscription_ytdl_options.py +++ b/src/ytdl_sub/subscriptions/subscription_ytdl_options.py @@ -89,7 +89,9 @@ def _output_options(self) -> Dict: ytdl_options = {} if self._preset.output_options.maintain_download_archive: - ytdl_options["download_archive"] = self._enhanced_download_archive.working_file_path + ytdl_options["download_archive"] = ( + self._enhanced_download_archive.working_ytdl_file_path + ) if self._preset.output_options.keep_max_files: keep_max_files = int( self._overrides.apply_formatter(self._preset.output_options.keep_max_files) diff --git a/src/ytdl_sub/utils/file_handler.py b/src/ytdl_sub/utils/file_handler.py index 646d44e17..faa7d2dcd 100644 --- a/src/ytdl_sub/utils/file_handler.py +++ b/src/ytdl_sub/utils/file_handler.py @@ -385,7 +385,11 @@ def copy(cls, src_file_path: Union[str, Path], dst_file_path: Union[str, Path]): dst_file_path Destination file """ - shutil.copyfile(src=src_file_path, dst=dst_file_path) + # Perform the copy by first writing to a temp file, then moving it. + # This tries to prevent corrupted writes if the processed dies mid-write, + atomic_dst = f"{dst_file_path}-ytdl-sub-incomplete" + shutil.copyfile(src=src_file_path, dst=atomic_dst) + shutil.move(src=atomic_dst, dst=dst_file_path) @classmethod def move(cls, src_file_path: Union[str, Path], dst_file_path: Union[str, Path]): diff --git a/src/ytdl_sub/ytdl_additions/enhanced_download_archive.py b/src/ytdl_sub/ytdl_additions/enhanced_download_archive.py index 616fe2f95..ec155245b 100644 --- a/src/ytdl_sub/ytdl_additions/enhanced_download_archive.py +++ b/src/ytdl_sub/ytdl_additions/enhanced_download_archive.py @@ -506,10 +506,19 @@ def working_file_path(self) -> str: """ Returns ------- - The download mapping's file path in the working directory. + The download mapping's file path in the working directory for ytdl usage """ return str(Path(self.working_directory) / self.file_name) + @property + def working_ytdl_file_path(self) -> str: + """ + Returns + ------- + The download mapping's file path in the working directory for ytdl usage + """ + return f"{self.working_file_path}-ytdl-archive" + @property def mapping(self) -> DownloadMappings: """ @@ -541,7 +550,7 @@ def prepare_download_archive(self) -> "EnhancedDownloadArchive": return self # Otherwise, create a ytdl download archive file in the working directory. - self.mapping.to_download_archive().to_file(self.working_file_path) + self.mapping.to_download_archive().to_file(self.working_ytdl_file_path) return self @@ -603,15 +612,18 @@ def save_download_mappings(self) -> "EnhancedDownloadArchive": if self._migrated_file_name: self._download_mapping.to_file(output_json_file=self.working_file_path) self.save_file_to_output_directory( - file_name=self.file_name, output_file_name=self._migrated_file_name + file_name=self.file_name, output_file_name=self._migrated_file_name, copy_file=True ) + FileHandler.delete(file_path=self.working_file_path) + # and delete the old one if the name differs if self._file_name != self._migrated_file_name: self.delete_file_from_output_directory(file_name=self.file_name) # Otherwise, only save if there are changes to the transaction log elif not self.get_file_handler_transaction_log().is_empty: self._download_mapping.to_file(output_json_file=self.working_file_path) - self.save_file_to_output_directory(file_name=self.file_name) + self.save_file_to_output_directory(file_name=self.file_name, copy_file=True) + FileHandler.delete(file_path=self.working_file_path) return self def delete_file_from_output_directory(self, file_name: str):