Skip to content

Commit

Permalink
[BUGFIX] Prevent corrupt writes to download archive (#983)
Browse files Browse the repository at this point in the history
Attempts to make writes to the download archive safer (#982).

`ytdl-sub` will now *copy* the download archive from the working directory to the output directory with a temp name, then perform a *move* to store it with its final expected name. This will drastically lower the window of time where  the process could die mid-write and corrupt it on the next read.
  • Loading branch information
jmbannon authored Jun 2, 2024
1 parent 74625c2 commit 30a2ad7
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/ytdl_sub/subscriptions/subscription_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def _maintain_archive_file(self):
)

self.download_archive.save_download_mappings()
FileHandler.delete(self.download_archive.working_file_path)
FileHandler.delete(self.download_archive.working_ytdl_file_path)

@contextlib.contextmanager
def _remove_empty_directories_in_output_directory(self):
Expand Down
4 changes: 3 additions & 1 deletion src/ytdl_sub/subscriptions/subscription_ytdl_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ def _output_options(self) -> Dict:
ytdl_options = {}

if self._preset.output_options.maintain_download_archive:
ytdl_options["download_archive"] = self._enhanced_download_archive.working_file_path
ytdl_options["download_archive"] = (
self._enhanced_download_archive.working_ytdl_file_path
)
if self._preset.output_options.keep_max_files:
keep_max_files = int(
self._overrides.apply_formatter(self._preset.output_options.keep_max_files)
Expand Down
6 changes: 5 additions & 1 deletion src/ytdl_sub/utils/file_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,11 @@ def copy(cls, src_file_path: Union[str, Path], dst_file_path: Union[str, Path]):
dst_file_path
Destination file
"""
shutil.copyfile(src=src_file_path, dst=dst_file_path)
# Perform the copy by first writing to a temp file, then moving it.
# This tries to prevent corrupted writes if the processed dies mid-write,
atomic_dst = f"{dst_file_path}-ytdl-sub-incomplete"
shutil.copyfile(src=src_file_path, dst=atomic_dst)
shutil.move(src=atomic_dst, dst=dst_file_path)

@classmethod
def move(cls, src_file_path: Union[str, Path], dst_file_path: Union[str, Path]):
Expand Down
20 changes: 16 additions & 4 deletions src/ytdl_sub/ytdl_additions/enhanced_download_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,10 +506,19 @@ def working_file_path(self) -> str:
"""
Returns
-------
The download mapping's file path in the working directory.
The download mapping's file path in the working directory for ytdl usage
"""
return str(Path(self.working_directory) / self.file_name)

@property
def working_ytdl_file_path(self) -> str:
"""
Returns
-------
The download mapping's file path in the working directory for ytdl usage
"""
return f"{self.working_file_path}-ytdl-archive"

@property
def mapping(self) -> DownloadMappings:
"""
Expand Down Expand Up @@ -541,7 +550,7 @@ def prepare_download_archive(self) -> "EnhancedDownloadArchive":
return self

# Otherwise, create a ytdl download archive file in the working directory.
self.mapping.to_download_archive().to_file(self.working_file_path)
self.mapping.to_download_archive().to_file(self.working_ytdl_file_path)

return self

Expand Down Expand Up @@ -603,15 +612,18 @@ def save_download_mappings(self) -> "EnhancedDownloadArchive":
if self._migrated_file_name:
self._download_mapping.to_file(output_json_file=self.working_file_path)
self.save_file_to_output_directory(
file_name=self.file_name, output_file_name=self._migrated_file_name
file_name=self.file_name, output_file_name=self._migrated_file_name, copy_file=True
)
FileHandler.delete(file_path=self.working_file_path)

# and delete the old one if the name differs
if self._file_name != self._migrated_file_name:
self.delete_file_from_output_directory(file_name=self.file_name)
# Otherwise, only save if there are changes to the transaction log
elif not self.get_file_handler_transaction_log().is_empty:
self._download_mapping.to_file(output_json_file=self.working_file_path)
self.save_file_to_output_directory(file_name=self.file_name)
self.save_file_to_output_directory(file_name=self.file_name, copy_file=True)
FileHandler.delete(file_path=self.working_file_path)
return self

def delete_file_from_output_directory(self, file_name: str):
Expand Down

0 comments on commit 30a2ad7

Please sign in to comment.