From dd06254e9babd595befa5e07b139552319a45d61 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 22 Nov 2024 17:09:48 -0500 Subject: [PATCH 1/9] [ENH] Allow bids files to be searched with datman file names --- datman/scan.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/datman/scan.py b/datman/scan.py index a991e6b4..ecc17912 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -123,6 +123,7 @@ def __init__(self, subject_id, config, bids_root=None): self.bids_root = bids_root self.bids_path = self.__get_bids() + self._bids_inventory = self._make_bids_inventory() # This one lists all existing resource folders for the timepoint. self.resources = self._get_resources(config) @@ -149,14 +150,71 @@ def _get_ident(self, subid): return ident def find_files(self, file_stem, format="nii"): + """Find files belonging to the session matching a given file name. + + Args: + file_stem (:obj:`str`): A valid datman-style file name, with or + without the extension and preceding path. + format (:obj:`str`): The configured datman folder path to search + through. Default: 'nii' + + Returns: + :obj:`list`: a list of full paths to matching files, if any. Or + an empty list if none are found. + + Raises: + datman.scanid.ParseException: If an invalid datman file name + is given. + """ + if format == 'bids': + return self._find_bids_files(file_stem) + try: base_path = getattr(self, f"{format}_path") except AttributeError: return [] + if not os.path.exists(base_path): return [] + return glob.glob(os.path.join(base_path, file_stem + "*")) + def _find_bids_files(self, file_stem): + ident, _, series, _ = datman.scanid.parse_filename(file_stem) + if ident.session != self.session: + return [] + if int(series) in self._bids_inventory: + return self._bids_inventory[int(series)] + return [] + + def _make_bids_inventory(self): + if not self.bids_path: + return {} + + inventory = {} + for path, _, files in os.walk(self.bids_path): + for item in files: + if not item.endswith(".json"): + continue + + json_path = os.path.join(path, item) + contents = datman.utils.read_json(json_path) + + repeat = contents['Repeat'] if 'Repeat' in contents else '01' + if repeat != self.session: + continue + + series = int(contents['SeriesNumber']) + if series in inventory: + raise Exception( + f"More than one series {series} found for " + f"{self.ident}") + + base_fname = os.path.splitext(json_path)[0] + inventory[series] = glob.glob(base_fname + "*") + + return inventory + def get_tagged_nii(self, tag): try: matched_niftis = self.__nii_dict[tag] From 79f499ea1bbb696fff8913db3a884e58f8668ec2 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 22 Nov 2024 19:18:06 -0500 Subject: [PATCH 2/9] [ENH] Allow blacklisted files to be saved (separately). --- bin/dm_blacklist_rm.py | 132 +++++++++++++++++++++++++++-------------- 1 file changed, 87 insertions(+), 45 deletions(-) diff --git a/bin/dm_blacklist_rm.py b/bin/dm_blacklist_rm.py index 48f9384f..c041dcec 100755 --- a/bin/dm_blacklist_rm.py +++ b/bin/dm_blacklist_rm.py @@ -1,6 +1,7 @@ #!/usr/bin/env python """ -Searches the file system for any data on the blacklist and deletes them. +Searches the file system for any data on the blacklist and removes them +from subject data folders to avoid pipeline failures etc. Usage: dm_blacklist_rm.py [options] [--path=KEY]... @@ -9,6 +10,8 @@ The name of a datman managed project Options: + --keep If provided, the blacklisted scans will be moved to + a 'blacklisted' subdir instead of being deleted. --path KEY If provided overrides the 'BlacklistDel' setting from the config files, which defines the directories to delete blacklisted items from. 'KEY' may be the name @@ -22,15 +25,14 @@ """ import os -import glob import logging +import shutil from docopt import docopt import datman.config import datman.scan import datman.utils -from datman.scanid import ParseException logging.basicConfig(level=logging.WARN, format="[%(name)s] %(levelname)s: %(message)s") @@ -43,6 +45,7 @@ def main(): global DRYRUN arguments = docopt(__doc__) project = arguments[''] + keep = arguments['--keep'] override_paths = arguments['--path'] verbose = arguments['--verbose'] debug = arguments['--debug'] @@ -58,13 +61,30 @@ def main(): config = datman.config.config(study=project) metadata = datman.utils.get_subject_metadata(config, allow_partial=True) - base_paths = get_base_paths(config, override_paths) + search_paths = get_search_paths(config, override_paths) - remove_blacklisted_items(metadata, base_paths) + for sub in metadata: + if not metadata[sub]: + continue + + logger.debug(f"Working on {sub}") + session = datman.scan.Scan(sub, config) + handle_blacklisted_scans( + session, metadata[sub], search_paths, keep=keep + ) -def get_base_paths(config, user_paths): - """Get the full path to each base directory to search for blacklisted data. +def get_search_paths(config, user_paths=None): + """Get path types to search for blacklisted files in. + + Args: + config (:obj:`datman.config.config`): A datman config object for the + current study. + user_paths (:obj:`list`): A list of path types to search through. + Optional. Causes the configuration file setting to be ignored. + + Returns: + list: A list of path types that will be searched. """ if user_paths: path_keys = user_paths @@ -74,51 +94,73 @@ def get_base_paths(config, user_paths): except datman.config.UndefinedSetting: # Fall back to the default path_keys = ['nii', 'mnc', 'nrrd', 'resources'] + return path_keys + + +def handle_blacklisted_scans(session, bl_scans, search_paths, keep=False): + """Move or delete all blacklisted scans for the given path types. + + Args: + session (:obj:`datman.scan.Scan`): A datman scan object for the + current session. + bl_scans (:obj:`list`): A list of strings each representing a + blacklisted scan. + search_paths (:obj:`list`): A list of path types to move/delete + blacklisted scans from. Each path type must exist in the + datman config files. + keep (bool): Whether to move files instead of deleting them. Optional, + default False. + """ + for scan in bl_scans: + for path_type in search_paths: + found = session.find_files(scan, format=path_type) - base_paths = [] - for key in path_keys: - try: - found = config.get_path(key) - except datman.config.UndefinedSetting: - logger.warning(f"Given undefined path type - {key}. Ignoring.") - continue - - if os.path.exists(found): - base_paths.append(found) + if not found: + continue - return base_paths + logger.debug(f"Files found for removal: {found}") + if DRYRUN: + logger.info("DRYRUN - Leaving files in place.") + continue -def remove_blacklisted_items(metadata, base_paths): - for sub in metadata: - blacklist_entries = metadata[sub] - if not blacklist_entries: - continue + for item in found: + if keep: + path = getattr(session, f"{path_type}_path") + logger.info( + f"Moving blacklisted files to {path}/blacklisted" + ) + move_file(path, item) + else: + delete_file(item) - logger.debug(f"Working on {sub}") - for path in base_paths: - for sub_dir in glob.glob(os.path.join(path, sub + "*")): - for entry in blacklist_entries: - remove_matches(sub_dir, entry) - - -def remove_matches(path, fname): - matches = find_files(path, fname) - if matches: - logger.info(f"Files found for deletion: {matches}") - if DRYRUN: - return - for item in matches: - try: - os.remove(item) - except FileNotFoundError: - pass - except (PermissionError, IsADirectoryError): - logger.error(f"Failed to delete blacklisted item {item}.") +def move_file(path, item): + """Move a file to a 'blacklisted' subdir inside the given path. -def find_files(path, fname): - return glob.glob(os.path.join(path, fname + "*")) + Args: + path (:obj:`str`): The path to move put the 'blacklisted' folder. + item (:obj:`str`): The full path to a blacklisted file to move. + """ + bl_dir = os.path.join(path, "blacklisted") + try: + os.mkdir(bl_dir) + except FileExistsError: + pass + + try: + shutil.move(item, bl_dir) + except shutil.Error as e: + logger.error(f"Failed to move blacklisted file {item} - {e}") + + +def delete_file(file_path): + try: + os.remove(file_path) + except FileNotFoundError: + pass + except (PermissionError, IsADirectoryError): + logger.error(f"Failed to delete blacklisted item {file_path}.") if __name__ == "__main__": From b905994d141a678dc528fe95fec4829e17d03d26 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Fri, 22 Nov 2024 20:03:16 -0500 Subject: [PATCH 3/9] [FIX] Allow datman/scan.py to handle split series in bids folder --- datman/scan.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/datman/scan.py b/datman/scan.py index ecc17912..f4b3bef8 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -205,13 +205,11 @@ def _make_bids_inventory(self): continue series = int(contents['SeriesNumber']) - if series in inventory: - raise Exception( - f"More than one series {series} found for " - f"{self.ident}") - base_fname = os.path.splitext(json_path)[0] - inventory[series] = glob.glob(base_fname + "*") + + inventory.setdefault(series, []).extend( + glob.glob(base_fname + "*") + ) return inventory From 13215ed2b4a872aa1bd2fc1803ac803f9527a077 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 27 Jan 2025 18:58:55 -0500 Subject: [PATCH 4/9] [FIX] Skip malformed sidecar files --- datman/scan.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datman/scan.py b/datman/scan.py index f4b3bef8..35ec7acc 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -204,7 +204,11 @@ def _make_bids_inventory(self): if repeat != self.session: continue - series = int(contents['SeriesNumber']) + try: + series = int(contents['SeriesNumber']) + except KeyError: + # Ignore sidecars missing a series number field. + continue base_fname = os.path.splitext(json_path)[0] inventory.setdefault(series, []).extend( From c89280ec04489391f0a57519f25739144b715287 Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 27 Jan 2025 19:36:49 -0500 Subject: [PATCH 5/9] [FIX] Stop blacklisted files from being found on later runs --- datman/scan.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datman/scan.py b/datman/scan.py index 35ec7acc..88929611 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -197,6 +197,9 @@ def _make_bids_inventory(self): if not item.endswith(".json"): continue + if 'blacklisted' in item: + continue + json_path = os.path.join(path, item) contents = datman.utils.read_json(json_path) From b01adea716e206258e7540e6fa8f97aef08585ea Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Mon, 27 Jan 2025 19:42:47 -0500 Subject: [PATCH 6/9] [FIX] More correctly avoid blacklisted items! --- datman/scan.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datman/scan.py b/datman/scan.py index 88929611..bc8812bd 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -193,13 +193,13 @@ def _make_bids_inventory(self): inventory = {} for path, _, files in os.walk(self.bids_path): + if path.endswith("blacklisted"): + continue + for item in files: if not item.endswith(".json"): continue - if 'blacklisted' in item: - continue - json_path = os.path.join(path, item) contents = datman.utils.read_json(json_path) From 1b2e187a2950946cd93e1b03372693dc55156b9f Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Tue, 28 Jan 2025 20:16:18 -0500 Subject: [PATCH 7/9] [FIX] Stop unnecessary errors for split files --- bin/dm_blacklist_rm.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/bin/dm_blacklist_rm.py b/bin/dm_blacklist_rm.py index c041dcec..a8a32582 100755 --- a/bin/dm_blacklist_rm.py +++ b/bin/dm_blacklist_rm.py @@ -118,6 +118,9 @@ def handle_blacklisted_scans(session, bl_scans, search_paths, keep=False): if not found: continue + if is_already_handled(found): + continue + logger.debug(f"Files found for removal: {found}") if DRYRUN: @@ -135,6 +138,23 @@ def handle_blacklisted_scans(session, bl_scans, search_paths, keep=False): delete_file(item) +def is_already_handled(found_files): + """Checks if the found files have already been moved or removed. + + Split series scans get 'found' for each blacklist entry the first time + the script encounters the blacklist entries. This checks if all 'found' + items have already been removed so unneccessary errors messages can be + avoided! + + Args: + found_files (:obj:`list`): A list of files to check. + + Returns: + bool + """ + return all([not os.path.exists(item) for item in found_files]) + + def move_file(path, item): """Move a file to a 'blacklisted' subdir inside the given path. From 343aa7b33a9688ade08f8a246138415516d7d40f Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Tue, 28 Jan 2025 20:49:20 -0500 Subject: [PATCH 8/9] [FIX] Stop outputs being marked as missing when blacklisted items exist --- datman/exporters.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/datman/exporters.py b/datman/exporters.py index d8ddb834..a78efee3 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -972,15 +972,14 @@ def get_error_file(self, dm_file): def outputs_exist(self): for dm_name in self.name_map: + if read_blacklist(scan=dm_name, config=self.config): + continue + if self.name_map[dm_name] == "missing": if not os.path.exists(self.get_error_file(dm_name)): return False continue - bl_entry = read_blacklist(scan=dm_name, config=self.config) - if bl_entry: - continue - full_path = os.path.join(self.output_dir, dm_name + self.ext) if not os.path.exists(full_path): return False From a505222eea4b0a9275682a96c7cebeafe04434ba Mon Sep 17 00:00:00 2001 From: Dawn Smith Date: Wed, 29 Jan 2025 14:26:59 -0500 Subject: [PATCH 9/9] [FIX] Update default folders dm_blacklist_rm works on --- assets/config_templates/main_config.yml | 4 ++-- bin/dm_blacklist_rm.py | 2 +- docs/datman_conf.rst | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/assets/config_templates/main_config.yml b/assets/config_templates/main_config.yml index f67ed9fe..6172fd03 100644 --- a/assets/config_templates/main_config.yml +++ b/assets/config_templates/main_config.yml @@ -226,7 +226,7 @@ IgnoreHeaderFields: # Dicom Header fields to exclude from gold # study's resources folder. (Optional). # Default: 'behav|\.edat2' -# BlacklistDel: [nii, mnc, nrrd, resources] # Indicate which directories to +# BlacklistDel: [nii, bids, resources] # Indicate which directories to # delete blacklisted data from. - # Default: [nii, mnc, nrrd, + # Default: [nii, bids, # resources] diff --git a/bin/dm_blacklist_rm.py b/bin/dm_blacklist_rm.py index a8a32582..593dfc04 100755 --- a/bin/dm_blacklist_rm.py +++ b/bin/dm_blacklist_rm.py @@ -93,7 +93,7 @@ def get_search_paths(config, user_paths=None): path_keys = config.get_key("BlacklistDel") except datman.config.UndefinedSetting: # Fall back to the default - path_keys = ['nii', 'mnc', 'nrrd', 'resources'] + path_keys = ['nii', 'bids', 'resources'] return path_keys diff --git a/docs/datman_conf.rst b/docs/datman_conf.rst index f7e5a476..b6444079 100644 --- a/docs/datman_conf.rst +++ b/docs/datman_conf.rst @@ -102,7 +102,7 @@ Optional * Accepted values: A list of path names, where each path name has already been defined in `Paths`_. * Default value: If omitted ``dm_blacklist_rm`` will delete blacklisted - scans from ``nii``, ``mnc``, ``nrrd``, and ``resources``, if these + scans from ``nii``, ``bids``, and ``resources``, if these directories exist. Example