diff --git a/assets/config_templates/main_config.yml b/assets/config_templates/main_config.yml index f67ed9fe..6172fd03 100644 --- a/assets/config_templates/main_config.yml +++ b/assets/config_templates/main_config.yml @@ -226,7 +226,7 @@ IgnoreHeaderFields: # Dicom Header fields to exclude from gold # study's resources folder. (Optional). # Default: 'behav|\.edat2' -# BlacklistDel: [nii, mnc, nrrd, resources] # Indicate which directories to +# BlacklistDel: [nii, bids, resources] # Indicate which directories to # delete blacklisted data from. - # Default: [nii, mnc, nrrd, + # Default: [nii, bids, # resources] diff --git a/bin/dm_blacklist_rm.py b/bin/dm_blacklist_rm.py index 48f9384f..593dfc04 100755 --- a/bin/dm_blacklist_rm.py +++ b/bin/dm_blacklist_rm.py @@ -1,6 +1,7 @@ #!/usr/bin/env python """ -Searches the file system for any data on the blacklist and deletes them. +Searches the file system for any data on the blacklist and removes them +from subject data folders to avoid pipeline failures etc. Usage: dm_blacklist_rm.py [options] [--path=KEY]... @@ -9,6 +10,8 @@ The name of a datman managed project Options: + --keep If provided, the blacklisted scans will be moved to + a 'blacklisted' subdir instead of being deleted. --path KEY If provided overrides the 'BlacklistDel' setting from the config files, which defines the directories to delete blacklisted items from. 'KEY' may be the name @@ -22,15 +25,14 @@ """ import os -import glob import logging +import shutil from docopt import docopt import datman.config import datman.scan import datman.utils -from datman.scanid import ParseException logging.basicConfig(level=logging.WARN, format="[%(name)s] %(levelname)s: %(message)s") @@ -43,6 +45,7 @@ def main(): global DRYRUN arguments = docopt(__doc__) project = arguments[''] + keep = arguments['--keep'] override_paths = arguments['--path'] verbose = arguments['--verbose'] debug = arguments['--debug'] @@ -58,13 +61,30 @@ def main(): config = datman.config.config(study=project) metadata = datman.utils.get_subject_metadata(config, allow_partial=True) - base_paths = get_base_paths(config, override_paths) + search_paths = get_search_paths(config, override_paths) - remove_blacklisted_items(metadata, base_paths) + for sub in metadata: + if not metadata[sub]: + continue + + logger.debug(f"Working on {sub}") + session = datman.scan.Scan(sub, config) + handle_blacklisted_scans( + session, metadata[sub], search_paths, keep=keep + ) + + +def get_search_paths(config, user_paths=None): + """Get path types to search for blacklisted files in. + Args: + config (:obj:`datman.config.config`): A datman config object for the + current study. + user_paths (:obj:`list`): A list of path types to search through. + Optional. Causes the configuration file setting to be ignored. -def get_base_paths(config, user_paths): - """Get the full path to each base directory to search for blacklisted data. + Returns: + list: A list of path types that will be searched. """ if user_paths: path_keys = user_paths @@ -73,52 +93,94 @@ def get_base_paths(config, user_paths): path_keys = config.get_key("BlacklistDel") except datman.config.UndefinedSetting: # Fall back to the default - path_keys = ['nii', 'mnc', 'nrrd', 'resources'] + path_keys = ['nii', 'bids', 'resources'] + return path_keys + + +def handle_blacklisted_scans(session, bl_scans, search_paths, keep=False): + """Move or delete all blacklisted scans for the given path types. + + Args: + session (:obj:`datman.scan.Scan`): A datman scan object for the + current session. + bl_scans (:obj:`list`): A list of strings each representing a + blacklisted scan. + search_paths (:obj:`list`): A list of path types to move/delete + blacklisted scans from. Each path type must exist in the + datman config files. + keep (bool): Whether to move files instead of deleting them. Optional, + default False. + """ + for scan in bl_scans: + for path_type in search_paths: + found = session.find_files(scan, format=path_type) - base_paths = [] - for key in path_keys: - try: - found = config.get_path(key) - except datman.config.UndefinedSetting: - logger.warning(f"Given undefined path type - {key}. Ignoring.") - continue + if not found: + continue - if os.path.exists(found): - base_paths.append(found) + if is_already_handled(found): + continue - return base_paths + logger.debug(f"Files found for removal: {found}") + if DRYRUN: + logger.info("DRYRUN - Leaving files in place.") + continue -def remove_blacklisted_items(metadata, base_paths): - for sub in metadata: - blacklist_entries = metadata[sub] - if not blacklist_entries: - continue + for item in found: + if keep: + path = getattr(session, f"{path_type}_path") + logger.info( + f"Moving blacklisted files to {path}/blacklisted" + ) + move_file(path, item) + else: + delete_file(item) - logger.debug(f"Working on {sub}") - for path in base_paths: - for sub_dir in glob.glob(os.path.join(path, sub + "*")): - for entry in blacklist_entries: - remove_matches(sub_dir, entry) - - -def remove_matches(path, fname): - matches = find_files(path, fname) - if matches: - logger.info(f"Files found for deletion: {matches}") - if DRYRUN: - return - for item in matches: - try: - os.remove(item) - except FileNotFoundError: - pass - except (PermissionError, IsADirectoryError): - logger.error(f"Failed to delete blacklisted item {item}.") +def is_already_handled(found_files): + """Checks if the found files have already been moved or removed. + + Split series scans get 'found' for each blacklist entry the first time + the script encounters the blacklist entries. This checks if all 'found' + items have already been removed so unneccessary errors messages can be + avoided! + + Args: + found_files (:obj:`list`): A list of files to check. -def find_files(path, fname): - return glob.glob(os.path.join(path, fname + "*")) + Returns: + bool + """ + return all([not os.path.exists(item) for item in found_files]) + + +def move_file(path, item): + """Move a file to a 'blacklisted' subdir inside the given path. + + Args: + path (:obj:`str`): The path to move put the 'blacklisted' folder. + item (:obj:`str`): The full path to a blacklisted file to move. + """ + bl_dir = os.path.join(path, "blacklisted") + try: + os.mkdir(bl_dir) + except FileExistsError: + pass + + try: + shutil.move(item, bl_dir) + except shutil.Error as e: + logger.error(f"Failed to move blacklisted file {item} - {e}") + + +def delete_file(file_path): + try: + os.remove(file_path) + except FileNotFoundError: + pass + except (PermissionError, IsADirectoryError): + logger.error(f"Failed to delete blacklisted item {file_path}.") if __name__ == "__main__": diff --git a/datman/exporters.py b/datman/exporters.py index d8ddb834..a78efee3 100644 --- a/datman/exporters.py +++ b/datman/exporters.py @@ -972,15 +972,14 @@ def get_error_file(self, dm_file): def outputs_exist(self): for dm_name in self.name_map: + if read_blacklist(scan=dm_name, config=self.config): + continue + if self.name_map[dm_name] == "missing": if not os.path.exists(self.get_error_file(dm_name)): return False continue - bl_entry = read_blacklist(scan=dm_name, config=self.config) - if bl_entry: - continue - full_path = os.path.join(self.output_dir, dm_name + self.ext) if not os.path.exists(full_path): return False diff --git a/datman/scan.py b/datman/scan.py index a991e6b4..bc8812bd 100644 --- a/datman/scan.py +++ b/datman/scan.py @@ -123,6 +123,7 @@ def __init__(self, subject_id, config, bids_root=None): self.bids_root = bids_root self.bids_path = self.__get_bids() + self._bids_inventory = self._make_bids_inventory() # This one lists all existing resource folders for the timepoint. self.resources = self._get_resources(config) @@ -149,14 +150,76 @@ def _get_ident(self, subid): return ident def find_files(self, file_stem, format="nii"): + """Find files belonging to the session matching a given file name. + + Args: + file_stem (:obj:`str`): A valid datman-style file name, with or + without the extension and preceding path. + format (:obj:`str`): The configured datman folder path to search + through. Default: 'nii' + + Returns: + :obj:`list`: a list of full paths to matching files, if any. Or + an empty list if none are found. + + Raises: + datman.scanid.ParseException: If an invalid datman file name + is given. + """ + if format == 'bids': + return self._find_bids_files(file_stem) + try: base_path = getattr(self, f"{format}_path") except AttributeError: return [] + if not os.path.exists(base_path): return [] + return glob.glob(os.path.join(base_path, file_stem + "*")) + def _find_bids_files(self, file_stem): + ident, _, series, _ = datman.scanid.parse_filename(file_stem) + if ident.session != self.session: + return [] + if int(series) in self._bids_inventory: + return self._bids_inventory[int(series)] + return [] + + def _make_bids_inventory(self): + if not self.bids_path: + return {} + + inventory = {} + for path, _, files in os.walk(self.bids_path): + if path.endswith("blacklisted"): + continue + + for item in files: + if not item.endswith(".json"): + continue + + json_path = os.path.join(path, item) + contents = datman.utils.read_json(json_path) + + repeat = contents['Repeat'] if 'Repeat' in contents else '01' + if repeat != self.session: + continue + + try: + series = int(contents['SeriesNumber']) + except KeyError: + # Ignore sidecars missing a series number field. + continue + base_fname = os.path.splitext(json_path)[0] + + inventory.setdefault(series, []).extend( + glob.glob(base_fname + "*") + ) + + return inventory + def get_tagged_nii(self, tag): try: matched_niftis = self.__nii_dict[tag] diff --git a/docs/datman_conf.rst b/docs/datman_conf.rst index f7e5a476..b6444079 100644 --- a/docs/datman_conf.rst +++ b/docs/datman_conf.rst @@ -102,7 +102,7 @@ Optional * Accepted values: A list of path names, where each path name has already been defined in `Paths`_. * Default value: If omitted ``dm_blacklist_rm`` will delete blacklisted - scans from ``nii``, ``mnc``, ``nrrd``, and ``resources``, if these + scans from ``nii``, ``bids``, and ``resources``, if these directories exist. Example