From 0019d7b1ab20c44c89a6b2bd2df8f17353c11ab4 Mon Sep 17 00:00:00 2001 From: Miguel Caballer Fernandez Date: Wed, 26 Jun 2024 17:31:50 +0200 Subject: [PATCH] Add support for providers in OSF (#77) Closes #69 --- datahugger/services.py | 23 +++++++++++++++++++++-- tests/test_repositories.py | 1 + 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/datahugger/services.py b/datahugger/services.py index 05403f5..37b856a 100644 --- a/datahugger/services.py +++ b/datahugger/services.py @@ -324,10 +324,10 @@ class OSFDataset(DatasetDownloader): REGEXP_ID = r"osf\.io\/(?P.*)/" # the base entry point of the REST API - API_URL = "https://api.osf.io/v2/registrations/" + API_URL = "https://api.osf.io/v2/nodes/" # the files and metadata about the dataset - API_URL_META = "{api_url}{record_id}/files/osfstorage/?format=jsonapi" + API_URL_META = "{api_url}{record_id}/files/" META_FILES_JSONPATH = "data[*]" PAGINATION_JSONPATH = "links.next" @@ -343,6 +343,25 @@ class OSFDataset(DatasetDownloader): ATTR_HASH_JSONPATH = "attributes.extra.hashes.sha256" ATTR_HASH_TYPE_VALUE = "sha256" + def _get_node_providers(self): + """Get the providers of a node.""" + record_id = self._params["record_id"] + res = requests.get(f"{self.API_URL}/{record_id}/files/") + return set([prov["attributes"]["provider"] for prov in res.json()["data"]]) + + def _get_files_recursive(self, url, folder_name=None, base_url=None): + files = [] + # In case of the top-level folder, we need to get first the providers + if folder_name is None: + for provider in self._get_node_providers(): + # and then the files of each provider + files.extend( + super()._get_files_recursive(f"{url}{provider}/", None, base_url) + ) + else: + files = super()._get_files_recursive(url, folder_name, base_url) + return files + class ZenodoDataset(DatasetDownloader): """Downloader for Zenodo repository. diff --git a/tests/test_repositories.py b/tests/test_repositories.py index 304aa87..eea2c65 100644 --- a/tests/test_repositories.py +++ b/tests/test_repositories.py @@ -36,6 +36,7 @@ ), ("https://doi.org/10.5061/dryad.31zcrjdm5", "ReadmeFile.txt"), # OSF + ("https://osf.io/ews27/", "Cross-comparison/amarlt1"), ("https://osf.io/kq573/", "nest_area_data.xlsx"), ("https://doi.org/10.17605/OSF.IO/KQ573", "nest_area_data.xlsx"), ("https://doi.org/10.17605/OSF.IO/9X6CA", "jobs.sh"),