From 9af49de3c9751acaf0eb15bb44dd32ed1531e099 Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Wed, 25 Sep 2024 10:42:44 +0200 Subject: [PATCH 1/3] Revert changes in OSF --- datahugger/services.py | 23 ++--------------------- tests/test_repositories.toml | 4 ---- 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/datahugger/services.py b/datahugger/services.py index 9c4baa1..022adf5 100644 --- a/datahugger/services.py +++ b/datahugger/services.py @@ -320,13 +320,13 @@ class MendeleyDataset(DatasetDownloader): class OSFDataset(DatasetDownloader): """Downloader for OSF repository.""" - REGEXP_ID = r"osf\.io\/(?P[^\/]*)\/{0,1}" + REGEXP_ID = r"osf\.io\/(?P.*)/" # the base entry point of the REST API API_URL = "https://api.osf.io/v2/nodes/" # the files and metadata about the dataset - API_URL_META = "{api_url}{record_id}/files/" + API_URL_META = "{api_url}{record_id}/files/osfstorage/?format=jsonapi" META_FILES_JSONPATH = "data[*]" PAGINATION_JSONPATH = "links.next" @@ -342,25 +342,6 @@ class OSFDataset(DatasetDownloader): ATTR_HASH_JSONPATH = "attributes.extra.hashes.sha256" ATTR_HASH_TYPE_VALUE = "sha256" - def _get_node_providers(self): - """Get the providers of a node.""" - record_id = self._params["record_id"] - res = requests.get(f"{self.API_URL}/{record_id}/files/") - return set([prov["attributes"]["provider"] for prov in res.json()["data"]]) - - def _get_files_recursive(self, url, folder_name=None, base_url=None): - files = [] - # In case of the top-level folder, we need to get first the providers - if folder_name is None: - for provider in self._get_node_providers(): - # and then the files of each provider - files.extend( - super()._get_files_recursive(f"{url}{provider}/", None, base_url) - ) - else: - files = super()._get_files_recursive(url, folder_name, base_url) - return files - class ZenodoDataset(DatasetDownloader): """Downloader for Zenodo repository. diff --git a/tests/test_repositories.toml b/tests/test_repositories.toml index 04dc97b..bf81923 100644 --- a/tests/test_repositories.toml +++ b/tests/test_repositories.toml @@ -62,10 +62,6 @@ files = "ReadmeFile.txt" location = "https://doi.org/10.5061/dryad.31zcrjdm5" files = "ReadmeFile.txt" -[[osf]] -location = "https://osf.io/ews27/" -files = "Cross-comparison/amarlt1" - [[osf]] location = "https://osf.io/kq573/" files = "nest_area_data.xlsx" From 5d3c3d3f5ba4878bacbb6bbd67605f40eea518c6 Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Wed, 25 Sep 2024 10:49:25 +0200 Subject: [PATCH 2/3] Enable to miss the last / --- datahugger/services.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datahugger/services.py b/datahugger/services.py index 022adf5..2941464 100644 --- a/datahugger/services.py +++ b/datahugger/services.py @@ -320,7 +320,7 @@ class MendeleyDataset(DatasetDownloader): class OSFDataset(DatasetDownloader): """Downloader for OSF repository.""" - REGEXP_ID = r"osf\.io\/(?P.*)/" + REGEXP_ID = r"osf\.io\/(?P[^\/]*)\/{0,1}" # the base entry point of the REST API API_URL = "https://api.osf.io/v2/nodes/" From b28d4c1d537e8553d94b8d8fa425de68fa6dd782 Mon Sep 17 00:00:00 2001 From: Miguel Caballer Date: Wed, 25 Sep 2024 11:00:55 +0200 Subject: [PATCH 3/3] Fix figshare test --- tests/test_repositories.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_repositories.toml b/tests/test_repositories.toml index bf81923..9254425 100644 --- a/tests/test_repositories.toml +++ b/tests/test_repositories.toml @@ -47,8 +47,8 @@ location = "https://figshare.com/articles/dataset/Long-term_behavioral_repeatabi files = "cross_year_data2.csv" [[figshare]] -location = "https://doi.org/10.15131/shef.data.22010159.v2" -files = "ScHARR QUIT evaluation statistical and health economic analysis plan.pdf" +location = "https://doi.org/10.15131/shef.data.26977237.v1" +files = "README.txt" [[djehuty]] location = "https://doi.org/10.4121/21989216.v1"