From 6ca501b0e0397cbbf583e0eba05dfcc790544e1c Mon Sep 17 00:00:00 2001 From: Miguel Caballer Fernandez Date: Sun, 29 Sep 2024 09:42:15 +0200 Subject: [PATCH] Add support to SeaNoe (#85) --- datahugger/config.py | 2 ++ datahugger/services.py | 20 ++++++++++++++++++++ tests/test_repositories.toml | 4 ++++ 3 files changed, 26 insertions(+) diff --git a/datahugger/config.py b/datahugger/config.py index 3176c87..2c5e90f 100644 --- a/datahugger/config.py +++ b/datahugger/config.py @@ -10,6 +10,7 @@ from datahugger.services import MendeleyDataset from datahugger.services import OSFDataset from datahugger.services import PangaeaDataset +from datahugger.services import SeaNoeDataset from datahugger.services import ZenodoDataset # fast lookup @@ -112,6 +113,7 @@ "researchdata.ntu.edu.sg": DataverseDataset, "rin.lipi.go.id": DataverseDataset, "ssri.is": DataverseDataset, + "www.seanoe.org": SeaNoeDataset, "trolling.uit.no": DataverseDataset, "www.sodha.be": DataverseDataset, "www.uni-hildesheim.de": DataverseDataset, diff --git a/datahugger/services.py b/datahugger/services.py index a0075e7..3a0ce60 100644 --- a/datahugger/services.py +++ b/datahugger/services.py @@ -388,3 +388,23 @@ def _get_attr_hash(self, record): def _get_attr_hash_type(self, record): return self._get_attr_attr(record, self.ATTR_HASH_JSONPATH).split(":")[0] + + +class SeaNoeDataset(DatasetDownloader): + """Downloader for SeaNoe publication.""" + + REGEXP_ID = r"https://www.seanoe\.org/data/[0-9]+/(?P.*)/" + + # the base entry point of the REST API + API_URL = "https://www.seanoe.org/api/" + + # the files and metadata about the dataset + API_URL_META = "{api_url}find-by-id/{record_id}" + META_FILES_JSONPATH = "files[*]" + + # paths to file attributes + ATTR_NAME_JSONPATH = "fileName" + ATTR_FILE_LINK_JSONPATH = "fileUrl" + ATTR_SIZE_JSONPATH = "size" + ATTR_HASH_JSONPATH = "checksum" + ATTR_HASH_TYPE_VALUE = "sha256" diff --git a/tests/test_repositories.toml b/tests/test_repositories.toml index 1758b46..ce22f08 100644 --- a/tests/test_repositories.toml +++ b/tests/test_repositories.toml @@ -109,3 +109,7 @@ files = "AA_age.tab" [[github]] location = "https://github.com/j535d165/cbsodata" files = "cbsodata-main/README.md" + +[[seanoe]] +location = "https://doi.org/10.17882/101042" +files = "111609.xlsx"