diff --git a/datahugger/config.py b/datahugger/config.py index 45899dc..3ed877b 100644 --- a/datahugger/config.py +++ b/datahugger/config.py @@ -1,4 +1,5 @@ from datahugger.services import ArXivDataset +from datahugger.services import B2shareDataset from datahugger.services import DataDryadDataset from datahugger.services import DataEuropaDataset from datahugger.services import DataOneDataset @@ -118,6 +119,7 @@ "trolling.uit.no": DataverseDataset, "www.sodha.be": DataverseDataset, "www.uni-hildesheim.de": DataverseDataset, + "b2share.eudat.eu": B2shareDataset, "data.europa.eu": DataEuropaDataset, } diff --git a/datahugger/services.py b/datahugger/services.py index 60fc9f2..82ff49f 100644 --- a/datahugger/services.py +++ b/datahugger/services.py @@ -320,7 +320,7 @@ class MendeleyDataset(DatasetDownloader): class OSFDataset(DatasetDownloader): """Downloader for OSF repository.""" - REGEXP_ID = r"osf\.io\/(?P.*)/" + REGEXP_ID = r"osf\.io\/(?P[^\/]*)\/{0,1}" # the base entry point of the REST API API_URL = "https://api.osf.io/v2/nodes/" @@ -425,3 +425,23 @@ class SeaNoeDataset(DatasetDownloader): ATTR_SIZE_JSONPATH = "size" ATTR_HASH_JSONPATH = "checksum" ATTR_HASH_TYPE_VALUE = "sha256" + + +class B2shareDataset(DatasetDownloader): + """Downloader for B2Share repository.""" + + REGEXP_ID = r"b2share\.eudat\.eu\/records\/(?P[0-9a-z]+)" + + # the base entry point of the REST API + API_URL = "https://b2share.eudat.eu/api/" + + # the files and metadata about the dataset + API_URL_META = "{api_url}records/{record_id}" + META_FILES_JSONPATH = "files[*]" + + # paths to file attributes + ATTR_NAME_JSONPATH = "key" + ATTR_FILE_LINK_JSONPATH = "ePIC_PID" + ATTR_SIZE_JSONPATH = "size" + ATTR_HASH_JSONPATH = "checksum" + ATTR_HASH_TYPE_VALUE = "md5" diff --git a/tests/test_repositories.toml b/tests/test_repositories.toml index 467bab1..775786a 100644 --- a/tests/test_repositories.toml +++ b/tests/test_repositories.toml @@ -114,6 +114,10 @@ files = "AA_age.tab" location = "https://github.com/j535d165/cbsodata" files = "cbsodata-main/README.md" +[[b2share]] +location = "https://b2share.eudat.eu/records/db2ef5890fa44c7a85af366a50de73b9" +files = "2024-02-13.sav" + [[dataeuropa]] location = "https://data.europa.eu/data/datasets/65e092e4009f18f050b14216" files = "consolidation-wattzhub-schema-irve-statique-20240220-152202.csv"