Skip to content

Commit

Permalink
Merge pull request #4 from micafer/b2share
Browse files Browse the repository at this point in the history
Add B2Share support J535D165#88
  • Loading branch information
micafer authored Sep 24, 2024
2 parents 62698ef + 9be54fd commit b4a9c04
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 7 deletions.
8 changes: 2 additions & 6 deletions datahugger/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,8 @@ def download_file(
if self.progress:
print(f"{_format_filename(file_name)}: SKIPPED")
return

if (
self.filter_files and
not re.match(self.filter_files, file_name)
):


if self.filter_files and not re.match(self.filter_files, file_name):
logging.info(f"Skipping file by filter {file_link}")
if self.progress:
print(f"{_format_filename(file_name)}: SKIPPED")
Expand Down
4 changes: 3 additions & 1 deletion datahugger/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from datahugger.services import ArXivDataset
from datahugger.services import B2shareDataset
from datahugger.services import DataDryadDataset
from datahugger.services import DataEuropaDataset
from datahugger.services import DataOneDataset
from datahugger.services import DataverseDataset
from datahugger.services import DjehutyDataset
from datahugger.services import DSpaceDataset
from datahugger.services import DataEuropaDataset
from datahugger.services import FigShareDataset
from datahugger.services import GitHubDataset
from datahugger.services import HuggingFaceDataset
Expand Down Expand Up @@ -118,6 +119,7 @@
"trolling.uit.no": DataverseDataset,
"www.sodha.be": DataverseDataset,
"www.uni-hildesheim.de": DataverseDataset,
"b2share.eudat.eu": B2shareDataset,
"data.europa.eu": DataEuropaDataset,
}

Expand Down
22 changes: 22 additions & 0 deletions datahugger/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,28 @@ def _get_attr_hash_type(self, record):
return self._get_attr_attr(record, self.ATTR_HASH_JSONPATH).split(":")[0]


class B2shareDataset(DatasetDownloader):
"""Downloader for B2Share repository."""

REGEXP_ID = r"b2share\.eudat\.eu\/records\/(?P<record_id>[0-9a-z]+)"

# the base entry point of the REST API
API_URL = "https://b2share.eudat.eu/api/"

# the files and metadata about the dataset
API_URL_META = "{api_url}records/{record_id}"
META_FILES_JSONPATH = "files[*]"

# paths to file attributes
ATTR_NAME_JSONPATH = "key"
ATTR_SIZE_JSONPATH = "size"
ATTR_HASH_JSONPATH = "checksum"
ATTR_HASH_TYPE_VALUE = "md5"

def _get_attr_link(self, record, base_url=None):
return f"{base_url}/files/{self._params['record_id']}/{record['key']}"


class SeaNoeDataset(DatasetDownloader):
"""Downloader for SeaNoe publication."""

Expand Down
4 changes: 4 additions & 0 deletions tests/test_repositories.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ files = "AA_age.tab"
location = "https://github.com/j535d165/cbsodata"
files = "cbsodata-main/README.md"

[[b2share]]
location = "https://b2share.eudat.eu/records/db2ef5890fa44c7a85af366a50de73b9"
files = "2024-02-13.sav"

[[seanoe]]
location = "https://doi.org/10.17882/101042"
files = "111609.xlsx"
Expand Down

0 comments on commit b4a9c04

Please sign in to comment.