Skip to content

Commit

Permalink
Change GitHub Actions to run workflow job per service (#83)
Browse files Browse the repository at this point in the history
  • Loading branch information
J535D165 authored Jun 27, 2024
1 parent eb9f98f commit a203837
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 96 deletions.
40 changes: 34 additions & 6 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,44 @@
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Test repositories
on: [push, pull_request]
on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
test-api:

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.12"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install ".[test]"
- name: Test with pytest
run: |
pytest -n 4 --ignore=tests/test_repositories_plus.py --ignore=tests/test_repositories.py
test-repositories:

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
service: ["zenodo", "dataverse", "figshare", "djehuty", "dryad", "osf", "mendeley", "dataone", "dspace", "pangaea", "github"]
python-version: ["3.8", "3.12"]

steps:
Expand All @@ -22,11 +51,10 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install .
python -m pip install pytest pytest-xdist
python -m pip install ".[test]"
- name: Test with pytest
run: |
pytest -n 4 --ignore=tests/test_repositories_plus.py
pytest tests/test_repositories.py --service ${{ matrix.service }}
test-repositories-plus:

Expand All @@ -45,8 +73,8 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install .[all]
python -m pip install pytest pytest-xdist
python -m pip install ".[all]"
python -m pip install ".[test]"
- name: Test with pytest
run: |
pytest -n 4 tests/test_repositories_plus.py
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ datahugger = "datahugger.__main__:main"
all = ["datasets"]
benchmark = ["pandas", "tabulate"]
lint = ["ruff"]
test = ["pytest"]
test = ["pytest", "tomli; python_version < '3.11'", "pytest-xdist"]
docs = ["mkdocs-material"]

[build-system]
Expand Down
42 changes: 42 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from pathlib import Path

try:
import tomllib
except ImportError:
import tomli as tomllib


def _parse_repositories(test_config):
return (
test_config["location"],
test_config.get("files", []),
test_config.get("ignored_files", []),
test_config.get("options", {}),
)


def pytest_addoption(parser):
parser.addoption("--service", action="store", default=None, help="Service to test")


def pytest_generate_tests(metafunc):
with open(Path("tests", "test_repositories.toml"), "rb") as f:
test_repos = tomllib.load(f)

if "location" in metafunc.fixturenames:
if service_value := metafunc.config.option.service:
metafunc.parametrize(
"location,files,ignored_files,dh_kwargs",
map(_parse_repositories, test_repos[service_value]),
)
else:
if service_value and service_value not in test_repos:
raise ValueError(f"Unknown service {service_value}")

metafunc.parametrize(
"location,files,ignored_files,dh_kwargs",
map(
_parse_repositories,
[v for _, service_v in test_repos.items() for v in service_v],
),
)
100 changes: 11 additions & 89 deletions tests/test_repositories.py
Original file line number Diff line number Diff line change
@@ -1,100 +1,22 @@
from pathlib import Path
from pathlib import PosixPath

import pytest

import datahugger
from datahugger.utils import _is_url

TESTS_URLS = [
# Zenodo
("10.5281/zenodo.6625880", "README.md"),
# Dataverse
(
"https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/KBHLOD",
"tutorial1.py",
),
("https://doi.org/10.7910/DVN/KBHLOD", "tutorial1.py"),
("https://hdl.handle.net/10622/NHJZUD", "ERRHS_7_01_data_1795.tab"),
# Dataverse single file
("10.7910/DVN/HZBYG7/RQ26H2", "Table 2.do"),
# Figshare
("https://doi.org/10.6084/m9.figshare.8851784.v1", "cross_year_data2.csv"),
(
"https://figshare.com/articles/dataset/Long-term_behavioral_repeatability_in_wild_adult_and_captive_juvenile_turtles_implications_for_personality_development/8851784",
"cross_year_data2.csv",
),
(
"https://doi.org/10.15131/shef.data.22010159.v2",
"ScHARR QUIT evaluation statistical and health economic analysis plan.pdf",
),
# Djehuty
("https://doi.org/10.4121/21989216.v1", "README.txt"),
# Dryad
(
"https://datadryad.org/stash/dataset/doi:10.5061/dryad.31zcrjdm5",
"ReadmeFile.txt",
),
("https://doi.org/10.5061/dryad.31zcrjdm5", "ReadmeFile.txt"),
# OSF
("https://osf.io/ews27/", "Cross-comparison/amarlt1"),
("https://osf.io/kq573/", "nest_area_data.xlsx"),
("https://doi.org/10.17605/OSF.IO/KQ573", "nest_area_data.xlsx"),
("https://doi.org/10.17605/OSF.IO/9X6CA", "jobs.sh"),
# Mendeley
(
"https://doi.org/10.17632/p6wmtv6t5g.2",
"READMI Stranding Sea Turtle records.pdf",
),
("https://doi.org/10.17632/p6wmtv6t5g", "READMI Stranding Sea Turtle records.pdf"),
# Dataone
("https://doi.org/10.18739/A2KH0DZ42", "2012F_Temperature_Data.csv"),
# DSpace
("https://uhra.herts.ac.uk/handle/2299/26087", "pdf.pdf"),
(
"https://repositorioinstitucional.ceu.es/handle/10637/2741",
"Aquaporin_1_JAMartin_et_al_MedSport_2009.pdf",
),
# Pangaea
("https://doi.org/10.1594/PANGAEA.954547", "Gubbio_age.tab"),
("https://doi.pangaea.de/10.1594/PANGAEA.954543", "AA_age.tab"),
]

def test_get_repositories(location, files, ignored_files, dh_kwargs, tmpdir):
datahugger.get(location, tmpdir, **dh_kwargs)

def test_url_checker():
assert not _is_url("82949")
assert _is_url("https://doi.org/10.5281/zenodo.6614829")
if files:
assert PosixPath(tmpdir, files).exists()

if ignored_files:
assert not PosixPath(tmpdir, ignored_files).exists()

@pytest.mark.parametrize("url_or_id,output_file", TESTS_URLS)
def test_load_dyno(url_or_id, output_file, tmpdir):
"""Load repository with the generic loader."""
datahugger.get(url_or_id, tmpdir)

assert Path(tmpdir, output_file).exists()
@pytest.mark.skip("Not implemented")
def test_info(location, files, ignored_files, dh_kwargs, tmpdir):
dh_info = datahugger.info(location)


@pytest.mark.parametrize(
"url_or_id",
[
("10.5281/zenodo.6614829"),
("https://zenodo.org/record/6614829"),
("https://doi.org/10.5281/zenodo.6614829"),
],
)
def test_load_zenodo_6614829(url_or_id, tmpdir):
datahugger.get(url_or_id, tmpdir, max_file_size=1e6)

assert Path(tmpdir, "quasiperiod.m").exists()
assert not Path(tmpdir, "quasiperiod.txt.gz").exists()


def test_load_github_cbsodata(tmpdir):
datahugger.get("https://github.com/j535d165/cbsodata", tmpdir)


def test_info_without_loading(tmpdir):
dh_get = datahugger.get("https://osf.io/wdzh5/", output_folder=".", print_only=True)

dh_info = datahugger.info("https://osf.io/wdzh5/")

assert dh_get.dataset.files == dh_info.files
assert files in dh_info.files
111 changes: 111 additions & 0 deletions tests/test_repositories.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
[[zenodo]]
location = "10.5281/zenodo.6625880"
files = "README.md"

[[zenodo]]
location = "10.5281/zenodo.6614829"
files = "quasiperiod.m"
ignored_files="quasiperiod.txt.gz"

[zenodo.options]
max_file_size=1000000

[[zenodo]]
location = "https://zenodo.org/record/6614829"
files = "quasiperiod.m"
ignored_files="quasiperiod.txt.gz"

[zenodo.options]
max_file_size=1000000

[[zenodo]]
location = "https://doi.org/10.5281/zenodo.6614829"
files = "quasiperiod.m"
ignored_files="quasiperiod.txt.gz"

[zenodo.options]
max_file_size=1000000

[[dataverse]]
location = "https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/KBHLOD"
files = "tutorial1.py"

[[dataverse]]
location = "https://doi.org/10.7910/DVN/KBHLOD"
files = "tutorial1.py"

[[dataverse]]
location = "https://hdl.handle.net/10622/NHJZUD"
files = "ERRHS_7_01_data_1795.tab"

[[figshare]]
location = "https://doi.org/10.6084/m9.figshare.8851784.v1"
files = "cross_year_data2.csv"

[[figshare]]
location = "https://figshare.com/articles/dataset/Long-term_behavioral_repeatability_in_wild_adult_and_captive_juvenile_turtles_implications_for_personality_development/8851784"
files = "cross_year_data2.csv"

[[figshare]]
location = "https://doi.org/10.15131/shef.data.22010159.v2"
files = "ScHARR QUIT evaluation statistical and health economic analysis plan.pdf"

[[djehuty]]
location = "https://doi.org/10.4121/21989216.v1"
files = "README.txt"

[[dryad]]
location = "https://datadryad.org/stash/dataset/doi:10.5061/dryad.31zcrjdm5"
files = "ReadmeFile.txt"

[[dryad]]
location = "https://doi.org/10.5061/dryad.31zcrjdm5"
files = "ReadmeFile.txt"

[[osf]]
location = "https://osf.io/ews27/"
files = "Cross-comparison/amarlt1"

[[osf]]
location = "https://osf.io/kq573/"
files = "nest_area_data.xlsx"

[[osf]]
location = "https://doi.org/10.17605/OSF.IO/KQ573"
files = "nest_area_data.xlsx"

[[osf]]
location = "https://doi.org/10.17605/OSF.IO/9X6CA"
files = "jobs.sh"

[[mendeley]]
location = "https://doi.org/10.17632/p6wmtv6t5g.2"
files = "READMI Stranding Sea Turtle records.pdf"

[[mendeley]]
location = "https://doi.org/10.17632/p6wmtv6t5g"
files = "READMI Stranding Sea Turtle records.pdf"

[[dataone]]
location = "https://doi.org/10.18739/A2KH0DZ42"
files = "2012F_Temperature_Data.csv"

[[dspace]]
location = "https://uhra.herts.ac.uk/handle/2299/26087"
files = "pdf.pdf"

[[dspace]]
location = "https://repositorioinstitucional.ceu.es/handle/10637/2741"
files = "Aquaporin_1_JAMartin_et_al_MedSport_2009.pdf"

[[pangaea]]
location = "https://doi.org/10.1594/PANGAEA.954547"
files = "Gubbio_age.tab"

[[pangaea]]
location = "https://doi.pangaea.de/10.1594/PANGAEA.954543"
files = "AA_age.tab"

[[github]]
location = "https://github.com/j535d165/cbsodata"
files = "cbsodata-main/README.md"
6 changes: 6 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from datahugger.utils import _is_url
from datahugger.utils import get_re3data_repositories


def test_url_checker():
assert not _is_url("82949")
assert _is_url("https://doi.org/10.5281/zenodo.6614829")


def test_re3data_repos():
get_re3data_repositories()

0 comments on commit a203837

Please sign in to comment.