Skip to content

Commit

Permalink
deriver tests
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed May 20, 2024
1 parent 50a90a0 commit b0ca69a
Show file tree
Hide file tree
Showing 9 changed files with 608 additions and 18 deletions.
1 change: 1 addition & 0 deletions tests/trove/derive/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__all__ = ()
84 changes: 84 additions & 0 deletions tests/trove/derive/_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import datetime
from unittest import mock, TestCase
import typing

from ._inputs import DERIVER_TEST_DOCS, DeriverTestDoc


SHOULD_SKIP = object() # for deriver inputs that should be skipped


class BaseIndexcardDeriverTest(TestCase):

#######
# implement these things:

# a subclass of IndexcardDeriver
deriver_class: type

# dictionary with the same keys as `DERIVER_TEST_DOCS` and values that
# are either `SHOULD_SKIP` (above) or strings that will be passed as
# `expected_text` to `derived_texts_equal`
expected_outputs: dict

# (optional override, for when equality isn't so easy)
def assert_derived_texts_equal(self, expected_text: str, actual_text: str) -> None:
self.assertEqual(expected_text, actual_text)

#######
# don't override anything else

test_should_skip: typing.Callable[['BaseIndexcardDeriverTest'], None]
test_derive_card_as_text: typing.Callable[['BaseIndexcardDeriverTest'], None]

def __init_subclass__(cls):
# add test methods on subclasses (but not the base class!)
cls.test_should_skip = _test_should_skip
cls.test_derive_card_as_text = _test_derive_card_as_text

def setUp(self):
_patcher = mock.patch('share.util.IDObfuscator.encode', new=lambda x: x.id)
_patcher.start()
self.addCleanup(_patcher.stop)

def _get_deriver(self, input_doc: DeriverTestDoc):
_mock_suid = mock.Mock()
_mock_suid.id = '--suid_id--'
_mock_suid.get_date_first_seen.return_value = datetime.datetime(2345, 1, 1)
_mock_suid.get_backcompat_sharev2_suid.return_value = _mock_suid
_mock_suid.identifier = '--sourceunique-id--'
_mock_suid.source_config.label = '--sourceconfig-label--'
_mock_suid.source_config.source.long_title = '--source-title--'

_mock_indexcard_rdf = mock.Mock()
_mock_indexcard_rdf.id = '--indexcardf-id--'
_mock_indexcard_rdf.modified = datetime.datetime(2345, 2, 2)
_mock_indexcard_rdf.as_rdf_tripledict.return_value = input_doc.tripledict
_mock_indexcard_rdf.focus_iri = input_doc.focus_iri
_mock_indexcard_rdf.from_raw_datum_id = '--rawdatum-id--'
_mock_indexcard_rdf.indexcard.id = '--indexcard-id--'
_mock_indexcard_rdf.indexcard.source_record_suid = _mock_suid
return self.deriver_class(_mock_indexcard_rdf)

def _iter_test_cases(self):
for _input_key, _input_doc in DERIVER_TEST_DOCS.items():
_expected_output = self.expected_outputs.get(_input_key)
if _expected_output is None:
raise NotImplementedError(f'{self.__class__.__qualname__}.expected_outputs["{_input_key}"]')
with self.subTest(input_key=_input_key):
yield (_input_key, self._get_deriver(_input_doc), _expected_output)


def _test_should_skip(self: BaseIndexcardDeriverTest) -> None:
for _input_key, _deriver, _expected_output in self._iter_test_cases():
self.assertEqual(
bool(_expected_output is SHOULD_SKIP),
_deriver.should_skip(),
)


def _test_derive_card_as_text(self: BaseIndexcardDeriverTest) -> None:
for _input_key, _deriver, _expected_output in self._iter_test_cases():
if _expected_output is not SHOULD_SKIP:
_output = _deriver.derive_card_as_text()
self.assert_derived_texts_equal(_expected_output, _output)
222 changes: 222 additions & 0 deletions tests/trove/derive/_inputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
from datetime import date
import dataclasses
from primitive_metadata import primitive_rdf as rdf

from trove.vocab.namespaces import (
SKOS,
DCAT,
RDF,
DCTERMS,
OSFMAP,
FOAF,
OWL,
SHAREv2,
)


BLARG = rdf.IriNamespace('http://blarg.example/vocab/')


@dataclasses.dataclass
class DeriverTestDoc:
focus_iri: str
tripledict: rdf.RdfTripleDictionary


DERIVER_TEST_DOCS: dict[str, DeriverTestDoc] = {
'blarg-item': DeriverTestDoc(BLARG.my_item, {
BLARG.my_item: {
RDF.type: {BLARG.Item},
DCTERMS.title: {rdf.literal('title', language='en')},
DCTERMS.creator: {BLARG.me},
DCTERMS.created: {rdf.literal('2024-02-14')},
},
BLARG.me: {
RDF.type: {FOAF.Person},
FOAF.name: {rdf.literal('me me')},
},
}),
'blarg-project': DeriverTestDoc(BLARG.my_project, {
BLARG.my_project: {
RDF.type: {BLARG.Item, OSFMAP.Project},
DCTERMS.title: {rdf.literal('title', language='en')},
DCTERMS.creator: {BLARG.ME},
DCTERMS.created: {rdf.literal('2024-02-14')},
},
BLARG.me: {
RDF.type: {FOAF.Person},
FOAF.name: {rdf.literal('me me')},
},
}),
'sharev2-with-subjects': DeriverTestDoc('http://osf.example/chair/', {
'http://osf.example/chair/': {
RDF.type: {
SHAREv2.CreativeWork,
SHAREv2.Publication,
SHAREv2.Registration,
},
DCTERMS.conformsTo: {
rdf.blanknode({FOAF.name: {rdf.literal("Open-Ended Registration")}}),
},
DCTERMS.created: {rdf.literal(date(2019, 1, 23))},
DCTERMS.creator: {'mailto:[email protected]'},
DCTERMS.date: {rdf.literal(date(2019, 1, 23))},
DCTERMS.identifier: {rdf.literal("http://osf.example/chair/")},
DCTERMS.isPartOf: {'http://osf.example/vroom/'},
DCTERMS.subject: {
rdf.literal('Architecture'),
rdf.literal('Biology'),
rdf.literal('Custom biologyyyy'),
rdf.literal('bepress|Architecture'),
rdf.literal('bepress|Life Sciences|Biology'),
rdf.literal('foo|Custom life sciencesssss|Custom biologyyyy'),
},
DCTERMS.title: {rdf.literal("Assorted chair")},
OSFMAP.affiliation: {'http://wassa.example'},
},
'http://osf.example/mdept/': {
RDF.type: {
SHAREv2.CreativeWork,
SHAREv2.Publication,
SHAREv2.Registration,
},
DCTERMS.identifier: {rdf.literal('http://osf.example/mdept/')},
DCTERMS.title: {rdf.literal("Miscellaneous department")},
},
'http://osf.example/vroom/': {
RDF.type: {
SHAREv2.CreativeWork,
SHAREv2.Publication,
SHAREv2.Registration,
},
DCTERMS.identifier: {rdf.literal('http://osf.example/vroom/')},
DCTERMS.isPartOf: {'http://osf.example/mdept/'},
DCTERMS.title: {rdf.literal("Various room")},
},
'mailto:[email protected]': {
RDF.type: {
FOAF.Person,
SHAREv2.Agent,
SHAREv2.Person,
},
DCTERMS.identifier: {
rdf.literal('http://osf.example/rando/'),
rdf.literal('mailto:[email protected]'),
},
OWL.sameAs: {'http://osf.example/rando/'},
FOAF.name: {rdf.literal('Some Rando')},
OSFMAP.affiliation: {'http://wassa.example'},
},
'http://wassa.example': {
RDF.type: {
FOAF.Organization,
SHAREv2.Agent,
SHAREv2.Institution,
SHAREv2.Organization,
},
FOAF.name: {rdf.literal('Wassamatter University')},
},
}),
'osfmap-registration': DeriverTestDoc('https://osf.example/2c4st', {
'https://api.osf.example/v2/schemas/registrations/564d31db8c5e4a7c9694b2be/': {
DCTERMS.title: {rdf.literal("Open-Ended Registration")},
},
'https://api.osf.example/v2/subjects/584240da54be81056cecaae5': {
RDF.type: {SKOS.Concept},
SKOS.inScheme: {'https://bepress.com/reference_guide_dc/disciplines/'},
SKOS.prefLabel: {rdf.literal('Education')},
},
'https://bepress.com/reference_guide_dc/disciplines/': {
RDF.type: {SKOS.ConceptScheme},
DCTERMS.title: {rdf.literal('bepress Digital Commons Three-Tiered Taxonomy')},
},
'https://cos.example/': {
RDF.type: {DCTERMS.Agent, FOAF.Organization},
DCTERMS.identifier: {rdf.literal('https://cos.example/'), rdf.literal('https://ror.example/05d5mza29')},
OWL.sameAs: {'https://ror.example/05d5mza29'},
FOAF.name: {rdf.literal('Center for Open Science')},
},
'https://creativecommons.example/licenses/by-nc-nd/4.0/legalcode': {
DCTERMS.identifier: {rdf.literal('https://creativecommons.example/licenses/by-nc-nd/4.0/legalcode')},
FOAF.name: {rdf.literal('CC-By Attribution-NonCommercial-NoDerivatives 4.0 International')},
},
'https://creativecommons.example/licenses/by/4.0/legalcode': {
DCTERMS.identifier: {rdf.literal('https://creativecommons.example/licenses/by/4.0/legalcode')},
FOAF.name: {rdf.literal('CC-By Attribution 4.0 International')},
},
'https://osf.example/2c4st': {
RDF.type: {OSFMAP.Registration},
DCTERMS.conformsTo: {'https://api.osf.example/v2/schemas/registrations/564d31db8c5e4a7c9694b2be/'},
DCTERMS.created: {date(2021, 10, 18)},
DCTERMS.creator: {'https://osf.example/bhcjn'},
DCTERMS.dateCopyrighted: {rdf.literal('2021')},
DCTERMS.description: {rdf.literal('This registration tree is intended to demonstrate linkages between the OSF view of a Registration and the Internet Archive view')},
DCTERMS.hasPart: {'https://osf.example/482n5'},
DCTERMS.identifier: {rdf.literal('https://doi.example/10.17605/OSF.IO/2C4ST'), rdf.literal('https://osf.example/2c4st')},
DCTERMS.isVersionOf: {'https://osf.example/hnm67'},
DCTERMS.modified: {date(2021, 10, 18)},
DCTERMS.publisher: {'https://osf.example/registries/osf'},
DCTERMS.rights: {'https://creativecommons.example/licenses/by-nc-nd/4.0/legalcode'},
DCTERMS.subject: {'https://api.osf.example/v2/subjects/584240da54be81056cecaae5'},
DCTERMS.title: {rdf.literal('IA/IMLS Demo')},
OWL.sameAs: {'https://doi.example/10.17605/OSF.IO/2C4ST'},
DCAT.accessService: {'https://osf.example'},
OSFMAP.affiliation: {'https://ror.example/05d5mza29'},
OSFMAP.archivedAt: {'https://archive.example/details/osf-registrations-2c4st-v1'},
OSFMAP.contains: {'https://osf.example/2ph9b'},
OSFMAP.hostingInstitution: {'https://cos.example/'},
OSFMAP.keyword: {rdf.literal('Demo'), rdf.literal('IA'), rdf.literal('IMLS'), rdf.literal('OSF')},
},
'https://osf.example/2ph9b': {
RDF.type: {OSFMAP.File},
DCTERMS.created: {date(2021, 10, 18)},
DCTERMS.identifier: {rdf.literal('https://osf.example/2ph9b')},
DCTERMS.modified: {date(2021, 10, 18)},
OSFMAP.fileName: {rdf.literal('test_file.txt')},
OSFMAP.filePath: {rdf.literal('/Archive of OSF Storage/test_file.txt')},
OSFMAP.isContainedBy: {'https://osf.example/2c4st'},
},
'https://osf.example/482n5': {
RDF.type: {OSFMAP.RegistrationComponent},
DCTERMS.created: {date(2021, 10, 18)},
DCTERMS.creator: {'https://osf.example/bhcjn'},
DCTERMS.dateCopyrighted: {rdf.literal('2021')},
DCTERMS.identifier: {rdf.literal('https://doi.example/10.17605/OSF.IO/482N5'), rdf.literal('https://osf.example/482n5')},
DCTERMS.publisher: {'https://osf.example/registries/osf'},
DCTERMS.rights: {'https://creativecommons.example/licenses/by/4.0/legalcode'},
DCTERMS.title: {rdf.literal('IA/IMLS Demo: Child Component')},
OWL.sameAs: {'https://doi.example/10.17605/OSF.IO/482N5'},
OSFMAP.affiliation: {'https://ror.example/05d5mza29'},
},
'https://osf.example/hnm67': {
RDF.type: {OSFMAP.Project},
DCTERMS.created: {date(2021, 10, 18)},
DCTERMS.creator: {'https://osf.example/bhcjn'},
DCTERMS.identifier: {rdf.literal('https://osf.example/hnm67')},
DCTERMS.publisher: {'https://osf.example'},
DCTERMS.title: {rdf.literal('IA/IMLS Demo')},
OSFMAP.affiliation: {'https://ror.example/05d5mza29'},
},
'https://osf.example': {
RDF.type: {DCTERMS.Agent, FOAF.Organization},
DCTERMS.identifier: {rdf.literal('https://osf.example')},
FOAF.name: {rdf.literal('OSF')},
},
'https://osf.example/registries/osf': {
RDF.type: {DCTERMS.Agent, FOAF.Organization},
DCTERMS.identifier: {rdf.literal('https://osf.example/'), rdf.literal('https://osf.io/registries/osf')},
FOAF.name: {rdf.literal('OSF Registries')},
},
'https://osf.example/bhcjn': {
RDF.type: {DCTERMS.Agent, FOAF.Person},
DCTERMS.identifier: {rdf.literal('https://osf.example/bhcjn')},
FOAF.name: {rdf.literal('JW')},
OSFMAP.affiliation: {'https://ror.example/05d5mza29'},
},
'https://ror.example/05d5mza29': {
RDF.type: {DCTERMS.Agent, FOAF.Organization},
DCTERMS.identifier: {rdf.literal('https://ror.example/05d5mza29')},
FOAF.name: {rdf.literal('Center For Open Science')},
},
}),
}
46 changes: 46 additions & 0 deletions tests/trove/derive/test_oaidcxml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from trove.derive.oaidc_xml import OaiDcXmlDeriver

from ._base import BaseIndexcardDeriverTest, SHOULD_SKIP


class TestOaiDcXmlDeriver(BaseIndexcardDeriverTest):
deriver_class = OaiDcXmlDeriver

expected_outputs = {
'blarg-item': SHOULD_SKIP,
'blarg-project': (
'<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">'
'<dc:title xml:lang="en">title</dc:title>'
'<dc:date>2024-02-14T00:00:00Z</dc:date>'
'<dc:type>Project</dc:type>'
'</oai_dc:dc>'
),
'sharev2-with-subjects': (
'<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">'
'<dc:title>Assorted chair</dc:title>'
'<dc:creator>Some Rando</dc:creator>'
'<dc:date>2019-01-23T00:00:00Z</dc:date>'
'<dc:type>CreativeWork</dc:type>'
'<dc:type>Publication</dc:type>'
'<dc:type>Registration</dc:type>'
'<dc:identifier>http://osf.example/chair/</dc:identifier>'
'<dc:relation>http://osf.example/vroom/</dc:relation>'
'</oai_dc:dc>'
),
'osfmap-registration': (
'<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">'
'<dc:title>IA/IMLS Demo</dc:title>'
'<dc:creator>JW</dc:creator>'
'<dc:subject>Education</dc:subject>'
'<dc:description>This registration tree is intended to demonstrate linkages between the OSF view of a Registration and the Internet Archive view</dc:description>'
'<dc:publisher>OSF Registries</dc:publisher>'
'<dc:date>2021-10-18T00:00:00Z</dc:date>'
'<dc:type>Registration</dc:type>'
'<dc:identifier>https://doi.example/10.17605/OSF.IO/2C4ST</dc:identifier>'
'<dc:identifier>https://osf.example/2c4st</dc:identifier>'
'<dc:relation>https://osf.example/482n5</dc:relation>'
'<dc:relation>https://osf.example/hnm67</dc:relation>'
'<dc:rights>https://creativecommons.example/licenses/by-nc-nd/4.0/legalcode</dc:rights>'
'</oai_dc:dc>'
),
}
Loading

0 comments on commit b0ca69a

Please sign in to comment.