diff --git a/tests/trove/derive/_base.py b/tests/trove/derive/_base.py index 5fdd84abb..40cdebcb9 100644 --- a/tests/trove/derive/_base.py +++ b/tests/trove/derive/_base.py @@ -9,6 +9,7 @@ class BaseIndexcardDeriverTest(TestCase): + maxDiff = None ####### # implement these things: diff --git a/tests/trove/derive/_inputs.py b/tests/trove/derive/_inputs.py index 02449e52e..e2659c4ee 100644 --- a/tests/trove/derive/_inputs.py +++ b/tests/trove/derive/_inputs.py @@ -10,6 +10,7 @@ OSFMAP, FOAF, OWL, + PROV, SHAREv2, ) @@ -40,7 +41,7 @@ class DeriverTestDoc: BLARG.my_project: { RDF.type: {BLARG.Item, OSFMAP.Project}, DCTERMS.title: {rdf.literal('title', language='en')}, - DCTERMS.creator: {BLARG.ME}, + DCTERMS.creator: {BLARG.me}, DCTERMS.created: {rdf.literal('2024-02-14')}, }, BLARG.me: { @@ -166,6 +167,10 @@ class DeriverTestDoc: OSFMAP.contains: {'https://osf.example/2ph9b'}, OSFMAP.hostingInstitution: {'https://cos.example/'}, OSFMAP.keyword: {rdf.literal('Demo'), rdf.literal('IA'), rdf.literal('IMLS'), rdf.literal('OSF')}, + PROV.qualifiedAttribution: {rdf.blanknode({ + DCAT.hadRole: {OSFMAP['admin-contributor']}, + PROV.agent: {'https://osf.example/bhcjn'}, + })}, }, 'https://osf.example/2ph9b': { RDF.type: {OSFMAP.File}, diff --git a/tests/trove/derive/test_oaidcxml.py b/tests/trove/derive/test_oaidcxml.py index 28d4ce389..10e835add 100644 --- a/tests/trove/derive/test_oaidcxml.py +++ b/tests/trove/derive/test_oaidcxml.py @@ -11,6 +11,7 @@ class TestOaiDcXmlDeriver(BaseIndexcardDeriverTest): 'blarg-project': ( '' 'title' + 'me me' '2024-02-14T00:00:00Z' 'Project' '' diff --git a/tests/trove/derive/test_osfmap_json.py b/tests/trove/derive/test_osfmap_json.py new file mode 100644 index 000000000..23061af5f --- /dev/null +++ b/tests/trove/derive/test_osfmap_json.py @@ -0,0 +1,548 @@ +import json +from trove.derive.osfmap_json import OsfmapJsonDeriver + +from ._base import BaseIndexcardDeriverTest + + +class TestOsfmapJsonDeriver(BaseIndexcardDeriverTest): + deriver_class = OsfmapJsonDeriver + + def assert_derived_texts_equal(self, expected, actual): + self.assertEqual(expected, json.loads(actual)) + + expected_outputs = { + 'blarg-item': { + "@id": "http://blarg.example/vocab/my_item", + "resourceType": [{"@id": "http://blarg.example/vocab/Item"}], + "title": [{ + "@value": "title", + "@language": "en" + }], + "creator": [{ + "@id": "http://blarg.example/vocab/me", + "resourceType": [{"@id": "Person"}], + "name": [{"@value": "me me"}] + }], + "dateCreated": [{"@value": "2024-02-14"}], + }, + 'blarg-project': { + "@id": "http://blarg.example/vocab/my_project", + "resourceType": [ + {"@id": "Project"}, + {"@id": "http://blarg.example/vocab/Item"}, + ], + "title": [{ + "@value": "title", + "@language": "en", + }], + "creator": [{ + "@id": "http://blarg.example/vocab/me", + "resourceType": [{"@id": "Person"}], + "name": [{"@value": "me me"}] + }], + "dateCreated": [{"@value": "2024-02-14"}], + }, + 'sharev2-with-subjects': { + "@id": "http://osf.example/chair/", + "resourceType": [ + {"@id": "sharev2:CreativeWork"}, + {"@id": "sharev2:Publication"}, + {"@id": "sharev2:Registration"}, + ], + "conformsTo": [{"name": [{"@value": "Open-Ended Registration"}]}], + "dateCreated": [ + { + "@value": "2019-01-23", + "@type": "xsd:date" + } + ], + "creator": [ + { + "@id": "mailto:rando@example.com", + "resourceType": [ + {"@id": "Person"}, + {"@id": "sharev2:Agent"}, + {"@id": "sharev2:Person"} + ], + "identifier": [ + {"@value": "http://osf.example/rando/"}, + {"@value": "mailto:rando@example.com"} + ], + "sameAs": [ + {"@id": "http://osf.example/rando/"} + ], + "name": [ + {"@value": "Some Rando"} + ], + "affiliation": [ + { + "@id": "http://wassa.example", + "resourceType": [ + {"@id": "Organization"}, + {"@id": "sharev2:Agent"}, + {"@id": "sharev2:Institution"}, + {"@id": "sharev2:Organization"} + ], + "name": [ + {"@value": "Wassamatter University"} + ] + } + ] + } + ], + "date": [ + { + "@value": "2019-01-23", + "@type": "xsd:date" + } + ], + "identifier": [ + {"@value": "http://osf.example/chair/"} + ], + "isPartOf": [ + { + "@id": "http://osf.example/vroom/", + "resourceType": [ + {"@id": "sharev2:CreativeWork"}, + {"@id": "sharev2:Publication"}, + {"@id": "sharev2:Registration"} + ], + "identifier": [ + {"@value": "http://osf.example/vroom/"} + ], + "isPartOf": [ + { + "@id": "http://osf.example/mdept/", + "resourceType": [ + {"@id": "sharev2:CreativeWork"}, + {"@id": "sharev2:Publication"}, + {"@id": "sharev2:Registration"} + ], + "identifier": [ + {"@value": "http://osf.example/mdept/"} + ], + "title": [ + {"@value": "Miscellaneous department"} + ] + } + ], + "title": [ + {"@value": "Various room"} + ] + } + ], + "subject": [ + {"@value": "Architecture"}, + {"@value": "Biology"}, + {"@value": "Custom biologyyyy"}, + {"@value": "bepress|Architecture"}, + {"@value": "bepress|Life Sciences|Biology"}, + {"@value": "foo|Custom life sciencesssss|Custom biologyyyy"} + ], + "title": [ + {"@value": "Assorted chair"} + ], + "affiliation": [ + { + "@id": "http://wassa.example", + "resourceType": [ + {"@id": "Organization"}, + {"@id": "sharev2:Agent"}, + {"@id": "sharev2:Institution"}, + {"@id": "sharev2:Organization"} + ], + "name": [ + {"@value": "Wassamatter University"} + ] + } + ] + }, + 'osfmap-registration': { + "@id": "https://osf.example/2c4st", + "resourceType": [ + {"@id": "Registration"} + ], + "conformsTo": [ + { + "@id": "https://api.osf.example/v2/schemas/registrations/564d31db8c5e4a7c9694b2be/", + "title": [ + {"@value": "Open-Ended Registration"} + ] + } + ], + "dateCreated": [ + {"@value": "2021-10-18"} + ], + "creator": [ + { + "@id": "https://osf.example/bhcjn", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Person"} + ], + "identifier": [ + {"@value": "https://osf.example/bhcjn"} + ], + "name": [ + {"@value": "JW"} + ], + "affiliation": [ + { + "@id": "https://ror.example/05d5mza29", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://ror.example/05d5mza29"} + ], + "name": [ + {"@value": "Center For Open Science"} + ] + } + ] + } + ], + 'qualifiedAttribution': [{ + 'agent': [{'@id': 'https://osf.example/bhcjn'}], + 'hadRole': [{'@id': 'osf:admin-contributor'}], + }], + "dateCopyrighted": [ + {"@value": "2021"} + ], + "description": [ + {"@value": "This registration tree is intended to demonstrate linkages between the OSF view of a Registration and the Internet Archive view"} + ], + "hasPart": [ + { + "@id": "https://osf.example/482n5", + "resourceType": [ + {"@id": "RegistrationComponent"} + ], + "dateCreated": [ + {"@value": "2021-10-18"} + ], + "creator": [ + { + "@id": "https://osf.example/bhcjn", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Person"} + ], + "identifier": [ + {"@value": "https://osf.example/bhcjn"} + ], + "name": [ + {"@value": "JW"} + ], + "affiliation": [ + { + "@id": "https://ror.example/05d5mza29", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://ror.example/05d5mza29"} + ], + "name": [ + {"@value": "Center For Open Science"} + ] + } + ] + } + ], + "dateCopyrighted": [ + {"@value": "2021"} + ], + "identifier": [ + {"@value": "https://doi.example/10.17605/OSF.IO/482N5"}, + {"@value": "https://osf.example/482n5"} + ], + "publisher": [ + { + "@id": "https://osf.example/registries/osf", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://osf.example/"}, + {"@value": "https://osf.io/registries/osf"} + ], + "name": [ + {"@value": "OSF Registries"} + ] + } + ], + "rights": [ + { + "@id": "https://creativecommons.example/licenses/by/4.0/legalcode", + "identifier": [ + {"@value": "https://creativecommons.example/licenses/by/4.0/legalcode"} + ], + "name": [ + {"@value": "CC-By Attribution 4.0 International"} + ] + } + ], + "title": [ + {"@value": "IA/IMLS Demo: Child Component"} + ], + "sameAs": [ + {"@id": "https://doi.example/10.17605/OSF.IO/482N5"} + ], + "affiliation": [ + { + "@id": "https://ror.example/05d5mza29", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://ror.example/05d5mza29"} + ], + "name": [ + {"@value": "Center For Open Science"} + ] + } + ] + } + ], + "identifier": [ + {"@value": "https://doi.example/10.17605/OSF.IO/2C4ST"}, + {"@value": "https://osf.example/2c4st"} + ], + "isVersionOf": [ + { + "@id": "https://osf.example/hnm67", + "resourceType": [ + {"@id": "Project"} + ], + "dateCreated": [ + {"@value": "2021-10-18"} + ], + "creator": [ + { + "@id": "https://osf.example/bhcjn", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Person"} + ], + "identifier": [ + {"@value": "https://osf.example/bhcjn"} + ], + "name": [ + {"@value": "JW"} + ], + "affiliation": [ + { + "@id": "https://ror.example/05d5mza29", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://ror.example/05d5mza29"} + ], + "name": [ + {"@value": "Center For Open Science"} + ] + } + ] + } + ], + "identifier": [ + {"@value": "https://osf.example/hnm67"} + ], + "publisher": [ + { + "@id": "https://osf.example", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://osf.example"} + ], + "name": [ + {"@value": "OSF"} + ] + } + ], + "title": [ + {"@value": "IA/IMLS Demo"} + ], + "affiliation": [ + { + "@id": "https://ror.example/05d5mza29", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://ror.example/05d5mza29"} + ], + "name": [ + {"@value": "Center For Open Science"} + ] + } + ] + } + ], + "dateModified": [ + {"@value": "2021-10-18"} + ], + "publisher": [ + { + "@id": "https://osf.example/registries/osf", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://osf.example/"}, + {"@value": "https://osf.io/registries/osf"} + ], + "name": [ + {"@value": "OSF Registries"} + ] + } + ], + "rights": [ + { + "@id": "https://creativecommons.example/licenses/by-nc-nd/4.0/legalcode", + "identifier": [ + {"@value": "https://creativecommons.example/licenses/by-nc-nd/4.0/legalcode"} + ], + "name": [ + {"@value": "CC-By Attribution-NonCommercial-NoDerivatives 4.0 International"} + ] + } + ], + "subject": [ + { + "@id": "https://api.osf.example/v2/subjects/584240da54be81056cecaae5", + "resourceType": [ + {"@id": "Concept"} + ], + "inScheme": [ + { + "@id": "https://bepress.com/reference_guide_dc/disciplines/", + "resourceType": [ + {"@id": "Concept:Scheme"} + ], + "title": [ + {"@value": "bepress Digital Commons Three-Tiered Taxonomy"} + ] + } + ], + "prefLabel": [ + {"@value": "Education"} + ] + } + ], + "title": [ + {"@value": "IA/IMLS Demo"} + ], + "sameAs": [ + {"@id": "https://doi.example/10.17605/OSF.IO/2C4ST"} + ], + "accessService": [ + { + "@id": "https://osf.example", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://osf.example"} + ], + "name": [ + {"@value": "OSF"} + ] + } + ], + "affiliation": [ + { + "@id": "https://ror.example/05d5mza29", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://ror.example/05d5mza29"} + ], + "name": [ + {"@value": "Center For Open Science"} + ] + } + ], + "archivedAt": [ + {"@id": "https://archive.example/details/osf-registrations-2c4st-v1"} + ], + "osf:contains": [ + { + "@id": "https://osf.example/2ph9b", + "resourceType": [ + {"@id": "File"} + ], + "dateCreated": [ + {"@value": "2021-10-18"} + ], + "identifier": [ + {"@value": "https://osf.example/2ph9b"} + ], + "dateModified": [ + {"@value": "2021-10-18"} + ], + "fileName": [ + {"@value": "test_file.txt"} + ], + "filePath": [ + {"@value": "/Archive of OSF Storage/test_file.txt"} + ], + "isContainedBy": [ + {"@id": "https://osf.example/2c4st"} + ] + } + ], + "hostingInstitution": [ + { + "@id": "https://cos.example/", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://cos.example/"}, + {"@value": "https://ror.example/05d5mza29"} + ], + "sameAs": [ + { + "@id": "https://ror.example/05d5mza29", + "resourceType": [ + {"@id": "Agent"}, + {"@id": "Organization"} + ], + "identifier": [ + {"@value": "https://ror.example/05d5mza29"} + ], + "name": [ + {"@value": "Center For Open Science"} + ] + } + ], + "name": [ + {"@value": "Center for Open Science"} + ] + } + ], + "keyword": [ + {"@value": "Demo"}, + {"@value": "IA"}, + {"@value": "IMLS"}, + {"@value": "OSF"} + ] + }, + } diff --git a/tests/trove/derive/test_sharev2_elastic.py b/tests/trove/derive/test_sharev2_elastic.py index 472ed2b75..1c8aef708 100644 --- a/tests/trove/derive/test_sharev2_elastic.py +++ b/tests/trove/derive/test_sharev2_elastic.py @@ -6,19 +6,17 @@ class TestShareV2ElasticDeriver(BaseIndexcardDeriverTest): - maxDiff = None deriver_class = ShareV2ElasticDeriver def assert_derived_texts_equal(self, expected, actual): - _actual = json.loads(actual) - if expected is None: - print(f'actual:\n{actual}') - else: - self.assertEqual(expected, _actual) + self.assertEqual(expected, json.loads(actual)) expected_outputs = { 'blarg-item': SHOULD_SKIP, 'blarg-project': { + "contributors": [ + "me me" + ], "date": "2024-02-14", "date_created": "2345-01-01T00:00:00", "date_modified": "2345-02-02T00:00:00", @@ -28,6 +26,8 @@ def assert_derived_texts_equal(self, expected, actual): "lists": { "contributors": [ { + "cited_as": "me me", + "name": "me me", "relation": "http://purl.org/dc/terms/creator" } ] diff --git a/trove/derive/osfmap_json.py b/trove/derive/osfmap_json.py index 35856cdfb..1666025f5 100644 --- a/trove/derive/osfmap_json.py +++ b/trove/derive/osfmap_json.py @@ -50,7 +50,7 @@ def rdfobject_as_jsonld(self, rdfobject: rdf.RdfObject) -> dict: rdf.twopledict_from_twopleset(rdfobject), ) elif isinstance(rdfobject, rdf.Literal): - if not rdfobject.datatype_iris: + if not rdfobject.datatype_iris or rdfobject.datatype_iris == {RDF.string}: return {'@value': rdfobject.unicode_value} if RDF.JSON in rdfobject.datatype_iris: # NOTE: does not reset jsonld context (is that a problem?) @@ -62,13 +62,16 @@ def rdfobject_as_jsonld(self, rdfobject: rdf.RdfObject) -> dict: '@language': _language_tag, } # datatype iri (or non-standard language iri) + _datatype_iris = sorted( + ( + osfmap_shorthand().compact_iri(_datatype_iri) + for _datatype_iri in rdfobject.datatype_iris + ), + key=len, + ) return { '@value': rdfobject.unicode_value, - '@type': ( - list(rdfobject.datatype_iris) - if len(rdfobject.datatype_iris) > 1 - else next(iter(rdfobject.datatype_iris)) - ), + '@type': (_datatype_iris if (len(_datatype_iris) > 1) else _datatype_iris[0]), } elif isinstance(rdfobject, str): return {'@id': osfmap_shorthand().compact_iri(rdfobject)} @@ -111,7 +114,7 @@ def __nested_rdfobject_as_jsonld( _nested_obj = ( {} if rdfobject.startswith('_:') # HACK: non-blank blank nodes (stop that) - else {'@id': rdfobject} + else {'@id': osfmap_shorthand().compact_iri(rdfobject)} ) for _pred, _objectset in tripledict[rdfobject].items(): _label = osfmap_shorthand().compact_iri(_pred) @@ -126,22 +129,26 @@ def __nested_rdfobject_as_jsonld( self.__nestvisiting_iris.discard(rdfobject) return _nested_obj - def _list_or_single_value(self, predicate_iri, objectset): + def _list_or_single_value(self, predicate_iri, json_list: list): _only_one_object = OWL.FunctionalProperty in ( OSFMAP_THESAURUS .get(predicate_iri, {}) .get(RDF.type, ()) ) if _only_one_object: - if len(objectset) > 1: + if len(json_list) > 1: raise trove_exceptions.OwlObjection(( f'expected at most one object for <{predicate_iri}>' - f' (got {objectset})' + f' (got {json_list})' )) try: - (_only_obj,) = objectset + (_only_obj,) = json_list except ValueError: return None else: return _only_obj - return list(objectset) + return ( + sorted(json_list, key=json.dumps) + if len(json_list) > 1 + else json_list + ) diff --git a/trove/vocab/osfmap.py b/trove/vocab/osfmap.py index b3f6e46fe..0dd8b8764 100644 --- a/trove/vocab/osfmap.py +++ b/trove/vocab/osfmap.py @@ -17,6 +17,7 @@ FOAF, OSFMAP, OWL, + PROV, RDF, RDFS, SKOS, @@ -543,6 +544,54 @@ literal('hostingInstitution', language='en'), }, }, + PROV.qualifiedAttribution: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('qualifiedAttribution', language='en')}, + }, + PROV.agent: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('agent', language='en')}, + }, + DCAT.hadRole: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('hadRole', language='en')}, + }, + OSFMAP.usage: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('usage', language='en')}, + }, + OSFMAP.storageRegion: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('storageRegion', language='en')}, + }, + OSFMAP.storageByteCount: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('storageByteCount', language='en')}, + }, + OSFMAP.hasOsfAddon: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('hasOsfAddon', language='en')}, + }, + OSFMAP.viewCount: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('viewCount', language='en')}, + }, + OSFMAP.viewSessionCount: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('viewSessionCount', language='en')}, + }, + OSFMAP.downloadCount: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('downloadCount', language='en')}, + }, + OSFMAP.downloadSessionCount: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('downloadSessionCount', language='en')}, + }, + DCTERMS.temporal: { + RDF.type: {RDF.Property}, + JSONAPI_MEMBERNAME: {literal('temporalCoverage', language='en')}, + }, RDFS.label: { RDF.type: {RDF.Property}, RDFS.label: { @@ -708,6 +757,7 @@ literal('hasCedarTemplate', language='en'), }, }, + ### # values: OSFMAP['no-conflict-of-interest']: {