diff --git a/apis_core/apis_entities/abc.py b/apis_core/apis_entities/abc.py index fedce0282..8edbdef1c 100644 --- a/apis_core/apis_entities/abc.py +++ b/apis_core/apis_entities/abc.py @@ -1,3 +1,5 @@ +from pathlib import Path + from django.db import models ######################### @@ -23,6 +25,10 @@ class Meta: def __str__(self): return f"{self.forename} {self.surname}" + @classmethod + def rdf_configs(cls): + return [Path(__file__).parent / "triple_configs/E21_PersonFromDNB.toml"] + class E53_Place(models.Model): label = models.CharField(blank=True, default="", max_length=4096) diff --git a/apis_core/apis_entities/triple_configs/E21_PersonFromDNB.toml b/apis_core/apis_entities/triple_configs/E21_PersonFromDNB.toml new file mode 100644 index 000000000..326d3f056 --- /dev/null +++ b/apis_core/apis_entities/triple_configs/E21_PersonFromDNB.toml @@ -0,0 +1,13 @@ +[[filters]] +"rdf:type" = "gndo:DifferentiatedPerson" + +[attributes] +forename = ["gndo:forename", "gndo:preferredNameEntityForThePerson/gndo:forename"] +alternative_names = "gndo:variantNameForThePerson" +surname = ["gndo:surname", "gndo:preferredNameEntityForThePerson/gndo:surname"] +start_date_written = "gndo:dateOfBirth" +end_date_written = "gndo:dateOfDeath" +same_as = "owl:sameAs" +profession = "gndo:professionOrOccupation" + +relations = ["gndo:placeOfDeath", "gndo:placeOfBirth"] diff --git a/apis_core/utils/rdf2.py b/apis_core/utils/rdf2.py new file mode 100644 index 000000000..da7e293de --- /dev/null +++ b/apis_core/utils/rdf2.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: 2025 Birger Schacht +# SPDX-License-Identifier: MIT + +import logging +from collections import defaultdict +from pathlib import Path + +import tomllib +from AcdhArcheAssets.uri_norm_rules import get_normalized_uri +from django.apps import apps +from rdflib import RDF, BNode, Graph + +logger = logging.getLogger(__name__) + + +def find_matching_config(graph: Graph) -> dict | None: + models_with_config = [ + model for model in apps.get_models() if hasattr(model, "rdf_configs") + ] + for model in models_with_config: + for path in model.rdf_configs(): + config = tomllib.loads(Path(path).read_text()) + for _filter in config.get("filters", []): + triples = [ + ( + None, + graph.namespace_manager.expand_curie(predicate), + graph.namespace_manager.expand_curie(obj), + ) + for predicate, obj in _filter.items() + ] + triples = [triple in graph for triple in triples] + if all(triples): + logger.debug("Using %s for parsing graph", path) + config["model"] = model + return config + return None + + +def get_something_from_uri(uri: str) -> dict | None: + uri = get_normalized_uri(uri) + graph = Graph() + graph.parse(uri) + + if config := find_matching_config(graph): + result = defaultdict(list) + result["model"] = config["model"] + result["relations"] = defaultdict(list) + + for attribute, curies in config.get("attributes", {}).items(): + if isinstance(curies, str): + curies = [curies] + for curie in curies: + values = [] + results = graph.query( + "SELECT ?object WHERE { ?subject " + curie + " ?object }" + ) + objects = [result.object for result in results] + for obj in objects: + if isinstance(obj, BNode): + values.extend( + [ + value.toPython() + for value in graph.objects(subject=obj) + if value != RDF.Seq + ] + ) + else: + values.append(obj.toPython()) + + if attribute == "relations": + result["relations"][curie].extend(values) + else: + result[attribute].extend(values) + return dict(result) + return None