Skip to content

Commit

Permalink
Adding uniprot adapter. (#319)
Browse files Browse the repository at this point in the history
* Adding uniprot adapter.

This is still preliminary.

* Fixing mapping test
  • Loading branch information
cmungall authored Oct 18, 2022
1 parent 33bbf39 commit c5c304a
Show file tree
Hide file tree
Showing 10 changed files with 337 additions and 35 deletions.
12 changes: 6 additions & 6 deletions src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ def chain_it(v):
else:
if isinstance(impl, SearchInterface):
cfg = create_search_configuration(term)
logging.info(f"Search config: {term} => {cfg}")
chain_it(impl.basic_search(cfg.search_terms[0], config=cfg))
else:
raise NotImplementedError
Expand Down Expand Up @@ -2235,12 +2236,7 @@ def relationships(
"""
impl = settings.impl
if output_type == "obo":
writer = StreamingOboWriter(ontology_interface=impl)
elif output_type == "csv":
writer = StreamingCsvWriter(ontology_interface=impl)
else:
writer = StreamingCsvWriter(ontology_interface=impl)
writer = _get_writer(output_type, impl, StreamingCsvWriter)
writer.autolabel = autolabel
writer.output = output
actual_predicates = _process_predicates_arg(predicates)
Expand Down Expand Up @@ -3683,6 +3679,7 @@ def diff_via_mappings(
logging.info("No term list provided, will compare all mapped terms")
entities = None
actual_predicates = _process_predicates_arg(predicates)
n = 0
for r in calculate_pairwise_relational_diff(
oi,
other_oi,
Expand All @@ -3696,6 +3693,9 @@ def diff_via_mappings(
if filter_category_identical and r.category == DiffCategory(DiffCategory.Identical):
continue
writer.emit(r)
n += 1
if n == 0:
raise ValueError("No mappings extracted")


@main.command()
Expand Down
8 changes: 7 additions & 1 deletion src/oaklib/datamodels/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
SearchProperty,
SearchTermSyntax,
)
from oaklib.datamodels.vocabulary import LABEL_PREDICATE, SYNONYM_PREDICATES
from oaklib.datamodels.vocabulary import (
IDENTIFIER_PREDICATE,
LABEL_PREDICATE,
SYNONYM_PREDICATES,
)
from oaklib.types import PRED_CURIE

DEFAULT_SEARCH_PROPERTIES = [SearchProperty.LABEL, SearchProperty.ALIAS]
Expand Down Expand Up @@ -68,6 +72,8 @@ def search_properties_to_predicates(props: List[SearchProperty]) -> List[PRED_CU
for p in props:
if p == SearchProperty(SearchProperty.LABEL):
preds.add(LABEL_PREDICATE)
elif p == SearchProperty(SearchProperty.IDENTIFIER):
preds.add(IDENTIFIER_PREDICATE)
elif p == SearchProperty(SearchProperty.ALIAS):
preds.update(SYNONYM_PREDICATES + [LABEL_PREDICATE])
elif p == SearchProperty(SearchProperty.ANYTHING):
Expand Down
14 changes: 10 additions & 4 deletions src/oaklib/datamodels/search_datamodel.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
id: https://w3id.org/linkml/search_datamodel
title: search datamodel
name: search-datamodel
description: >-
A datamodel for representing a search configuration and results. This is intended to provide a unified layer over
both (a) how searches are *parameterized* (b) the structure of search *results*. The scope is any kind of service
that provides search over *named entities*, including ontology concepts. It is not intended to cover generic
description: |-
A datamodel for representing a search configuration and results.
This is intended to provide a unified layer over both:
- (a) how searches are *parameterized*
- (b) the structure of search *results*.
The scope is any kind of service that provides search over *named entities*, including ontology concepts. It is not intended to cover generic
search results, e.g. google search, although parts could be generalized for this purpose.
license: https://creativecommons.org/publicdomain/zero/1.0/

prefixes:
Expand Down
3 changes: 3 additions & 0 deletions src/oaklib/datamodels/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

APP_NAME = "ontology-access-kit"

IDENTIFIER_PREDICATE = "rdf:ID"

# TODO: replace with oio vocab
LABEL_PREDICATE = omd.slots.label.curie
HAS_EXACT_SYNONYM = omd.slots.has_exact_synonym.curie
Expand Down Expand Up @@ -51,6 +53,7 @@
RDF_TYPE = "rdf:type"
EQUIVALENT_CLASS = "owl:equivalentClass"
OWL_SAME_AS = "owl:sameAs"
RDF_SEE_ALSO = "rdfs:seeAlso"
PART_OF = "BFO:0000050"
LOCATED_IN = "RO:0001025"
DEVELOPS_FROM = "RO:0002202"
Expand Down
52 changes: 37 additions & 15 deletions src/oaklib/implementations/sparql/abstract_sparql_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@
SearchConfiguration,
search_properties_to_predicates,
)
from oaklib.datamodels.search_datamodel import SearchProperty, SearchTermSyntax
from oaklib.datamodels.search_datamodel import SearchTermSyntax
from oaklib.datamodels.vocabulary import (
ALL_MATCH_PREDICATES,
DEFAULT_PREFIX_MAP,
HAS_DEFINITION_URI,
IDENTIFIER_PREDICATE,
IS_A,
LABEL_PREDICATE,
OBO_PURL,
Expand Down Expand Up @@ -162,6 +163,14 @@ def _label_uri(self):
else RDFS.label
)

def _mapping_predicates(self):
preds = ALL_MATCH_PREDICATES
omm = self.ontology_metamodel_mapper
if omm:
return [omm.map_curie(pred, unmapped_reflexive=True) for pred in preds]
else:
return preds

def _definition_uri(self):
return (
self.ontology_metamodel_mapper.definition_uri()
Expand Down Expand Up @@ -221,9 +230,10 @@ def obsoletes(self) -> Iterable[CURIE]:
yield self.uri_to_curie(row["s"]["value"])

def simple_mappings_by_curie(self, curie: CURIE) -> Iterable[Tuple[PRED_CURIE, CURIE]]:
mapping_preds = self._mapping_predicates()
uri = self.curie_to_sparql(curie)
query = SparqlQuery(select=["?p ?o"], distinct=True, where=[f"{uri} ?p ?o"])
query.add_values("p", [self.curie_to_sparql(p) for p in ALL_MATCH_PREDICATES])
query.add_values("p", [self.curie_to_sparql(p) for p in mapping_preds])
bindings = self._query(query.query_str())
for row in bindings:
yield (self.uri_to_curie(row["p"]["value"]), self.uri_to_curie(row["o"]["value"]))
Expand All @@ -243,7 +253,7 @@ def list_of_named_graphs(self) -> List[URI]:
sw.setReturnFormat(JSON)
check_limit()
ret = sw.queryAndConvert()
logging.info(f"RET={ret}")
logging.debug(f"RET={ret}")
self._list_of_named_graphs = [row["g"]["value"] for row in ret["results"]["bindings"]]
return self._list_of_named_graphs

Expand Down Expand Up @@ -290,7 +300,7 @@ def tr(v: Identifier):
sw.setReturnFormat(JSON)
check_limit()
ret = sw.queryAndConvert()
logging.info(f"RET={ret}")
logging.debug(f"queryResults={ret}")
return ret["results"]["bindings"]

def _triples(
Expand Down Expand Up @@ -605,8 +615,9 @@ def basic_search(
self, search_term: str, config: SearchConfiguration = SEARCH_CONFIG
) -> Iterable[CURIE]:
if ":" in search_term and " " not in search_term:
logging.debug(f"Not performing search on what looks like a CURIE: {search_term}")
return
# logging.error(f"Not performing search on what looks like a CURIE: {search_term}")
# return
search_term = self.curie_to_uri(search_term)

if self._is_blazegraph():
filter_clause = f'?v bds:search "{search_term}"'
Expand All @@ -631,18 +642,29 @@ def basic_search(
self.ontology_metamodel_mapper.map_curie(pred, unmapped_reflexive=True)[0]
for pred in preds
]
preds = [self.curie_to_sparql(p) for p in preds]
if len(preds) == 1:
where = [f"?s {preds[0]} ?v "]
if preds == [IDENTIFIER_PREDICATE]:
where = ["?v a ?s_cls", "BIND(?v AS ?s)"]
query = SparqlQuery(select=["?s"], where=where + [filter_clause])
else:
where = ["?s ?p ?v ", f'VALUES ?p {{ {" ".join(preds)} }}']
query = SparqlQuery(select=["?s"], where=where + [filter_clause])
# print(f"Search query: {query.query_str()}")
non_id_preds = [pred for pred in preds if pred != IDENTIFIER_PREDICATE]
non_id_preds = [self.curie_to_sparql(p) for p in non_id_preds]
if len(non_id_preds) == 1:
where = [f"?s {preds[0]} ?v "]
elif len(non_id_preds) == 1:
raise ValueError("Logic error; this should be handled by above clause")
else:
where = ["?s ?p ?v ", f'VALUES ?p {{ {" ".join(non_id_preds)} }}']
if IDENTIFIER_PREDICATE in preds:
raise NotImplementedError(
f"Cannot mix identifier and non-identifier preds: {preds}"
)
query = SparqlQuery(select=["?s"], where=where + [filter_clause])
logging.info(f"Search query: {query.query_str()}")
bindings = self._query(query, prefixes=DEFAULT_PREFIX_MAP)
for row in bindings:
yield self.uri_to_curie(row["s"]["value"])
if SearchProperty(SearchProperty.IDENTIFIER) in config.properties:
raise NotImplementedError
# if SearchProperty(SearchProperty.IDENTIFIER) in config.properties:
# raise NotImplementedError

# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Implements: OboGraphInterface
Expand All @@ -665,7 +687,7 @@ def hierarchical_descendants(self, start_curies: Union[CURIE, List[CURIE]]) -> I
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

def get_sssom_mappings_by_curie(self, curie: CURIE) -> Iterable[Mapping]:
pred_uris = [self.curie_to_sparql(pred) for pred in ALL_MATCH_PREDICATES]
pred_uris = [self.curie_to_sparql(pred) for pred in self._mapping_predicates()]
# input curie is subject
query = SparqlQuery(
select=["?p", "?o"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class UbergraphImplementation(
.. code:: python
>>> oi = UbergraphImplementation.create()
>>> oi = UbergraphImplementation()
The default ubergraph endpoint will be assumed
Expand Down
Loading

0 comments on commit c5c304a

Please sign in to comment.