From b2507c4c19150757538d2d1296ec5c13ef8fbb39 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:15:40 +0100 Subject: [PATCH 1/9] Lazy load converter from Bioregistry Depends on https://github.com/biopragmatics/bioregistry/pull/652, helps solve #7 --- src/bioontologies/obograph.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/bioontologies/obograph.py b/src/bioontologies/obograph.py index fa98cb9..873b34f 100644 --- a/src/bioontologies/obograph.py +++ b/src/bioontologies/obograph.py @@ -9,8 +9,7 @@ from operator import attrgetter from typing import Any, Iterable, List, Mapping, Optional, Set, Tuple, Union -from bioregistry import curie_to_str, manager -from curies import Converter +from bioregistry import curie_to_str, manager, get_default_converter from pydantic import BaseModel, Field from tqdm.auto import tqdm from typing_extensions import Literal @@ -40,7 +39,6 @@ MaybeCURIE = Union[Tuple[str, str], Tuple[None, None]] -converter = Converter.from_reverse_prefix_map(manager.get_reverse_prefix_map(include_prefixes=True)) class StandardizeMixin: @@ -571,7 +569,7 @@ def _compress_uri_or_curie_or_str( if cv: return cv - prefix, identifier = converter.parse_uri(s) + prefix, identifier = get_default_converter().parse_uri(s) if prefix and identifier: if prefix == "obo" and "#" in identifier: return _parse_obo_rel(s, identifier) From 2daa6713a6e9db59e8932e7a6e55461b8a215f86 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:16:08 +0100 Subject: [PATCH 2/9] Add function for guessing the version of a parse results --- src/bioontologies/robot.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/bioontologies/robot.py b/src/bioontologies/robot.py index c86aa9c..2454b22 100644 --- a/src/bioontologies/robot.py +++ b/src/bioontologies/robot.py @@ -92,6 +92,15 @@ def guess(self, prefix: str) -> Graph: return id_to_graph[CANONICAL[prefix]] raise ValueError(f"Several graphs in {prefix}: {sorted(id_to_graph)}") + def guess_version(self, prefix: str) -> Optional[str]: + """Guess the version.""" + try: + graph = self.guess(prefix) + except ValueError: + return None + else: + return graph.version or graph.version_iri + def get_obograph_by_iri( iri: str, From 3013a222a422d40d8f4640608724370b7d799418 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:16:26 +0100 Subject: [PATCH 3/9] Use robot merge --- src/bioontologies/robot.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/bioontologies/robot.py b/src/bioontologies/robot.py index 2454b22..20b3b25 100644 --- a/src/bioontologies/robot.py +++ b/src/bioontologies/robot.py @@ -214,6 +214,7 @@ def convert_to_obograph( input_is_iri: bool = False, extra_args: Optional[List[str]] = None, from_iri: Optional[str] = None, + merge: bool = True, ) -> ParseResults: """Convert a local OWL file to a JSON file. @@ -232,6 +233,7 @@ def convert_to_obograph( :param extra_args: Extra positional arguments to pass in the command line :param from_iri: Use this parameter to say what IRI the graph came from + :param merge: Use ROBOT's merge command to squash all graphs together :returns: An object with the parsed OBO Graph JSON and text output from the ROBOT conversion program @@ -252,6 +254,7 @@ def convert_to_obograph( output_path=path, fmt="json", extra_args=extra_args, + merge=merge, ) messages = ret.strip().splitlines() graph_document_raw = json.loads(path.read_text()) @@ -310,21 +313,31 @@ def convert( output_path: Union[str, Path], input_flag: Optional[Literal["-i", "-I"]] = None, *, + merge: bool = True, fmt: Optional[str] = None, extra_args: Optional[List[str]] = None, ) -> str: """Convert an OBO file to an OWL file with ROBOT.""" if input_flag is None: input_flag = "-I" if _is_remote(input_path) else "-i" - args = [ - "robot", - "convert", - input_flag, - str(input_path), - "-o", - str(output_path), - *(extra_args or []), - ] + if merge: + args = [ + "robot", + "merge", + input_flag, + str(input_path), + "convert", + ] + else: + args = [ + "robot", + "convert", + input_flag, + str(input_path), + ] + args.extend(("-o", str(output_path))) + if extra_args: + args.extend(extra_args) if fmt: args.extend(("--format", fmt)) logger.debug("Running shell command: %s", args) From 02fbbdbf6a3b78abe493ccb2edf6aa7871679f84 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:16:46 +0100 Subject: [PATCH 4/9] Add function for loading local obo graph json --- src/bioontologies/__init__.py | 8 +++++++- src/bioontologies/robot.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/bioontologies/__init__.py b/src/bioontologies/__init__.py index d24e621..0de08ae 100644 --- a/src/bioontologies/__init__.py +++ b/src/bioontologies/__init__.py @@ -2,10 +2,16 @@ """Tools for biomedical ontologies.""" -from .robot import convert_to_obograph, get_obograph_by_iri, get_obograph_by_prefix +from .robot import ( + convert_to_obograph, + get_obograph_by_iri, + get_obograph_by_path, + get_obograph_by_prefix, +) __all__ = [ "convert_to_obograph", "get_obograph_by_prefix", "get_obograph_by_iri", + "get_obograph_by_path", ] diff --git a/src/bioontologies/robot.py b/src/bioontologies/robot.py index 20b3b25..3458ba1 100644 --- a/src/bioontologies/robot.py +++ b/src/bioontologies/robot.py @@ -35,6 +35,7 @@ # Processors "get_obograph_by_prefix", "get_obograph_by_iri", + "get_obograph_by_path", ] logger = logging.getLogger(__name__) @@ -111,6 +112,20 @@ def get_obograph_by_iri( return ParseResults(graph_document=graph_document, iri=iri) +def get_obograph_by_path( + path: Union[str, Path], + *, + iri: Optional[str] = None +) -> ParseResults: + """Get an ontology by its OBO Graph JSON file path.""" + res_json = json.loads(Path(path).resolve().read_text()) + graph_document = GraphDocument(**res_json) + if iri is None: + if graph_document.graphs and len(graph_document.graphs) == 1: + iri = graph_document.graphs[0].id + return ParseResults(graph_document=graph_document, iri=iri) + + def get_obograph_by_prefix( prefix: str, *, From 65fdc8370e485bf69776344236f0609aad696d50 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:17:05 +0100 Subject: [PATCH 5/9] Better access to properties --- src/bioontologies/obograph.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/bioontologies/obograph.py b/src/bioontologies/obograph.py index 873b34f..ad0c25f 100644 --- a/src/bioontologies/obograph.py +++ b/src/bioontologies/obograph.py @@ -83,6 +83,7 @@ def val_curie(self) -> str: def standardize(self): """Standardize this property.""" + self.val = self.val.replace("\n", " ") self.pred_prefix, self.pred_identifier = _parse_uri_or_curie_or_str(self.pred) self.val_prefix, self.val_identifier = _parse_uri_or_curie_or_str(self.val) self.standardized = True @@ -271,6 +272,13 @@ def xrefs(self) -> List[Xref]: return self.meta.xrefs return [] + @property + def properties(self) -> List[Property]: + if not self.meta or self.meta.basicPropertyValues is None: + return [] + # TODO filter out ones grabbed by other getters + return self.meta.basicPropertyValues + @property def replaced_by(self) -> Optional[str]: """Get the identifier that this node was replaced by.""" From 43fe70cac3c7839b30609e997156152c983aac28 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:17:31 +0100 Subject: [PATCH 6/9] Inline upgrading Related to #7 --- src/bioontologies/obograph.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/bioontologies/obograph.py b/src/bioontologies/obograph.py index ad0c25f..f84290e 100644 --- a/src/bioontologies/obograph.py +++ b/src/bioontologies/obograph.py @@ -14,7 +14,6 @@ from tqdm.auto import tqdm from typing_extensions import Literal -from . import upgrade from .relations import ground_relation __all__ = [ @@ -571,9 +570,11 @@ def _parse_obo_rel(s: str, identifier: str) -> Union[Tuple[str, str], Tuple[None def _compress_uri_or_curie_or_str( s: str, *, debug: bool = False ) -> Union[Tuple[str, str], Tuple[None, str]]: + from .upgrade import insert, upgrade + s = s.replace(" ", "") - cv = upgrade.upgrade(s) + cv = upgrade(s) if cv: return cv @@ -593,7 +594,7 @@ def _compress_uri_or_curie_or_str( if s.startswith(x): prefix, identifier = ground_relation(s[len(x) :]) if prefix and identifier: - upgrade.insert(s, prefix, identifier) + insert(s, prefix, identifier) return prefix, identifier elif s not in WARNED: tqdm.write(f"could not parse legacy RO: {s}") From 6e6f69b492706be721d3898cf518fde6068395dc Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:17:46 +0100 Subject: [PATCH 7/9] Update data.tsv --- src/bioontologies/upgrade/data.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bioontologies/upgrade/data.tsv b/src/bioontologies/upgrade/data.tsv index 08b80ee..df84566 100644 --- a/src/bioontologies/upgrade/data.tsv +++ b/src/bioontologies/upgrade/data.tsv @@ -60,6 +60,7 @@ http://purl.obolibrary.org/obo/nbo#has_participant ro 0000057 http://purl.obolibrary.org/obo/ncbitaxon#has_rank debio 0000023 http://purl.obolibrary.org/obo/ncbitaxon/subsets/taxslim#has_rank debio 0000023 http://purl.obolibrary.org/obo/obo#_PATO_0000047 pato 0000047 +http://purl.obolibrary.org/obo/pato#seeAlso rdf seeAlso http://purl.obolibrary.org/obo/so#has_origin debio 0000025 http://purl.obolibrary.org/obo/uberon/core#conduit_for ro 0002570 http://purl.obolibrary.org/obo/uberon/core#existence_starts_and_ends_during ro 0002491 From f0c9e71a75bd7f3a51a0751fe3cb44464b6d103e Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:22:32 +0100 Subject: [PATCH 8/9] Update setup.cfg --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 6054720..146e947 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,7 +54,7 @@ keywords = [options] install_requires = - bioregistry>=0.5.86 + bioregistry>=0.6.13 curies>=0.3.0 requests pydantic From 57bf12b103c1e780e02d58f229066225b918c5e9 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 14 Nov 2022 14:23:52 +0100 Subject: [PATCH 9/9] Flake --- src/bioontologies/obograph.py | 4 ++-- src/bioontologies/robot.py | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/bioontologies/obograph.py b/src/bioontologies/obograph.py index f84290e..cb66413 100644 --- a/src/bioontologies/obograph.py +++ b/src/bioontologies/obograph.py @@ -9,7 +9,7 @@ from operator import attrgetter from typing import Any, Iterable, List, Mapping, Optional, Set, Tuple, Union -from bioregistry import curie_to_str, manager, get_default_converter +from bioregistry import curie_to_str, get_default_converter, manager from pydantic import BaseModel, Field from tqdm.auto import tqdm from typing_extensions import Literal @@ -39,7 +39,6 @@ MaybeCURIE = Union[Tuple[str, str], Tuple[None, None]] - class StandardizeMixin: """A mixin for classes representing standardizable data.""" @@ -273,6 +272,7 @@ def xrefs(self) -> List[Xref]: @property def properties(self) -> List[Property]: + """Get the properties for this node.""" if not self.meta or self.meta.basicPropertyValues is None: return [] # TODO filter out ones grabbed by other getters diff --git a/src/bioontologies/robot.py b/src/bioontologies/robot.py index 3458ba1..3f0ae0e 100644 --- a/src/bioontologies/robot.py +++ b/src/bioontologies/robot.py @@ -112,11 +112,7 @@ def get_obograph_by_iri( return ParseResults(graph_document=graph_document, iri=iri) -def get_obograph_by_path( - path: Union[str, Path], - *, - iri: Optional[str] = None -) -> ParseResults: +def get_obograph_by_path(path: Union[str, Path], *, iri: Optional[str] = None) -> ParseResults: """Get an ontology by its OBO Graph JSON file path.""" res_json = json.loads(Path(path).resolve().read_text()) graph_document = GraphDocument(**res_json)