From f6e8e4a10feb53c225d412db77733407f7665e46 Mon Sep 17 00:00:00 2001 From: Vincent Verelst Date: Tue, 12 Mar 2024 16:37:03 +0100 Subject: [PATCH] moved metadata_from_stac function from connection to metadata module #527 --- openeo/metadata.py | 59 +++++++++++++++++++++++++++++++++ openeo/rest/connection.py | 69 +++++---------------------------------- 2 files changed, 68 insertions(+), 60 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index 31d97513f..cf59aa211 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import pystac import warnings from typing import Any, Callable, List, NamedTuple, Optional, Tuple, Union @@ -522,3 +523,61 @@ def _repr_html_(self): def __str__(self) -> str: bands = self.band_names if self.has_band_dimension() else "no bands dimension" return f"CollectionMetadata({self.extent} - {bands} - {self.dimension_names()})" + + +def metadata_from_stac(url: str) -> CubeMetadata: + """ + Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` + + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection + :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. + """ + + def get_band_names(asst: pystac.Asset) -> List[Band]: + return [Band(eo_band["name"]) for eo_band in asst.extra_fields["eo:bands"]] + + def is_band_asset(asset: pystac.Asset) -> bool: + return "eo:bands" in asset.extra_fields + + stac_object = pystac.read_file(href=url) + + band_names = [] + collection = None + + if isinstance(stac_object, pystac.Item): + item = stac_object + if "eo:bands" in item.properties: + eo_bands_location = item.properties + elif item.get_collection() is not None: + collection = item.get_collection() + eo_bands_location = item.get_collection().summaries.lists + else: + eo_bands_location = {} + band_names = [Band(b["name"]) for b in eo_bands_location.get("eo:bands", [])] + + elif isinstance(stac_object, pystac.Collection): + collection = stac_object + band_names = [Band(b["name"]) for b in collection.summaries.lists.get("eo:bands", [])] + + # Summaries is not a required field in a STAC collection, so also check the assets + for itm in collection.get_items(): + band_assets = { + asset_id: asset + for asset_id, asset in dict(sorted(itm.get_assets().items())).items() + if is_band_asset(asset) + } + + for asset in band_assets.values(): + asset_band_names = get_band_names(asset) + for asset_band_name in asset_band_names: + if asset_band_name not in band_names: + band_names.append(asset_band_name) + + else: + assert isinstance(stac_object, pystac.Catalog) + catalog = stac_object + band_names = [Band(b["name"]) for b in catalog.extra_fields.get("summaries", {}).get("eo:bands", [])] + + band_dimension = BandDimension(name="bands", bands=band_names) + metadata = CubeMetadata(dimensions=[band_dimension]) + return metadata diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 7d3e02eab..8aa95210c 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -7,7 +7,6 @@ import json import logging import os -import pystac import shlex import sys import warnings @@ -28,7 +27,14 @@ from openeo.internal.jupyter import VisualDict, VisualList from openeo.internal.processes.builder import ProcessBuilderBase from openeo.internal.warnings import deprecated, legacy_alias -from openeo.metadata import Band, BandDimension, CollectionMetadata, CubeMetadata, SpatialDimension, TemporalDimension +from openeo.metadata import ( + Band, + BandDimension, + CollectionMetadata, + SpatialDimension, + TemporalDimension, + metadata_from_stac, +) from openeo.rest import ( CapabilitiesException, OpenEoApiError, @@ -1150,63 +1156,6 @@ def datacube_from_json(self, src: Union[str, Path], parameters: Optional[dict] = """ return self.datacube_from_flat_graph(load_json_resource(src), parameters=parameters) - def metadata_from_stac(self, url: str) -> CubeMetadata: - """ - Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata` - - :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection - :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url. - """ - - def get_band_names(asst: pystac.Asset) -> List[Band]: - return [Band(eo_band["name"]) for eo_band in asst.extra_fields["eo:bands"]] - - def is_band_asset(asset: pystac.Asset) -> bool: - return "eo:bands" in asset.extra_fields - - stac_object = pystac.read_file(href=url) - - band_names = [] - collection = None - - if isinstance(stac_object, pystac.Item): - item = stac_object - if "eo:bands" in item.properties: - eo_bands_location = item.properties - elif item.get_collection() is not None: - collection = item.get_collection() - eo_bands_location = item.get_collection().summaries.lists - else: - eo_bands_location = {} - band_names = [Band(b["name"]) for b in eo_bands_location.get("eo:bands", [])] - - elif isinstance(stac_object, pystac.Collection): - collection = stac_object - band_names = [Band(b["name"]) for b in collection.summaries.lists.get("eo:bands", [])] - - # Summaries is not a required field in a STAC collection, so also check the assets - for itm in collection.get_items(): - band_assets = { - asset_id: asset - for asset_id, asset in dict(sorted(itm.get_assets().items())).items() - if is_band_asset(asset) - } - - for asset in band_assets.values(): - asset_band_names = get_band_names(asset) - for asset_band_name in asset_band_names: - if asset_band_name not in band_names: - band_names.append(asset_band_name) - - else: - assert isinstance(stac_object, pystac.Catalog) - catalog = stac_object - band_names = [Band(b["name"]) for b in catalog.extra_fields.get("summaries", {}).get("eo:bands", [])] - - band_dimension = BandDimension(name="bands", bands=band_names) - metadata = CubeMetadata(dimensions=[band_dimension]) - return metadata - @openeo_process def load_collection( self, @@ -1420,7 +1369,7 @@ def load_stac( } cube = self.datacube_from_process(process_id="load_stac", **arguments) try: - cube.metadata = self.metadata_from_stac(url) + cube.metadata = metadata_from_stac(url) except Exception: _log.warning("Python client could not read band metadata from URL.") return cube