issue #693 solve merge conflicts

Open-EO · Jan 8, 2025 · a6c6533 · a6c6533
2 parents a64ce16 + 7064cf3
commit a6c6533
Show file tree

Hide file tree

Showing 21 changed files with 1,313 additions and 107 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,22 +9,39 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Automatically use `load_url` when providing a URL as geometries to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc. ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
 - Argument `spatial_extent` in `load_collection` supports Shapely objects and loading GeoJSON from a local path.
 
 ### Changed
 
+### Removed
+
+### Fixed
+
+
+## [0.36.0] - 2024-12-10
+
+### Added
+
+- Automatically use `load_url` when providing a URL as geometries to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc. ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
+- Allow specifying `limit` when listing batch jobs with `Connection.list_jobs()` ([#677](https://github.com/Open-EO/openeo-python-client/issues/677))
+- Add `additional` and `job_options` arguments to `Connection.download()`, `Datacube.download()` and related ([#681](https://github.com/Open-EO/openeo-python-client/issues/681))
+
+### Changed
+
 - `MultiBackendJobManager`: costs has been added as a column in tracking databases ([[#588](https://github.com/Open-EO/openeo-python-client/issues/588)])
 - When passing a path/string as `geometry` to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc.:
   this is not translated automatically anymore to deprecated, non-standard `read_vector` usage.
   Instead, if it is a local GeoJSON file, the GeoJSON data will be loaded directly client-side.
   ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
-
-### Removed
+- Move `read()` method from general `JobDatabaseInterface` to more specific `FullDataFrameJobDatabase` ([#680](https://github.com/Open-EO/openeo-python-client/issues/680))
+- Align `additional` and `job_options` arguments in `Connection.create_job()`, `DataCube.create_job()` and related.
+  Also, follow official spec more closely. ([#683](https://github.com/Open-EO/openeo-python-client/issues/683), [Open-EO/openeo-api#276](https://github.com/Open-EO/openeo-api/issues/276))
 
 ### Fixed
 
 - `load_stac`: use fallback temporal dimension when no "cube:dimensions" in STAC Collection ([#666](https://github.com/Open-EO/openeo-python-client/issues/666))
+- Fix usage of `Parameter.spatial_extent()` with `load_collection` and `filter_bbox` ([#676](https://github.com/Open-EO/openeo-python-client/issues/676))
+
 
 ## [0.35.0] - 2024-11-19
 

diff --git a/docs/installation.rst b/docs/installation.rst
@@ -92,6 +92,7 @@ For example:
 - ``rioxarray`` for GeoTIFF support in the assert helpers from ``openeo.testing.results``
 - ``geopandas`` for working with dataframes with geospatial support,
   (e.g. with :py:class:`~openeo.extra.job_management.MultiBackendJobManager`)
+- ``pystac_client`` for creating a STAC API Job Database (e.g. with :py:class:`~openeo.extra.job_management.stac_job_db.STACAPIJobDatabase`)
 
 
 Enabling additional features

diff --git a/docs/udp.rst b/docs/udp.rst
@@ -134,7 +134,7 @@ Some useful parameter helpers (class methods of the :py:class:`~openeo.api.proce
 -   :py:meth:`Parameter.geojson() <openeo.api.process.Parameter.geojson>` to create
     a parameter for specifying a GeoJSON geometry.
 -   :py:meth:`Parameter.spatial_extent() <openeo.api.process.Parameter.spatial_extent>` to create
-    a spatial_extent parameter that is exactly the same as the corresponding parameter in `load_collection` and `load_stac`.
+    a spatial_extent parameter that is exactly the same as the corresponding parameter in ``load_collection`` and ``load_stac``.
 
 
 

diff --git a/openeo/_version.py b/openeo/_version.py
@@ -1 +1 @@
-__version__ = "0.36.0a1"
+__version__ = "0.37.0a1"
diff --git a/openeo/api/process.py b/openeo/api/process.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import textwrap
 import warnings
 from typing import List, Optional, Union
 
@@ -279,23 +280,15 @@ def bounding_box(
         }
         return cls(name=name, description=description, schema=schema, **kwargs)
 
-    _spatial_extent_description = """Limits the data to process to the specified bounding box or polygons.
-
-For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).
-For vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.
-
-Empty geometries are ignored.
-Set this parameter to null to set no limit for the spatial extent. """
-
     @classmethod
     def spatial_extent(
         cls,
         name: str = "spatial_extent",
-        description: str = _spatial_extent_description,
+        description: Optional[str] = None,
         **kwargs,
     ) -> Parameter:
         """
-        Helper to easily create a 'spatial_extent' parameter, which is compatible with the 'load_collection' argument of
+        Helper to easily create a 'spatial_extent' parameter, which is compatible with the ``load_collection`` argument of
         the same name. This allows to conveniently create user-defined processes that can be applied to a bounding box and vector data
         for spatial filtering. It is also possible for users to set to null, and define spatial filtering using other processes.
 
@@ -307,6 +300,26 @@ def spatial_extent(
 
         .. versionadded:: 0.32.0
         """
+        if description is None:
+            description = textwrap.dedent(
+                """
+                Limits the data to process to the specified bounding box or polygons.
+
+                For raster data, the process loads the pixel into the data cube if the point
+                at the pixel center intersects with the bounding box or any of the polygons
+                (as defined in the Simple Features standard by the OGC).
+
+                For vector data, the process loads the geometry into the data cube if the geometry
+                is fully within the bounding box or any of the polygons (as defined in the
+                Simple Features standard by the OGC). Empty geometries may only be in the
+                data cube if no spatial extent has been provided.
+
+                Empty geometries are ignored.
+
+                Set this parameter to null to set no limit for the spatial extent.
+                """
+            ).strip()
+
         schema = [
             {
                 "title": "Bounding Box",
@@ -410,7 +423,7 @@ def geojson(cls, name: str, description: str = "Geometries specified as GeoJSON
     @classmethod
     def temporal_interval(
         cls,
-        name: str,
+        name: str = "temporal_extent",
         description: str = "Temporal extent specified as two-element array with start and end date/date-time.",
         **kwargs,
     ) -> Parameter:
@@ -441,3 +454,26 @@ def temporal_interval(
             },
         }
         return cls(name=name, description=description, schema=schema, **kwargs)
+
+
+def schema_supports(schema: Union[dict, List[dict]], type: str, subtype: Optional[str] = None) -> bool:
+    """Helper to check if parameter schema supports given type/subtype"""
+    # TODO: support checking item type in arrays
+    if isinstance(schema, dict):
+        actual_type = schema.get("type")
+        if isinstance(actual_type, str):
+            if actual_type != type:
+                return False
+        elif isinstance(actual_type, list):
+            if type not in actual_type:
+                return False
+        else:
+            raise ValueError(actual_type)
+        if subtype:
+            if schema.get("subtype") != subtype:
+                return False
+        return True
+    elif isinstance(schema, list):
+        return any(schema_supports(s, type=type, subtype=subtype) for s in schema)
+    else:
+        raise ValueError(schema)
diff --git a/openeo/extra/job_management.py → openeo/extra/job_management/__init__.py b/openeo/extra/job_management.py → openeo/extra/job_management/__init__.py
@@ -41,6 +41,7 @@
 
 _log = logging.getLogger(__name__)
 
+
 class _Backend(NamedTuple):
     """Container for backend info/settings"""
 
@@ -70,15 +71,6 @@ def exists(self) -> bool:
         """Does the job database already exist, to read job data from?"""
         ...
 
-    @abc.abstractmethod
-    def read(self) -> pd.DataFrame:
-        """
-        Read job data from the database as pandas DataFrame.
-
-        :return: loaded job data.
-        """
-        ...
-
     @abc.abstractmethod
     def persist(self, df: pd.DataFrame):
         """
@@ -112,7 +104,6 @@ def get_by_status(self, statuses: List[str], max=None) -> pd.DataFrame:
         """
         ...
 
-
 def _start_job_default(row: pd.Series, connection: Connection, *args, **kwargs):
     raise NotImplementedError("No 'start_job' callable provided")
 
@@ -364,9 +355,9 @@ def start_job_thread(self, start_job: Callable[[], BatchJob], job_db: JobDatabas
 
         # Resume from existing db
         _log.info(f"Resuming `run_jobs` from existing {job_db}")
-        df = job_db.read()
 
         self._stop_thread = False
+
         def run_loop():
 
             # TODO: support user-provided `stats`
@@ -810,6 +801,15 @@ def initialize_from_df(self, df: pd.DataFrame, *, on_exists: str = "error"):
         # Return self to allow chaining with constructor.
         return self
 
+    @abc.abstractmethod
+    def read(self) -> pd.DataFrame:
+        """
+        Read job data from the database as pandas DataFrame.
+
+        :return: loaded job data.
+        """
+        ...
+
     @property
     def df(self) -> pd.DataFrame:
         if self._df is None:
@@ -856,6 +856,7 @@ class CsvJobDatabase(FullDataFrameJobDatabase):
 
     .. versionadded:: 0.31.0
     """
+
     def __init__(self, path: Union[str, Path]):
         super().__init__()
         self.path = Path(path)
@@ -912,6 +913,7 @@ class ParquetJobDatabase(FullDataFrameJobDatabase):
 
     .. versionadded:: 0.31.0
     """
+
     def __init__(self, path: Union[str, Path]):
         super().__init__()
         self.path = Path(path)
@@ -934,6 +936,7 @@ def read(self) -> pd.DataFrame:
         metadata = pyarrow.parquet.read_metadata(self.path)
         if b"geo" in metadata.metadata:
             import geopandas
+
             return geopandas.read_parquet(self.path)
         else:
             return pd.read_parquet(self.path)
@@ -1045,6 +1048,7 @@ class ProcessBasedJobCreator:
         `feedback and suggestions for improvement <https://github.com/Open-EO/openeo-python-client/issues>`_.
 
     """
+
     def __init__(
         self,
         *,
@@ -1077,7 +1081,6 @@ def _get_process_definition(self, connection: Connection) -> Process:
                 f"Unsupported process definition source udp_id={self._process_id!r} namespace={self._namespace!r}"
             )
 
-
     def start_job(self, row: pd.Series, connection: Connection, **_) -> BatchJob:
         """
         Implementation of the ``start_job`` callable interface