Skip to content
This repository has been archived by the owner on Jan 10, 2022. It is now read-only.

Commit

Permalink
add caching logic for WEkEO collections (#123)
Browse files Browse the repository at this point in the history
* add caching logic for WEkEO collections

* Fix data service tests

Signed-off-by: sherrmann <[email protected]>

* Remove old OpenShift template

Signed-off-by: sherrmann <[email protected]>

Co-authored-by: sherrmann <[email protected]>
  • Loading branch information
lforesta and sophieherrmann authored Oct 23, 2020
1 parent 23ff7eb commit 822dbd3
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 291 deletions.
264 changes: 0 additions & 264 deletions gateway/template.yaml

This file was deleted.

76 changes: 49 additions & 27 deletions services/data/data/dependencies/wekeo_hda.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@


import logging
from typing import Dict, List, Tuple
from typing import Any, Dict, List, Optional, Tuple

from dynaconf import settings
from eodc_openeo_bindings.wekeo_utils import get_collection_metadata, get_filepaths
from nameko.extensions import DependencyProvider

from .cache import cache_json, get_cache_path, get_json_cache
from .links import LinkHandler
from .stac_utils import add_non_csw_info
from ..models import Collection, Collections
Expand Down Expand Up @@ -35,22 +36,24 @@ def __init__(self, service_uri: str, service_user: str, service_password: str) -

LOGGER.debug("Initialized %s", self)

def get_all_products(self) -> Collections:
def get_all_products(self, use_cache: bool = True) -> Collections:
"""Return all products available at the back-end.
Returns:
list -- The list containing information about available products
"""
collection_list = []
for data_id in settings.WHITELIST_WEKEO:
collection_list.append(self.get_product(data_id))
col = self.get_product(data_id, use_cache=use_cache)
if col:
collection_list.append(col)

links = self.link_handler.get_links(collection=True)
collections = Collections(collections=collection_list, links=links)

return collections

def get_product(self, data_id: str) -> Collection:
def get_product(self, data_id: str, use_cache: bool = True) -> Optional[Collection]:
"""Return information about a specific product.
Arguments:
Expand All @@ -59,29 +62,48 @@ def get_product(self, data_id: str) -> Collection:
Returns:
dict -- The product data
"""
wekeo_data_id, _ = self._split_collection_id(data_id)
response = get_collection_metadata(self.service_uri, self.service_user, self.service_password,
wekeo_data_id)

data = response.json()
data["id"] = data_id
data = add_non_csw_info([data])
data = self.link_handler.get_links(data)[0]

collection = Collection(
stac_version=data["stac_version"],
id_=data["id"],
description=data["description"],
license_=data["userTerms"]["termsId"],
extent=data["extent"],
links=data["links"],
title=data["title"],
keywords=data["keywords"],
cube_dimensions=data["cube:dimensions"],
summaries=data["summaries"],
)

return collection
path_to_cache = get_cache_path(settings.CACHE_PATH, data_id, False, settings.DATA_ACCESS_WEKEO)
data: Dict[str, Any] = {}
if use_cache:
datasets = get_json_cache(path_to_cache)
if len(datasets) == 1:
data = datasets[0]
else:
wekeo_data_id, _ = self._split_collection_id(data_id)
response = get_collection_metadata(self.service_uri, self.service_user, self.service_password,
wekeo_data_id)

dataset_wekeo = response.json()
dataset_wekeo["id"] = data_id
datasets_with_static_info: List[Dict[str, Any]] = add_non_csw_info([dataset_wekeo])
datasets_with_static_info = self.link_handler.get_links(datasets_with_static_info)
cache_json(datasets_with_static_info, path_to_cache)
data = datasets_with_static_info[0]

if data:
return Collection(
stac_version=data["stac_version"],
id_=data["id"],
description=data["description"],
license_=data["userTerms"]["termsId"],
extent=data["extent"],
links=data["links"],
title=data["title"],
keywords=data["keywords"],
cube_dimensions=data["cube:dimensions"],
summaries=data["summaries"],
)
return None

def refresh_cache(self, use_cache: bool = False) -> None:
"""Refresh the product cache.
Args:
use_cache: Specifies whether to or not to refresh the cache.
A bit redundant because submitted through an additional POST.
"""
LOGGER.debug("Refreshing cache %s", use_cache)
_ = self.get_all_products(use_cache=use_cache)

def get_filepaths(self, collection_id: str, spatial_extent: Dict, temporal_extent: List) -> Tuple[List, str]:
"""Retrieve a URL list from the WEkEO HDA according to the specified parameters.
Expand Down
2 changes: 2 additions & 0 deletions services/data/data/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ def refresh_cache(self, user: Dict[str, Any] = None, use_cache: bool = False) ->
self.csw_session.refresh_cache(use_cache)
if settings.IS_CSW_SERVER_DC:
self.csw_session_dc.refresh_cache(use_cache)
if settings.IS_HDA_WEKEO:
self.hda_session.refresh_cache(use_cache)

return {
"status": "success",
Expand Down

0 comments on commit 822dbd3

Please sign in to comment.