Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support remote storage for tool-registry + SDK roll changes for platform service #862

Merged
merged 46 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
9374a79
Add more tests
gaya3-zipstack Oct 22, 2024
5a2dbd0
Commit pdm.lock changes
gaya3-zipstack Oct 22, 2024
3a1b94b
Check in with improvements
gaya3-zipstack Oct 23, 2024
945b215
Merge branch 'feature/remote_storage' of https://github.com/Zipstack/…
gaya3-zipstack Oct 24, 2024
a90e2ed
Commit pdm.lock changes
gaya3-zipstack Oct 24, 2024
83e32b6
Add permanent storage here
gaya3-zipstack Oct 24, 2024
ffdaf65
Merge branch 'feature/remote_storage' of https://github.com/Zipstack/…
gaya3-zipstack Oct 24, 2024
f6e2bb8
Commit pdm.lock changes
gaya3-zipstack Oct 24, 2024
c64cb0a
Add skeleton for temporary fs
gaya3-zipstack Oct 25, 2024
d070bb1
Merge branch 'feature/remote_storage' of https://github.com/Zipstack/…
gaya3-zipstack Oct 25, 2024
8ed0d00
Revert "FIX: Rearranged Prompt Card Buttons and Floating Copy Button …
gaya3-zipstack Oct 29, 2024
89564aa
Remove file variants
gaya3-zipstack Oct 29, 2024
4a7d7e7
Merge branch 'main' of https://github.com/Zipstack/unstract into feat…
gaya3-zipstack Nov 7, 2024
6001b33
Remote storage migration
gaya3-zipstack Nov 8, 2024
6c0a7bc
Sample env
gaya3-zipstack Nov 8, 2024
ee6b5c3
Sample env
gaya3-zipstack Nov 8, 2024
8046dbd
Merge branch 'main' into feature/remote_storage
gaya3-zipstack Nov 8, 2024
5de6d72
Merge remote-tracking branch 'origin' into feature/remote_storage
gaya3-zipstack Nov 8, 2024
5d85ec3
Merge branch 'feature/remote_storage' of https://github.com/Zipstack/…
gaya3-zipstack Nov 8, 2024
7fba4f5
revert unwanted changes
gaya3-zipstack Nov 8, 2024
d042008
Wrap the new remote storage support under a feature flag
gaya3-zipstack Nov 11, 2024
e15c455
Rename env variable
gaya3-zipstack Nov 11, 2024
9647f78
Change logging into warning
gaya3-zipstack Nov 13, 2024
ee2b42f
Update sdk version and lock file
gaya3-zipstack Nov 13, 2024
45ad01f
Commit pdm.lock changes
gaya3-zipstack Nov 13, 2024
ce578ee
Spell check
gaya3-zipstack Nov 13, 2024
f66f635
Merge branch 'feature/remote_storage' of https://github.com/Zipstack/…
gaya3-zipstack Nov 13, 2024
1e386c6
Tuck the FileStorage specifics under the feature flag check
gaya3-zipstack Nov 14, 2024
4125c46
Commit pdm.lock changes
gaya3-zipstack Nov 14, 2024
e45018f
Merge from main
gaya3-zipstack Nov 15, 2024
a5e63fa
Merge remote-tracking branch 'origin/feature/remote_storage' into fea…
gaya3-zipstack Nov 15, 2024
481aebe
Support tool registry with remote storage, roll to rc2
gaya3-zipstack Nov 26, 2024
b626176
Merge branch 'main' of https://github.com/Zipstack/unstract into feat…
gaya3-zipstack Nov 26, 2024
4acad9a
Merge
gaya3-zipstack Nov 26, 2024
71bac3b
Commit pdm.lock changes
gaya3-zipstack Nov 26, 2024
65d540c
Add a separate file for config registry file for Remote storage
gaya3-zipstack Dec 2, 2024
09f7e84
Merge from main
gaya3-zipstack Dec 2, 2024
f365929
Merge with remote
gaya3-zipstack Dec 2, 2024
812e16f
Update backend/sample.env
gaya3-zipstack Dec 2, 2024
da93d0f
Revert to latest on main
gaya3-zipstack Dec 2, 2024
26b2543
Merge branch 'feature/remote_storage' of https://github.com/Zipstack/…
gaya3-zipstack Dec 2, 2024
c18ebc3
Commit pdm.lock changes
gaya3-zipstack Dec 2, 2024
9cda99d
SONAR issue to clean up after apt-get installl
gaya3-zipstack Dec 2, 2024
0382369
Commit pdm.lock changes
gaya3-zipstack Dec 3, 2024
c3bba78
Update unstract/tool-registry/src/unstract/tool_registry/tool_utils.py
gaya3-zipstack Dec 3, 2024
7424049
Commit pdm.lock changes
gaya3-zipstack Dec 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backend/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ TOOL_REGISTRY_CONFIG_PATH="/data/tool_registry_config"
# Flipt Service
FLIPT_SERVICE_AVAILABLE=False

# Remote storage config for tool registry
TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}'


# File System Configuration for Workflow and API Execution

Expand Down
9 changes: 8 additions & 1 deletion docker/dockerfiles/platform.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,14 @@ ENV BUILD_CONTEXT_PATH platform-service
ENV BUILD_PACKAGES_PATH unstract
ENV PDM_VERSION 2.16.1

RUN pip install --no-cache-dir -U pip pdm~=${PDM_VERSION}; \
RUN apt-get update; \
gaya3-zipstack marked this conversation as resolved.
Show resolved Hide resolved
apt-get --no-install-recommends install -y \
# unstract sdk
build-essential libmagic-dev; \
\
apt-get clean && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*; \
\
pip install --no-cache-dir -U pip pdm~=${PDM_VERSION}; \
\
# Creates a non-root user with an explicit UID and adds permission to access the /app folder
# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
Expand Down
887 changes: 455 additions & 432 deletions platform-service/pdm.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion platform-service/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies = [
"cryptography>=41.0.7",
"unstract-flags @ file:///${PROJECT_ROOT}/../unstract/flags",
"requests>=2.31.0",
"unstract-sdk~=0.54.0rc1",
"unstract-sdk==0.54.0rc2",
gaya3-zipstack marked this conversation as resolved.
Show resolved Hide resolved
gaya3-zipstack marked this conversation as resolved.
Show resolved Hide resolved
"gcsfs==2024.10.0",
]
requires-python = ">=3.9,<3.11.1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,26 @@ def __init__(
self.file_path = file_path

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
self.file_storage, self.file_path = self.__get_storage_credentials()
self.file_storage, self.file_path = self._get_storage_credentials()
self.model_token_data = self._get_model_token_data()

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):

def __get_storage_credentials(self) -> tuple[PermanentFileStorage, str]:
def _get_storage_credentials(self) -> tuple[PermanentFileStorage, str]:
try:
# Not creating constants for now for the keywords below as this
# logic ought to change in the near future to maintain unformity
# across services
file_storage = json.loads(os.environ.get("FILE_STORAGE_CREDENTIALS"))
provider = FileStorageProvider(file_storage["provider"])
credentials = file_storage["credentials"]
credentials = file_storage.get("credentials", {})
file_path = file_storage["model_prices_file_path"]
return PermanentFileStorage(provider, **credentials), file_path
except KeyError as e:
app.logger.error(
f"Required credentials is missing in the env: {str(e)}"
)
raise e
except FileStorageError as e:
app.logger.error(
"Error while initialising storage: %s",
Expand Down Expand Up @@ -142,8 +147,6 @@ def _fetch_and_save_json(self) -> Optional[dict[str, Any]]:
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
self.file_storage.json_dump(
path=self.file_path,
mode="w",
encoding="utf-8",
data=json_data,
ensure_ascii=False,
indent=4,
Expand Down
1 change: 1 addition & 0 deletions unstract/tool-registry/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies = [
# Hence instead, add the dependencies without version constraints where the
# assumption is they are added as direct dependencies in main project itself.
"unstract-tool-sandbox",
"unstract-flags",
]
requires-python = ">=3.9,<3.11.1"
readme = "README.md"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from typing import Any


class FeatureFlag:
"""Temporary feature flags."""

# For enabling remote storage feature
REMOTE_FILE_STORAGE = "remote_file_storage"


class Tools:
TOOLS_DIRECTORY = "tools"
IMAGE_LATEST_TAG = "latest"
Expand Down
10 changes: 7 additions & 3 deletions unstract/tool-registry/src/unstract/tool_registry/helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from typing import Any, Optional

from unstract.sdk.file_storage import FileStorage, FileStorageProvider
from unstract.tool_registry.constants import PropKey
from unstract.tool_registry.dto import Tool, ToolMeta
from unstract.tool_registry.exceptions import (
Expand All @@ -20,6 +21,7 @@ def __init__(
registry: str,
private_tools_file: str,
public_tools_file: str,
fs: FileStorage = FileStorage(FileStorageProvider.LOCAL),
) -> None:
"""Helper class for ToolRegistry.

Expand All @@ -32,6 +34,7 @@ def __init__(
self.registry_file = registry
self.private_tools_file = private_tools_file
self.public_tools_file = public_tools_file
self.fs = fs
self.tools = self._load_tools_from_registry_file()
if self.tools:
logger.info(f"Loaded tools from registry YAML: {self.tools}")
Expand Down Expand Up @@ -214,7 +217,7 @@ def save_registry(self, data: dict[str, Any]) -> None:
RegistryNotFound: _description_
"""
try:
ToolUtils.save_registry(self.registry_file, data=data)
ToolUtils.save_registry(self.registry_file, data=data, fs=self.fs)
except FileNotFoundError:
logger.error(f"File not found: {self.registry_file}")
raise RegistryNotFound()
Expand Down Expand Up @@ -298,7 +301,9 @@ def get_all_tools_from_disk(self) -> dict[str, dict[str, Any]]:
tools = {}
for tool_file in tool_files:
try:
data = ToolUtils.get_all_tools_from_disk(file_path=tool_file)
data = ToolUtils.get_all_tools_from_disk(
file_path=tool_file, fs=self.fs
)
if not data:
logger.info(f"No data from {tool_file}")
tool_version_list = [
Expand All @@ -309,7 +314,6 @@ def get_all_tools_from_disk(self) -> dict[str, dict[str, Any]]:
tools.update(data)
except FileNotFoundError:
logger.warning(f"Unable to find tool file to load tools: {tool_file}")
pass
return tools

def get_tool_data_by_id(self, tool_uid: str) -> dict[str, Any]:
Expand Down
53 changes: 50 additions & 3 deletions unstract/tool-registry/src/unstract/tool_registry/tool_registry.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,37 @@
import json
import logging
import os
from typing import Any, Optional

from unstract.tool_registry.constants import PropKey, ToolJsonField, ToolKey
from unstract.tool_registry.constants import (
FeatureFlag,
PropKey,
ToolJsonField,
ToolKey,
)
from unstract.tool_registry.dto import Tool
from unstract.tool_registry.exceptions import InvalidToolURLException
from unstract.tool_registry.helper import ToolRegistryHelper
from unstract.tool_registry.schema_validator import JsonSchemaValidator
from unstract.tool_registry.tool_utils import ToolUtils

from unstract.flags.feature_flag import check_feature_flag_status

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
from unstract.sdk.exceptions import FileStorageError
from unstract.sdk.file_storage import FileStorageProvider, PermanentFileStorage

logger = logging.getLogger(__name__)


class ToolRegistry:
REGISTRY_FILE = "registry.yaml"
PRIVATE_TOOL_CONFIG_FILE = "private_tools.json"
PUBLIC_TOOL_CONFIG_FILE = "public_tools.json"
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
PRIVATE_TOOL_CONFIG_FILE = "private_tools_remote_storage.json"
PUBLIC_TOOL_CONFIG_FILE = "public_tools_remote_storage.json"
else:
PRIVATE_TOOL_CONFIG_FILE = "private_tools.json"
PUBLIC_TOOL_CONFIG_FILE = "public_tools.json"

def __init__(
self,
Expand Down Expand Up @@ -45,12 +61,43 @@ def __init__(
"Env 'TOOL_REGISTRY_CONFIG_PATH' is not set, please add the tool "
"registry JSONs and YAML to a directory and set the env."
)

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
self.fs = self._get_storage_credentials()
else:
self.fs = None
self.helper = ToolRegistryHelper(
registry=os.path.join(directory, registry_file),
private_tools_file=os.path.join(directory, private_tools),
public_tools_file=os.path.join(directory, public_tools),
fs=self.fs,
)

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):

def _get_storage_credentials(self) -> PermanentFileStorage:
gaya3-zipstack marked this conversation as resolved.
Show resolved Hide resolved
try:
# Not creating constants for now for the keywords below as this
# logic ought to change in the near future to maintain unformity
# across services
file_storage = json.loads(
os.environ.get("TOOL_REGISTRY_STORAGE_CREDENTIALS", {})
)
provider = FileStorageProvider(file_storage["provider"])
credentials = file_storage.get("credentials", {})
return PermanentFileStorage(provider, **credentials)
except KeyError as e:
logger.error(f"Required credentials is missing in the env: {str(e)}")
raise e
except FileStorageError as e:
logger.error(
"Error while initialising storage: %s",
e,
stack_info=True,
exc_info=True,
)
raise e

def load_all_tools_to_disk(self) -> None:
self.helper.load_all_tools_to_disk()

Expand Down
57 changes: 44 additions & 13 deletions unstract/tool-registry/src/unstract/tool_registry/tool_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@

import yaml
from unstract.sdk.adapters.enums import AdapterTypes
from unstract.tool_registry.constants import AdapterPropertyKey, Tools
from unstract.sdk.file_storage import FileStorage, FileStorageProvider
from unstract.tool_registry.constants import AdapterPropertyKey, FeatureFlag, Tools
from unstract.tool_registry.dto import AdapterProperties, Spec, Tool, ToolMeta
from unstract.tool_registry.exceptions import InvalidToolURLException, RegistryNotFound

from unstract.flags.feature_flag import check_feature_flag_status

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -55,39 +58,67 @@ def get_tool_meta_from_tool_url(registry_tool: str) -> ToolMeta:
)

@staticmethod
def save_tools_in_to_disk(file_path: str, data: dict[str, Any]) -> None:
with open(file_path, "w") as json_file:
json.dump(data, json_file)
def save_tools_in_to_disk(
file_path: str,
data: dict[str, Any],
fs: FileStorage = FileStorage(FileStorageProvider.LOCAL),
) -> None:
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
fs.json_dump(path=file_path, mode="w", encoding="utf-8", data=data)
else:
with open(file_path, "w") as json_file:
json.dump(data, json_file)

@staticmethod
def get_all_tools_from_disk(file_path: str) -> dict[str, Any]:
def get_all_tools_from_disk(
file_path: str, fs: FileStorage = FileStorage(FileStorageProvider.LOCAL)
) -> dict[str, Any]:
try:
with open(file_path) as json_file:
data: dict[str, Any] = json.load(json_file)
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
data = fs.json_load(file_path)
else:
with open(file_path) as json_file:
data: dict[str, Any] = json.load(json_file)
return data
except json.JSONDecodeError as e:
logger.warning(f"Error loading tools from {file_path}: {e}")
return {}

@staticmethod
def save_registry(file_path: str, data: dict[str, Any]) -> None:
with open(file_path, "w") as file:
yaml.dump(data, file, default_flow_style=False)
def save_registry(
file_path: str,
data: dict[str, Any],
fs: FileStorage = FileStorage(FileStorageProvider.LOCAL),
) -> None:
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
fs.yaml_dump(path=file_path, mode="w", encoding="utf-8", data=data)
else:
with open(file_path, "w") as file:
yaml.dump(data, file, default_flow_style=False)

@staticmethod
def get_registry(file_path: str, raise_exc: bool = False) -> dict[str, Any]:
def get_registry(
file_path: str,
fs: FileStorage = FileStorage(FileStorageProvider.LOCAL),
raise_exc: bool = False,
) -> dict[str, Any]:
"""Get Registry File.

Args:
file_path (str): file path of registry.yaml
fs (FileStorage): file storage to be used

Returns:
dict[str, Any]: yaml data
"""
yml_data: dict[str, Any] = {}
try:
with open(file_path) as file:
yml_data = yaml.safe_load(file)
logger.debug(f"Reading tool registry YAML: {file_path}")
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
yml_data = fs.yaml_load(file_path)
else:
with open(file_path) as file:
yml_data = yaml.safe_load(file)
except FileNotFoundError:
logger.warning(f"Could not find tool registry YAML: {str(file_path)}")
if raise_exc:
Expand Down
Loading