-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* SDK & adapters merge - First cut * Refactor unstract.sdk.core.* back to unstract.sdk.* * Pick up latest changes * Remove LICENSE * Refactoring adapters.py to adapter.py to resolve name conflict * Remove dependency on adapters * Update lock file * Fix regex pattern * pin llama-index-core version --------- Signed-off-by: Gayathri <[email protected]>
- Loading branch information
1 parent
623807c
commit a424e5b
Showing
179 changed files
with
7,009 additions
and
390 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
__version__ = "0.38.1" | ||
__version__ = "0.39.0" | ||
|
||
|
||
|
||
def get_sdk_version(): | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import logging | ||
from logging import NullHandler | ||
from typing import Any | ||
|
||
logging.getLogger(__name__).addHandler(NullHandler()) | ||
|
||
AdapterDict = dict[str, dict[str, Any]] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
import logging | ||
from typing import Any | ||
|
||
from singleton_decorator import singleton | ||
|
||
from unstract.sdk.adapters import AdapterDict | ||
from unstract.sdk.adapters.base import Adapter | ||
from unstract.sdk.adapters.constants import Common | ||
from unstract.sdk.adapters.embedding import adapters as embedding_adapters | ||
from unstract.sdk.adapters.llm import adapters as llm_adapters | ||
from unstract.sdk.adapters.ocr import adapters as ocr_adapters | ||
from unstract.sdk.adapters.vectordb import adapters as vectordb_adapters | ||
from unstract.sdk.adapters.x2text import adapters as x2text_adapters | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
# Declaring this class as a Singleton to avoid initialising | ||
# adapters list everytime | ||
@singleton | ||
class Adapterkit: | ||
def __init__(self) -> None: | ||
self._adapters: AdapterDict = ( | ||
embedding_adapters | ||
| llm_adapters | ||
| vectordb_adapters | ||
| x2text_adapters | ||
| ocr_adapters | ||
) | ||
|
||
@property | ||
def adapters(self) -> AdapterDict: | ||
return self._adapters | ||
|
||
def get_adapter_class_by_adapter_id(self, adapter_id: str) -> Adapter: | ||
if adapter_id in self._adapters: | ||
adapter_class: Adapter = self._adapters[adapter_id][ | ||
Common.METADATA | ||
][Common.ADAPTER] | ||
return adapter_class | ||
else: | ||
raise RuntimeError(f"Couldn't obtain adapter for {adapter_id}") | ||
|
||
def get_adapter_by_id( | ||
self, adapter_id: str, *args: Any, **kwargs: Any | ||
) -> Adapter: | ||
"""Instantiates and returns a adapter. | ||
Args: | ||
adapter_id (str): Identifies adapter to create | ||
Raises: | ||
RuntimeError: If the ID is invalid/adapter is missing | ||
Returns: | ||
Adapter: Concrete impl of the `Adapter` base | ||
""" | ||
adapter_class: Adapter = self.get_adapter_class_by_adapter_id( | ||
adapter_id | ||
) | ||
return adapter_class(*args, **kwargs) | ||
|
||
def get_adapters_list(self) -> list[dict[str, Any]]: | ||
adapters = [] | ||
for adapter_id, adapter_registry_metadata in self._adapters.items(): | ||
m: Adapter = adapter_registry_metadata[Common.METADATA][ | ||
Common.ADAPTER | ||
] | ||
_id = m.get_id() | ||
name = m.get_name() | ||
adapter_type = m.get_adapter_type().name | ||
json_schema = m.get_json_schema() | ||
desc = m.get_description() | ||
icon = m.get_icon() | ||
adapters.append( | ||
{ | ||
"id": _id, | ||
"name": name, | ||
"class_name": m.__name__, | ||
"description": desc, | ||
"icon": icon, | ||
"adapter_type": adapter_type, | ||
"json_schema": json_schema, | ||
} | ||
) | ||
return adapters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import logging | ||
from abc import ABC, abstractmethod | ||
|
||
from unstract.sdk.adapters.enums import AdapterTypes | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Adapter(ABC): | ||
def __init__(self, name: str): | ||
self.name = name | ||
|
||
@staticmethod | ||
@abstractmethod | ||
def get_id() -> str: | ||
return "" | ||
|
||
@staticmethod | ||
@abstractmethod | ||
def get_name() -> str: | ||
return "" | ||
|
||
@staticmethod | ||
@abstractmethod | ||
def get_description() -> str: | ||
return "" | ||
|
||
@staticmethod | ||
@abstractmethod | ||
def get_icon() -> str: | ||
return "" | ||
|
||
@staticmethod | ||
@abstractmethod | ||
def get_json_schema() -> str: | ||
return "" | ||
|
||
@staticmethod | ||
@abstractmethod | ||
def get_adapter_type() -> AdapterTypes: | ||
return "" | ||
|
||
@abstractmethod | ||
def test_connection(self) -> bool: | ||
"""Override to test connection for a adapter. | ||
Returns: | ||
bool: Flag indicating if the credentials are valid or not | ||
""" | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
class Common: | ||
METADATA = "metadata" | ||
MODULE = "module" | ||
ADAPTER = "adapter" | ||
SRC_FOLDER = "src" | ||
ADAPTER_METADATA = "adapter_metadata" | ||
ICON = "icon" | ||
ADAPTER_ID = "adapter_id" | ||
ADAPTER_TYPE = "adapter_type" | ||
DEFAULT_ERR_MESSAGE = "Something went wrong" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from unstract.sdk.adapters import AdapterDict | ||
from unstract.sdk.adapters.embedding.register import EmbeddingRegistry | ||
|
||
adapters: AdapterDict = {} | ||
EmbeddingRegistry.register_adapters(adapters) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Unstract Azure Open AI Embedding Adapter | ||
|
||
This package consists of the functionalities required to adapt with Azure OpenAI Embedding | ||
Version supported |
25 changes: 25 additions & 0 deletions
25
src/unstract/sdk/adapters/embedding/azure_open_ai/pyproject.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
[build-system] | ||
requires = ["pdm-backend"] | ||
build-backend = "pdm.backend" | ||
|
||
|
||
[project] | ||
name = "unstract-azure-open-ai-embedding" | ||
version = "0.0.1" | ||
description = "Azure Open AI Embedding" | ||
authors = [ | ||
{name = "Zipstack Inc.", email = "[email protected]"}, | ||
] | ||
dependencies = [ | ||
] | ||
requires-python = ">=3.9" | ||
readme = "README.md" | ||
classifiers = [ | ||
"Programming Language :: Python" | ||
] | ||
license = {text = "MIT"} | ||
|
||
[tool.pdm.build] | ||
includes = ["src"] | ||
package-dir = "src" | ||
# source-includes = ["tests"] |
9 changes: 9 additions & 0 deletions
9
src/unstract/sdk/adapters/embedding/azure_open_ai/src/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from .azure_open_ai import AzureOpenAI | ||
|
||
metadata = { | ||
"name": AzureOpenAI.__name__, | ||
"version": "1.0.0", | ||
"adapter": AzureOpenAI, | ||
"description": "AzureOpenAI embedding adapter", | ||
"is_active": True, | ||
} |
75 changes: 75 additions & 0 deletions
75
src/unstract/sdk/adapters/embedding/azure_open_ai/src/azure_open_ai.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import os | ||
from typing import Any | ||
|
||
from llama_index.core.embeddings import BaseEmbedding | ||
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding | ||
|
||
from unstract.sdk.adapters.embedding.embedding_adapter import EmbeddingAdapter | ||
from unstract.sdk.adapters.embedding.helper import EmbeddingHelper | ||
from unstract.sdk.adapters.exceptions import AdapterError | ||
|
||
|
||
class Constants: | ||
ADAPTER_NAME = "adapter_name" | ||
MODEL = "model" | ||
API_KEY = "api_key" | ||
API_VERSION = "api_version" | ||
AZURE_ENDPOINT = "azure_endpoint" | ||
DEPLOYMENT_NAME = "deployment_name" | ||
API_TYPE = "azure" | ||
|
||
|
||
class AzureOpenAI(EmbeddingAdapter): | ||
def __init__(self, settings: dict[str, Any]): | ||
super().__init__("AzureOpenAIEmbedding") | ||
self.config = settings | ||
|
||
@staticmethod | ||
def get_id() -> str: | ||
return "azureopenai|9770f3f6-f8ba-4fa0-bb3a-bef48a00e66f" | ||
|
||
@staticmethod | ||
def get_name() -> str: | ||
return "AzureOpenAIEmbedding" | ||
|
||
@staticmethod | ||
def get_description() -> str: | ||
return "AzureOpenAI Embedding" | ||
|
||
@staticmethod | ||
def get_provider() -> str: | ||
return "azure" | ||
|
||
@staticmethod | ||
def get_icon() -> str: | ||
return "/icons/adapter-icons/AzureopenAI.png" | ||
|
||
@staticmethod | ||
def get_json_schema() -> str: | ||
f = open(f"{os.path.dirname(__file__)}/static/json_schema.json") | ||
schema = f.read() | ||
f.close() | ||
return schema | ||
|
||
def get_embedding_instance(self) -> BaseEmbedding: | ||
try: | ||
embedding_batch_size = EmbeddingHelper.get_embedding_batch_size( | ||
config=self.config | ||
) | ||
embedding: BaseEmbedding = AzureOpenAIEmbedding( | ||
model=str(self.config.get(Constants.MODEL)), | ||
deployment_name=str(self.config.get(Constants.DEPLOYMENT_NAME)), | ||
api_key=str(self.config.get(Constants.API_KEY)), | ||
api_version=str(self.config.get(Constants.API_VERSION)), | ||
azure_endpoint=str(self.config.get(Constants.AZURE_ENDPOINT)), | ||
embed_batch_size=embedding_batch_size, | ||
api_type=Constants.API_TYPE, | ||
) | ||
return embedding | ||
except Exception as e: | ||
raise AdapterError(str(e)) | ||
|
||
def test_connection(self) -> bool: | ||
embedding = self.get_embedding_instance() | ||
test_result: bool = EmbeddingHelper.test_embedding_instance(embedding) | ||
return test_result |
Oops, something went wrong.