From f7f1427708ea85e771fbd8c47f8a7c0e46a67c78 Mon Sep 17 00:00:00 2001 From: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Date: Fri, 28 Jun 2024 12:07:32 +0530 Subject: [PATCH] Support Ollama embedding (#67) * Support Ollama * Update adapter version * Multiple exception handling --- pdm.lock | 34 +++++++++++++++------- pyproject.toml | 2 +- src/unstract/sdk/__init__.py | 2 +- src/unstract/sdk/utils/callback_manager.py | 2 +- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/pdm.lock b/pdm.lock index 711df719..820b2a74 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "docs", "lint", "test"] strategy = ["cross_platform"] lock_version = "4.4.1" -content_hash = "sha256:4c5471966bc1ed5aa9e777a8cebbadd5a6af38486dc1e45d261ab445219f5e78" +content_hash = "sha256:59b7a22c5ee38ad98f6897c17299eb64d2161855742d8fb36b41bbaef5e0e7fa" [[package]] name = "aiohttp" @@ -688,12 +688,12 @@ files = [ [[package]] name = "fsspec" -version = "2024.6.0" +version = "2024.6.1" requires_python = ">=3.8" summary = "File-system specification" files = [ - {file = "fsspec-2024.6.0-py3-none-any.whl", hash = "sha256:58d7122eb8a1a46f7f13453187bfea4972d66bf01618d37366521b1998034cee"}, - {file = "fsspec-2024.6.0.tar.gz", hash = "sha256:f579960a56e6d8038a9efc8f9c77279ec12e6299aa86b0769a7e9c46b94527c2"}, + {file = "fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e"}, + {file = "fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49"}, ] [[package]] @@ -763,7 +763,7 @@ files = [ [[package]] name = "google-cloud-aiplatform" -version = "1.56.0" +version = "1.57.0" requires_python = ">=3.8" summary = "Vertex AI API client library" dependencies = [ @@ -780,8 +780,8 @@ dependencies = [ "shapely<3.0.0dev", ] files = [ - {file = "google-cloud-aiplatform-1.56.0.tar.gz", hash = "sha256:d4cfb085427dac01142915f523949ac2955d6c7f148d95017d3286a77caf5d5e"}, - {file = "google_cloud_aiplatform-1.56.0-py2.py3-none-any.whl", hash = "sha256:ee1ab3bd115c3caebf8ddfd3e47eeb8396a3ec2fc5f5baf1a5c295c8d64333ab"}, + {file = "google-cloud-aiplatform-1.57.0.tar.gz", hash = "sha256:113905f100cb0a9ad744a2445a7675f92f28600233ba499614aa704d11a809b7"}, + {file = "google_cloud_aiplatform-1.57.0-py2.py3-none-any.whl", hash = "sha256:ca5391a56e0cc8f4ed39a2beb7be02f51936ff04fd5304775a72a86c345d0e47"}, ] [[package]] @@ -1433,6 +1433,19 @@ files = [ {file = "llama_index_embeddings_google-0.1.5.tar.gz", hash = "sha256:989f87e249661a5a531972c4ae52cd587e485291b25e76c084db204f97f0087b"}, ] +[[package]] +name = "llama-index-embeddings-ollama" +version = "0.1.2" +requires_python = ">=3.8.1,<4.0" +summary = "llama-index embeddings ollama integration" +dependencies = [ + "llama-index-core<0.11.0,>=0.10.1", +] +files = [ + {file = "llama_index_embeddings_ollama-0.1.2-py3-none-any.whl", hash = "sha256:ac7afabfa1134059af351b021e05e256bf86dd15e5176ffa5ab0305bcf03b33f"}, + {file = "llama_index_embeddings_ollama-0.1.2.tar.gz", hash = "sha256:a9e0809bddd2e4ad888f249519edc7e3d339c74e4e03fc5a40c3060dc41d47a9"}, +] + [[package]] name = "llama-index-embeddings-openai" version = "0.1.10" @@ -3416,7 +3429,7 @@ files = [ [[package]] name = "unstract-adapters" -version = "0.19.2" +version = "0.20.1" requires_python = "<3.12,>=3.9" summary = "Unstract interface for LLMs, Embeddings and VectorDBs" dependencies = [ @@ -3425,6 +3438,7 @@ dependencies = [ "llama-index-embeddings-azure-openai==0.1.6", "llama-index-embeddings-azure-openai==0.1.6", "llama-index-embeddings-google==0.1.5", + "llama-index-embeddings-ollama==0.1.2", "llama-index-llms-anthropic==0.1.11", "llama-index-llms-anyscale==0.1.3", "llama-index-llms-azure-openai==0.1.5", @@ -3443,8 +3457,8 @@ dependencies = [ "singleton-decorator~=1.0.0", ] files = [ - {file = "unstract_adapters-0.19.2-py3-none-any.whl", hash = "sha256:0c8d534964c62c4cc49fb2e4346438a2d6aac3abef517ece591a2a39cffb931b"}, - {file = "unstract_adapters-0.19.2.tar.gz", hash = "sha256:1f1961c43adee9d8b0f617098482671cf4830324e304e6b08b5c7eb9a2f40ed5"}, + {file = "unstract_adapters-0.20.1-py3-none-any.whl", hash = "sha256:e1f291016ab81f2fa46bc5ac0629905a9977198a8e5ad3077b8b188383fbd0c3"}, + {file = "unstract_adapters-0.20.1.tar.gz", hash = "sha256:4c9b10bc5ae6f0d395088ba4f3d2b27bcfade1b2f1666ccf2611723336d33571"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index c35bbf54..48cc0807 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "python-magic~=0.4.27", "python-dotenv==1.0.0", # LLM Triad - "unstract-adapters~=0.19.2", + "unstract-adapters~=0.20.1", "llama-index==0.10.38", "tiktoken~=0.4.0", "transformers==4.37.0", diff --git a/src/unstract/sdk/__init__.py b/src/unstract/sdk/__init__.py index fba2699c..4ca381ca 100644 --- a/src/unstract/sdk/__init__.py +++ b/src/unstract/sdk/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.33.1" +__version__ = "0.34.0" def get_sdk_version(): diff --git a/src/unstract/sdk/utils/callback_manager.py b/src/unstract/sdk/utils/callback_manager.py index 5617f145..3931c452 100644 --- a/src/unstract/sdk/utils/callback_manager.py +++ b/src/unstract/sdk/utils/callback_manager.py @@ -138,7 +138,7 @@ def get_tokenizer( model_name ).encode return tokenizer - except ValueError as e: + except (KeyError, ValueError) as e: logger.warning(str(e)) return fallback_tokenizer