Skip to content

Commit

Permalink
feat: Bumped SDK version to 0.50.0 in services and tools (#768)
Browse files Browse the repository at this point in the history
* Bumped SDK version to 0.50.0 in services and tools, update platform service API to fetch adapter information

* Commit pdm.lock changes

* Bumped SDK version to 0.50.0 in prompt-service

* Commit pdm.lock changes

* Commit pdm.lock changes

---------

Co-authored-by: Ritwik G <[email protected]>
Co-authored-by: ritwik-g <[email protected]>
Co-authored-by: chandrasekharan-zipstack <[email protected]>
Co-authored-by: ali <[email protected]>
Co-authored-by: muhammad-ali-e <[email protected]>
  • Loading branch information
6 people authored Oct 7, 2024
1 parent 844b239 commit 5c4f9bc
Show file tree
Hide file tree
Showing 19 changed files with 613 additions and 492 deletions.
299 changes: 150 additions & 149 deletions backend/pdm.lock

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions backend/prompt_studio/prompt_studio_core/prompt_studio_helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
import os
import time
import uuid
from pathlib import Path
from typing import Any, Callable, Optional
Expand Down Expand Up @@ -338,6 +339,7 @@ def index_document(
)
file_path = str(Path(file_path) / file_name)

start_time = time.time()
logger.info(f"[{tool_id}] Indexing started for doc: {file_name}")
PromptStudioHelper._publish_log(
{"tool_id": tool_id, "run_id": run_id, "doc_name": file_name},
Expand Down Expand Up @@ -367,12 +369,16 @@ def index_document(
process_text=process_text,
)

logger.info(f"[{tool_id}] Indexing successful for doc: {file_name}")
elapsed_time = time.time() - start_time
logger.info(
f"[{tool_id}] Indexing successful for doc: {file_name},"
f" took {elapsed_time:.3f}s"
)
PromptStudioHelper._publish_log(
{"tool_id": tool_id, "run_id": run_id, "doc_name": file_name},
LogLevels.INFO,
LogLevels.RUN,
"Indexing successful",
f"Indexing successful, took {elapsed_time:.3f}s",
)

return doc_id.get("output")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import logging
import os
import time
import uuid
from pathlib import Path
from typing import Any, Callable, Optional
Expand Down Expand Up @@ -343,6 +344,7 @@ def index_document(
logger.error(f"No tool instance found for the ID {tool_id}")
raise ToolNotValid()

start_time = time.time()
logger.info(f"[{tool_id}] Indexing started for doc: {file_name}")
PromptStudioHelper._publish_log(
{"tool_id": tool_id, "run_id": run_id, "doc_name": file_name},
Expand Down Expand Up @@ -372,12 +374,17 @@ def index_document(
process_text=process_text,
)

elapsed_time = time.time() - start_time
logger.info(
f"[{tool_id}] Indexing successful for doc: {file_name},"
f" took {elapsed_time:.3f}s"
)
logger.info(f"[{tool_id}] Indexing successful for doc: {file_name}")
PromptStudioHelper._publish_log(
{"tool_id": tool_id, "run_id": run_id, "doc_name": file_name},
LogLevels.INFO,
LogLevels.RUN,
"Indexing successful",
f"Indexing successful, took {elapsed_time:.3f}s",
)

return doc_id.get("output")
Expand Down
2 changes: 1 addition & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"python-socketio==5.9.0", # For log_events
"social-auth-app-django==5.3.0", # For OAuth
"social-auth-core==4.4.2", # For OAuth
"unstract-sdk~=0.49.0",
"unstract-sdk~=0.50.0",
# ! IMPORTANT!
# Indirect local dependencies usually need to be added in their own projects
# as: https://pdm-project.org/latest/usage/dependency/#local-dependencies.
Expand Down
415 changes: 254 additions & 161 deletions pdm.lock

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,23 @@ def get_adapter_instance_from_db(
"""
if check_feature_flag_status(FeatureFlag.MULTI_TENANCY_V2):
query = (
"SELECT id, adapter_id, adapter_metadata_b FROM "
f'"{DB_SCHEMA}".{DBTableV2.ADAPTER_INSTANCE} x '
"SELECT id, adapter_id, adapter_name, adapter_type, adapter_metadata_b"
f' FROM "{DB_SCHEMA}".{DBTableV2.ADAPTER_INSTANCE} x '
f"WHERE id='{adapter_instance_id}' and "
f"organization_id='{organization_uid}'"
)
else:
query = (
f"SELECT id, adapter_id, adapter_metadata_b FROM "
f'"{organization_id}".adapter_adapterinstance x '
f"SELECT id, adapter_id, adapter_name, adapter_type, adapter_metadata_b"
f' FROM "{organization_id}".adapter_adapterinstance x '
f"WHERE id='{adapter_instance_id}'"
)
cursor = db.execute_sql(query)
result_row = cursor.fetchone()
if not result_row:
raise APIError(message="Adapter not found", code=404)
raise APIError(
message=f"Adapter '{adapter_instance_id}' not found", code=404
)
columns = [desc[0] for desc in cursor.description]
data_dict: dict[str, Any] = dict(zip(columns, result_row))
cursor.close()
Expand Down
291 changes: 146 additions & 145 deletions prompt-service/pdm.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion prompt-service/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dependencies = [
"flask~=3.0",
"llama-index==0.10.58",
"python-dotenv==1.0.0",
"unstract-sdk~=0.49.0",
"unstract-sdk~=0.50.0",
"redis>=5.0.3",
"unstract-core @ file:///${PROJECT_ROOT}/../unstract/core",
"unstract-flags @ file:///${PROJECT_ROOT}/../unstract/flags",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ hook-check-django-migrations = [
"psycopg2-binary==2.9.9",
"python-dotenv==1.0.0",
"python-magic==0.4.27",
"unstract-sdk~=0.49.0",
"unstract-sdk~=0.50.0",
"-e unstract-connectors @ file:///${PROJECT_ROOT}/unstract/connectors",
"-e unstract-core @ file:///${PROJECT_ROOT}/unstract/core",
"-e unstract-flags @ file:///${PROJECT_ROOT}/unstract/flags",
Expand Down
2 changes: 1 addition & 1 deletion tools/classifier/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.49.0
unstract-sdk~=0.50.0
2 changes: 1 addition & 1 deletion tools/classifier/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "File Classifier",
"functionName": "classify",
"toolVersion": "0.0.35",
"toolVersion": "0.0.36",
"description": "Classifies a file into a bin based on its contents",
"input": {
"description": "File to be classified"
Expand Down
2 changes: 1 addition & 1 deletion tools/structure/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.49.0
unstract-sdk~=0.50.0
2 changes: 1 addition & 1 deletion tools/structure/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "Structure Tool",
"functionName": "structure_tool",
"toolVersion": "0.0.42",
"toolVersion": "0.0.44",
"description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio",
"input": {
"description": "File that needs to be indexed and parsed for answers"
Expand Down
45 changes: 30 additions & 15 deletions tools/structure/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,18 @@
from unstract.sdk.utils import ToolUtils
from unstract.sdk.utils.common_utils import CommonUtils

PAID_FEATURE_MSG = (
"It is a cloud / enterprise feature. If you have purchased a plan and still "
"face this issue, please contact support"
)


class StructureTool(BaseTool):
def validate(self, input_file: str, settings: dict[str, Any]) -> None:
pass
enable_challenge: bool = settings.get(SettingsKeys.ENABLE_CHALLENGE, False)
challenge_llm: str = settings.get(SettingsKeys.CHALLENGE_LLM_ADAPTER_ID, "")
if enable_challenge and not challenge_llm:
raise ValueError("Challenge LLM is not set after enabling Challenge")

def run(
self,
Expand All @@ -25,12 +33,14 @@ def run(
output_dir: str,
) -> None:
prompt_registry_id: str = settings[SettingsKeys.PROMPT_REGISTRY_ID]
challenge_llm: str = settings[SettingsKeys.CHALLENGE_LLM_ADAPTER_ID]
enable_challenge: bool = settings[SettingsKeys.ENABLE_CHALLENGE]
summarize_as_source: bool = settings[SettingsKeys.SUMMARIZE_AS_SOURCE]
single_pass_extraction_mode: bool = settings[
SettingsKeys.SINGLE_PASS_EXTRACTION_MODE
]
enable_challenge: bool = settings.get(SettingsKeys.ENABLE_CHALLENGE, False)
summarize_as_source: bool = settings.get(
SettingsKeys.SUMMARIZE_AS_SOURCE, False
)
single_pass_extraction_mode: bool = settings.get(
SettingsKeys.SINGLE_PASS_EXTRACTION_MODE, False
)
challenge_llm: str = settings.get(SettingsKeys.CHALLENGE_LLM_ADAPTER_ID, "")
responder: PromptTool = PromptTool(
tool=self,
prompt_port=self.get_env_or_die(SettingsKeys.PROMPT_PORT),
Expand All @@ -42,7 +52,7 @@ def run(
tool=self, prompt_registry_id=prompt_registry_id
)
tool_metadata = exported_tool[SettingsKeys.TOOL_METADATA]
self.stream_log(f"Tool Metadata retrived succesfully: {tool_metadata}")
self.stream_log(f"Tool metadata retrieved successfully: {tool_metadata}")
except Exception as e:
self.stream_error_and_exit(f"Error loading structure definition: {e}")

Expand Down Expand Up @@ -84,18 +94,20 @@ def run(
}
# TODO: Need to split extraction and indexing
# to avoid unwanted indexing
self.stream_log("Indexing document")
source_file_name = self.get_exec_metadata.get(MetadataKey.SOURCE_NAME)
self.stream_log(f"Indexing document '{source_file_name}'")
usage_kwargs: dict[Any, Any] = dict()
usage_kwargs[SettingsKeys.RUN_ID] = run_id
usage_kwargs[SettingsKeys.FILE_NAME] = (
self.get_exec_metadata.get(MetadataKey.SOURCE_NAME),
)
usage_kwargs[SettingsKeys.FILE_NAME] = source_file_name

process_text = None
try:
from helper import process_text # type: ignore [attr-defined]
except ImportError:
self.stream_log("Function 'process_text' is not found")
self.stream_log(
f"Function to higlight context is not found. {PAID_FEATURE_MSG}",
level=LogLevel.WARN,
)

if tool_settings[SettingsKeys.ENABLE_SINGLE_PASS_EXTRACTION]:
index.index(
Expand Down Expand Up @@ -186,7 +198,7 @@ def run(
# To check if the prompt has table enforce type selected.
pass

self.stream_log("Fetching responses for prompts...")
self.stream_log(f"Fetching responses for {len(outputs)} prompt(s)...")
prompt_service_resp = responder.answer_prompt(
payload=payload,
)
Expand All @@ -211,7 +223,10 @@ def run(
highlight_data = transform_dict(epilogue, tool_data_dir)
metadata[SettingsKeys.HIGHLIGHT_DATA] = highlight_data
except ImportError:
self.stream_log("Function 'transform_dict' is not found")
self.stream_log(
f"Highlight metadata is not added. {PAID_FEATURE_MSG}",
level=LogLevel.WARN,
)
# Update the dictionary with modified metadata
structured_output_dict[SettingsKeys.METADATA] = metadata
structured_output = json.dumps(structured_output_dict)
Expand Down
3 changes: 0 additions & 3 deletions tools/text_extractor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,3 @@ docker run -it \
--log-level DEBUG
```

License
This project is licensed under the Zipstack Inc.
2 changes: 1 addition & 1 deletion tools/text_extractor/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.49.0
unstract-sdk~=0.50.0
2 changes: 1 addition & 1 deletion tools/text_extractor/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "Text Extractor",
"functionName": "text_extractor",
"toolVersion": "0.0.33",
"toolVersion": "0.0.34",
"description": "The Text Extractor is a powerful tool designed to convert documents to its text form or Extract texts from documents",
"input": {
"description": "Document"
Expand Down
2 changes: 1 addition & 1 deletion unstract/tool-registry/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies = [
"docker~=6.1.3",
"jsonschema~=4.18.2",
"PyYAML~=6.0.1",
"unstract-sdk~=0.49.0",
"unstract-sdk~=0.50.0",
# ! IMPORTANT!
# Local dependencies usually need to be added as:
# https://pdm-project.org/latest/usage/dependency/#local-dependencies
Expand Down
1 change: 0 additions & 1 deletion worker/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ LOG_LEVEL="INFO"

# Flag to decide whether to clean up/ remove the tool container after execution.
# (Default: True)

REMOVE_CONTAINER_ON_EXIT=True

# Client module path of the container engine to be used.
Expand Down

0 comments on commit 5c4f9bc

Please sign in to comment.