Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes in structure tool and bumped sdk versions #876

Merged
merged 7 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 80 additions & 76 deletions backend/pdm.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ dev = [
"-e unstract-tool-registry @ file:///${PROJECT_ROOT}/../unstract/tool-registry",
"-e unstract-tool-sandbox @ file:///${PROJECT_ROOT}/../unstract/tool-sandbox",
"-e unstract-workflow-execution @ file:///${PROJECT_ROOT}/../unstract/workflow-execution",
"-e unstract-filesystem @ file:///${PROJECT_ROOT}/../unstract/filesystem",
]

[tool.pytest.ini_options]
Expand Down
190 changes: 102 additions & 88 deletions pdm.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ hook-check-django-migrations = [
"-e unstract-tool-registry @ file:///${PROJECT_ROOT}/unstract/tool-registry",
"-e unstract-tool-sandbox @ file:///${PROJECT_ROOT}/unstract/tool-sandbox",
"-e unstract-workflow-execution @ file:///${PROJECT_ROOT}/unstract/workflow-execution",
"-e unstract-filesystem @ file:///${PROJECT_ROOT}/unstract/filesystem"
]

[tool.black]
Expand Down
2 changes: 1 addition & 1 deletion tools/classifier/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.1
unstract-sdk~=0.54.0rc2
chandrasekharan-zipstack marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion tools/structure/requirements.txt
chandrasekharan-zipstack marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.2
unstract-sdk~=0.54.0rc2
25 changes: 19 additions & 6 deletions tools/structure/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Callable, Optional

from constants import SettingsKeys # type: ignore [attr-defined]
from unstract.sdk.constants import LogLevel, LogState, MetadataKey
from unstract.sdk.constants import LogLevel, LogState, MetadataKey, ToolEnv
from unstract.sdk.index import Index
from unstract.sdk.prompt import PromptTool
from unstract.sdk.tool.base import BaseTool
Expand Down Expand Up @@ -97,7 +97,10 @@ def run(
_, file_name = os.path.split(input_file)
if summarize_as_source:
file_name = SettingsKeys.SUMMARIZE
tool_data_dir = Path(self.get_env_or_die(SettingsKeys.TOOL_DATA_DIR))
if hasattr(self, "workflow_filestorage"):
tool_data_dir = Path(self.get_env_or_die(ToolEnv.EXECUTION_DATA_DIR))
else:
tool_data_dir = Path(self.get_env_or_die(SettingsKeys.TOOL_DATA_DIR))
execution_run_data_folder = Path(
self.get_env_or_die(SettingsKeys.EXECUTION_RUN_DATA_FOLDER)
)
Expand Down Expand Up @@ -268,8 +271,13 @@ def run(
self.stream_log("Writing parsed output...")
source_name = self.get_exec_metadata.get(MetadataKey.SOURCE_NAME)
output_path = Path(output_dir) / f"{Path(source_name).stem}.json"
with open(output_path, "w", encoding="utf-8") as f:
f.write(structured_output)
if hasattr(self, "workflow_filestorage"):
self.workflow_filestorage.json_dump(
path=output_path, data=structured_output_dict
)
else:
with open(output_path, "w", encoding="utf-8") as f:
f.write(structured_output)
except OSError as e:
self.stream_error_and_exit(f"Error creating output file: {e}")
except json.JSONDecodeError as e:
Expand Down Expand Up @@ -351,8 +359,13 @@ def _summarize_and_index(
structure_output = json.loads(response[SettingsKeys.STRUCTURE_OUTPUT])
summarized_context = structure_output.get(SettingsKeys.DATA, "")
self.stream_log("Writing summarized context to a file")
with open(summarize_file_path, "w", encoding="utf-8") as f:
f.write(summarized_context)
if hasattr(self, "workflow_filestorage"):
self.workflow_filestorage.write(
path=summarize_file_path, mode="w", data=summarized_context
)
else:
with open(summarize_file_path, "w", encoding="utf-8") as f:
f.write(summarized_context)

self.stream_log("Indexing summarized context")
summarize_file_hash: str = ToolUtils.get_hash_from_file(
Expand Down
2 changes: 1 addition & 1 deletion tools/text_extractor/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.1
unstract-sdk~=0.54.0rc2
3,706 changes: 3,705 additions & 1 deletion unstract/filesystem/pdm.lock

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion unstract/filesystem/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ description = "Unstract filesystem package"
authors = [
{name = "Zipstack Inc.", email = "[email protected]"},
]
dependencies = []
dependencies = [
"unstract-sdk~=0.54.0rc2",
]
requires-python = ">=3.9,<3.11.1"
readme = "README.md"

Expand Down
2 changes: 1 addition & 1 deletion unstract/filesystem/src/unstract/filesystem/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This module exposes the main classes for file storage handling.
"""

from unstract.sdk.file_storage.shared_temporary import SharedTemporaryFileStorage
from unstract.sdk.file_storage import SharedTemporaryFileStorage

from .file_storage_types import FileStorageType
from .filesystem import FileSystem
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,9 @@ def get_execution_dir(
path_prefix = ToolsUtils.get_env(
ToolRuntimeVariable.WORKFLOW_EXECUTION_DIR_PREFIX
)
execution_dir = Path(path_prefix) / organization_id / workflow_id / execution_id
execution_dir = (
Path(path_prefix) / organization_id / str(workflow_id) / str(execution_id)
)

return str(execution_dir)

Expand All @@ -207,5 +209,7 @@ def get_api_execution_dir(
str: The directory path for the execution.
"""
path_prefix = ToolsUtils.get_env(ToolRuntimeVariable.API_EXECUTION_DIR_PREFIX)
execution_dir = Path(path_prefix) / organization_id / workflow_id / execution_id
execution_dir = (
Path(path_prefix) / organization_id / str(workflow_id) / str(execution_id)
)
return str(execution_dir)