Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes in structure tool and bumped sdk versions #876

Merged
merged 7 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ dev = [
"-e unstract-tool-registry @ file:///${PROJECT_ROOT}/../unstract/tool-registry",
"-e unstract-tool-sandbox @ file:///${PROJECT_ROOT}/../unstract/tool-sandbox",
"-e unstract-workflow-execution @ file:///${PROJECT_ROOT}/../unstract/workflow-execution",
"-e unstract-filesystem @ file:///${PROJECT_ROOT}/../unstract/filesystem",
]

[tool.pytest.ini_options]
Expand Down
16 changes: 15 additions & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ hook-check-django-migrations = [
"-e unstract-tool-registry @ file:///${PROJECT_ROOT}/unstract/tool-registry",
"-e unstract-tool-sandbox @ file:///${PROJECT_ROOT}/unstract/tool-sandbox",
"-e unstract-workflow-execution @ file:///${PROJECT_ROOT}/unstract/workflow-execution",
"-e unstract-filesystem @ file:///${PROJECT_ROOT}/unstract/filesystem"
]

[tool.black]
Expand Down
2 changes: 1 addition & 1 deletion tools/classifier/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.1
unstract-sdk~=0.54.0rc2
chandrasekharan-zipstack marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion tools/structure/requirements.txt
chandrasekharan-zipstack marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.2
unstract-sdk~=0.54.0rc2
25 changes: 19 additions & 6 deletions tools/structure/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Callable, Optional

from constants import SettingsKeys # type: ignore [attr-defined]
from unstract.sdk.constants import LogLevel, LogState, MetadataKey
from unstract.sdk.constants import LogLevel, LogState, MetadataKey, ToolEnv
from unstract.sdk.index import Index
from unstract.sdk.prompt import PromptTool
from unstract.sdk.tool.base import BaseTool
Expand Down Expand Up @@ -97,7 +97,10 @@ def run(
_, file_name = os.path.split(input_file)
if summarize_as_source:
file_name = SettingsKeys.SUMMARIZE
tool_data_dir = Path(self.get_env_or_die(SettingsKeys.TOOL_DATA_DIR))
if hasattr(self, "workflow_filestorage"):
tool_data_dir = Path(self.get_env_or_die(ToolEnv.EXECUTION_DATA_DIR))
else:
tool_data_dir = Path(self.get_env_or_die(SettingsKeys.TOOL_DATA_DIR))
execution_run_data_folder = Path(
self.get_env_or_die(SettingsKeys.EXECUTION_RUN_DATA_FOLDER)
)
Expand Down Expand Up @@ -268,8 +271,13 @@ def run(
self.stream_log("Writing parsed output...")
source_name = self.get_exec_metadata.get(MetadataKey.SOURCE_NAME)
output_path = Path(output_dir) / f"{Path(source_name).stem}.json"
with open(output_path, "w", encoding="utf-8") as f:
f.write(structured_output)
if hasattr(self, "workflow_filestorage"):
self.workflow_filestorage.json_dump(
path=output_path, data=structured_output_dict
)
else:
with open(output_path, "w", encoding="utf-8") as f:
f.write(structured_output)
except OSError as e:
self.stream_error_and_exit(f"Error creating output file: {e}")
except json.JSONDecodeError as e:
Expand Down Expand Up @@ -351,8 +359,13 @@ def _summarize_and_index(
structure_output = json.loads(response[SettingsKeys.STRUCTURE_OUTPUT])
summarized_context = structure_output.get(SettingsKeys.DATA, "")
self.stream_log("Writing summarized context to a file")
with open(summarize_file_path, "w", encoding="utf-8") as f:
f.write(summarized_context)
if hasattr(self, "workflow_filestorage"):
self.workflow_filestorage.write(
path=summarize_file_path, mode="w", data=summarized_context
)
else:
with open(summarize_file_path, "w", encoding="utf-8") as f:
f.write(summarized_context)

self.stream_log("Indexing summarized context")
summarize_file_hash: str = ToolUtils.get_hash_from_file(
Expand Down
2 changes: 1 addition & 1 deletion tools/text_extractor/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.53.1
unstract-sdk~=0.54.0rc2
Loading
Loading