From 557e9432cb32893aa1de122ad76f8e19620497d1 Mon Sep 17 00:00:00 2001 From: ali-zipstack Date: Thu, 23 Jan 2025 11:54:34 +0530 Subject: [PATCH 1/2] passing tags to x2text from tools --- tools/classifier/requirements.txt | 2 +- tools/classifier/src/config/properties.json | 2 +- tools/classifier/src/helper.py | 6 ++++-- tools/structure/requirements.txt | 2 +- tools/structure/src/config/properties.json | 2 +- tools/structure/src/main.py | 3 +++ tools/text_extractor/requirements.txt | 2 +- tools/text_extractor/src/config/properties.json | 2 +- tools/text_extractor/src/main.py | 4 ++-- 9 files changed, 15 insertions(+), 10 deletions(-) diff --git a/tools/classifier/requirements.txt b/tools/classifier/requirements.txt index fd8618df9..bc03dfa4c 100644 --- a/tools/classifier/requirements.txt +++ b/tools/classifier/requirements.txt @@ -1,6 +1,6 @@ # Add your dependencies here # Required for all unstract tools -unstract-sdk~=0.55.0rc2 +unstract-sdk~=0.55.0rc3 # Required for remote storage support s3fs[boto3]==2024.6.0 diff --git a/tools/classifier/src/config/properties.json b/tools/classifier/src/config/properties.json index f85c243c8..0e7eb073a 100644 --- a/tools/classifier/src/config/properties.json +++ b/tools/classifier/src/config/properties.json @@ -2,7 +2,7 @@ "schemaVersion": "0.0.1", "displayName": "File Classifier", "functionName": "classify", - "toolVersion": "0.0.47", + "toolVersion": "0.0.48", "description": "Classifies a file into a bin based on its contents", "input": { "description": "File to be classified" diff --git a/tools/classifier/src/helper.py b/tools/classifier/src/helper.py index 139017937..66179d685 100644 --- a/tools/classifier/src/helper.py +++ b/tools/classifier/src/helper.py @@ -152,11 +152,13 @@ def _extract_from_adapter(self, file: str, adapter_id: str) -> Optional[str]: try: if self.tool.workflow_filestorage: extraction_result: TextExtractionResult = x2text.process( - input_file_path=file, fs=self.tool.workflow_filestorage + input_file_path=file, + fs=self.tool.workflow_filestorage, + tags=self.tool.tags, ) else: extraction_result: TextExtractionResult = x2text.process( - input_file_path=file + input_file_path=file, tags=self.tool.tags ) extracted_text: str = extraction_result.extracted_text return extracted_text diff --git a/tools/structure/requirements.txt b/tools/structure/requirements.txt index fd8618df9..bc03dfa4c 100644 --- a/tools/structure/requirements.txt +++ b/tools/structure/requirements.txt @@ -1,6 +1,6 @@ # Add your dependencies here # Required for all unstract tools -unstract-sdk~=0.55.0rc2 +unstract-sdk~=0.55.0rc3 # Required for remote storage support s3fs[boto3]==2024.6.0 diff --git a/tools/structure/src/config/properties.json b/tools/structure/src/config/properties.json index 2c88344fd..4b13b2af1 100644 --- a/tools/structure/src/config/properties.json +++ b/tools/structure/src/config/properties.json @@ -2,7 +2,7 @@ "schemaVersion": "0.0.1", "displayName": "Structure Tool", "functionName": "structure_tool", - "toolVersion": "0.0.57", + "toolVersion": "0.0.58", "description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio", "input": { "description": "File that needs to be indexed and parsed for answers" diff --git a/tools/structure/src/main.py b/tools/structure/src/main.py index a420486a1..09f400f8b 100644 --- a/tools/structure/src/main.py +++ b/tools/structure/src/main.py @@ -149,6 +149,7 @@ def run( reindex=True, usage_kwargs=usage_kwargs, process_text=process_text, + tags=self.tags, **( {"fs": self.workflow_filestorage} if self.workflow_filestorage is not None @@ -191,6 +192,7 @@ def run( reindex=reindex, usage_kwargs=usage_kwargs, process_text=process_text, + tags=self.tags, **( {"fs": self.workflow_filestorage} if self.workflow_filestorage is not None @@ -399,6 +401,7 @@ def _summarize_and_index( chunk_size=0, chunk_overlap=0, usage_kwargs=usage_kwargs, + tags=self.tags, **( {"fs": self.workflow_filestorage} if self.workflow_filestorage is not None diff --git a/tools/text_extractor/requirements.txt b/tools/text_extractor/requirements.txt index fd8618df9..bc03dfa4c 100644 --- a/tools/text_extractor/requirements.txt +++ b/tools/text_extractor/requirements.txt @@ -1,6 +1,6 @@ # Add your dependencies here # Required for all unstract tools -unstract-sdk~=0.55.0rc2 +unstract-sdk~=0.55.0rc3 # Required for remote storage support s3fs[boto3]==2024.6.0 diff --git a/tools/text_extractor/src/config/properties.json b/tools/text_extractor/src/config/properties.json index 45d272f56..43c25ae1e 100644 --- a/tools/text_extractor/src/config/properties.json +++ b/tools/text_extractor/src/config/properties.json @@ -2,7 +2,7 @@ "schemaVersion": "0.0.1", "displayName": "Text Extractor", "functionName": "text_extractor", - "toolVersion": "0.0.44", + "toolVersion": "0.0.45", "description": "The Text Extractor is a powerful tool designed to convert documents to its text form or Extract texts from documents", "input": { "description": "Document" diff --git a/tools/text_extractor/src/main.py b/tools/text_extractor/src/main.py index ab51a2198..65f3e916e 100644 --- a/tools/text_extractor/src/main.py +++ b/tools/text_extractor/src/main.py @@ -65,11 +65,11 @@ def run( self.stream_log("Text extraction adapter has been created successfully.") if self.workflow_filestorage: extraction_result: TextExtractionResult = text_extraction_adapter.process( - input_file_path=input_file, fs=self.workflow_filestorage + input_file_path=input_file, fs=self.workflow_filestorage, tags=self.tags ) else: extraction_result: TextExtractionResult = text_extraction_adapter.process( - input_file_path=input_file + input_file_path=input_file, tags=self.tags ) extracted_text = self.convert_to_actual_string(extraction_result.extracted_text) From 93622a21115bfe27bade74ae6912a5b134e64095 Mon Sep 17 00:00:00 2001 From: ali-zipstack Date: Thu, 23 Jan 2025 15:52:21 +0530 Subject: [PATCH 2/2] bumped sdk version in tools --- tools/classifier/requirements.txt | 2 +- tools/structure/requirements.txt | 2 +- tools/text_extractor/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/classifier/requirements.txt b/tools/classifier/requirements.txt index bc03dfa4c..77e9841a2 100644 --- a/tools/classifier/requirements.txt +++ b/tools/classifier/requirements.txt @@ -1,6 +1,6 @@ # Add your dependencies here # Required for all unstract tools -unstract-sdk~=0.55.0rc3 +unstract-sdk~=0.56.0rc1 # Required for remote storage support s3fs[boto3]==2024.6.0 diff --git a/tools/structure/requirements.txt b/tools/structure/requirements.txt index bc03dfa4c..77e9841a2 100644 --- a/tools/structure/requirements.txt +++ b/tools/structure/requirements.txt @@ -1,6 +1,6 @@ # Add your dependencies here # Required for all unstract tools -unstract-sdk~=0.55.0rc3 +unstract-sdk~=0.56.0rc1 # Required for remote storage support s3fs[boto3]==2024.6.0 diff --git a/tools/text_extractor/requirements.txt b/tools/text_extractor/requirements.txt index bc03dfa4c..77e9841a2 100644 --- a/tools/text_extractor/requirements.txt +++ b/tools/text_extractor/requirements.txt @@ -1,6 +1,6 @@ # Add your dependencies here # Required for all unstract tools -unstract-sdk~=0.55.0rc3 +unstract-sdk~=0.56.0rc1 # Required for remote storage support s3fs[boto3]==2024.6.0