Skip to content

Commit

Permalink
passing tags to x2text from tools
Browse files Browse the repository at this point in the history
  • Loading branch information
muhammad-ali-e committed Jan 23, 2025
1 parent 04013dc commit 557e943
Show file tree
Hide file tree
Showing 9 changed files with 15 additions and 10 deletions.
2 changes: 1 addition & 1 deletion tools/classifier/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.55.0rc2
unstract-sdk~=0.55.0rc3
# Required for remote storage support
s3fs[boto3]==2024.6.0
2 changes: 1 addition & 1 deletion tools/classifier/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "File Classifier",
"functionName": "classify",
"toolVersion": "0.0.47",
"toolVersion": "0.0.48",
"description": "Classifies a file into a bin based on its contents",
"input": {
"description": "File to be classified"
Expand Down
6 changes: 4 additions & 2 deletions tools/classifier/src/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,13 @@ def _extract_from_adapter(self, file: str, adapter_id: str) -> Optional[str]:
try:
if self.tool.workflow_filestorage:
extraction_result: TextExtractionResult = x2text.process(
input_file_path=file, fs=self.tool.workflow_filestorage
input_file_path=file,
fs=self.tool.workflow_filestorage,
tags=self.tool.tags,
)
else:
extraction_result: TextExtractionResult = x2text.process(
input_file_path=file
input_file_path=file, tags=self.tool.tags
)
extracted_text: str = extraction_result.extracted_text
return extracted_text
Expand Down
2 changes: 1 addition & 1 deletion tools/structure/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.55.0rc2
unstract-sdk~=0.55.0rc3
# Required for remote storage support
s3fs[boto3]==2024.6.0
2 changes: 1 addition & 1 deletion tools/structure/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "Structure Tool",
"functionName": "structure_tool",
"toolVersion": "0.0.57",
"toolVersion": "0.0.58",
"description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio",
"input": {
"description": "File that needs to be indexed and parsed for answers"
Expand Down
3 changes: 3 additions & 0 deletions tools/structure/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def run(
reindex=True,
usage_kwargs=usage_kwargs,
process_text=process_text,
tags=self.tags,
**(
{"fs": self.workflow_filestorage}
if self.workflow_filestorage is not None
Expand Down Expand Up @@ -191,6 +192,7 @@ def run(
reindex=reindex,
usage_kwargs=usage_kwargs,
process_text=process_text,
tags=self.tags,
**(
{"fs": self.workflow_filestorage}
if self.workflow_filestorage is not None
Expand Down Expand Up @@ -399,6 +401,7 @@ def _summarize_and_index(
chunk_size=0,
chunk_overlap=0,
usage_kwargs=usage_kwargs,
tags=self.tags,
**(
{"fs": self.workflow_filestorage}
if self.workflow_filestorage is not None
Expand Down
2 changes: 1 addition & 1 deletion tools/text_extractor/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.55.0rc2
unstract-sdk~=0.55.0rc3
# Required for remote storage support
s3fs[boto3]==2024.6.0
2 changes: 1 addition & 1 deletion tools/text_extractor/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "Text Extractor",
"functionName": "text_extractor",
"toolVersion": "0.0.44",
"toolVersion": "0.0.45",
"description": "The Text Extractor is a powerful tool designed to convert documents to its text form or Extract texts from documents",
"input": {
"description": "Document"
Expand Down
4 changes: 2 additions & 2 deletions tools/text_extractor/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ def run(
self.stream_log("Text extraction adapter has been created successfully.")
if self.workflow_filestorage:
extraction_result: TextExtractionResult = text_extraction_adapter.process(
input_file_path=input_file, fs=self.workflow_filestorage
input_file_path=input_file, fs=self.workflow_filestorage, tags=self.tags
)
else:
extraction_result: TextExtractionResult = text_extraction_adapter.process(
input_file_path=input_file
input_file_path=input_file, tags=self.tags
)
extracted_text = self.convert_to_actual_string(extraction_result.extracted_text)

Expand Down

0 comments on commit 557e943

Please sign in to comment.