Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

passing tags to x2text from tools #1086

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tools/classifier/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.55.0rc2
unstract-sdk~=0.55.0rc3
# Required for remote storage support
s3fs[boto3]==2024.6.0
2 changes: 1 addition & 1 deletion tools/classifier/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "File Classifier",
"functionName": "classify",
"toolVersion": "0.0.47",
"toolVersion": "0.0.48",
"description": "Classifies a file into a bin based on its contents",
"input": {
"description": "File to be classified"
Expand Down
6 changes: 4 additions & 2 deletions tools/classifier/src/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,13 @@ def _extract_from_adapter(self, file: str, adapter_id: str) -> Optional[str]:
try:
if self.tool.workflow_filestorage:
extraction_result: TextExtractionResult = x2text.process(
input_file_path=file, fs=self.tool.workflow_filestorage
input_file_path=file,
fs=self.tool.workflow_filestorage,
tags=self.tool.tags,
)
else:
extraction_result: TextExtractionResult = x2text.process(
input_file_path=file
input_file_path=file, tags=self.tool.tags
)
extracted_text: str = extraction_result.extracted_text
return extracted_text
Expand Down
2 changes: 1 addition & 1 deletion tools/structure/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.55.0rc2
unstract-sdk~=0.55.0rc3
# Required for remote storage support
s3fs[boto3]==2024.6.0
2 changes: 1 addition & 1 deletion tools/structure/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "Structure Tool",
"functionName": "structure_tool",
"toolVersion": "0.0.57",
"toolVersion": "0.0.58",
"description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio",
"input": {
"description": "File that needs to be indexed and parsed for answers"
Expand Down
3 changes: 3 additions & 0 deletions tools/structure/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def run(
reindex=True,
usage_kwargs=usage_kwargs,
process_text=process_text,
tags=self.tags,
**(
{"fs": self.workflow_filestorage}
if self.workflow_filestorage is not None
Expand Down Expand Up @@ -191,6 +192,7 @@ def run(
reindex=reindex,
usage_kwargs=usage_kwargs,
process_text=process_text,
tags=self.tags,
**(
{"fs": self.workflow_filestorage}
if self.workflow_filestorage is not None
Expand Down Expand Up @@ -399,6 +401,7 @@ def _summarize_and_index(
chunk_size=0,
chunk_overlap=0,
usage_kwargs=usage_kwargs,
tags=self.tags,
**(
{"fs": self.workflow_filestorage}
if self.workflow_filestorage is not None
Expand Down
2 changes: 1 addition & 1 deletion tools/text_extractor/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Add your dependencies here

# Required for all unstract tools
unstract-sdk~=0.55.0rc2
unstract-sdk~=0.55.0rc3
# Required for remote storage support
s3fs[boto3]==2024.6.0
2 changes: 1 addition & 1 deletion tools/text_extractor/src/config/properties.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"schemaVersion": "0.0.1",
"displayName": "Text Extractor",
"functionName": "text_extractor",
"toolVersion": "0.0.44",
"toolVersion": "0.0.45",
"description": "The Text Extractor is a powerful tool designed to convert documents to its text form or Extract texts from documents",
"input": {
"description": "Document"
Expand Down
4 changes: 2 additions & 2 deletions tools/text_extractor/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ def run(
self.stream_log("Text extraction adapter has been created successfully.")
if self.workflow_filestorage:
extraction_result: TextExtractionResult = text_extraction_adapter.process(
input_file_path=input_file, fs=self.workflow_filestorage
input_file_path=input_file, fs=self.workflow_filestorage, tags=self.tags
)
else:
extraction_result: TextExtractionResult = text_extraction_adapter.process(
input_file_path=input_file
input_file_path=input_file, tags=self.tags
)
extracted_text = self.convert_to_actual_string(extraction_result.extracted_text)

Expand Down
Loading