Merge branch 'main' into feat/support-readonly-for-custom-rjsf-widget…

…s-and-transform-llmwhisperer-v2-schema-for-cloud
Zipstack · Jan 23, 2025 · c5eb859 · c5eb859
2 parents 563f303 + 104a82b
commit c5eb859
Show file tree

Hide file tree

Showing 60 changed files with 3,256 additions and 2,884 deletions.
diff --git a/.github/workflows/ci-container-build.yaml b/.github/workflows/ci-container-build.yaml
@@ -1,34 +1,14 @@
 name: Container Image Build Test for PRs
 
-env:
-  VERSION: ci-test # Used for docker tag
-
 on:
   push:
     branches:
       - main
-      - development
-    paths:
-      - 'backend/**'
-      - 'frontend/**'
-      - 'unstract/**'
-      - 'platform-service/**'
-      - 'x2text-service/**'
-      - 'runner/**'
-      - 'docker/dockerfiles/**'
   pull_request:
     types: [opened, synchronize, reopened, ready_for_review]
     branches:
       - main
-      - development
-    paths:
-      - 'backend/**'
-      - 'frontend/**'
-      - 'unstract/**'
-      - 'platform-service/**'
-      - 'x2text-service/**'
-      - 'runner/**'
-      - 'docker/dockerfiles/**'
+  workflow_dispatch:
 
 jobs:
   build:
@@ -42,27 +22,14 @@ jobs:
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
-      - name: Container Build
-        working-directory: ./docker
-        run: |
-          docker compose -f docker-compose.build.yaml build
       - name: Container Run
-        working-directory: ./docker
         run: |
-          cp ../backend/sample.env ../backend/.env
-          cp ../platform-service/sample.env ../platform-service/.env
-          cp ../prompt-service/sample.env ../prompt-service/.env
-          cp ../runner/sample.env ../runner/.env
-          cp ../x2text-service/sample.env ../x2text-service/.env
-          cp sample.essentials.env essentials.env
-          cp sample.env .env
-
-          docker compose -f docker-compose.yaml up -d
-          sleep 10
-          docker compose -f docker-compose.yaml ps -a
+          ./run-platform.sh -b
+          sleep 30
+          docker compose -f docker/docker-compose.yaml ps -a
           # Get the names of exited containers
           custom_format="{{.Name}}\t{{.Image}}\t{{.Service}}"
-          EXITED_CONTAINERS=$(docker compose -f docker-compose.yaml ps -a --filter status=exited --format "$custom_format")
+          EXITED_CONTAINERS=$(docker compose -f docker/docker-compose.yaml ps -a --filter status=exited --format "$custom_format")
 
           line_count=$(echo "$EXITED_CONTAINERS" | wc -l)
 
@@ -76,9 +43,9 @@ jobs:
             # Print logs of exited containers
             IFS=$'\n'
             for SERVICE in $SERVICE; do
-              docker compose -f docker-compose.yaml logs "$SERVICE"
+              docker compose -f docker/docker-compose.yaml logs "$SERVICE"
             done
-            docker compose -f docker-compose.yaml down -v
+            docker compose -f docker/docker-compose.yaml down -v
             exit 1
           fi
-          docker compose -f docker-compose.yaml down -v
+          docker compose -f docker/docker-compose.yaml down -v
diff --git a/backend/pdm.lock b/backend/pdm.lock
diff --git a/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json b/backend/prompt_studio/prompt_studio_core_v2/static/select_choices.json
@@ -15,7 +15,8 @@
         "boolean":"boolean",
         "json":"json",
         "table":"table",
-        "record":"record"
+        "record":"record",
+        "line_item":"line-item"
     },
     "output_processing":{
         "DEFAULT":"Default"

diff --git a/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py b/backend/prompt_studio/prompt_studio_output_manager_v2/output_manager_helper.py
@@ -148,7 +148,7 @@ def update_or_create_prompt_output(
 
             output = outputs.get(prompt.prompt_key)
             # TODO: use enums here
-            if prompt.enforce_type in {"json", "table", "record"}:
+            if prompt.enforce_type in {"json", "table", "record", "line-item"}:
                 output = json.dumps(output)
             profile_manager = default_profile
             eval_metrics = outputs.get(f"{prompt.prompt_key}__evaluation", [])

diff --git a/...end/prompt_studio/prompt_studio_v2/migrations/0006_alter_toolstudioprompt_enforce_type.py b/...end/prompt_studio/prompt_studio_v2/migrations/0006_alter_toolstudioprompt_enforce_type.py
@@ -0,0 +1,39 @@
+# Generated by Django 4.2.1 on 2025-01-09 21:09
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("prompt_studio_v2", "0005_alter_toolstudioprompt_required"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="toolstudioprompt",
+            name="enforce_type",
+            field=models.TextField(
+                blank=True,
+                choices=[
+                    ("Text", "Response sent as Text"),
+                    ("number", "Response sent as number"),
+                    ("email", "Response sent as email"),
+                    ("date", "Response sent as date"),
+                    ("boolean", "Response sent as boolean"),
+                    ("json", "Response sent as json"),
+                    ("table", "Response sent as table"),
+                    (
+                        "record",
+                        "Response sent for records. Entries of records are list of logical and organized individual entities with distint values",
+                    ),
+                    (
+                        "line-item",
+                        "Response sent as line-item which is large a JSON output. If extraction stopped due to token limitation, we try to continue extraction from where it stopped",
+                    ),
+                ],
+                db_comment="Field to store the type in             which the response to be returned.",
+                default="Text",
+            ),
+        ),
+    ]
diff --git a/backend/prompt_studio/prompt_studio_v2/models.py b/backend/prompt_studio/prompt_studio_v2/models.py
@@ -27,6 +27,12 @@ class EnforceType(models.TextChoices):
             "logical and organized individual "
             "entities with distint values"
         )
+        LINE_ITEM = "line-item", (
+            "Response sent as line-item "
+            "which is large a JSON output. "
+            "If extraction stopped due to token limitation, "
+            "we try to continue extraction from where it stopped"
+        )
 
     class PromptType(models.TextChoices):
         PROMPT = "PROMPT", "Response sent as Text"

diff --git a/backend/pyproject.toml b/backend/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
     "python-socketio==5.9.0", # For log_events
     "social-auth-app-django==5.3.0", # For OAuth
     "social-auth-core==4.4.2", # For OAuth
-    "unstract-sdk~=0.54.0rc12",
+    "unstract-sdk~=0.55.0rc2",
     # ! IMPORTANT!
     # Indirect local dependencies usually need to be added in their own projects
     # as: https://pdm-project.org/latest/usage/dependency/#local-dependencies.

diff --git a/backend/sample.env b/backend/sample.env
@@ -82,9 +82,9 @@ REMOTE_PROMPT_STUDIO_FILE_PATH=
 
 # Structure Tool Image (Runs prompt studio exported tools)
 # https://hub.docker.com/r/unstract/tool-structure
-STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.55"
+STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.56"
 STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
-STRUCTURE_TOOL_IMAGE_TAG="0.0.55"
+STRUCTURE_TOOL_IMAGE_TAG="0.0.56"
 
 # Feature Flags
 EVALUATION_SERVER_IP=unstract-flipt

diff --git a/backend/utils/FileValidator.py b/backend/utils/FileValidator.py
@@ -8,6 +8,7 @@
 from django.template.defaultfilters import filesizeformat
 from django.utils.translation import gettext_lazy as _
 from typing_extensions import NotRequired, Unpack
+from unstract.sdk.file_storage.constants import FileOperationParams
 
 
 class FileValidationParam(TypedDict):
@@ -69,7 +70,10 @@ def _check_file_extension(self, file: InMemoryUploadedFile) -> None:
             raise ValidationError(message)
 
     def _check_file_mime_type(self, file: InMemoryUploadedFile) -> None:
-        mimetype = magic.from_buffer(file.read(2048), mime=True)
+        # TODO: Need to optimise, istead of reading entire file.
+        mimetype = magic.from_buffer(
+            file.read(FileOperationParams.READ_ENTIRE_LENGTH), mime=True
+        )
         file.seek(0)  # Reset the file pointer to the start
 
         if self.allowed_mimetypes and mimetype not in self.allowed_mimetypes:

diff --git a/backend/workflow_manager/endpoint_v2/base_connector.py b/backend/workflow_manager/endpoint_v2/base_connector.py
@@ -12,9 +12,6 @@
 from unstract.connectors.filesystems.unstract_file_system import UnstractFileSystem
 from unstract.flags.feature_flag import check_feature_flag_status
 
-if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
-    from unstract.filesystem import FileStorageType, FileSystem
-
 
 class BaseConnector(ExecutionFileHandler):
     """Base class for connectors providing common methods and utilities."""
@@ -86,16 +83,8 @@ def get_json_schema(cls, file_path: str) -> dict[str, Any]:
         json.JSONDecodeError: If there is an issue decoding the JSON file.
         """
         try:
-            if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
-                file_system = FileSystem(FileStorageType.WORKFLOW_EXECUTION)
-                file_storage = file_system.get_file_storage()
-                file_contents = file_storage.read(
-                    path=file_path, mode="r", encoding="utf-8"
-                )
-                schema: dict[str, Any] = json.load(file_contents)
-            else:
-                with open(file_path, encoding="utf-8") as file:
-                    schema: dict[str, Any] = json.load(file)
+            with open(file_path, encoding="utf-8") as file:
+                schema: dict[str, Any] = json.load(file)
         except OSError:
             schema = {}
         return schema

diff --git a/backend/workflow_manager/endpoint_v2/source.py b/backend/workflow_manager/endpoint_v2/source.py
@@ -34,6 +34,7 @@
     SourceConnectorNotConfigured,
 )
 from workflow_manager.endpoint_v2.models import WorkflowEndpoint
+from workflow_manager.file_execution.models import WorkflowFileExecution
 from workflow_manager.workflow_v2.execution import WorkflowExecutionServiceHelper
 from workflow_manager.workflow_v2.file_history_helper import FileHistoryHelper
 from workflow_manager.workflow_v2.models.workflow import Workflow
@@ -596,11 +597,14 @@ def _copy_file_to_destination(
                 # Update the seek position
                 seek_position += len(chunk)
 
-    def add_file_to_volume(self, input_file_path: str, file_hash: FileHash) -> str:
+    def add_file_to_volume(
+        self, input_file_path: str, workflow_file_execution: WorkflowFileExecution
+    ) -> str:
         """Add input file to execution directory.
 
         Args:
             input_file_path (str): source file
+            workflow_file_execution: WorkflowFileExecution model
 
         Raises:
             InvalidSource: _description_
@@ -614,18 +618,20 @@ def add_file_to_volume(self, input_file_path: str, file_hash: FileHash) -> str:
             file_content_hash = self.add_input_from_connector_to_volume(
                 input_file_path=input_file_path,
             )
-            if file_content_hash != file_hash.file_hash:
+            if file_content_hash != workflow_file_execution.file_hash:
                 raise FileHashMismatched()
         elif connection_type == WorkflowEndpoint.ConnectionType.API:
             self.add_input_from_api_storage_to_volume(input_file_path=input_file_path)
-            if file_name != file_hash.file_name:
+            if file_name != workflow_file_execution.file_name:
                 raise FileHashNotFound()
-            file_content_hash = file_hash.file_hash
+            file_content_hash = workflow_file_execution.file_hash
         else:
             raise InvalidSourceConnectionType()
 
         self.add_metadata_to_volume(
-            input_file_path=input_file_path, source_hash=file_content_hash
+            input_file_path=input_file_path,
+            file_execution_id=workflow_file_execution.id,
+            source_hash=file_content_hash,
         )
         return file_name
 

diff --git a/backend/workflow_manager/workflow_v2/workflow_helper.py b/backend/workflow_manager/workflow_v2/workflow_helper.py
@@ -231,14 +231,14 @@ def _process_file(
         workflow_file_execution: WorkflowFileExecution,
     ) -> Optional[str]:
         error: Optional[str] = None
+        # Multiple run_ids are linked to an execution_id
+        # Each run_id corresponds to workflow runs for a single file
+        # It should e uuid of workflow_file_execution
+        file_execution_id = str(workflow_file_execution.id)
         file_name = source.add_file_to_volume(
-            input_file_path=input_file, file_hash=file_hash
+            input_file_path=input_file, workflow_file_execution=workflow_file_execution
         )
         try:
-            # Multiple run_ids are linked to an execution_id
-            # Each run_id corresponds to workflow runs for a single file
-            # It should e uuid of workflow_file_execution
-            file_execution_id = str(workflow_file_execution.id)
             execution_service.file_execution_id = file_execution_id
             execution_service.initiate_tool_execution(
                 current_file_idx, total_files, file_name, single_step

diff --git a/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx b/frontend/src/components/custom-tools/document-manager/DocumentManager.jsx
@@ -380,7 +380,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) {
               ) : null}
             </div>
             <div>
-              <Tooltip title="Manage Documents">
+              <Tooltip title="Manage Document Variants">
                 <Button
                   className="doc-manager-btn"
                   onClick={() => setOpenManageDocsModal(true)}

diff --git a/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx b/frontend/src/components/custom-tools/manage-docs-modal/ManageDocsModal.jsx
@@ -307,7 +307,7 @@ function ManageDocsModal({
 
   const columns = [
     {
-      title: "Document",
+      title: "Document Variants",
       dataIndex: "document",
       key: "document",
     },
@@ -580,7 +580,7 @@ function ManageDocsModal({
       setIsUploading(false);
       setAlertDetails({
         type: "error",
-        content: "Failed to upload",
+        content: info?.file?.response?.errors[0]?.detail || "Failed to Upload",
       });
     }
   };
@@ -663,7 +663,7 @@ function ManageDocsModal({
           <SpaceWrapper>
             <Space>
               <Typography.Text className="add-cus-tool-header">
-                Manage Documents
+                Manage Document Variants
               </Typography.Text>
             </Space>
             <div>

diff --git a/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx b/frontend/src/components/custom-tools/output-analyzer/OutputAnalyzerCard.jsx
@@ -55,7 +55,7 @@ function OutputAnalyzerCard({ doc, selectedPrompts, totalFields }) {
       setIsDocLoading(true);
       try {
         const res = await axiosPrivate.get(fileUrlEndpoint);
-        const base64String = res?.data?.data || "";
+        const base64String = res?.data?.data?.data || "";
         const blob = base64toBlob(base64String);
         setFileUrl(URL.createObjectURL(blob));
       } catch (err) {

diff --git a/frontend/src/components/custom-tools/output-for-doc-modal/OutputForDocModal.jsx b/frontend/src/components/custom-tools/output-for-doc-modal/OutputForDocModal.jsx
@@ -300,7 +300,7 @@ function OutputForDocModal({
 
   const columns = [
     {
-      title: "Document",
+      title: "Document Variants",
       dataIndex: "document",
       key: "document",
     },

diff --git a/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx b/frontend/src/components/custom-tools/prompt-card/PromptCardItems.jsx
@@ -8,6 +8,7 @@ import {
   Row,
   Select,
   Space,
+  Tag,
   Typography,
 } from "antd";
 import { useEffect, useRef, useState } from "react";
@@ -251,6 +252,22 @@ function PromptCardItems({
                         </Space>
                       </Button>
                     </Space>
+                    <Space>
+                      {details?.enable_highlight &&
+                        ["json", "table", "record"].includes(enforceType) && (
+                          <Tag
+                            color="red"
+                            style={{
+                              whiteSpace: "normal",
+                              wordWrap: "break-word",
+                              minWidth: "200px",
+                            }}
+                          >
+                            Highlighting is not supported when enforce type is{" "}
+                            {enforceType}
+                          </Tag>
+                        )}
+                    </Space>
                     <Space>
                       {(enforceType === TABLE_ENFORCE_TYPE ||
                         enforceType === RECORD_ENFORCE_TYPE) &&