[FEATURE] Changes required for highlighting (#555)

* Changes for highlighting in prompt studio Signed-off-by: Deepak <[email protected]> * Precommit fixes Signed-off-by: Deepak <[email protected]> * Send metadata always from prompt-studio Signed-off-by: Deepak <[email protected]> * pre-commit fix Signed-off-by: Deepak <[email protected]> * SDK version bump Signed-off-by: Deepak <[email protected]> * Dependency resolution Signed-off-by: Deepak <[email protected]> * Update pdm.lock for unstract/core * Send metadata by default and remove it based on include_metadata Signed-off-by: Deepak <[email protected]> * Minor refactor for prompt-service Signed-off-by: Deepak <[email protected]> * Minor fix in structure tool Signed-off-by: Deepak <[email protected]> * Update pdm.lock for prompt-service * Update pdm.lock for backend * Update pdm.lock for root * Highlight related changes in tool Signed-off-by: Deepak <[email protected]> * Optimized code and reverted unwanted code Signed-off-by: Deepak <[email protected]> * Bumped structure tool version Signed-off-by: Deepak <[email protected]> --------- Signed-off-by: Deepak <[email protected]> Signed-off-by: Deepak K <[email protected]> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Zipstack · Aug 16, 2024 · b6057e8 · b6057e8
1 parent 27f3e91
commit b6057e8
Show file tree

Hide file tree

Showing 28 changed files with 360 additions and 215 deletions.
diff --git a/backend/api/api_deployment_views.py b/backend/api/api_deployment_views.py
@@ -74,6 +74,7 @@ def get(
         self, request: Request, org_name: str, api_name: str, api: APIDeployment
     ) -> Response:
         execution_id = request.query_params.get("execution_id")
+        include_metadata = request.query_params.get("include_metadata", False)
         if not execution_id:
             raise InvalidAPIRequest("execution_id shouldn't be empty")
         response: ExecutionResponse = DeploymentHelper.get_execution_status(
@@ -87,6 +88,11 @@ def get(
                 },
                 status=status.HTTP_422_UNPROCESSABLE_ENTITY,
             )
+        if (
+            response.execution_status == CeleryTaskState.SUCCESS.value
+            and not include_metadata
+        ):
+            response.remove_result_metadata_keys()
         return Response(
             {"status": response.execution_status, "message": response.result},
             status=status.HTTP_200_OK,

diff --git a/backend/api/deployment_helper.py b/backend/api/deployment_helper.py
@@ -174,11 +174,13 @@ def execute_workflow(
                 hash_values_of_files=hash_values_of_files,
                 timeout=timeout,
                 execution_id=execution_id,
-                include_metadata=include_metadata,
             )
             result.status_api = DeploymentHelper.construct_status_endpoint(
                 api_endpoint=api.api_endpoint, execution_id=execution_id
             )
+            if not include_metadata:
+                result.remove_result_metadata_keys()
+            cls._send_notification(api=api, result=result)
         except Exception as error:
             DestinationConnector.delete_api_storage_dir(
                 workflow_id=workflow_id, execution_id=execution_id

diff --git a/backend/file_management/file_management_helper.py b/backend/file_management/file_management_helper.py
@@ -223,15 +223,22 @@ def delete_related_files(
     ) -> bool:
         fs = file_system.get_fsspec_fs()
         base_path = FileManagerHelper._get_base_path(file_system, path)
-
         base_file_name, _ = os.path.splitext(file_name)
-        file_name_txt = base_file_name + ".txt"
-
-        for directory in directories:
-            file_path = str(Path(base_path) / directory / file_name_txt)
+        pattern = f"{base_file_name}.*"
+        file_paths = FileManagerHelper._find_files(fs, base_path, directories, pattern)
+        for file_path in file_paths:
             FileManagerHelper._delete_file(fs, file_path)
         return True
 
+    @staticmethod
+    def _find_files(fs, base_path: str, directories: list[str], pattern: str):
+        file_paths = []
+        for directory in directories:
+            directory_path = str(Path(base_path) / directory)
+            for file in fs.glob(f"{directory_path}/{pattern}"):
+                file_paths.append(file)
+        return file_paths
+
     @staticmethod
     def handle_sub_directory_for_tenants(
         org_id: str, user_id: str, tool_id: str, is_create: bool

diff --git a/backend/prompt_studio/processor_loader.py b/backend/prompt_studio/processor_loader.py
@@ -72,3 +72,13 @@ def load_plugins() -> list[Any]:
         logger.info("No processor plugins found.")
 
     return processor_plugins
+
+
+def get_plugin_class_by_name(name: str, plugins: list[Any]) -> Any:
+    """Retrieve a specific plugin class by name."""
+    for plugin in plugins:
+        metadata = plugin[ProcessorConfig.METADATA]
+        if metadata.get(ProcessorConfig.METADATA_NAME) == name:
+            return metadata.get(ProcessorConfig.METADATA_SERVICE_CLASS)
+    logger.warning("Plugin with name '%s' not found.", name)
+    return None
diff --git a/backend/prompt_studio/prompt_studio_core/constants.py b/backend/prompt_studio/prompt_studio_core/constants.py
@@ -62,7 +62,6 @@ class ToolStudioPromptKeys:
     WORD = "word"
     SYNONYMS = "synonyms"
     OUTPUTS = "outputs"
-    ASSERT_PROMPT = "assert_prompt"
     SECTION = "section"
     DEFAULT = "default"
     REINDEX = "reindex"
@@ -89,6 +88,7 @@ class ToolStudioPromptKeys:
     CONTEXT = "context"
     METADATA = "metadata"
     INCLUDE_METADATA = "include_metadata"
+    PLATFORM_POSTAMBLE = "platform_postamble"
 
 
 class FileViewTypes:

diff --git a/backend/prompt_studio/prompt_studio_core/prompt_studio_helper.py b/backend/prompt_studio/prompt_studio_core/prompt_studio_helper.py
@@ -3,7 +3,7 @@
 import os
 import uuid
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any, Callable, Optional
 
 from account.constants import Common
 from account.models import User
@@ -291,6 +291,7 @@ def index_document(
         document_id: str,
         is_summary: bool = False,
         run_id: str = None,
+        text_processor: Optional[type[Any]] = None,
     ) -> Any:
         """Method to index a document.
 
@@ -340,7 +341,9 @@ def index_document(
         # Need to check the user who created profile manager
         # has access to adapters configured in profile manager
         PromptStudioHelper.validate_profile_manager_owner_access(default_profile)
-
+        process_text = None
+        if text_processor:
+            process_text = text_processor.process
         doc_id = PromptStudioHelper.dynamic_indexer(
             profile_manager=default_profile,
             tool_id=tool_id,
@@ -351,6 +354,7 @@ def index_document(
             reindex=True,
             run_id=run_id,
             user_id=user_id,
+            process_text=process_text,
         )
 
         logger.info(f"[{tool_id}] Indexing successful for doc: {file_name}")
@@ -372,6 +376,7 @@ def prompt_responder(
         id: Optional[str] = None,
         run_id: str = None,
         profile_manager_id: Optional[str] = None,
+        text_processor: Optional[type[Any]] = None,
     ) -> Any:
         """Execute chain/single run of the prompts. Makes a call to prompt
         service and returns the dict of response.
@@ -398,19 +403,26 @@ def prompt_responder(
 
         if id:
             return PromptStudioHelper._execute_single_prompt(
-                id,
-                doc_path,
-                doc_name,
-                tool_id,
-                org_id,
-                user_id,
-                document_id,
-                run_id,
-                profile_manager_id,
+                id=id,
+                doc_path=doc_path,
+                doc_name=doc_name,
+                tool_id=tool_id,
+                org_id=org_id,
+                user_id=user_id,
+                document_id=document_id,
+                run_id=run_id,
+                profile_manager_id=profile_manager_id,
+                text_processor=text_processor,
             )
         else:
             return PromptStudioHelper._execute_prompts_in_single_pass(
-                doc_path, tool_id, org_id, user_id, document_id, run_id
+                doc_path=doc_path,
+                tool_id=tool_id,
+                org_id=org_id,
+                user_id=user_id,
+                document_id=document_id,
+                run_id=run_id,
+                text_processor=text_processor,
             )
 
     @staticmethod
@@ -424,6 +436,7 @@ def _execute_single_prompt(
         document_id,
         run_id,
         profile_manager_id,
+        text_processor: Optional[type[Any]] = None,
     ):
         prompt_instance = PromptStudioHelper._fetch_prompt_from_id(id)
         prompt_name = prompt_instance.prompt_key
@@ -458,7 +471,9 @@ def _execute_single_prompt(
             LogLevels.RUN,
             "Invoking prompt service",
         )
-
+        process_text = None
+        if text_processor:
+            process_text = text_processor.process
         try:
             response = PromptStudioHelper._fetch_response(
                 doc_path=doc_path,
@@ -470,9 +485,15 @@ def _execute_single_prompt(
                 run_id=run_id,
                 profile_manager_id=profile_manager_id,
                 user_id=user_id,
+                process_text=process_text,
             )
             return PromptStudioHelper._handle_response(
-                response, run_id, prompts, document_id, False, profile_manager_id
+                response=response,
+                run_id=run_id,
+                prompts=prompts,
+                document_id=document_id,
+                is_single_pass=False,
+                profile_manager_id=profile_manager_id,
             )
         except Exception as e:
             logger.error(
@@ -495,7 +516,13 @@ def _execute_single_prompt(
 
     @staticmethod
     def _execute_prompts_in_single_pass(
-        doc_path, tool_id, org_id, user_id, document_id, run_id
+        doc_path,
+        tool_id,
+        org_id,
+        user_id,
+        document_id,
+        run_id,
+        text_processor: Optional[type[Any]] = None,
     ):
         prompts = PromptStudioHelper.fetch_prompt_from_tool(tool_id)
         prompts = [
@@ -513,7 +540,9 @@ def _execute_prompts_in_single_pass(
             LogLevels.RUN,
             "Executing prompts in single pass",
         )
-
+        process_text = None
+        if text_processor:
+            process_text = text_processor.process
         try:
             tool = prompts[0].tool_id
             response = PromptStudioHelper._fetch_single_pass_response(
@@ -524,9 +553,14 @@ def _execute_prompts_in_single_pass(
                 document_id=document_id,
                 run_id=run_id,
                 user_id=user_id,
+                process_text=process_text,
             )
             return PromptStudioHelper._handle_response(
-                response, run_id, prompts, document_id, True
+                response=response,
+                run_id=run_id,
+                prompts=prompts,
+                document_id=document_id,
+                is_single_pass=True,
             )
         except Exception as e:
             logger.error(
@@ -556,7 +590,12 @@ def _get_document_path(org_id, user_id, tool_id, doc_name):
 
     @staticmethod
     def _handle_response(
-        response, run_id, prompts, document_id, is_single_pass, profile_manager_id=None
+        response,
+        run_id,
+        prompts,
+        document_id,
+        is_single_pass,
+        profile_manager_id=None,
     ):
         if response.get("status") == IndexingStatus.PENDING_STATUS.value:
             return {
@@ -586,6 +625,7 @@ def _fetch_response(
         run_id: str,
         user_id: str,
         profile_manager_id: Optional[str] = None,
+        process_text: Optional[Callable[[str], str]] = None,
     ) -> Any:
         """Utility function to invoke prompt service. Used internally.
 
@@ -656,6 +696,7 @@ def _fetch_response(
             is_summary=tool.summarize_as_source,
             run_id=run_id,
             user_id=user_id,
+            process_text=process_text,
         )
         if index_result.get("status") == IndexingStatus.PENDING_STATUS.value:
             return {
@@ -713,6 +754,9 @@ def _fetch_response(
         tool_settings[TSPKeys.PREAMBLE] = tool.preamble
         tool_settings[TSPKeys.POSTAMBLE] = tool.postamble
         tool_settings[TSPKeys.GRAMMAR] = grammar_list
+        tool_settings[TSPKeys.PLATFORM_POSTAMBLE] = getattr(
+            settings, TSPKeys.PLATFORM_POSTAMBLE.upper(), ""
+        )
 
         tool_id = str(tool.tool_id)
 
@@ -760,6 +804,7 @@ def dynamic_indexer(
         is_summary: bool = False,
         reindex: bool = False,
         run_id: str = None,
+        process_text: Optional[Callable[[str], str]] = None,
     ) -> Any:
         """Used to index a file based on the passed arguments.
 
@@ -842,6 +887,7 @@ def dynamic_indexer(
                 reindex=reindex,
                 output_file_path=extract_file_path,
                 usage_kwargs=usage_kwargs.copy(),
+                process_text=process_text,
             )
 
             PromptStudioIndexHelper.handle_index_manager(
@@ -875,6 +921,7 @@ def _fetch_single_pass_response(
         user_id: str,
         document_id: str,
         run_id: str = None,
+        process_text: Optional[Callable[[str], str]] = None,
     ) -> Any:
         tool_id: str = str(tool.tool_id)
         outputs: list[dict[str, Any]] = []
@@ -910,6 +957,7 @@ def _fetch_single_pass_response(
             document_id=document_id,
             run_id=run_id,
             user_id=user_id,
+            process_text=process_text,
         )
         if index_result.get("status") == IndexingStatus.PENDING_STATUS.value:
             return {