Skip to content

Commit

Permalink
Merge branch 'main' into feat/support-readonly-for-custom-rjsf-widget…
Browse files Browse the repository at this point in the history
…s-and-transform-llmwhisperer-v2-schema-for-cloud
  • Loading branch information
jaseemjaskp authored Jan 23, 2025
2 parents 563f303 + 104a82b commit c5eb859
Show file tree
Hide file tree
Showing 60 changed files with 3,256 additions and 2,884 deletions.
49 changes: 8 additions & 41 deletions .github/workflows/ci-container-build.yaml
Original file line number Diff line number Diff line change
@@ -1,34 +1,14 @@
name: Container Image Build Test for PRs

env:
VERSION: ci-test # Used for docker tag

on:
push:
branches:
- main
- development
paths:
- 'backend/**'
- 'frontend/**'
- 'unstract/**'
- 'platform-service/**'
- 'x2text-service/**'
- 'runner/**'
- 'docker/dockerfiles/**'
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
branches:
- main
- development
paths:
- 'backend/**'
- 'frontend/**'
- 'unstract/**'
- 'platform-service/**'
- 'x2text-service/**'
- 'runner/**'
- 'docker/dockerfiles/**'
workflow_dispatch:

jobs:
build:
Expand All @@ -42,27 +22,14 @@ jobs:
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Container Build
working-directory: ./docker
run: |
docker compose -f docker-compose.build.yaml build
- name: Container Run
working-directory: ./docker
run: |
cp ../backend/sample.env ../backend/.env
cp ../platform-service/sample.env ../platform-service/.env
cp ../prompt-service/sample.env ../prompt-service/.env
cp ../runner/sample.env ../runner/.env
cp ../x2text-service/sample.env ../x2text-service/.env
cp sample.essentials.env essentials.env
cp sample.env .env
docker compose -f docker-compose.yaml up -d
sleep 10
docker compose -f docker-compose.yaml ps -a
./run-platform.sh -b
sleep 30
docker compose -f docker/docker-compose.yaml ps -a
# Get the names of exited containers
custom_format="{{.Name}}\t{{.Image}}\t{{.Service}}"
EXITED_CONTAINERS=$(docker compose -f docker-compose.yaml ps -a --filter status=exited --format "$custom_format")
EXITED_CONTAINERS=$(docker compose -f docker/docker-compose.yaml ps -a --filter status=exited --format "$custom_format")
line_count=$(echo "$EXITED_CONTAINERS" | wc -l)
Expand All @@ -76,9 +43,9 @@ jobs:
# Print logs of exited containers
IFS=$'\n'
for SERVICE in $SERVICE; do
docker compose -f docker-compose.yaml logs "$SERVICE"
docker compose -f docker/docker-compose.yaml logs "$SERVICE"
done
docker compose -f docker-compose.yaml down -v
docker compose -f docker/docker-compose.yaml down -v
exit 1
fi
docker compose -f docker-compose.yaml down -v
docker compose -f docker/docker-compose.yaml down -v
1,002 changes: 501 additions & 501 deletions backend/pdm.lock

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
"boolean":"boolean",
"json":"json",
"table":"table",
"record":"record"
"record":"record",
"line_item":"line-item"
},
"output_processing":{
"DEFAULT":"Default"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def update_or_create_prompt_output(

output = outputs.get(prompt.prompt_key)
# TODO: use enums here
if prompt.enforce_type in {"json", "table", "record"}:
if prompt.enforce_type in {"json", "table", "record", "line-item"}:
output = json.dumps(output)
profile_manager = default_profile
eval_metrics = outputs.get(f"{prompt.prompt_key}__evaluation", [])
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Generated by Django 4.2.1 on 2025-01-09 21:09

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("prompt_studio_v2", "0005_alter_toolstudioprompt_required"),
]

operations = [
migrations.AlterField(
model_name="toolstudioprompt",
name="enforce_type",
field=models.TextField(
blank=True,
choices=[
("Text", "Response sent as Text"),
("number", "Response sent as number"),
("email", "Response sent as email"),
("date", "Response sent as date"),
("boolean", "Response sent as boolean"),
("json", "Response sent as json"),
("table", "Response sent as table"),
(
"record",
"Response sent for records. Entries of records are list of logical and organized individual entities with distint values",
),
(
"line-item",
"Response sent as line-item which is large a JSON output. If extraction stopped due to token limitation, we try to continue extraction from where it stopped",
),
],
db_comment="Field to store the type in which the response to be returned.",
default="Text",
),
),
]
6 changes: 6 additions & 0 deletions backend/prompt_studio/prompt_studio_v2/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ class EnforceType(models.TextChoices):
"logical and organized individual "
"entities with distint values"
)
LINE_ITEM = "line-item", (
"Response sent as line-item "
"which is large a JSON output. "
"If extraction stopped due to token limitation, "
"we try to continue extraction from where it stopped"
)

class PromptType(models.TextChoices):
PROMPT = "PROMPT", "Response sent as Text"
Expand Down
2 changes: 1 addition & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"python-socketio==5.9.0", # For log_events
"social-auth-app-django==5.3.0", # For OAuth
"social-auth-core==4.4.2", # For OAuth
"unstract-sdk~=0.54.0rc12",
"unstract-sdk~=0.55.0rc2",
# ! IMPORTANT!
# Indirect local dependencies usually need to be added in their own projects
# as: https://pdm-project.org/latest/usage/dependency/#local-dependencies.
Expand Down
4 changes: 2 additions & 2 deletions backend/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ REMOTE_PROMPT_STUDIO_FILE_PATH=

# Structure Tool Image (Runs prompt studio exported tools)
# https://hub.docker.com/r/unstract/tool-structure
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.55"
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.56"
STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
STRUCTURE_TOOL_IMAGE_TAG="0.0.55"
STRUCTURE_TOOL_IMAGE_TAG="0.0.56"

# Feature Flags
EVALUATION_SERVER_IP=unstract-flipt
Expand Down
6 changes: 5 additions & 1 deletion backend/utils/FileValidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.template.defaultfilters import filesizeformat
from django.utils.translation import gettext_lazy as _
from typing_extensions import NotRequired, Unpack
from unstract.sdk.file_storage.constants import FileOperationParams


class FileValidationParam(TypedDict):
Expand Down Expand Up @@ -69,7 +70,10 @@ def _check_file_extension(self, file: InMemoryUploadedFile) -> None:
raise ValidationError(message)

def _check_file_mime_type(self, file: InMemoryUploadedFile) -> None:
mimetype = magic.from_buffer(file.read(2048), mime=True)
# TODO: Need to optimise, istead of reading entire file.
mimetype = magic.from_buffer(
file.read(FileOperationParams.READ_ENTIRE_LENGTH), mime=True
)
file.seek(0) # Reset the file pointer to the start

if self.allowed_mimetypes and mimetype not in self.allowed_mimetypes:
Expand Down
15 changes: 2 additions & 13 deletions backend/workflow_manager/endpoint_v2/base_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
from unstract.connectors.filesystems.unstract_file_system import UnstractFileSystem
from unstract.flags.feature_flag import check_feature_flag_status

if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
from unstract.filesystem import FileStorageType, FileSystem


class BaseConnector(ExecutionFileHandler):
"""Base class for connectors providing common methods and utilities."""
Expand Down Expand Up @@ -86,16 +83,8 @@ def get_json_schema(cls, file_path: str) -> dict[str, Any]:
json.JSONDecodeError: If there is an issue decoding the JSON file.
"""
try:
if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
file_system = FileSystem(FileStorageType.WORKFLOW_EXECUTION)
file_storage = file_system.get_file_storage()
file_contents = file_storage.read(
path=file_path, mode="r", encoding="utf-8"
)
schema: dict[str, Any] = json.load(file_contents)
else:
with open(file_path, encoding="utf-8") as file:
schema: dict[str, Any] = json.load(file)
with open(file_path, encoding="utf-8") as file:
schema: dict[str, Any] = json.load(file)
except OSError:
schema = {}
return schema
Expand Down
16 changes: 11 additions & 5 deletions backend/workflow_manager/endpoint_v2/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
SourceConnectorNotConfigured,
)
from workflow_manager.endpoint_v2.models import WorkflowEndpoint
from workflow_manager.file_execution.models import WorkflowFileExecution
from workflow_manager.workflow_v2.execution import WorkflowExecutionServiceHelper
from workflow_manager.workflow_v2.file_history_helper import FileHistoryHelper
from workflow_manager.workflow_v2.models.workflow import Workflow
Expand Down Expand Up @@ -596,11 +597,14 @@ def _copy_file_to_destination(
# Update the seek position
seek_position += len(chunk)

def add_file_to_volume(self, input_file_path: str, file_hash: FileHash) -> str:
def add_file_to_volume(
self, input_file_path: str, workflow_file_execution: WorkflowFileExecution
) -> str:
"""Add input file to execution directory.
Args:
input_file_path (str): source file
workflow_file_execution: WorkflowFileExecution model
Raises:
InvalidSource: _description_
Expand All @@ -614,18 +618,20 @@ def add_file_to_volume(self, input_file_path: str, file_hash: FileHash) -> str:
file_content_hash = self.add_input_from_connector_to_volume(
input_file_path=input_file_path,
)
if file_content_hash != file_hash.file_hash:
if file_content_hash != workflow_file_execution.file_hash:
raise FileHashMismatched()
elif connection_type == WorkflowEndpoint.ConnectionType.API:
self.add_input_from_api_storage_to_volume(input_file_path=input_file_path)
if file_name != file_hash.file_name:
if file_name != workflow_file_execution.file_name:
raise FileHashNotFound()
file_content_hash = file_hash.file_hash
file_content_hash = workflow_file_execution.file_hash
else:
raise InvalidSourceConnectionType()

self.add_metadata_to_volume(
input_file_path=input_file_path, source_hash=file_content_hash
input_file_path=input_file_path,
file_execution_id=workflow_file_execution.id,
source_hash=file_content_hash,
)
return file_name

Expand Down
10 changes: 5 additions & 5 deletions backend/workflow_manager/workflow_v2/workflow_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,14 +231,14 @@ def _process_file(
workflow_file_execution: WorkflowFileExecution,
) -> Optional[str]:
error: Optional[str] = None
# Multiple run_ids are linked to an execution_id
# Each run_id corresponds to workflow runs for a single file
# It should e uuid of workflow_file_execution
file_execution_id = str(workflow_file_execution.id)
file_name = source.add_file_to_volume(
input_file_path=input_file, file_hash=file_hash
input_file_path=input_file, workflow_file_execution=workflow_file_execution
)
try:
# Multiple run_ids are linked to an execution_id
# Each run_id corresponds to workflow runs for a single file
# It should e uuid of workflow_file_execution
file_execution_id = str(workflow_file_execution.id)
execution_service.file_execution_id = file_execution_id
execution_service.initiate_tool_execution(
current_file_idx, total_files, file_name, single_step
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ function DocumentManager({ generateIndex, handleUpdateTool, handleDocChange }) {
) : null}
</div>
<div>
<Tooltip title="Manage Documents">
<Tooltip title="Manage Document Variants">
<Button
className="doc-manager-btn"
onClick={() => setOpenManageDocsModal(true)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ function ManageDocsModal({

const columns = [
{
title: "Document",
title: "Document Variants",
dataIndex: "document",
key: "document",
},
Expand Down Expand Up @@ -580,7 +580,7 @@ function ManageDocsModal({
setIsUploading(false);
setAlertDetails({
type: "error",
content: "Failed to upload",
content: info?.file?.response?.errors[0]?.detail || "Failed to Upload",
});
}
};
Expand Down Expand Up @@ -663,7 +663,7 @@ function ManageDocsModal({
<SpaceWrapper>
<Space>
<Typography.Text className="add-cus-tool-header">
Manage Documents
Manage Document Variants
</Typography.Text>
</Space>
<div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function OutputAnalyzerCard({ doc, selectedPrompts, totalFields }) {
setIsDocLoading(true);
try {
const res = await axiosPrivate.get(fileUrlEndpoint);
const base64String = res?.data?.data || "";
const base64String = res?.data?.data?.data || "";
const blob = base64toBlob(base64String);
setFileUrl(URL.createObjectURL(blob));
} catch (err) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ function OutputForDocModal({

const columns = [
{
title: "Document",
title: "Document Variants",
dataIndex: "document",
key: "document",
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
Row,
Select,
Space,
Tag,
Typography,
} from "antd";
import { useEffect, useRef, useState } from "react";
Expand Down Expand Up @@ -251,6 +252,22 @@ function PromptCardItems({
</Space>
</Button>
</Space>
<Space>
{details?.enable_highlight &&
["json", "table", "record"].includes(enforceType) && (
<Tag
color="red"
style={{
whiteSpace: "normal",
wordWrap: "break-word",
minWidth: "200px",
}}
>
Highlighting is not supported when enforce type is{" "}
{enforceType}
</Tag>
)}
</Space>
<Space>
{(enforceType === TABLE_ENFORCE_TYPE ||
enforceType === RECORD_ENFORCE_TYPE) &&
Expand Down
Loading

0 comments on commit c5eb859

Please sign in to comment.