Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2 changes of PromptStudo-Document-Index-Output-Managers #477

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.contrib import admin

from .models import DocumentManager

admin.site.register(DocumentManager)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.apps import AppConfig


class PromptStudioDocumentManagerConfig(AppConfig):
name = "prompt_studio.prompt_studio_document_manager_v2"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class PSDMKeys:
DOCUMENT_NAME = "document_name"
TOOL = "tool"
DOCUMENT_ID = "document_id"
56 changes: 56 additions & 0 deletions backend/prompt_studio/prompt_studio_document_manager_v2/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import uuid

from account_v2.models import User
from django.db import models
from prompt_studio.prompt_studio_core_v2.models import CustomTool
from utils.models.base_model import BaseModel


class DocumentManager(BaseModel):
"""Model to store the document details."""

document_id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)

document_name = models.CharField(
db_comment="Field to store the document name",
editable=False,
null=False,
blank=False,
)

tool = models.ForeignKey(
CustomTool,
on_delete=models.CASCADE,
related_name="document_managers",
null=False,
blank=False,
)

created_by = models.ForeignKey(
User,
on_delete=models.SET_NULL,
related_name="document_managers_created",
null=True,
blank=True,
editable=False,
)

modified_by = models.ForeignKey(
User,
on_delete=models.SET_NULL,
related_name="document_managers_modified",
null=True,
blank=True,
editable=False,
)

class Meta:
verbose_name = "Document Manager"
verbose_name_plural = "Document Managers"
db_table = "document_manager_v2"
constraints = [
models.UniqueConstraint(
fields=["document_name", "tool"],
name="unique_document_name_tool_index",
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import logging

from prompt_studio.prompt_studio_core_v2.models import CustomTool

from .models import DocumentManager

logger = logging.getLogger(__name__)


class PromptStudioDocumentHelper:
@staticmethod
def create(tool_id: str, document_name: str) -> DocumentManager:
tool: CustomTool = CustomTool.objects.get(pk=tool_id)
document: DocumentManager = DocumentManager.objects.create(
hari-kuriakose marked this conversation as resolved.
Show resolved Hide resolved
tool=tool, document_name=document_name
)
logger.info("Successfully created the record")
return document

@staticmethod
def delete(document_id: str) -> None:
document: DocumentManager = DocumentManager.objects.get(pk=document_id)
document.delete()
logger.info("Successfully deleted the record")
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import Any

from backend.serializers import AuditSerializer

from .constants import PSDMKeys
from .models import DocumentManager


class PromptStudioDocumentManagerSerializer(AuditSerializer):
class Meta:
model = DocumentManager
fields = "__all__"

def to_representation(self, instance: DocumentManager) -> dict[str, Any]:
rep: dict[str, str] = super().to_representation(instance)
required_fields = [
PSDMKeys.DOCUMENT_NAME,
PSDMKeys.TOOL,
PSDMKeys.DOCUMENT_ID,
]
return {key: rep[key] for key in required_fields if key in rep}
24 changes: 24 additions & 0 deletions backend/prompt_studio/prompt_studio_document_manager_v2/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from django.urls import path
from rest_framework.urlpatterns import format_suffix_patterns

from .views import PromptStudioDocumentManagerView

prompt_studio_documents_list = PromptStudioDocumentManagerView.as_view(
{"get": "list", "post": "create"}
)

prompt_studio_documents_detail = PromptStudioDocumentManagerView.as_view(
{
"get": "retrieve",
}
)

urlpatterns = format_suffix_patterns(
[
path(
"prompt-document/",
prompt_studio_documents_list,
name="prompt-studio-documents-list",
),
]
)
31 changes: 31 additions & 0 deletions backend/prompt_studio/prompt_studio_document_manager_v2/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import Optional

from django.db.models import QuerySet
from prompt_studio.prompt_studio_document_manager_v2.serializers import (
PromptStudioDocumentManagerSerializer,
)
from prompt_studio.prompt_studio_output_manager_v2.constants import (
PromptStudioOutputManagerKeys,
)
from rest_framework import viewsets
from rest_framework.versioning import URLPathVersioning
from utils.filtering import FilterHelper

from .models import DocumentManager


class PromptStudioDocumentManagerView(viewsets.ModelViewSet):
versioning_class = URLPathVersioning
queryset = DocumentManager.objects.all()
serializer_class = PromptStudioDocumentManagerSerializer

def get_queryset(self) -> Optional[QuerySet]:
filter_args = FilterHelper.build_filter_args(
self.request,
PromptStudioOutputManagerKeys.TOOL_ID,
)
queryset = None
if filter_args:
queryset = DocumentManager.objects.filter(**filter_args)

return queryset
Empty file.
5 changes: 5 additions & 0 deletions backend/prompt_studio/prompt_studio_index_manager_v2/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.contrib import admin

from .models import IndexManager

admin.site.register(IndexManager)
5 changes: 5 additions & 0 deletions backend/prompt_studio/prompt_studio_index_manager_v2/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.apps import AppConfig


class PromptStudioIndexManagerConfig(AppConfig):
name = "prompt_studio.prompt_studio_index_manager_v2"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class IndexManagerKeys:
PROFILE_MANAGER = "profile_manager"
DOCUMENT_MANAGER = "document_manager"
130 changes: 130 additions & 0 deletions backend/prompt_studio/prompt_studio_index_manager_v2/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import json
import logging
import uuid

from account_v2.models import User
from django.db import models
from django.db.models.signals import pre_delete
from django.dispatch import receiver
from prompt_studio.prompt_profile_manager_v2.models import ProfileManager
from prompt_studio.prompt_studio_core_v2.prompt_ide_base_tool import PromptIdeBaseTool
from prompt_studio.prompt_studio_document_manager_v2.models import DocumentManager
from unstract.sdk.constants import LogLevel
from unstract.sdk.vector_db import VectorDB
from utils.models.base_model import BaseModel
from utils.user_context import UserContext

logger = logging.getLogger(__name__)


class IndexManager(BaseModel):
gaya3-zipstack marked this conversation as resolved.
Show resolved Hide resolved
"""Model to store the index details."""

index_manager_id = models.UUIDField(
primary_key=True, default=uuid.uuid4, editable=False
)

document_manager = models.ForeignKey(
DocumentManager,
on_delete=models.CASCADE,
related_name="index_managers",
editable=False,
null=False,
blank=False,
)

profile_manager = models.ForeignKey(
ProfileManager,
on_delete=models.SET_NULL,
related_name="index_managers",
editable=False,
null=True,
blank=True,
)
hari-kuriakose marked this conversation as resolved.
Show resolved Hide resolved

raw_index_id = models.CharField(
db_comment="Field to store the raw index id",
editable=False,
null=True,
blank=True,
)

summarize_index_id = models.CharField(
db_comment="Field to store the summarize index id",
editable=False,
null=True,
blank=True,
)

index_ids_history = models.JSONField(
db_comment="List of index ids",
default=list,
null=False,
blank=False,
)

created_by = models.ForeignKey(
User,
on_delete=models.SET_NULL,
related_name="index_managers_created",
null=True,
blank=True,
editable=False,
)

modified_by = models.ForeignKey(
User,
on_delete=models.SET_NULL,
related_name="index_managers_modified",
null=True,
blank=True,
editable=False,
)

class Meta:
verbose_name = "Index Manager"
verbose_name_plural = "Index Managers"
db_table = "index_manager_v2"
constraints = [
models.UniqueConstraint(
fields=["document_manager", "profile_manager"],
name="unique_document_manager_profile_manager_index",
),
]


def delete_from_vector_db(index_ids_history, vector_db_instance_id):
organization_identifier = UserContext.get_organization_identifier()
util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=organization_identifier)
vector_db = VectorDB(
tool=util,
adapter_instance_id=vector_db_instance_id,
)
for index_id in index_ids_history:
logger.debug(f"Deleting from VectorDB - index id: {index_id}")
try:
vector_db.delete(ref_doc_id=index_id)
except Exception as e:
# Log error and continue with the next index id
logger.error(f"Error deleting index: {index_id} - {e}")


# Function will be executed every time an instance of IndexManager is deleted.
@receiver(pre_delete, sender=IndexManager)
def perform_vector_db_cleanup(sender, instance, **kwargs):
"""Signal to perform vector db cleanup."""
logger.info("Performing vector db cleanup")
hari-kuriakose marked this conversation as resolved.
Show resolved Hide resolved
logger.debug(f"Document tool id: {instance.document_manager.tool_id}")
try:
# Get the index_ids_history to clean up from the vector db
index_ids_history = json.loads(instance.index_ids_history)
vector_db_instance_id = str(instance.profile_manager.vector_store.id)
delete_from_vector_db(index_ids_history, vector_db_instance_id)
except Exception as e:
logger.warning(
"Error during vector DB cleanup for deleted document "
"in prompt studio tool %s: %s",
instance.document_manager.tool_id,
e,
exc_info=True, # For additional stack trace
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import json
import logging

from django.db import transaction
from prompt_studio.prompt_profile_manager_v2.models import ProfileManager
from prompt_studio.prompt_studio_core_v2.exceptions import IndexingAPIError
from prompt_studio.prompt_studio_document_manager_v2.models import DocumentManager

from .models import IndexManager

logger = logging.getLogger(__name__)


class PromptStudioIndexHelper:
@staticmethod
def handle_index_manager(
document_id: str,
is_summary: bool,
profile_manager: ProfileManager,
doc_id: str,
) -> IndexManager:
try:

with transaction.atomic():

document: DocumentManager = DocumentManager.objects.get(pk=document_id)

index_id = "raw_index_id"
if is_summary:
index_id = "summarize_index_id"

args: dict[str, str] = dict()
args["document_manager"] = document
args["profile_manager"] = profile_manager

# Create or get the existing record for this document and
# profile combo
index_manager, success = IndexManager.objects.get_or_create(**args)

if success:
logger.info(
f"Index manager doc_id: {doc_id} for "
f"profile {profile_manager.profile_id} created"
)
else:
logger.info(
f"Index manager doc_id: {doc_id} for "
f"profile {profile_manager.profile_id} updated"
)

index_ids = index_manager.index_ids_history
index_ids_list = json.loads(index_ids) if index_ids else []
if doc_id not in index_ids:
index_ids_list.append(doc_id)

args[index_id] = doc_id
args["index_ids_history"] = json.dumps(index_ids_list)

# Update the record with the index id
result: IndexManager = IndexManager.objects.filter(
index_manager_id=index_manager.index_manager_id
).update(**args)
return result
except Exception as e:
transaction.rollback()
raise IndexingAPIError("Error updating indexing status") from e
Loading
Loading