diff --git a/backend/api_v2/api_deployment_views.py b/backend/api_v2/api_deployment_views.py index c820363cf..580c7494f 100644 --- a/backend/api_v2/api_deployment_views.py +++ b/backend/api_v2/api_deployment_views.py @@ -54,6 +54,7 @@ def post( include_metadata = serializer.validated_data.get(ApiExecution.INCLUDE_METADATA) include_metrics = serializer.validated_data.get(ApiExecution.INCLUDE_METRICS) use_file_history = serializer.validated_data.get(ApiExecution.USE_FILE_HISTORY) + tag_names = serializer.validated_data.get(ApiExecution.TAGS) if not file_objs or len(file_objs) == 0: raise InvalidAPIRequest("File shouldn't be empty") response = DeploymentHelper.execute_workflow( @@ -64,6 +65,7 @@ def post( include_metadata=include_metadata, include_metrics=include_metrics, use_file_history=use_file_history, + tag_names=tag_names, ) if "error" in response and response["error"]: return Response( diff --git a/backend/api_v2/constants.py b/backend/api_v2/constants.py index f6a980303..ec7dfd0ea 100644 --- a/backend/api_v2/constants.py +++ b/backend/api_v2/constants.py @@ -7,3 +7,4 @@ class ApiExecution: INCLUDE_METRICS: str = "include_metrics" USE_FILE_HISTORY: str = "use_file_history" # Undocumented parameter EXECUTION_ID: str = "execution_id" + TAGS: str = "tags" diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index acdb6f673..95bda00e1 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -17,6 +17,7 @@ from rest_framework.request import Request from rest_framework.serializers import Serializer from rest_framework.utils.serializer_helpers import ReturnDict +from tags.models import Tag from utils.constants import Account, CeleryQueue from utils.local_context import StateStore from workflow_manager.endpoint_v2.destination import DestinationConnector @@ -138,6 +139,7 @@ def execute_workflow( include_metadata: bool = False, include_metrics: bool = False, use_file_history: bool = False, + tag_names: list[str] = [], ) -> ReturnDict: """Execute workflow by api. @@ -147,16 +149,19 @@ def execute_workflow( file_obj (UploadedFile): input file use_file_history (bool): Use FileHistory table to return results on already processed files. Defaults to False + tag_names (list(str)): list of tag names Returns: ReturnDict: execution status/ result """ workflow_id = api.workflow.id pipeline_id = api.id + tags = Tag.bulk_get_or_create(tag_names=tag_names) workflow_execution = WorkflowExecutionServiceHelper.create_workflow_execution( workflow_id=workflow_id, pipeline_id=pipeline_id, mode=WorkflowExecution.Mode.QUEUE, + tags=tags, ) execution_id = workflow_execution.id diff --git a/backend/api_v2/serializers.py b/backend/api_v2/serializers.py index 9fffc748a..24bba28cc 100644 --- a/backend/api_v2/serializers.py +++ b/backend/api_v2/serializers.py @@ -15,6 +15,7 @@ Serializer, ValidationError, ) +from tags.serializers import TagParamsSerializer from utils.serializer.integrity_error_mixin import IntegrityErrorMixin from workflow_manager.workflow_v2.exceptions import ExecutionDoesNotExistError from workflow_manager.workflow_v2.models.execution import WorkflowExecution @@ -99,7 +100,7 @@ def to_representation(self, instance: APIKey) -> OrderedDict[str, Any]: return representation -class ExecutionRequestSerializer(Serializer): +class ExecutionRequestSerializer(TagParamsSerializer): """Execution request serializer. Attributes: @@ -110,6 +111,8 @@ class ExecutionRequestSerializer(Serializer): use_file_history (bool): Flag to use FileHistory to save and retrieve responses quickly. This is undocumented to the user and can be helpful for demos. + tags (str): Comma-separated List of tags to associate with the execution. + e.g:'tag1,tag2-name,tag3_name' """ timeout = IntegerField( diff --git a/backend/backend/settings/base.py b/backend/backend/settings/base.py index c9614c1dc..940808699 100644 --- a/backend/backend/settings/base.py +++ b/backend/backend/settings/base.py @@ -210,6 +210,7 @@ def get_required_setting( "django.contrib.messages", "django.contrib.staticfiles", "django.contrib.admindocs", + "django_filters", # Third party apps should go below this line, "rest_framework", # Connector OAuth @@ -226,8 +227,6 @@ def get_required_setting( "commands", # health checks "health", -) -v2_apps = ( "migrating.v2", "connector_auth_v2", "tenant_account_v2", @@ -250,8 +249,8 @@ def get_required_setting( "prompt_studio.prompt_studio_output_manager_v2", "prompt_studio.prompt_studio_document_manager_v2", "prompt_studio.prompt_studio_index_manager_v2", + "tags", ) -SHARED_APPS += v2_apps TENANT_APPS = [] INSTALLED_APPS = list(SHARED_APPS) + [ @@ -432,6 +431,10 @@ def get_required_setting( "DEFAULT_PERMISSION_CLASSES": [], # TODO: Update once auth is figured "TEST_REQUEST_DEFAULT_FORMAT": "json", "EXCEPTION_HANDLER": "middleware.exception.drf_logging_exc_handler", + "DEFAULT_FILTER_BACKENDS": [ + "django_filters.rest_framework.DjangoFilterBackend", + "rest_framework.filters.OrderingFilter", + ], } # These paths will work without authentication diff --git a/backend/backend/urls_v2.py b/backend/backend/urls_v2.py index fa222aecb..0a8bd0f95 100644 --- a/backend/backend/urls_v2.py +++ b/backend/backend/urls_v2.py @@ -58,4 +58,5 @@ UrlPathConstants.PROMPT_STUDIO, include("prompt_studio.prompt_studio_index_manager_v2.urls"), ), + path("tags/", include("tags.urls")), ] diff --git a/backend/pdm.lock b/backend/pdm.lock index c43b55cef..4afb0d994 100644 --- a/backend/pdm.lock +++ b/backend/pdm.lock @@ -2,10 +2,10 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "dev", "deploy", "test"] +groups = ["default", "deploy", "dev", "test"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.2" -content_hash = "sha256:f51602a26a2213a6f6c221c02cce491ccf4d15bc84a5775421f081ce5dc65512" +content_hash = "sha256:8840d46b34ffa521b5a4d19a7e22636a1e34669bfb52dac91d8bf6ffa2dce6f3" [[package]] name = "adlfs" @@ -190,7 +190,7 @@ files = [ [[package]] name = "anthropic" -version = "0.43.0" +version = "0.45.0" requires_python = ">=3.8" summary = "The official Python library for the anthropic API" groups = ["default", "dev"] @@ -204,26 +204,26 @@ dependencies = [ "typing-extensions<5,>=4.10", ] files = [ - {file = "anthropic-0.43.0-py3-none-any.whl", hash = "sha256:f748a703f77b3244975e1aace3a935840dc653a4714fb6bba644f97cc76847b4"}, - {file = "anthropic-0.43.0.tar.gz", hash = "sha256:06801f01d317a431d883230024318d48981758058bf7e079f33fb11f64b5a5c1"}, + {file = "anthropic-0.45.0-py3-none-any.whl", hash = "sha256:f36aff71d2c232945e64d1970be68a91b05a2ef5e3afa6c1ff195c3303a95ad3"}, + {file = "anthropic-0.45.0.tar.gz", hash = "sha256:4e8541dc355332090bfc51b84549c19b649a13a23dbd6bd68e1d012e08551025"}, ] [[package]] name = "anthropic" -version = "0.43.0" +version = "0.45.0" extras = ["bedrock", "vertex"] requires_python = ">=3.8" summary = "The official Python library for the anthropic API" groups = ["default", "dev"] dependencies = [ - "anthropic==0.43.0", + "anthropic==0.45.0", "boto3>=1.28.57", "botocore>=1.31.57", "google-auth<3,>=2", ] files = [ - {file = "anthropic-0.43.0-py3-none-any.whl", hash = "sha256:f748a703f77b3244975e1aace3a935840dc653a4714fb6bba644f97cc76847b4"}, - {file = "anthropic-0.43.0.tar.gz", hash = "sha256:06801f01d317a431d883230024318d48981758058bf7e079f33fb11f64b5a5c1"}, + {file = "anthropic-0.45.0-py3-none-any.whl", hash = "sha256:f36aff71d2c232945e64d1970be68a91b05a2ef5e3afa6c1ff195c3303a95ad3"}, + {file = "anthropic-0.45.0.tar.gz", hash = "sha256:4e8541dc355332090bfc51b84549c19b649a13a23dbd6bd68e1d012e08551025"}, ] [[package]] @@ -440,7 +440,7 @@ files = [ [[package]] name = "azure-storage-blob" -version = "12.24.0" +version = "12.24.1" requires_python = ">=3.8" summary = "Microsoft Azure Blob Storage Client Library for Python" groups = ["default", "dev"] @@ -451,8 +451,8 @@ dependencies = [ "typing-extensions>=4.6.0", ] files = [ - {file = "azure_storage_blob-12.24.0-py3-none-any.whl", hash = "sha256:4f0bb4592ea79a2d986063696514c781c9e62be240f09f6397986e01755bc071"}, - {file = "azure_storage_blob-12.24.0.tar.gz", hash = "sha256:eaaaa1507c8c363d6e1d1342bd549938fdf1adec9b1ada8658c8f5bf3aea844e"}, + {file = "azure_storage_blob-12.24.1-py3-none-any.whl", hash = "sha256:77fb823fdbac7f3c11f7d86a5892e2f85e161e8440a7489babe2195bf248f09e"}, + {file = "azure_storage_blob-12.24.1.tar.gz", hash = "sha256:052b2a1ea41725ba12e2f4f17be85a54df1129e13ea0321f5a2fcc851cbf47d4"}, ] [[package]] @@ -608,13 +608,13 @@ files = [ [[package]] name = "cachetools" -version = "5.5.0" +version = "5.5.1" requires_python = ">=3.7" summary = "Extensible memoizing collections and decorators" groups = ["default", "dev"] files = [ - {file = "cachetools-5.5.0-py3-none-any.whl", hash = "sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292"}, - {file = "cachetools-5.5.0.tar.gz", hash = "sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a"}, + {file = "cachetools-5.5.1-py3-none-any.whl", hash = "sha256:b76651fdc3b24ead3c648bbdeeb940c1b04d365b38b4af66788f9ec4a81d42bb"}, + {file = "cachetools-5.5.1.tar.gz", hash = "sha256:70f238fbba50383ef62e55c6aff6d9673175fe59f7c6782c7a0b9e38f4a9df95"}, ] [[package]] @@ -997,6 +997,20 @@ files = [ {file = "django_cors_headers-4.3.1-py3-none-any.whl", hash = "sha256:0b1fd19297e37417fc9f835d39e45c8c642938ddba1acce0c1753d3edef04f36"}, ] +[[package]] +name = "django-filter" +version = "24.3" +requires_python = ">=3.8" +summary = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically." +groups = ["default"] +dependencies = [ + "Django>=4.2", +] +files = [ + {file = "django_filter-24.3-py3-none-any.whl", hash = "sha256:c4852822928ce17fb699bcfccd644b3574f1a2d80aeb2b4ff4f16b02dd49dc64"}, + {file = "django_filter-24.3.tar.gz", hash = "sha256:d8ccaf6732afd21ca0542f6733b11591030fa98669f8d15599b358e24a2cd9c3"}, +] + [[package]] name = "django-log-request-id" version = "2.1.0" @@ -1200,13 +1214,13 @@ files = [ [[package]] name = "filelock" -version = "3.16.1" -requires_python = ">=3.8" +version = "3.17.0" +requires_python = ">=3.9" summary = "A platform independent file lock." groups = ["default", "dev"] files = [ - {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"}, - {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"}, + {file = "filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338"}, + {file = "filelock-3.17.0.tar.gz", hash = "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e"}, ] [[package]] @@ -1369,7 +1383,7 @@ files = [ [[package]] name = "google-api-core" -version = "2.24.0" +version = "2.24.1rc1" requires_python = ">=3.7" summary = "Google API client core library" groups = ["default", "dev"] @@ -1381,27 +1395,27 @@ dependencies = [ "requests<3.0.0.dev0,>=2.18.0", ] files = [ - {file = "google_api_core-2.24.0-py3-none-any.whl", hash = "sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9"}, - {file = "google_api_core-2.24.0.tar.gz", hash = "sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf"}, + {file = "google_api_core-2.24.1rc1-py3-none-any.whl", hash = "sha256:92ee3eed90a397a9f4dd13c034a36cbe7dba2a58e01e5668619847b68a527b73"}, + {file = "google_api_core-2.24.1rc1.tar.gz", hash = "sha256:d1cf8265c8b0b171a87d84adc8709a5e48147ca529d6f96d6a2be613a195eb78"}, ] [[package]] name = "google-api-core" -version = "2.24.0" +version = "2.24.1rc1" extras = ["grpc"] requires_python = ">=3.7" summary = "Google API client core library" groups = ["default", "dev"] dependencies = [ - "google-api-core==2.24.0", + "google-api-core==2.24.1rc1", "grpcio-status<2.0.dev0,>=1.33.2", "grpcio-status<2.0.dev0,>=1.49.1; python_version >= \"3.11\"", "grpcio<2.0dev,>=1.33.2", "grpcio<2.0dev,>=1.49.1; python_version >= \"3.11\"", ] files = [ - {file = "google_api_core-2.24.0-py3-none-any.whl", hash = "sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9"}, - {file = "google_api_core-2.24.0.tar.gz", hash = "sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf"}, + {file = "google_api_core-2.24.1rc1-py3-none-any.whl", hash = "sha256:92ee3eed90a397a9f4dd13c034a36cbe7dba2a58e01e5668619847b68a527b73"}, + {file = "google_api_core-2.24.1rc1.tar.gz", hash = "sha256:d1cf8265c8b0b171a87d84adc8709a5e48147ca529d6f96d6a2be613a195eb78"}, ] [[package]] @@ -1471,7 +1485,7 @@ files = [ [[package]] name = "google-cloud-aiplatform" -version = "1.77.0" +version = "1.78.0" requires_python = ">=3.8" summary = "Vertex AI API client library" groups = ["default", "dev"] @@ -1490,8 +1504,8 @@ dependencies = [ "typing-extensions", ] files = [ - {file = "google_cloud_aiplatform-1.77.0-py2.py3-none-any.whl", hash = "sha256:e9dd1bcb1b9a85eddd452916cd6ad1d9ce2d487772a9e45b1814aa0ac5633689"}, - {file = "google_cloud_aiplatform-1.77.0.tar.gz", hash = "sha256:1e5b77fe6c7f276d7aae65bcf08a273122a71f6c4af1f43cf45821f603a74080"}, + {file = "google_cloud_aiplatform-1.78.0-py2.py3-none-any.whl", hash = "sha256:e2663b715bdeb5f4c9bf72defc5bd9abdb182048b012b83231dd0708dbc8b7ba"}, + {file = "google_cloud_aiplatform-1.78.0.tar.gz", hash = "sha256:c42a8e9981afb7964d14c3109e1eae0892785c746235acb1f990cdfd40ce9d13"}, ] [[package]] @@ -1887,13 +1901,13 @@ files = [ [[package]] name = "hpack" -version = "4.0.0" -requires_python = ">=3.6.1" -summary = "Pure-Python HPACK header compression" +version = "4.1.0" +requires_python = ">=3.9" +summary = "Pure-Python HPACK header encoding" groups = ["default", "dev"] files = [ - {file = "hpack-4.0.0-py3-none-any.whl", hash = "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c"}, - {file = "hpack-4.0.0.tar.gz", hash = "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095"}, + {file = "hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496"}, + {file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"}, ] [[package]] @@ -1992,13 +2006,13 @@ files = [ [[package]] name = "hyperframe" -version = "6.0.1" -requires_python = ">=3.6.1" -summary = "HTTP/2 framing layer for Python" +version = "6.1.0" +requires_python = ">=3.9" +summary = "Pure-Python HTTP/2 framing" groups = ["default", "dev"] files = [ - {file = "hyperframe-6.0.1-py3-none-any.whl", hash = "sha256:0ec6bafd80d8ad2195c4f03aacba3a8265e57bc4cff261e802bf39970ed02a15"}, - {file = "hyperframe-6.0.1.tar.gz", hash = "sha256:ae510046231dc8e9ecb1a6586f63d2347bf4c8905914aa84ba585ae85f28a914"}, + {file = "hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5"}, + {file = "hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08"}, ] [[package]] @@ -2173,7 +2187,7 @@ files = [ [[package]] name = "llama-cloud" -version = "0.1.9" +version = "0.1.10" requires_python = "<4,>=3.8" summary = "" groups = ["default", "dev"] @@ -2183,8 +2197,8 @@ dependencies = [ "pydantic>=1.10", ] files = [ - {file = "llama_cloud-0.1.9-py3-none-any.whl", hash = "sha256:792ee316985bbf4dd0294007105a100489d4baba0bcc4f3e16284f0c01d832d4"}, - {file = "llama_cloud-0.1.9.tar.gz", hash = "sha256:fc03bd338a1da04b7607a44d82a62b3eb178d80af05a53653e801d6f8bb67df7"}, + {file = "llama_cloud-0.1.10-py3-none-any.whl", hash = "sha256:d91198ad92ea6c3a25757e5d6cb565b4bd6db385dc4fa596a725c0fb81a68f4e"}, + {file = "llama_cloud-0.1.10.tar.gz", hash = "sha256:56ffe8f2910c2047dd4eb1b13da31ee5f67321a000794eee559e0b56954d2f76"}, ] [[package]] @@ -2246,7 +2260,7 @@ files = [ [[package]] name = "llama-index-core" -version = "0.12.11" +version = "0.12.13" requires_python = "<4.0,>=3.9" summary = "Interface between LLMs and your data" groups = ["default", "dev"] @@ -2276,8 +2290,8 @@ dependencies = [ "wrapt", ] files = [ - {file = "llama_index_core-0.12.11-py3-none-any.whl", hash = "sha256:3b1e019c899e9e011dfa01c96b7e3f666e0c161035fbca6cb787b4c61e0c94db"}, - {file = "llama_index_core-0.12.11.tar.gz", hash = "sha256:9a41ca91167ea5eec9ebaac7f5e958b7feddbd8af3bfbf7c393a5edfb994d566"}, + {file = "llama_index_core-0.12.13-py3-none-any.whl", hash = "sha256:9708bb594bbddffd6ff0767242e49d8978d1ba60a2e62e071d9d123ad2f17e6f"}, + {file = "llama_index_core-0.12.13.tar.gz", hash = "sha256:77af0161246ce1de38efc17cb6438dfff9e9558af00bcfac7dd4d0b7325efa4b"}, ] [[package]] @@ -2343,17 +2357,17 @@ files = [ [[package]] name = "llama-index-indices-managed-llama-cloud" -version = "0.6.3" +version = "0.6.4" requires_python = "<4.0,>=3.9" summary = "llama-index indices llama-cloud integration" groups = ["default", "dev"] dependencies = [ - "llama-cloud>=0.1.5", + "llama-cloud<0.2.0,>=0.1.8", "llama-index-core<0.13.0,>=0.12.0", ] files = [ - {file = "llama_index_indices_managed_llama_cloud-0.6.3-py3-none-any.whl", hash = "sha256:7f125602f624a2d321b6a4130cd98df35eb8c15818a159390755b2c13068f4ce"}, - {file = "llama_index_indices_managed_llama_cloud-0.6.3.tar.gz", hash = "sha256:f09e4182cbc2a2bd75ae85cebb1681075247f0d91b931b094cac4315386ce87a"}, + {file = "llama_index_indices_managed_llama_cloud-0.6.4-py3-none-any.whl", hash = "sha256:d7e85844a2e343dacebdef424decab3f5fd6361e25b3ff2bdcfb18607c1a49c5"}, + {file = "llama_index_indices_managed_llama_cloud-0.6.4.tar.gz", hash = "sha256:0b45973cb2dc9702122006019bfb556dcabba31b0bdf79afc7b376ca8143df03"}, ] [[package]] @@ -2557,7 +2571,7 @@ files = [ [[package]] name = "llama-index-readers-file" -version = "0.4.3" +version = "0.4.4" requires_python = "<4.0,>=3.9" summary = "llama-index readers file integration" groups = ["default", "dev"] @@ -2569,8 +2583,8 @@ dependencies = [ "striprtf<0.0.27,>=0.0.26", ] files = [ - {file = "llama_index_readers_file-0.4.3-py3-none-any.whl", hash = "sha256:c669da967ea534e3af3660f9fd730c71c725288f5c57906bcce338414ebeee5c"}, - {file = "llama_index_readers_file-0.4.3.tar.gz", hash = "sha256:07514bebed7ce431c1b3ef9279d09aa3d1bba8e342d661860a033355b98fb33a"}, + {file = "llama_index_readers_file-0.4.4-py3-none-any.whl", hash = "sha256:01589a4895e2d4abad30294c9b0d2813520ee1f5164922ad92f11e64a1d65d6c"}, + {file = "llama_index_readers_file-0.4.4.tar.gz", hash = "sha256:e076b3fa1e68eea1594d47cec1f64b384fb6067f2697ca8aae22b4a21ad27ca7"}, ] [[package]] @@ -2685,7 +2699,7 @@ files = [ [[package]] name = "marshmallow" -version = "3.25.1" +version = "3.26.0" requires_python = ">=3.9" summary = "A lightweight library for converting complex datatypes to and from native Python datatypes." groups = ["default", "dev"] @@ -2693,8 +2707,8 @@ dependencies = [ "packaging>=17.0", ] files = [ - {file = "marshmallow-3.25.1-py3-none-any.whl", hash = "sha256:ec5d00d873ce473b7f2ffcb7104286a376c354cab0c2fa12f5573dab03e87210"}, - {file = "marshmallow-3.25.1.tar.gz", hash = "sha256:f4debda3bb11153d81ac34b0d582bf23053055ee11e791b54b4b35493468040a"}, + {file = "marshmallow-3.26.0-py3-none-any.whl", hash = "sha256:1287bca04e6a5f4094822ac153c03da5e214a0a60bcd557b140f3e66991b8ca1"}, + {file = "marshmallow-3.26.0.tar.gz", hash = "sha256:eb36762a1cc76d7abf831e18a3a1b26d3d481bbc74581b8e532a3d3a8115e1cb"}, ] [[package]] @@ -2958,22 +2972,22 @@ files = [ [[package]] name = "ollama" -version = "0.4.6" +version = "0.4.7" requires_python = "<4.0,>=3.8" summary = "The official Python client for Ollama." groups = ["default", "dev"] dependencies = [ - "httpx<0.28.0,>=0.27.0", + "httpx<0.29,>=0.27", "pydantic<3.0.0,>=2.9.0", ] files = [ - {file = "ollama-0.4.6-py3-none-any.whl", hash = "sha256:cbb4ebe009e10dd12bdd82508ab415fd131945e185753d728a7747c9ebe762e9"}, - {file = "ollama-0.4.6.tar.gz", hash = "sha256:b00717651c829f96094ed4231b9f0d87e33cc92dc235aca50aeb5a2a4e6e95b7"}, + {file = "ollama-0.4.7-py3-none-any.whl", hash = "sha256:85505663cca67a83707be5fb3aeff0ea72e67846cea5985529d8eca4366564a1"}, + {file = "ollama-0.4.7.tar.gz", hash = "sha256:891dcbe54f55397d82d289c459de0ea897e103b86a3f1fad0fdb1895922a75ff"}, ] [[package]] name = "openai" -version = "1.59.7" +version = "1.60.0" requires_python = ">=3.8" summary = "The official Python library for the openai API" groups = ["default", "dev"] @@ -2988,8 +3002,8 @@ dependencies = [ "typing-extensions<5,>=4.11", ] files = [ - {file = "openai-1.59.7-py3-none-any.whl", hash = "sha256:cfa806556226fa96df7380ab2e29814181d56fea44738c2b0e581b462c268692"}, - {file = "openai-1.59.7.tar.gz", hash = "sha256:043603def78c00befb857df9f0a16ee76a3af5984ba40cb7ee5e2f40db4646bf"}, + {file = "openai-1.60.0-py3-none-any.whl", hash = "sha256:df06c43be8018274980ac363da07d4b417bd835ead1c66e14396f6f15a0d5dda"}, + {file = "openai-1.60.0.tar.gz", hash = "sha256:7fa536cd4b644718645b874d2706e36dbbef38b327e42ca0623275da347ee1a9"}, ] [[package]] @@ -3280,16 +3294,16 @@ files = [ [[package]] name = "prompt-toolkit" -version = "3.0.48" -requires_python = ">=3.7.0" +version = "3.0.50" +requires_python = ">=3.8.0" summary = "Library for building powerful interactive command lines in Python" groups = ["default"] dependencies = [ "wcwidth", ] files = [ - {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"}, - {file = "prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90"}, + {file = "prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198"}, + {file = "prompt_toolkit-3.0.50.tar.gz", hash = "sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab"}, ] [[package]] @@ -3353,16 +3367,16 @@ files = [ [[package]] name = "proto-plus" -version = "1.25.0" +version = "1.26.0rc1" requires_python = ">=3.7" -summary = "Beautiful, Pythonic protocol buffers." +summary = "Beautiful, Pythonic protocol buffers" groups = ["default", "dev"] dependencies = [ "protobuf<6.0.0dev,>=3.19.0", ] files = [ - {file = "proto_plus-1.25.0-py3-none-any.whl", hash = "sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961"}, - {file = "proto_plus-1.25.0.tar.gz", hash = "sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91"}, + {file = "proto_plus-1.26.0rc1-py3-none-any.whl", hash = "sha256:a0ad6fbc2e194dbbb813edc22ee2e509a7c38df7ecea2fd2803bce0536eaf0f4"}, + {file = "proto_plus-1.26.0rc1.tar.gz", hash = "sha256:04eeceecd6a038285e2aa8996b53c045d04a568c5c48b7eaa79c097a4984a4c7"}, ] [[package]] @@ -3498,7 +3512,7 @@ files = [ [[package]] name = "pydantic" -version = "2.10.5" +version = "2.10.6" requires_python = ">=3.8" summary = "Data validation using Python type hints" groups = ["default", "dev"] @@ -3508,8 +3522,8 @@ dependencies = [ "typing-extensions>=4.12.2", ] files = [ - {file = "pydantic-2.10.5-py3-none-any.whl", hash = "sha256:4dd4e322dbe55472cb7ca7e73f4b63574eecccf2835ffa2af9021ce113c83c53"}, - {file = "pydantic-2.10.5.tar.gz", hash = "sha256:278b38dbbaec562011d659ee05f63346951b3a248a6f3642e1bc68894ea2b4ff"}, + {file = "pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584"}, + {file = "pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236"}, ] [[package]] @@ -3975,7 +3989,7 @@ files = [ [[package]] name = "qdrant-client" -version = "1.12.2" +version = "1.13.2" requires_python = ">=3.9" summary = "Client library for the Qdrant vector search engine" groups = ["default", "dev"] @@ -3990,8 +4004,8 @@ dependencies = [ "urllib3<3,>=1.26.14", ] files = [ - {file = "qdrant_client-1.12.2-py3-none-any.whl", hash = "sha256:a0ae500a46a679ff3521ba3f1f1cf3d72b57090a768cec65fc317066bcbac1e6"}, - {file = "qdrant_client-1.12.2.tar.gz", hash = "sha256:2777e09b3e89bb22bb490384d8b1fa8140f3915287884f18984f7031a346aba5"}, + {file = "qdrant_client-1.13.2-py3-none-any.whl", hash = "sha256:db97e759bd3f8d483a383984ba4c2a158eef56f2188d83df7771591d43de2201"}, + {file = "qdrant_client-1.13.2.tar.gz", hash = "sha256:c8cce87ce67b006f49430a050a35c85b78e3b896c0c756dafc13bdeca543ec13"}, ] [[package]] @@ -4010,17 +4024,18 @@ files = [ [[package]] name = "referencing" -version = "0.35.1" -requires_python = ">=3.8" +version = "0.36.1" +requires_python = ">=3.9" summary = "JSON Referencing + Python" groups = ["default", "dev"] dependencies = [ "attrs>=22.2.0", "rpds-py>=0.7.0", + "typing-extensions>=4.4.0; python_version < \"3.13\"", ] files = [ - {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"}, - {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"}, + {file = "referencing-0.36.1-py3-none-any.whl", hash = "sha256:363d9c65f080d0d70bc41c721dce3c7f3e77fc09f269cd5c8813da18069a6794"}, + {file = "referencing-0.36.1.tar.gz", hash = "sha256:ca2e6492769e3602957e9b831b94211599d2aade9477f5d44110d2530cf9aade"}, ] [[package]] @@ -4854,13 +4869,13 @@ files = [ [[package]] name = "tzdata" -version = "2024.2" +version = "2025.1" requires_python = ">=2" summary = "Provider of IANA time zone data" groups = ["default", "dev"] files = [ - {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, - {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, + {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, + {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, ] [[package]] diff --git a/backend/permissions/permission.py b/backend/permissions/permission.py index 02d62acb7..f6085e142 100644 --- a/backend/permissions/permission.py +++ b/backend/permissions/permission.py @@ -4,6 +4,7 @@ from rest_framework import permissions from rest_framework.request import Request from rest_framework.views import APIView +from utils.user_context import UserContext class IsOwner(permissions.BasePermission): @@ -14,6 +15,14 @@ def has_object_permission(self, request: Request, view: APIView, obj: Any) -> bo return True if obj.created_by == request.user else False +class IsOrganizationMember(permissions.BasePermission): + def has_object_permission(self, request: Request, view: APIView, obj: Any) -> bool: + user_organization = UserContext.get_organization() + if user_organization is None: + return False + return True if obj.organization == user_organization else False + + class IsOwnerOrSharedUser(permissions.BasePermission): """Custom permission to only allow owners and shared users of an object.""" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index f87d7b415..8db5dbdf6 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -52,6 +52,7 @@ dependencies = [ "azure-identity==1.16.0", "azure-mgmt-apimanagement==3.0.0", "croniter>=3.0.3", + "django-filter>=24.3", ] # <3.11.1 due to resolution error from Unstract SDK requires-python = ">=3.9,<3.11.1" diff --git a/backend/tags/__init__.py b/backend/tags/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/tags/apps.py b/backend/tags/apps.py new file mode 100644 index 000000000..1169a97e9 --- /dev/null +++ b/backend/tags/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class TagsConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "tags" diff --git a/backend/tags/helper.py b/backend/tags/helper.py new file mode 100644 index 000000000..d111d19ad --- /dev/null +++ b/backend/tags/helper.py @@ -0,0 +1,31 @@ +from tags.models import Tag +from workflow_manager.file_execution.models import WorkflowFileExecution +from workflow_manager.workflow_v2.models import WorkflowExecution + + +class TagHelper: + @staticmethod + def list_workflow_executions(tag: Tag): + """ + Lists all workflow executions that are tagged with the given tag. + + Args: + tag (str): The tag to filter workflow executions by. + + Returns: + QuerySet: A QuerySet containing the filtered WorkflowExecution objects. + """ + return WorkflowExecution.objects.filter(tags=tag) + + @staticmethod + def list_workflow_file_executions(tag: Tag): + """ + Lists all workflow file executions that are tagged with the given tag. + + Args: + tag (str): The tag to filter workflow file executions by. + + Returns: + QuerySet: A QuerySet containing the filtered WorkflowFileExecution objects. + """ + return WorkflowFileExecution.objects.filter(workflow_execution__tags=tag) diff --git a/backend/tags/migrations/0001_initial.py b/backend/tags/migrations/0001_initial.py new file mode 100644 index 000000000..bad9070fa --- /dev/null +++ b/backend/tags/migrations/0001_initial.py @@ -0,0 +1,68 @@ +# Generated by Django 4.2.1 on 2025-01-16 10:19 + +import uuid + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ("account_v2", "0001_initial"), + ] + + operations = [ + migrations.CreateModel( + name="Tag", + fields=[ + ("created_at", models.DateTimeField(auto_now_add=True)), + ("modified_at", models.DateTimeField(auto_now=True)), + ( + "id", + models.UUIDField( + default=uuid.uuid4, + editable=False, + primary_key=True, + serialize=False, + ), + ), + ( + "name", + models.CharField( + db_comment="Unique name of the tag", max_length=50 + ), + ), + ( + "description", + models.TextField( + blank=True, db_comment="Description of the tag", null=True + ), + ), + ( + "organization", + models.ForeignKey( + blank=True, + db_comment="Foreign key reference to the Organization model.", + default=None, + null=True, + on_delete=django.db.models.deletion.CASCADE, + to="account_v2.organization", + ), + ), + ], + options={ + "verbose_name": "Tag", + "verbose_name_plural": "Tags", + "db_table": "tag", + }, + ), + migrations.AddConstraint( + model_name="tag", + constraint=models.UniqueConstraint( + fields=("name", "organization"), name="unique_tag_name_organization" + ), + ), + ] diff --git a/backend/tags/migrations/__init__.py b/backend/tags/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/tags/models.py b/backend/tags/models.py new file mode 100644 index 000000000..9154add7f --- /dev/null +++ b/backend/tags/models.py @@ -0,0 +1,79 @@ +import uuid + +from django.db import models +from utils.models.base_model import BaseModel +from utils.models.organization_mixin import ( + DefaultOrganizationManagerMixin, + DefaultOrganizationMixin, +) +from utils.user_context import UserContext + + +class TagModelManager(DefaultOrganizationManagerMixin, models.Manager): + def get_or_create_tags(self, tag_names: list[str]) -> list["Tag"]: + """ + Retrieves or creates tags based on a list of tag names. + + Args: + tag_names (list): A list of tag names to retrieve or create. + + Returns: + list: A list of Tag instances. + """ + organization = UserContext.get_organization() + if not organization: + raise ValueError( + "Organization context is required to retrieve or create tags." + ) + + tags: list[Tag] = [] + for tag_name in tag_names: + tag, _ = self.get_or_create( + name=tag_name, + organization=organization, + defaults={"description": f"Tag for {tag_name}"}, + ) + tags.append(tag) + return tags + + +class Tag(DefaultOrganizationMixin, BaseModel): + TAG_NAME_LENGTH = 50 + + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + name = models.CharField( + max_length=TAG_NAME_LENGTH, db_comment="Unique name of the tag" + ) + description = models.TextField( + blank=True, null=True, db_comment="Description of the tag" + ) + + # Manager + objects = TagModelManager() + + @classmethod + def bulk_get_or_create(cls, tag_names: list[str]) -> list["Tag"]: + """ + Class method to retrieve or create multiple tags for the current organization. + + Args: + tag_names (list): A list of tag names to retrieve or create. + + Returns: + list: A list of Tag instances associated with the current organization. + """ + return cls.objects.get_or_create_tags(tag_names) + + def __str__(self): + return self.name + + class Meta: + verbose_name = "Tag" + verbose_name_plural = "Tags" + db_table = "tag" + constraints = [ + models.UniqueConstraint( + fields=["name", "organization"], + name="unique_tag_name_organization", + ), + ] diff --git a/backend/tags/serializers.py b/backend/tags/serializers.py new file mode 100644 index 000000000..0b96b95eb --- /dev/null +++ b/backend/tags/serializers.py @@ -0,0 +1,61 @@ +# serializers.py +import re + +from rest_framework import serializers +from rest_framework.serializers import CharField, ValidationError +from tags.models import Tag + + +class TagSerializer(serializers.ModelSerializer): + class Meta: + model = Tag + fields = ["id", "name", "description"] + + +class TagParamsSerializer(serializers.Serializer): + # Currently limited to 1 tag per request to maintain compatibility with + # LLM Whisperer integration. Consider increasing the limit once LLM Whisperer + # supports multiple tags + MAX_TAGS_ALLOWED = 1 + tags = CharField( + required=False, + allow_blank=True, + default="", + help_text="Comma-separated list of tag names (EX:'tag1,tag2-name,tag3_name')", + ) + + def validate_tags(self, value): + if not value: + return [] + + # Pattern allows letters, numbers, underscores, and hyphens + # Must start with a letter + tag_pattern = re.compile(r"^[a-zA-Z][a-zA-Z0-9_-]*$") + + # Ensure value is a string + if not isinstance(value, str): + raise ValidationError("Tags must be a comma-separated string.") + + tags = [tag.strip() for tag in value.split(",") if tag.strip()] + + # Check maximum number of tags + if len(tags) > self.MAX_TAGS_ALLOWED: + raise ValidationError( + f"Maximum '{self.MAX_TAGS_ALLOWED}' tags allowed. " + f"You provided '{len(tags)}' tags." + ) + + # Validate tags + for tag in tags: + if len(tag) > Tag.TAG_NAME_LENGTH: + raise ValidationError( + f"Tag '{tag}' exceeds the maximum length of {Tag.TAG_NAME_LENGTH}." + ) + + if not tag_pattern.match(tag): + raise ValidationError( + f"Tag '{tag}' is invalid. Tags must start with a letter and " + "can only contain letters, numbers, underscores, and hyphens." + ) + + return tags diff --git a/backend/tags/urls.py b/backend/tags/urls.py new file mode 100644 index 000000000..39a230294 --- /dev/null +++ b/backend/tags/urls.py @@ -0,0 +1,42 @@ +from django.urls import path +from tags.views import TagViewSet + +tag_list = TagViewSet.as_view( + { + "get": TagViewSet.list.__name__, + } +) + +tag_detail = TagViewSet.as_view( + { + "get": TagViewSet.retrieve.__name__, + } +) + +# Map the custom action for workflow executions +tag_workflow_executions = TagViewSet.as_view( + { + "get": TagViewSet.workflow_executions.__name__, + } +) + +tag_workflow_file_executions = TagViewSet.as_view( + { + "get": TagViewSet.workflow_file_executions.__name__, + } +) + +urlpatterns = [ + path("", tag_list, name="tag_list"), + path("/", tag_detail, name="tag_detail"), + path( + "/workflow-executions/", + tag_workflow_executions, + name="tag_workflow_executions", + ), + path( + "/workflow-file-executions/", + tag_workflow_file_executions, + name="tag_workflow_file_executions", + ), +] diff --git a/backend/tags/views.py b/backend/tags/views.py new file mode 100644 index 000000000..aae6819fe --- /dev/null +++ b/backend/tags/views.py @@ -0,0 +1,76 @@ +from django_filters.rest_framework import DjangoFilterBackend +from permissions.permission import IsOrganizationMember +from rest_framework import status, viewsets +from rest_framework.decorators import action +from rest_framework.filters import OrderingFilter +from rest_framework.permissions import IsAuthenticated +from rest_framework.response import Response +from tags.helper import TagHelper +from tags.models import Tag +from tags.serializers import TagSerializer +from utils.pagination import CustomPagination +from workflow_manager.file_execution.serializers import WorkflowFileExecutionSerializer +from workflow_manager.workflow_v2.serializers import WorkflowExecutionSerializer + + +class TagViewSet(viewsets.ModelViewSet): + permission_classes = [IsAuthenticated, IsOrganizationMember] + serializer_class = TagSerializer + pagination_class = CustomPagination + ordering_fields = ["created_at"] + filter_backends = [DjangoFilterBackend, OrderingFilter] + + def get_queryset(self): + """ + Retrieve the base queryset for the Tag model, allowing additional + filtering or customization if needed. Defaults to using the manager's + get_queryset method. + + """ + return Tag.objects.get_queryset() + + @action(detail=True, methods=["get"], url_path="workflow-executions") + def workflow_executions(self, request, pk=None): + """ + Custom action to list all WorkflowExecution instances associated + with a specific Tag. + """ + try: + tag = self.get_object() # Fetch the tag based on the primary key + workflow_executions = TagHelper.list_workflow_executions(tag=tag) + + # Apply pagination + page = self.paginate_queryset(workflow_executions) + if page is not None: + serializer = WorkflowExecutionSerializer(page, many=True) + return self.get_paginated_response(serializer.data) + + serializer = WorkflowExecutionSerializer(workflow_executions, many=True) + return Response(serializer.data, status=status.HTTP_200_OK) + except Tag.DoesNotExist: + return Response( + {"detail": "Tag not found."}, status=status.HTTP_404_NOT_FOUND + ) + + @action(detail=True, methods=["get"], url_path="workflow-file-executions") + def workflow_file_executions(self, request, pk=None): + """ + Custom action to list all WorkflowFileExecution instances associated + with a specific Tag. + """ + try: + tag = self.get_object() # Get the tag based on the primary key + workflow_file_executions = TagHelper.list_workflow_file_executions(tag=tag) + # Apply pagination + page = self.paginate_queryset(workflow_file_executions) + if page is not None: + serializer = WorkflowFileExecutionSerializer(page, many=True) + return self.get_paginated_response(serializer.data) + serializer = WorkflowFileExecutionSerializer( + workflow_file_executions, many=True + ) + return Response(serializer.data, status=status.HTTP_200_OK) + except Tag.DoesNotExist: + return Response( + {"detail": "Tag not found."}, status=status.HTTP_404_NOT_FOUND + ) diff --git a/backend/usage_v2/constants.py b/backend/usage_v2/constants.py index 8da54da05..37e6d3393 100644 --- a/backend/usage_v2/constants.py +++ b/backend/usage_v2/constants.py @@ -5,3 +5,22 @@ class UsageKeys: COMPLETION_TOKENS = "completion_tokens" TOTAL_TOKENS = "total_tokens" COST_IN_DOLLARS = "cost_in_dollars" + ADAPTER_INSTANCE_ID = "adapter_instance_id" + MODEL_NAME = "model_name" + + +class UsageRequestParams: + START_DATE = "start_date" + END_DATE = "end_date" + MODEL_NAME = "model_name" + ADAPTER_INSTANCE_ID = "adapter_instance_id" + EXECUTION_ID = "execution_id" + FILE_EXECUTION_ID = "file_execution_id" + EXECUTION_ID = "execution_id" + TAG = "tag" + TAGS = "tags" + RUN_ID = "run_id" + LIMIT = "limit" + OFFSET = "offset" + ORDER_BY = "order_by" + ORDER = "order" diff --git a/backend/usage_v2/dto.py b/backend/usage_v2/dto.py new file mode 100644 index 000000000..67170ada2 --- /dev/null +++ b/backend/usage_v2/dto.py @@ -0,0 +1,16 @@ +import datetime +from dataclasses import dataclass + + +@dataclass +class DateRange: + """ + Represents a validated date range with start and end dates. + + Attributes: + start_date: Beginning of the date range + end_date: End of the date range + """ + + start_date: datetime + end_date: datetime diff --git a/backend/usage_v2/enums.py b/backend/usage_v2/enums.py new file mode 100644 index 000000000..1e9274af5 --- /dev/null +++ b/backend/usage_v2/enums.py @@ -0,0 +1,35 @@ +from datetime import timedelta +from enum import Enum + +from django.utils import timezone +from usage_v2.exceptions import InvalidDateRange + + +class DateRangePresets(Enum): + LAST_7_DAYS = ("last_7_days", 7, "Last 7 Days") + LAST_30_DAYS = ("last_30_days", 30, "Last 30 Days") + + def __init__(self, key: str, days: int, display_name: str): + self.key = key + self.days = days + self.display_name = display_name + + def get_start_date(self): + return timezone.now() - timedelta(days=self.days) + + def get_end_date(self): + return timezone.now() + + def get_date_range(self): + return self.get_start_date(), self.get_end_date() + + @classmethod + def from_value(cls, value: str): + try: + return next(preset for preset in cls if preset.key == value) + except StopIteration: + valid_values = [preset.key for preset in cls] + raise InvalidDateRange( + f"Invalid date range value: '{value}'. " + f"Valid values are: {', '.join(valid_values)}" + ) diff --git a/backend/usage_v2/exceptions.py b/backend/usage_v2/exceptions.py new file mode 100644 index 000000000..80444989f --- /dev/null +++ b/backend/usage_v2/exceptions.py @@ -0,0 +1,11 @@ +from rest_framework.exceptions import APIException + + +class InvalidDatetime(APIException): + status_code = 400 + default_detail = "Invalid datetime format" + + +class InvalidDateRange(APIException): + status_code = 400 + default_detail = "Invalid date range" diff --git a/backend/usage_v2/filter.py b/backend/usage_v2/filter.py new file mode 100644 index 000000000..72e7ff37a --- /dev/null +++ b/backend/usage_v2/filter.py @@ -0,0 +1,82 @@ +from typing import Any + +from django.db.models import Q, QuerySet +from django_filters import rest_framework as filters +from usage_v2.models import Usage, UsageType +from usage_v2.utils import DateTimeProcessor +from workflow_manager.file_execution.models import WorkflowFileExecution +from workflow_manager.workflow_v2.models.execution import WorkflowExecution + + +class UsageFilter(filters.FilterSet): + created_at_gte = filters.DateTimeFilter(field_name="created_at", lookup_expr="gte") + created_at_lte = filters.DateTimeFilter(field_name="created_at", lookup_expr="lte") + date_range = filters.CharFilter(method="filter_date_range") + usage_type = filters.ChoiceFilter(choices=UsageType.choices) + tag = filters.CharFilter(method="filter_by_tag") + workflow_execution_id = filters.CharFilter(method="filter_by_execution_id") + adapter_instance_id = filters.CharFilter( + field_name="adapter_instance_id", lookup_expr="exact" + ) + + class Meta: + model = Usage + fields = { + "created_at": ["exact", "lt", "lte", "gt", "gte"], + "usage_type": ["exact"], + "adapter_instance_id": ["exact"], + } + + def filter_queryset(self, queryset: QuerySet) -> Any: + """ + Apply all filters to the queryset, including smart date handling. + """ + # First apply parent's filtering + queryset = super().filter_queryset(queryset) + + start_date = self.form.cleaned_data.get("created_at_gte") + end_date = self.form.cleaned_data.get("created_at_lte") + if start_date or end_date: + date_span = DateTimeProcessor.process_date_range(start_date, end_date) + queryset = queryset.filter( + created_at__range=[date_span.start_date, date_span.end_date] + ) + return queryset + + def filter_date_range(self, queryset: QuerySet, name: str, value: str) -> QuerySet: + """ + Filters Usages based on the provided date range. + """ + date_span = DateTimeProcessor.filter_date_range(value) + if date_span: + queryset = queryset.filter(created_at__gte=date_span.start_date) + return queryset + + def filter_by_tag(self, queryset: QuerySet, name: str, value: str) -> Any: + """ + Filters Usages based on the Tag ID or name. + """ + queryset: QuerySet = queryset.filter( + Q( + run_id__in=WorkflowFileExecution.objects.filter( + workflow_execution__in=WorkflowExecution.objects.filter( + tags__name=value + ) + ).values_list("id", flat=True) + ) + ) + return queryset + + def filter_by_execution_id( + self, queryset: QuerySet, name: str, value: Any + ) -> QuerySet: + """ + Filters Usages based on the execution ID. + """ + return queryset.filter( + Q( + run_id__in=WorkflowFileExecution.objects.filter( + workflow_execution__id=value + ).values_list("id", flat=True) + ) + ) diff --git a/backend/usage_v2/helper.py b/backend/usage_v2/helper.py index fd217cd6f..e77668d1a 100644 --- a/backend/usage_v2/helper.py +++ b/backend/usage_v2/helper.py @@ -1,6 +1,8 @@ import logging +from datetime import datetime +from typing import Any -from django.db.models import Sum +from django.db.models import QuerySet, Sum from rest_framework.exceptions import APIException from .constants import UsageKeys @@ -62,3 +64,41 @@ def get_aggregated_token_count(run_id: str) -> dict: # Handle any other exceptions that might occur during the execution logger.error(f"An unexpected error occurred for run_id {run_id}: {str(e)}") raise APIException("Error while aggregating token counts") + + @staticmethod + def aggregate_usage_metrics(queryset: QuerySet) -> dict[str, Any]: + """ + Aggregate usage metrics from a queryset of Usage objects. + + Args: + queryset (QuerySet): A queryset of Usage objects. + + Returns: + dict: A dictionary containing aggregated usage metrics. + """ + return queryset.aggregate( + total_prompt_tokens=Sum("prompt_tokens"), + total_completion_tokens=Sum("completion_tokens"), + total_tokens=Sum("total_tokens"), + total_cost=Sum("cost_in_dollars"), + ) + + @staticmethod + def format_usage_response( + aggregated_data: dict[str, Any], start_date: datetime, end_date: datetime + ) -> dict[str, Any]: + """ + Format aggregated usage data into a structured response. + + Args: + aggregated_data (dict): Aggregated usage metrics. + start_date (datetime): Start date of the usage period. + end_date (datetime): End date of the usage period. + + Returns: + dict: Formatted response containing aggregated data and date range. + """ + return { + "aggregated_data": aggregated_data, + "date_range": {"start_date": start_date, "end_date": end_date}, + } diff --git a/backend/usage_v2/migrations/0002_alter_usage_run_id.py b/backend/usage_v2/migrations/0002_alter_usage_run_id.py new file mode 100644 index 000000000..1aa9c0472 --- /dev/null +++ b/backend/usage_v2/migrations/0002_alter_usage_run_id.py @@ -0,0 +1,20 @@ +# Generated by Django 4.2.1 on 2025-01-20 06:07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("usage_v2", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="usage", + name="run_id", + field=models.UUIDField( + blank=True, db_comment="Identifier for the run", null=True + ), + ), + ] diff --git a/backend/usage_v2/models.py b/backend/usage_v2/models.py index 8e836024e..a8db04490 100644 --- a/backend/usage_v2/models.py +++ b/backend/usage_v2/models.py @@ -42,7 +42,7 @@ class Usage(DefaultOrganizationMixin, BaseModel): adapter_instance_id = models.CharField( max_length=255, db_comment="Identifier for the adapter instance" ) - run_id = models.CharField( + run_id = models.UUIDField( max_length=255, null=True, blank=True, db_comment="Identifier for the run" ) usage_type = models.CharField( diff --git a/backend/usage_v2/serializers.py b/backend/usage_v2/serializers.py index eb1f2c326..a82329708 100644 --- a/backend/usage_v2/serializers.py +++ b/backend/usage_v2/serializers.py @@ -1,5 +1,23 @@ from rest_framework import serializers +from tags.serializers import TagParamsSerializer + +from .models import Usage class GetUsageSerializer(serializers.Serializer): run_id = serializers.CharField(required=True) + + +class UsageMetricsSerializer(TagParamsSerializer): + execution_id = serializers.CharField(required=False) + file_execution_id = serializers.CharField(required=False) + + +class UsageSerializer(serializers.ModelSerializer): + workflow_execution_id = serializers.UUIDField(read_only=True) + tag = serializers.CharField(read_only=True) + executed_at = serializers.DateTimeField(read_only=True) + + class Meta: + model = Usage + fields = "__all__" diff --git a/backend/usage_v2/urls.py b/backend/usage_v2/urls.py index a0fa0bfa8..f1a3182aa 100644 --- a/backend/usage_v2/urls.py +++ b/backend/usage_v2/urls.py @@ -4,6 +4,13 @@ from .views import UsageView get_token_usage = UsageView.as_view({"get": "get_token_usage"}) +aggregate = UsageView.as_view({"get": UsageView.aggregate.__name__}) +usage_list = UsageView.as_view({"get": UsageView.list.__name__}) +usage_detail = UsageView.as_view( + { + "get": UsageView.retrieve.__name__, + } +) # TODO: Refactor URL to avoid using action-specific verbs like get. @@ -14,5 +21,12 @@ get_token_usage, name="get-token-usage", ), + path("", usage_list, name="usage_list"), + path( + "aggregate/", + aggregate, + name="aggregate", + ), + path("/", usage_detail, name="usage_detail"), ] ) diff --git a/backend/usage_v2/utils.py b/backend/usage_v2/utils.py new file mode 100644 index 000000000..7bdd6ce6f --- /dev/null +++ b/backend/usage_v2/utils.py @@ -0,0 +1,163 @@ +import logging +from datetime import datetime, timedelta +from typing import Optional, Union + +from dateutil.parser import parse +from django.utils import timezone +from isodate import parse_datetime +from usage_v2.dto import DateRange +from usage_v2.enums import DateRangePresets +from usage_v2.exceptions import InvalidDatetime + +logger = logging.getLogger(__name__) + + +class DateTimeProcessor: + DEFAULT_DAYS_RANGE = 1 + MAX_DAYS_RANGE = 60 + + @staticmethod + def normalize_datetime(date_str: str) -> str: + """ + Converts various datetime string formats to ISO 8601 format. + + Args: + date_input: A string representing a date/time in any recognizable format + + Returns: + str: Normalized datetime string in ISO 8601 format (YYYY-MM-DDTHH:MM:SS) + + Raises: + ValueError: If the input string cannot be parsed as a valid datetime + """ + try: + dt = parse(date_str) + return dt.strftime("%Y-%m-%dT%H:%M:%S") + except ValueError as e: + logger.error(f"Error parsing datetime: {e}") + raise InvalidDatetime(f"Invalid datetime format: {date_str}") + + @classmethod + def parse_date_parameter( + cls, + date_param: Optional[Union[str, datetime]], + default_date: Optional[datetime] = None, + ) -> Optional[datetime]: + """ + Parses and converts date parameters to datetime objects. + + Args: + date_param: Date parameter that can be either a string or datetime object + default_date: Fallback datetime value if date_param is None + + Returns: + Optional[datetime]: Parsed datetime object or default_date if no valid input + + Examples: + >>> DateTimeProcessor.parse_date_parameter("2023-12-01T10:00:00") + datetime(2023, 12, 1, 10, 0) + """ + if isinstance(date_param, str): + normalized_date = cls.normalize_datetime(date_param) + return parse_datetime(normalized_date) + return date_param or default_date + + @classmethod + def process_date_range( + cls, + start_date_param: Optional[Union[str, datetime]] = None, + end_date_param: Optional[Union[str, datetime]] = None, + ) -> DateRange: + """ + Processes and validates start and end dates with smart defaults. + + Logic: + 1. If no end_date: use current time + 2. If no start_date: use end_date minus DEFAULT_DAYS_RANGE + 3. Validates the resulting range + + Args: + start_date_param: Optional start date + end_date_param: Optional end date + + Returns: + DateRange: Object containing processed dates and validation status + + Examples: + >>> # No dates provided - defaults to last 24 hours + >>> range = DateTimeProcessor.process_date_range() + >>> # Only end date - starts 24 hours before + >>> range = DateTimeProcessor.process_date_range( + ... end_date_param="2023-12-01T00:00:00" + ... ) + >>> # Both dates provided + >>> range = DateTimeProcessor.process_date_range( + ... start_date_param="2023-11-01", + ... end_date_param="2023-12-01" + ... ) + """ + # Process end date first + end_date = cls._process_end_date(end_date_param) + + # Then process start date based on end date + start_date = cls._process_start_date(start_date_param, end_date) + + if timezone.is_naive(end_date): + end_date = timezone.make_aware(end_date) + if timezone.is_naive(start_date): + start_date = timezone.make_aware(start_date) + + # Validate the resulting range + return cls._validate_date_range(start_date, end_date) + + @classmethod + def filter_date_range(cls, value: str) -> Optional[DateRange]: + preset = DateRangePresets.from_value(value) + if not preset: + return None + start_date, end_date = preset.get_date_range() + print(start_date, end_date) + return cls._validate_date_range(start_date=start_date, end_date=end_date) + + @classmethod + def _process_end_date( + cls, end_date_param: Optional[Union[str, datetime]] + ) -> datetime: + """ + Processes end date with default to current time. + """ + if end_date_param: + return cls.parse_date_parameter(end_date_param) + return timezone.now() + + @classmethod + def _process_start_date( + cls, start_date_param: Optional[Union[str, datetime]], end_date: datetime + ) -> datetime: + """ + Processes start date with default to end_date minus DEFAULT_DAYS_RANGE. + """ + if start_date_param: + return cls.parse_date_parameter(start_date_param) + return end_date - timedelta(days=cls.DEFAULT_DAYS_RANGE) + + @classmethod + def _validate_date_range( + cls, start_date: datetime, end_date: datetime + ) -> DateRange: + """ + Validates the date range and returns a DateRange object. + """ + if start_date > timezone.now(): + raise InvalidDatetime("Start date cannot be in the future") + if start_date > end_date: + raise InvalidDatetime("Start date cannot be after end date") + date_diff = end_date - start_date + if date_diff.days > cls.MAX_DAYS_RANGE: + raise InvalidDatetime(f"Date range cannot exceed {cls.MAX_DAYS_RANGE} days") + if date_diff.days < 0: + raise InvalidDatetime("Invalid date range") + return DateRange( + start_date=start_date, + end_date=end_date, + ) diff --git a/backend/usage_v2/views.py b/backend/usage_v2/views.py index ce964abb8..03cffc800 100644 --- a/backend/usage_v2/views.py +++ b/backend/usage_v2/views.py @@ -1,13 +1,22 @@ import logging from django.http import HttpRequest +from django_filters.rest_framework import DjangoFilterBackend +from permissions.permission import IsOrganizationMember from rest_framework import status, viewsets from rest_framework.decorators import action +from rest_framework.filters import OrderingFilter +from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response +from usage_v2.filter import UsageFilter +from usage_v2.utils import DateTimeProcessor +from utils.pagination import CustomPagination +from utils.user_context import UserContext from .constants import UsageKeys from .helper import UsageHelper -from .serializers import GetUsageSerializer +from .models import Usage +from .serializers import GetUsageSerializer, UsageSerializer logger = logging.getLogger(__name__) @@ -15,6 +24,48 @@ class UsageView(viewsets.ModelViewSet): """Viewset for managing Usage-related operations.""" + permission_classes = [IsAuthenticated, IsOrganizationMember] + serializer_class = UsageSerializer + pagination_class = CustomPagination + filter_backends = [DjangoFilterBackend, OrderingFilter] + filterset_class = UsageFilter + ordering_fields = ["created_at"] + + def get_queryset(self): + """ + Returns a queryset filtered by the current user's organization. + """ + user_organization = UserContext.get_organization() + queryset = Usage.objects.filter(organization=user_organization) + return queryset + + @action(detail=True, methods=["get"], url_path="aggregate") + def aggregate(self, request: HttpRequest) -> Response: + """ + Custom action to list Usage data for a given Tag, grouped by + WorkflowFileExecution. + """ + + date_range = DateTimeProcessor.process_date_range( + start_date_param=request.query_params.get("created_at_gte"), + end_date_param=request.query_params.get("created_at_lte"), + ) + date_range_param = request.query_params.get("date_range") + if date_range_param: + date_range = DateTimeProcessor.filter_date_range(date_range_param) + # Get filtered queryset + queryset = self.filter_queryset(self.get_queryset()).filter( + created_at__range=[date_range.start_date, date_range.end_date] + ) + + # Aggregate and prepare response + aggregated_data = UsageHelper.aggregate_usage_metrics(queryset) + response_data = UsageHelper.format_usage_response( + aggregated_data, date_range.start_date, date_range.end_date + ) + + return Response(status=status.HTTP_200_OK, data=response_data) + @action(detail=True, methods=["get"]) def get_token_usage(self, request: HttpRequest) -> Response: """Retrieves the aggregated token usage for a given run_id. diff --git a/backend/workflow_manager/endpoint_v2/source.py b/backend/workflow_manager/endpoint_v2/source.py index a030d7c70..194b321ce 100644 --- a/backend/workflow_manager/endpoint_v2/source.py +++ b/backend/workflow_manager/endpoint_v2/source.py @@ -598,13 +598,17 @@ def _copy_file_to_destination( seek_position += len(chunk) def add_file_to_volume( - self, input_file_path: str, workflow_file_execution: WorkflowFileExecution + self, + input_file_path: str, + workflow_file_execution: WorkflowFileExecution, + tags=list[str], ) -> str: """Add input file to execution directory. Args: input_file_path (str): source file workflow_file_execution: WorkflowFileExecution model + tags (list[str]): Tag names associated with the workflow execution. Raises: InvalidSource: _description_ @@ -632,6 +636,7 @@ def add_file_to_volume( input_file_path=input_file_path, file_execution_id=workflow_file_execution.id, source_hash=file_content_hash, + tags=tags, ) return file_name diff --git a/backend/workflow_manager/file_execution/serializers.py b/backend/workflow_manager/file_execution/serializers.py new file mode 100644 index 000000000..9232d7f25 --- /dev/null +++ b/backend/workflow_manager/file_execution/serializers.py @@ -0,0 +1,9 @@ +from rest_framework import serializers + +from .models import WorkflowFileExecution + + +class WorkflowFileExecutionSerializer(serializers.ModelSerializer): + class Meta: + model = WorkflowFileExecution + fields = "__all__" diff --git a/backend/workflow_manager/workflow_v2/execution.py b/backend/workflow_manager/workflow_v2/execution.py index b3afceb03..5b1787690 100644 --- a/backend/workflow_manager/workflow_v2/execution.py +++ b/backend/workflow_manager/workflow_v2/execution.py @@ -4,6 +4,7 @@ from account_v2.constants import Common from platform_settings_v2.platform_auth_service import PlatformAuthenticationService +from tags.models import Tag from tool_instance_v2.models import ToolInstance from tool_instance_v2.tool_processor import ToolProcessor from unstract.tool_registry.dto import Tool @@ -99,6 +100,7 @@ def __init__( self.pipeline_id = pipeline_id self.execution_id = str(workflow_execution.id) self.use_file_history = use_file_history + self.tags = workflow_execution.tag_names logger.info( f"Executing for Pipeline ID: {pipeline_id}, " f"workflow ID: {self.workflow_id}, execution ID: {self.execution_id}, " @@ -117,6 +119,7 @@ def create_workflow_execution( log_events_id: Optional[str] = None, execution_id: Optional[str] = None, mode: tuple[str, str] = WorkflowExecution.Mode.INSTANT, + tags: Optional[list[Tag]] = None, ) -> WorkflowExecution: # Validating with existing execution existing_execution = cls.get_execution_instance_by_id(execution_id) @@ -147,6 +150,8 @@ def create_workflow_execution( if execution_id: workflow_execution.id = execution_id workflow_execution.save() + if tags: + workflow_execution.tags.set(tags) return workflow_execution def update_execution( diff --git a/backend/workflow_manager/workflow_v2/migrations/0005_workflowexecution_tags.py b/backend/workflow_manager/workflow_v2/migrations/0005_workflowexecution_tags.py new file mode 100644 index 000000000..8845bbe3b --- /dev/null +++ b/backend/workflow_manager/workflow_v2/migrations/0005_workflowexecution_tags.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.1 on 2025-01-16 10:23 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("tags", "0001_initial"), + ("workflow_v2", "0004_executionlog_file_execution"), + ] + + operations = [ + migrations.AddField( + model_name="workflowexecution", + name="tags", + field=models.ManyToManyField( + blank=True, related_name="workflow_executions", to="tags.tag" + ), + ), + ] diff --git a/backend/workflow_manager/workflow_v2/models/execution.py b/backend/workflow_manager/workflow_v2/models/execution.py index c86a2a081..1f040d654 100644 --- a/backend/workflow_manager/workflow_v2/models/execution.py +++ b/backend/workflow_manager/workflow_v2/models/execution.py @@ -1,6 +1,7 @@ import uuid from django.db import models +from tags.models import Tag from utils.models.base_model import BaseModel EXECUTION_ERROR_LENGTH = 256 @@ -61,6 +62,17 @@ class Type(models.TextChoices): execution_time = models.FloatField( default=0, db_comment="execution time in seconds" ) + tags = models.ManyToManyField(Tag, related_name="workflow_executions", blank=True) + + class Meta: + verbose_name = "Workflow Execution" + verbose_name_plural = "Workflow Executions" + db_table = "workflow_execution" + + @property + def tag_names(self) -> list[str]: + """Return a list of tag names associated with the workflow execution.""" + return list(self.tags.values_list("name", flat=True)) def __str__(self) -> str: return ( @@ -71,8 +83,3 @@ def __str__(self) -> str: f"status: {self.status}, " f"error message: {self.error_message})" ) - - class Meta: - verbose_name = "Workflow Execution" - verbose_name_plural = "Workflow Executions" - db_table = "workflow_execution" diff --git a/backend/workflow_manager/workflow_v2/workflow_helper.py b/backend/workflow_manager/workflow_v2/workflow_helper.py index 1afba1c03..4478ad359 100644 --- a/backend/workflow_manager/workflow_v2/workflow_helper.py +++ b/backend/workflow_manager/workflow_v2/workflow_helper.py @@ -236,7 +236,9 @@ def _process_file( # It should e uuid of workflow_file_execution file_execution_id = str(workflow_file_execution.id) file_name = source.add_file_to_volume( - input_file_path=input_file, workflow_file_execution=workflow_file_execution + input_file_path=input_file, + workflow_file_execution=workflow_file_execution, + tags=execution_service.tags, ) try: execution_service.file_execution_id = file_execution_id diff --git a/unstract/workflow-execution/src/unstract/workflow_execution/constants.py b/unstract/workflow-execution/src/unstract/workflow_execution/constants.py index af767f356..f7e309c98 100644 --- a/unstract/workflow-execution/src/unstract/workflow_execution/constants.py +++ b/unstract/workflow-execution/src/unstract/workflow_execution/constants.py @@ -43,6 +43,7 @@ class MetaDataKey: FILE_EXECUTION_ID = "file_execution_id" ORGANIZATION_ID = "organization_id" TOOL_METADATA = "tool_metadata" + TAGS = "tags" class ToolMetadataKey: diff --git a/unstract/workflow-execution/src/unstract/workflow_execution/execution_file_handler.py b/unstract/workflow-execution/src/unstract/workflow_execution/execution_file_handler.py index e9d565cf1..2c699b94d 100644 --- a/unstract/workflow-execution/src/unstract/workflow_execution/execution_file_handler.py +++ b/unstract/workflow-execution/src/unstract/workflow_execution/execution_file_handler.py @@ -104,7 +104,11 @@ def get_last_tool_metadata(self, metadata: dict[str, Any]) -> dict[str, Any]: return tool_metadata[-1] def add_metadata_to_volume( - self, input_file_path: str, file_execution_id: str, source_hash: str + self, + input_file_path: str, + file_execution_id: str, + source_hash: str, + tags: list[str], ) -> None: """Creating metadata for workflow. This method is responsible for creating metadata for the workflow. It takes the input file path and @@ -115,6 +119,7 @@ def add_metadata_to_volume( input_file_path (str): The path of the input file. file_execution_id (str): Unique execution id for the file. source_hash (str): The hash value of the source/input file. + tags (list[str]): Tag names associated with the workflow execution. Returns: None @@ -131,6 +136,7 @@ def add_metadata_to_volume( MetaDataKey.WORKFLOW_ID: str(self.workflow_id), MetaDataKey.EXECUTION_ID: str(self.execution_id), MetaDataKey.FILE_EXECUTION_ID: str(file_execution_id), + MetaDataKey.TAGS: tags, } if check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE): file_system = FileSystem(FileStorageType.WORKFLOW_EXECUTION)