From 5476ac3acbc71aa8bc8669dbced45668bb5556d0 Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Tue, 27 Aug 2024 14:23:03 -0700 Subject: [PATCH] Python: Update model diagnostics (#8346) ### Motivation and Context We have a module that performs tracing for model invocations. However, that module only works with OpenAI. We need it to work for all AI connectors. This PR does the refactoring necessary for the module to become more generic. ### Description 1. Rename the tracing module to `model_diagnostics`. 2. Restructure and optimize code. 3. Add tracing to all AI connectors except the HuggingFace connector, because the HuggingFace connector will need some refactoring in the near future. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --- python/.cspell.json | 4 +- .../.env.example | 4 +- .../services/anthropic_chat_completion.py | 82 +++--- .../services/azure_ai_inference_base.py | 3 + .../azure_ai_inference_chat_completion.py | 3 + .../google_ai/services/google_ai_base.py | 3 + .../services/google_ai_chat_completion.py | 2 + .../services/google_ai_text_completion.py | 2 + .../vertex_ai/services/vertex_ai_base.py | 3 + .../services/vertex_ai_chat_completion.py | 2 + .../services/vertex_ai_text_completion.py | 2 + .../ai/mistral_ai/services/mistral_ai_base.py | 16 ++ .../services/mistral_ai_chat_completion.py | 60 ++-- .../services/mistral_ai_text_embedding.py | 34 +-- .../ollama_prompt_execution_settings.py | 3 + .../ai/ollama/services/ollama_base.py | 3 + .../ollama/services/ollama_chat_completion.py | 5 + .../ollama/services/ollama_text_completion.py | 3 + .../services/open_ai_chat_completion_base.py | 2 +- .../services/open_ai_text_completion_base.py | 2 +- .../utils/telemetry/decorators.py | 265 ------------------ .../telemetry/model_diagnostics/__init__.py | 8 + .../telemetry/model_diagnostics/decorators.py | 232 +++++++++++++++ .../gen_ai_attributes.py} | 7 +- .../model_diagnostics_settings.py | 31 ++ python/tests/conftest.py | 10 +- .../test_mistralai_text_embeddings.py | 22 +- .../unit/utils/model_diagnostics/conftest.py | 91 ++++++ .../test_trace_chat_completion.py | 172 ++++++++++++ .../test_trace_text_completion.py | 150 ++++++++++ python/tests/unit/utils/test_tracing.py | 241 ---------------- 31 files changed, 854 insertions(+), 613 deletions(-) create mode 100644 python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_base.py delete mode 100644 python/semantic_kernel/utils/telemetry/decorators.py create mode 100644 python/semantic_kernel/utils/telemetry/model_diagnostics/__init__.py create mode 100644 python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py rename python/semantic_kernel/utils/telemetry/{const.py => model_diagnostics/gen_ai_attributes.py} (83%) create mode 100644 python/semantic_kernel/utils/telemetry/model_diagnostics/model_diagnostics_settings.py create mode 100644 python/tests/unit/utils/model_diagnostics/conftest.py create mode 100644 python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py create mode 100644 python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py delete mode 100644 python/tests/unit/utils/test_tracing.py diff --git a/python/.cspell.json b/python/.cspell.json index 804c4ebfa4c6..949cce6e3c9a 100644 --- a/python/.cspell.json +++ b/python/.cspell.json @@ -55,6 +55,8 @@ "huggingface", "pytestmark", "contoso", - "opentelemetry" + "opentelemetry", + "SEMANTICKERNEL", + "OTEL" ] } \ No newline at end of file diff --git a/python/samples/demos/telemetry_with_application_insights/.env.example b/python/samples/demos/telemetry_with_application_insights/.env.example index 3ee18ae9e6b0..be404f15d7ff 100644 --- a/python/samples/demos/telemetry_with_application_insights/.env.example +++ b/python/samples/demos/telemetry_with_application_insights/.env.example @@ -1 +1,3 @@ -TELEMETRY_SAMPLE_CONNECTION_STRING="..." \ No newline at end of file +TELEMETRY_SAMPLE_CONNECTION_STRING="..." +SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS=true +SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE=true \ No newline at end of file diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py index 3b9a7de99182..94c4c77d98ea 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py @@ -1,8 +1,14 @@ # Copyright (c) Microsoft. All rights reserved. import logging +import sys from collections.abc import AsyncGenerator -from typing import Any +from typing import Any, ClassVar + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover from anthropic import AsyncAnthropic from anthropic.types import ( @@ -29,11 +35,9 @@ from semantic_kernel.contents.text_content import TextContent from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.contents.utils.finish_reason import FinishReason as SemanticKernelFinishReason -from semantic_kernel.exceptions.service_exceptions import ( - ServiceInitializationError, - ServiceResponseException, -) +from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceResponseException from semantic_kernel.utils.experimental_decorator import experimental_class +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion # map finish reasons from Anthropic to Semantic Kernel ANTHROPIC_TO_SEMANTIC_KERNEL_FINISH_REASON_MAP = { @@ -49,8 +53,10 @@ class AnthropicChatCompletion(ChatCompletionClientBase): """Antropic ChatCompletion class.""" + MODEL_PROVIDER_NAME: ClassVar[str] = "anthropic" + async_client: AsyncAnthropic - + def __init__( self, ai_model_id: str | None = None, @@ -68,10 +74,10 @@ def __init__( service_id: Service ID tied to the execution settings. api_key: The optional API key to use. If provided will override, the env vars or .env file value. - async_client: An existing client to use. + async_client: An existing client to use. env_file_path: Use the environment settings file as a fallback - to environment variables. - env_file_encoding: The encoding of the environment settings file. + to environment variables. + env_file_encoding: The encoding of the environment settings file. """ try: anthropic_settings = AnthropicSettings.create( @@ -82,7 +88,7 @@ def __init__( ) except ValidationError as ex: raise ServiceInitializationError("Failed to create Anthropic settings.", ex) from ex - + if not anthropic_settings.chat_model_id: raise ServiceInitializationError("The Anthropic chat model ID is required.") @@ -97,12 +103,14 @@ def __init__( ai_model_id=anthropic_settings.chat_model_id, ) + @override + @trace_chat_completion(MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", **kwargs: Any, - ) -> list["ChatMessageContent"]: + ) -> list["ChatMessageContent"]: """Executes a chat completion request and returns the result. Args: @@ -127,22 +135,23 @@ async def get_chat_message_contents( raise ServiceResponseException( f"{type(self)} service failed to complete the prompt", ex, - ) from ex - + ) from ex + metadata: dict[str, Any] = {"id": response.id} # Check if usage exists and has a value, then add it to the metadata if hasattr(response, "usage") and response.usage is not None: metadata["usage"] = response.usage - return [self._create_chat_message_content(response, content_block, metadata) - for content_block in response.content] - + return [ + self._create_chat_message_content(response, content_block, metadata) for content_block in response.content + ] + async def get_streaming_chat_message_contents( self, chat_history: ChatHistory, - settings: PromptExecutionSettings, + settings: PromptExecutionSettings, **kwargs: Any, - ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]: + ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]: """Executes a streaming chat completion request and returns the result. Args: @@ -166,17 +175,18 @@ async def get_streaming_chat_message_contents( author_role = None metadata: dict[str, Any] = {"usage": {}, "id": None} content_block_idx = 0 - + async for stream_event in stream: if isinstance(stream_event, RawMessageStartEvent): author_role = stream_event.message.role metadata["usage"]["input_tokens"] = stream_event.message.usage.input_tokens metadata["id"] = stream_event.message.id elif isinstance(stream_event, (RawContentBlockDeltaEvent, RawMessageDeltaEvent)): - yield [self._create_streaming_chat_message_content(stream_event, - content_block_idx, - author_role, - metadata)] + yield [ + self._create_streaming_chat_message_content( + stream_event, content_block_idx, author_role, metadata + ) + ] elif isinstance(stream_event, ContentBlockStopEvent): content_block_idx += 1 @@ -187,21 +197,18 @@ async def get_streaming_chat_message_contents( ) from ex def _create_chat_message_content( - self, - response: Message, - content: TextBlock, - response_metadata: dict[str, Any] + self, response: Message, content: TextBlock, response_metadata: dict[str, Any] ) -> "ChatMessageContent": """Create a chat message content object.""" items: list[ITEM_TYPES] = [] - + if content.text: items.append(TextContent(text=content.text)) finish_reason = None if response.stop_reason: finish_reason = ANTHROPIC_TO_SEMANTIC_KERNEL_FINISH_REASON_MAP[response.stop_reason] - + return ChatMessageContent( inner_content=response, ai_model_id=self.ai_model_id, @@ -212,20 +219,20 @@ def _create_chat_message_content( ) def _create_streaming_chat_message_content( - self, - stream_event: RawContentBlockDeltaEvent | RawMessageDeltaEvent, - content_block_idx: int, - role: str | None = None, - metadata: dict[str, Any] = {} + self, + stream_event: RawContentBlockDeltaEvent | RawMessageDeltaEvent, + content_block_idx: int, + role: str | None = None, + metadata: dict[str, Any] = {}, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a choice.""" text_content = "" - + if stream_event.delta and hasattr(stream_event.delta, "text"): text_content = stream_event.delta.text - + items: list[STREAMING_ITEM_TYPES] = [StreamingTextContent(choice_index=content_block_idx, text=text_content)] - + finish_reason = None if isinstance(stream_event, RawMessageDeltaEvent): if stream_event.delta.stop_reason: @@ -246,4 +253,3 @@ def _create_streaming_chat_message_content( def get_prompt_execution_settings_class(self) -> "type[AnthropicChatPromptExecutionSettings]": """Create a request settings object.""" return AnthropicChatPromptExecutionSettings - diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py index 32550fa71697..3d64c38ce5bc 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_base.py @@ -3,6 +3,7 @@ import asyncio import contextlib from abc import ABC +from typing import ClassVar from azure.ai.inference.aio import ChatCompletionsClient, EmbeddingsClient @@ -14,6 +15,8 @@ class AzureAIInferenceBase(KernelBaseModel, ABC): """Azure AI Inference Chat Completion Service.""" + MODEL_PROVIDER_NAME: ClassVar[str] = "azureai" + client: ChatCompletionsClient | EmbeddingsClient def __del__(self) -> None: diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py index 92fe5bb2af71..b56562fc8a35 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py @@ -7,6 +7,7 @@ from functools import reduce from typing import TYPE_CHECKING, Any +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT if sys.version_info >= (3, 12): @@ -119,6 +120,8 @@ def __init__( ) # region Non-streaming + @override + @trace_chat_completion(AzureAIInferenceBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: ChatHistory, diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_base.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_base.py index 91446835302d..5bbc19568bc1 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_base.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_base.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. from abc import ABC +from typing import ClassVar from semantic_kernel.connectors.ai.google.google_ai.google_ai_settings import GoogleAISettings from semantic_kernel.kernel_pydantic import KernelBaseModel @@ -9,4 +10,6 @@ class GoogleAIBase(KernelBaseModel, ABC): """Google AI Service.""" + MODEL_PROVIDER_NAME: ClassVar[str] = "googleai" + service_settings: GoogleAISettings diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py index 2b65dc298111..c33affe047cb 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py @@ -40,6 +40,7 @@ from semantic_kernel.contents.utils.finish_reason import FinishReason from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.kernel import Kernel +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -109,6 +110,7 @@ def __init__( # region Non-streaming @override + @trace_chat_completion(GoogleAIBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: ChatHistory, diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_completion.py index a38201db6b67..3590b8b4d51c 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_completion.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_completion.py @@ -15,6 +15,7 @@ ) from semantic_kernel.connectors.ai.google.google_ai.services.google_ai_base import GoogleAIBase from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_text_completion if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -78,6 +79,7 @@ def __init__( # region Non-streaming @override + @trace_text_completion(GoogleAIBase.MODEL_PROVIDER_NAME) async def get_text_contents( self, prompt: str, diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py index e17b1994424d..29e5d2502b63 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. from abc import ABC +from typing import ClassVar from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_settings import VertexAISettings from semantic_kernel.kernel_pydantic import KernelBaseModel @@ -9,4 +10,6 @@ class VertexAIBase(KernelBaseModel, ABC): """Vertex AI Service.""" + MODEL_PROVIDER_NAME: ClassVar[str] = "vertexai" + service_settings: VertexAISettings diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py index b2519d1e5edc..53116630b632 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py @@ -45,6 +45,7 @@ ) from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.kernel import Kernel +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -103,6 +104,7 @@ def __init__( # region Non-streaming @override + @trace_chat_completion(VertexAIBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: ChatHistory, diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_text_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_text_completion.py index 6919b6ba521e..e874ba21f254 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_text_completion.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_text_completion.py @@ -19,6 +19,7 @@ from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_text_completion if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -74,6 +75,7 @@ def __init__( # region Non-streaming @override + @trace_text_completion(VertexAIBase.MODEL_PROVIDER_NAME) async def get_text_contents( self, prompt: str, diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_base.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_base.py new file mode 100644 index 000000000000..0e18409f9e08 --- /dev/null +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_base.py @@ -0,0 +1,16 @@ +# Copyright (c) Microsoft. All rights reserved. + +from abc import ABC +from typing import ClassVar + +from mistralai.async_client import MistralAsyncClient + +from semantic_kernel.kernel_pydantic import KernelBaseModel + + +class MistralAIBase(KernelBaseModel, ABC): + """Mistral AI service base.""" + + MODEL_PROVIDER_NAME: ClassVar[str] = "mistralai" + + async_client: MistralAsyncClient diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py index ffd6bc2594ad..fc23b451d253 100644 --- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py @@ -1,9 +1,15 @@ # Copyright (c) Microsoft. All rights reserved. import logging +import sys from collections.abc import AsyncGenerator from typing import Any +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + from mistralai.async_client import MistralAsyncClient from mistralai.models.chat_completion import ( ChatCompletionResponse, @@ -19,6 +25,7 @@ from semantic_kernel.connectors.ai.mistral_ai.prompt_execution_settings.mistral_ai_prompt_execution_settings import ( MistralAIChatPromptExecutionSettings, ) +from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_base import MistralAIBase from semantic_kernel.connectors.ai.mistral_ai.settings.mistral_ai_settings import MistralAISettings from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents.chat_history import ChatHistory @@ -29,23 +36,20 @@ from semantic_kernel.contents.text_content import TextContent from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.contents.utils.finish_reason import FinishReason -from semantic_kernel.exceptions.service_exceptions import ( - ServiceInitializationError, - ServiceResponseException, -) +from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceResponseException from semantic_kernel.utils.experimental_decorator import experimental_class +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion logger: logging.Logger = logging.getLogger(__name__) @experimental_class -class MistralAIChatCompletion(ChatCompletionClientBase): +class MistralAIChatCompletion(MistralAIBase, ChatCompletionClientBase): """Mistral Chat completion class.""" prompt_tokens: int = 0 completion_tokens: int = 0 total_tokens: int = 0 - async_client: MistralAsyncClient def __init__( self, @@ -64,10 +68,10 @@ def __init__( service_id (str | None): Service ID tied to the execution settings. api_key (str | None): The optional API key to use. If provided will override, the env vars or .env file value. - async_client (MistralAsyncClient | None) : An existing client to use. + async_client (MistralAsyncClient | None) : An existing client to use. env_file_path (str | None): Use the environment settings file as a fallback - to environment variables. - env_file_encoding (str | None): The encoding of the environment settings file. + to environment variables. + env_file_encoding (str | None): The encoding of the environment settings file. """ try: mistralai_settings = MistralAISettings.create( @@ -78,7 +82,7 @@ def __init__( ) except ValidationError as ex: raise ServiceInitializationError("Failed to create MistralAI settings.", ex) from ex - + if not mistralai_settings.chat_model_id: raise ServiceInitializationError("The MistralAI chat model ID is required.") @@ -93,12 +97,14 @@ def __init__( ai_model_id=ai_model_id or mistralai_settings.chat_model_id, ) + @override + @trace_chat_completion(MistralAIBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", **kwargs: Any, - ) -> list["ChatMessageContent"]: + ) -> list["ChatMessageContent"]: """Executes a chat completion request and returns the result. Args: @@ -124,18 +130,18 @@ async def get_chat_message_contents( raise ServiceResponseException( f"{type(self)} service failed to complete the prompt", ex, - ) from ex - + ) from ex + self.store_usage(response) response_metadata = self._get_metadata_from_response(response) return [self._create_chat_message_content(response, choice, response_metadata) for choice in response.choices] - + async def get_streaming_chat_message_contents( self, chat_history: ChatHistory, - settings: PromptExecutionSettings, + settings: PromptExecutionSettings, **kwargs: Any, - ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]: + ) -> AsyncGenerator[list[StreamingChatMessageContent], Any]: """Executes a streaming chat completion request and returns the result. Args: @@ -181,7 +187,7 @@ def _create_chat_message_content( metadata.update(response_metadata) items: list[Any] = self._get_tool_calls_from_chat_choice(choice) - + if choice.message.content: items.append(TextContent(text=choice.message.content)) @@ -220,8 +226,7 @@ def _create_streaming_chat_message_content( ) def _get_metadata_from_response( - self, - response: ChatCompletionResponse | ChatCompletionStreamResponse + self, response: ChatCompletionResponse | ChatCompletionStreamResponse ) -> dict[str, Any]: """Get metadata from a chat response.""" metadata: dict[str, Any] = { @@ -231,27 +236,26 @@ def _get_metadata_from_response( # Check if usage exists and has a value, then add it to the metadata if hasattr(response, "usage") and response.usage is not None: metadata["usage"] = response.usage - + return metadata def _get_metadata_from_chat_choice( - self, - choice: ChatCompletionResponseChoice | ChatCompletionResponseStreamChoice + self, choice: ChatCompletionResponseChoice | ChatCompletionResponseStreamChoice ) -> dict[str, Any]: """Get metadata from a chat choice.""" return { "logprobs": getattr(choice, "logprobs", None), } - - def _get_tool_calls_from_chat_choice(self, - choice: ChatCompletionResponseChoice | ChatCompletionResponseStreamChoice + + def _get_tool_calls_from_chat_choice( + self, choice: ChatCompletionResponseChoice | ChatCompletionResponseStreamChoice ) -> list[FunctionCallContent]: """Get tool calls from a chat choice.""" - content: ChatMessage | DeltaMessage + content: ChatMessage | DeltaMessage content = choice.message if isinstance(choice, ChatCompletionResponseChoice) else choice.delta if content.tool_calls is None: return [] - + return [ FunctionCallContent( id=tool.id, @@ -267,7 +271,7 @@ def _get_tool_calls_from_chat_choice(self, def get_prompt_execution_settings_class(self) -> "type[MistralAIChatPromptExecutionSettings]": """Create a request settings object.""" return MistralAIChatPromptExecutionSettings - + def store_usage(self, response): """Store the usage information from the response.""" if not isinstance(response, AsyncGenerator): diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py index 24b2905b1587..8bf76e5303b1 100644 --- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py @@ -6,6 +6,7 @@ from typing import Any, override # pragma: no cover else: from typing_extensions import Any, override # pragma: no cover + import logging from mistralai.async_client import MistralAsyncClient @@ -14,6 +15,7 @@ from pydantic import ValidationError from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase +from semantic_kernel.connectors.ai.mistral_ai.services.mistral_ai_base import MistralAIBase from semantic_kernel.connectors.ai.mistral_ai.settings.mistral_ai_settings import MistralAISettings from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceResponseException @@ -23,19 +25,17 @@ @experimental_class -class MistralAITextEmbedding(EmbeddingGeneratorBase): +class MistralAITextEmbedding(MistralAIBase, EmbeddingGeneratorBase): """Mistral AI Inference Text Embedding Service.""" - client: MistralAsyncClient - def __init__( self, ai_model_id: str | None = None, api_key: str | None = None, service_id: str | None = None, + async_client: MistralAsyncClient | None = None, env_file_path: str | None = None, env_file_encoding: str | None = None, - client: MistralAsyncClient | None = None, ) -> None: """Initialize the Mistral AI Text Embedding service. @@ -45,12 +45,12 @@ def __init__( - MISTRALAI_EMBEDDING_MODEL_ID Args: - ai_model_id: (str | None): A string that is used to identify the model such as the model name. - api_key (str | None): The API key for the Mistral AI service deployment. - service_id (str | None): Service ID for the embedding completion service. - env_file_path (str | None): The path to the environment file. - env_file_encoding (str | None): The encoding of the environment file. - client (MistralAsyncClient | None): The Mistral AI client to use. + ai_model_id: (str | None): A string that is used to identify the model such as the model name. + api_key (str | None): The API key for the Mistral AI service deployment. + service_id (str | None): Service ID for the embedding completion service. + async_client (MistralAsyncClient | None): The Mistral AI client to use. + env_file_path (str | None): The path to the environment file. + env_file_encoding (str | None): The encoding of the environment file. Raises: ServiceInitializationError: If an error occurs during initialization. @@ -68,15 +68,13 @@ def __init__( if not mistralai_settings.embedding_model_id: raise ServiceInitializationError("The MistralAI embedding model ID is required.") - if not client: - client = MistralAsyncClient( - api_key=mistralai_settings.api_key.get_secret_value() - ) + if not async_client: + async_client = MistralAsyncClient(api_key=mistralai_settings.api_key.get_secret_value()) super().__init__( service_id=service_id or mistralai_settings.embedding_model_id, ai_model_id=ai_model_id or mistralai_settings.embedding_model_id, - client=client, + async_client=async_client, ) @override @@ -98,10 +96,8 @@ async def generate_raw_embeddings( ) -> Any: """Generate embeddings from the Mistral AI service.""" try: - - embedding_response: EmbeddingResponse = await self.client.embeddings( - model=self.ai_model_id, - input=texts + embedding_response: EmbeddingResponse = await self.async_client.embeddings( + model=self.ai_model_id, input=texts ) except Exception as ex: raise ServiceResponseException( diff --git a/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py index 7e365bea3d5c..6ff69be7dc12 100644 --- a/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py @@ -11,6 +11,9 @@ class OllamaPromptExecutionSettings(PromptExecutionSettings): format: Literal["json"] | None = None options: dict[str, Any] | None = None + # TODO(@taochen): Add individual properties for execution settings and + # convert them to the appropriate types in the options dictionary. + class OllamaTextPromptExecutionSettings(OllamaPromptExecutionSettings): """Settings for Ollama text prompt execution.""" diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_base.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_base.py index ceffb48d9dbf..f03ad0e994d1 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_base.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_base.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. from abc import ABC +from typing import ClassVar from ollama import AsyncClient @@ -14,4 +15,6 @@ class OllamaBase(KernelBaseModel, ABC): client [AsyncClient]: An Ollama client to use for the service. """ + MODEL_PROVIDER_NAME: ClassVar[str] = "ollama" + client: AsyncClient diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py index 1c3ffe3080b7..2e9adb09fddb 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py @@ -25,6 +25,7 @@ from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceInvalidResponseError +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion, trace_text_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -74,6 +75,8 @@ def __init__( client=client or AsyncClient(host=ollama_settings.host), ) + @override + @trace_chat_completion(OllamaBase.MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: ChatHistory, @@ -162,6 +165,8 @@ async def get_streaming_chat_message_contents( ) ] + @override + @trace_text_completion(OllamaBase.MODEL_PROVIDER_NAME) async def get_text_contents( self, prompt: str, diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_text_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_text_completion.py index 351c4e768fea..e02f98723d96 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_text_completion.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_text_completion.py @@ -20,6 +20,7 @@ from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceInvalidResponseError +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_text_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -69,6 +70,8 @@ def __init__( client=client or AsyncClient(host=ollama_settings.host), ) + @override + @trace_text_completion(OllamaBase.MODEL_PROVIDER_NAME) async def get_text_contents( self, prompt: str, diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py index e23cd9799e61..786be4efb996 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py @@ -43,7 +43,7 @@ from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import ( AutoFunctionInvocationContext, ) -from semantic_kernel.utils.telemetry.decorators import trace_chat_completion +from semantic_kernel.utils.telemetry.model_diagnostics import trace_chat_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py index fbcb90767e46..40b445cce480 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py @@ -26,7 +26,7 @@ from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent -from semantic_kernel.utils.telemetry.decorators import trace_text_completion +from semantic_kernel.utils.telemetry.model_diagnostics import trace_text_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings diff --git a/python/semantic_kernel/utils/telemetry/decorators.py b/python/semantic_kernel/utils/telemetry/decorators.py deleted file mode 100644 index 366168ae3938..000000000000 --- a/python/semantic_kernel/utils/telemetry/decorators.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. -# -# Code to trace model activities with the OTel semantic conventions. -# This code contains experimental features and may change in the future. -# To enable these features, set one of the following senvironment variables to true: -# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS -# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE - -import functools -import json -import os -from collections.abc import Callable -from typing import Any - -from opentelemetry.trace import Span, StatusCode, get_tracer, use_span - -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings -from semantic_kernel.contents.chat_history import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.text_content import TextContent -from semantic_kernel.utils.telemetry.const import ( - CHAT_COMPLETION_OPERATION, - COMPLETION_EVENT, - COMPLETION_EVENT_COMPLETION, - COMPLETION_TOKENS, - ERROR_TYPE, - FINISH_REASON, - MAX_TOKENS, - MODEL, - OPERATION, - PROMPT_EVENT, - PROMPT_EVENT_PROMPT, - PROMPT_TOKENS, - RESPONSE_ID, - SYSTEM, - TEMPERATURE, - TEXT_COMPLETION_OPERATION, - TOP_P, -) - -OTEL_ENABLED_ENV_VAR = "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS" -OTEL_SENSITIVE_ENABLED_ENV_VAR = "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE" - - -_enable_diagnostics = os.getenv(OTEL_ENABLED_ENV_VAR, "false").lower() in ("true", "1", "t") -_enable_sensitive_events = os.getenv(OTEL_SENSITIVE_ENABLED_ENV_VAR, "false").lower() in ("true", "1", "t") - -# Creates a tracer from the global tracer provider -tracer = get_tracer(__name__) - - -def are_model_diagnostics_enabled() -> bool: - """Check if model diagnostics are enabled. - - Model diagnostics are enabled if either _enable_diagnostics or _enable_sensitive_events is set. - """ - return _enable_diagnostics or _enable_sensitive_events - - -def are_sensitive_events_enabled() -> bool: - """Check if sensitive events are enabled. - - Sensitive events are enabled if _enable_sensitive_events is set. - """ - return _enable_sensitive_events - - -def trace_chat_completion(model_provider: str) -> Callable: - """Decorator to trace chat completion activities.""" - - def inner_trace_chat_completion(completion_func: Callable) -> Callable: - @functools.wraps(completion_func) - async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[ChatMessageContent]: - chat_history: ChatHistory = kwargs["chat_history"] - settings: PromptExecutionSettings = kwargs["settings"] - - model_name = getattr(settings, "ai_model_id", None) or getattr(args[0], "ai_model_id", None) or "unknown" - - formatted_messages = ( - _messages_to_openai_format(chat_history.messages) if are_sensitive_events_enabled() else None - ) - span = _start_completion_activity( - CHAT_COMPLETION_OPERATION, model_name, model_provider, formatted_messages, settings - ) - - try: - completions: list[ChatMessageContent] = await completion_func(*args, **kwargs) - except Exception as exception: - if span: - _set_completion_error(span, exception) - span.end() - raise - - if span and completions: - with use_span(span, end_on_exit=True): - first_completion = completions[0] - response_id = first_completion.metadata.get("id") or (first_completion.inner_content or {}).get( - "id" - ) - usage = first_completion.metadata.get("usage", None) - prompt_tokens = getattr(usage, "prompt_tokens", None) - completion_tokens = getattr(usage, "completion_tokens", None) - - completion_text: str | None = ( - _messages_to_openai_format(completions) if are_sensitive_events_enabled() else None - ) - - finish_reasons: list[str] = [str(completion.finish_reason) for completion in completions] - - _set_completion_response( - span, - completion_text, - finish_reasons, - response_id or "unknown", - prompt_tokens, - completion_tokens, - ) - - return completions - - return wrapper_decorator - - return inner_trace_chat_completion - - -def trace_text_completion(model_provider: str) -> Callable: - """Decorator to trace text completion activities.""" - - def inner_trace_text_completion(completion_func: Callable) -> Callable: - @functools.wraps(completion_func) - async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[TextContent]: - prompt: str = kwargs["prompt"] - settings: PromptExecutionSettings = kwargs["settings"] - - model_name = getattr(settings, "ai_model_id", None) or getattr(args[0], "ai_model_id", None) or "unknown" - - span = _start_completion_activity(TEXT_COMPLETION_OPERATION, model_name, model_provider, prompt, settings) - - try: - completions: list[TextContent] = await completion_func(*args, **kwargs) - except Exception as exception: - if span: - _set_completion_error(span, exception) - span.end() - raise - - if span and completions: - with use_span(span, end_on_exit=True): - first_completion = completions[0] - response_id = first_completion.metadata.get("id") or (first_completion.inner_content or {}).get( - "id" - ) - usage = first_completion.metadata.get("usage", None) - prompt_tokens = getattr(usage, "prompt_tokens", None) - completion_tokens = getattr(usage, "completion_tokens", None) - - completion_text: str | None = ( - json.dumps([completion.text for completion in completions]) - if are_sensitive_events_enabled() - else None - ) - - _set_completion_response( - span, - completion_text, - None, - response_id or "unknown", - prompt_tokens, - completion_tokens, - ) - - return completions - - return wrapper_decorator - - return inner_trace_text_completion - - -def _start_completion_activity( - operation_name: str, - model_name: str, - model_provider: str, - prompt: str | None, - execution_settings: PromptExecutionSettings | None, -) -> Span | None: - """Start a text or chat completion activity for a given model.""" - if not are_model_diagnostics_enabled(): - return None - - span = tracer.start_span(f"{operation_name} {model_name}") - - # Set attributes on the span - span.set_attributes( - { - OPERATION: operation_name, - SYSTEM: model_provider, - MODEL: model_name, - } - ) - - # TODO(@glahaye): we'll need to have a way to get these attributes from model - # providers other than OpenAI (for example if the attributes are named differently) - if execution_settings: - attribute = execution_settings.extension_data.get("max_tokens") - if attribute: - span.set_attribute(MAX_TOKENS, attribute) - - attribute = execution_settings.extension_data.get("temperature") - if attribute: - span.set_attribute(TEMPERATURE, attribute) - - attribute = execution_settings.extension_data.get("top_p") - if attribute: - span.set_attribute(TOP_P, attribute) - - if are_sensitive_events_enabled() and prompt: - span.add_event(PROMPT_EVENT, {PROMPT_EVENT_PROMPT: prompt}) - - return span - - -def _set_completion_response( - span: Span, - completion_text: str | None, - finish_reasons: list[str] | None, - response_id: str, - prompt_tokens: int | None = None, - completion_tokens: int | None = None, -) -> None: - """Set the a text or chat completion response for a given activity.""" - if not are_model_diagnostics_enabled(): - return - - span.set_attribute(RESPONSE_ID, response_id) - - if finish_reasons: - span.set_attribute(FINISH_REASON, ",".join(finish_reasons)) - - if prompt_tokens: - span.set_attribute(PROMPT_TOKENS, prompt_tokens) - - if completion_tokens: - span.set_attribute(COMPLETION_TOKENS, completion_tokens) - - if are_sensitive_events_enabled() and completion_text: - span.add_event(COMPLETION_EVENT, {COMPLETION_EVENT_COMPLETION: completion_text}) - - -def _set_completion_error(span: Span, error: Exception) -> None: - """Set an error for a text or chat completion .""" - if not are_model_diagnostics_enabled(): - return - - span.set_attribute(ERROR_TYPE, str(type(error))) - - span.set_status(StatusCode.ERROR, repr(error)) - - -def _messages_to_openai_format(messages: list[ChatMessageContent]) -> str: - """Convert a list of ChatMessageContent to a string in the OpenAI format. - - OpenTelemetry recommends formatting the messages in the OpenAI format - regardless of the actual model being used. - """ - return json.dumps([message.to_dict() for message in messages]) diff --git a/python/semantic_kernel/utils/telemetry/model_diagnostics/__init__.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/__init__.py new file mode 100644 index 000000000000..c873a5770a80 --- /dev/null +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Microsoft. All rights reserved. + +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_chat_completion, trace_text_completion + +__all__ = [ + "trace_chat_completion", + "trace_text_completion", +] diff --git a/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py new file mode 100644 index 000000000000..b3dd0faf5f82 --- /dev/null +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/decorators.py @@ -0,0 +1,232 @@ +# Copyright (c) Microsoft. All rights reserved. + +import functools +import json +from collections.abc import Callable +from typing import Any + +from opentelemetry.trace import Span, StatusCode, get_tracer, use_span + +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.text_content import TextContent +from semantic_kernel.utils.experimental_decorator import experimental_function +from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes +from semantic_kernel.utils.telemetry.model_diagnostics.model_diagnostics_settings import ModelDiagnosticSettings + +# Module to instrument GenAI models using OpenTelemetry and OpenTelemetry Semantic Conventions. +# These are experimental features and may change in the future. + +# To enable these features, set one of the following environment variables to true: +# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS +# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE +MODEL_DIAGNOSTICS_SETTINGS = ModelDiagnosticSettings.create() + +# Operation names +CHAT_COMPLETION_OPERATION = "chat.completions" +TEXT_COMPLETION_OPERATION = "text.completions" + +# Creates a tracer from the global tracer provider +tracer = get_tracer(__name__) + + +@experimental_function +def are_model_diagnostics_enabled() -> bool: + """Check if model diagnostics are enabled. + + Model diagnostics are enabled if either diagnostic is enabled or diagnostic with sensitive events is enabled. + """ + return ( + MODEL_DIAGNOSTICS_SETTINGS.enable_otel_diagnostics + or MODEL_DIAGNOSTICS_SETTINGS.enable_otel_diagnostics_sensitive + ) + + +@experimental_function +def are_sensitive_events_enabled() -> bool: + """Check if sensitive events are enabled. + + Sensitive events are enabled if the diagnostic with sensitive events is enabled. + """ + return MODEL_DIAGNOSTICS_SETTINGS.enable_otel_diagnostics_sensitive + + +@experimental_function +def trace_chat_completion(model_provider: str) -> Callable: + """Decorator to trace chat completion activities. + + Args: + model_provider (str): The model provider should describe a family of + GenAI models with specific model identified by ai_model_id. For example, + model_provider could be "openai" and ai_model_id could be "gpt-3.5-turbo". + Sometimes the model provider is unknown at runtime, in which case it can be + set to the most specific known provider. For example, while using local models + hosted by Ollama, the model provider could be set to "ollama". + """ + + def inner_trace_chat_completion(completion_func: Callable) -> Callable: + @functools.wraps(completion_func) + async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[ChatMessageContent]: + if not are_model_diagnostics_enabled(): + # If model diagnostics are not enabled, just return the completion + return await completion_func(*args, **kwargs) + + completion_service: ChatCompletionClientBase = args[0] + chat_history: ChatHistory = kwargs["chat_history"] + settings: PromptExecutionSettings = kwargs["settings"] + + with use_span( + _start_completion_activity( + CHAT_COMPLETION_OPERATION, + completion_service.ai_model_id, + model_provider, + chat_history, + settings, + ), + end_on_exit=True, + ) as current_span: + try: + completions: list[ChatMessageContent] = await completion_func(*args, **kwargs) + _set_completion_response(current_span, completions) + return completions + except Exception as exception: + _set_completion_error(current_span, exception) + raise + + return wrapper_decorator + + return inner_trace_chat_completion + + +@experimental_function +def trace_text_completion(model_provider: str) -> Callable: + """Decorator to trace text completion activities.""" + + def inner_trace_text_completion(completion_func: Callable) -> Callable: + @functools.wraps(completion_func) + async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[TextContent]: + if not are_model_diagnostics_enabled(): + # If model diagnostics are not enabled, just return the completion + return await completion_func(*args, **kwargs) + + completion_service: TextCompletionClientBase = args[0] + prompt: str = kwargs["prompt"] + settings: PromptExecutionSettings = kwargs["settings"] + + with use_span( + _start_completion_activity( + TEXT_COMPLETION_OPERATION, + completion_service.ai_model_id, + model_provider, + prompt, + settings, + ), + end_on_exit=True, + ) as current_span: + try: + completions: list[TextContent] = await completion_func(*args, **kwargs) + _set_completion_response(current_span, completions) + return completions + except Exception as exception: + _set_completion_error(current_span, exception) + raise + + return wrapper_decorator + + return inner_trace_text_completion + + +def _start_completion_activity( + operation_name: str, + model_name: str, + model_provider: str, + prompt: str | ChatHistory, + execution_settings: PromptExecutionSettings | None, +) -> Span: + """Start a text or chat completion activity for a given model.""" + span = tracer.start_span(f"{operation_name} {model_name}") + + # Set attributes on the span + span.set_attributes({ + gen_ai_attributes.OPERATION: operation_name, + gen_ai_attributes.SYSTEM: model_provider, + gen_ai_attributes.MODEL: model_name, + }) + + # TODO(@glahaye): we'll need to have a way to get these attributes from model + # providers other than OpenAI (for example if the attributes are named differently) + if execution_settings: + attribute = execution_settings.extension_data.get("max_tokens") + if attribute: + span.set_attribute(gen_ai_attributes.MAX_TOKENS, attribute) + + attribute = execution_settings.extension_data.get("temperature") + if attribute: + span.set_attribute(gen_ai_attributes.TEMPERATURE, attribute) + + attribute = execution_settings.extension_data.get("top_p") + if attribute: + span.set_attribute(gen_ai_attributes.TOP_P, attribute) + + if are_sensitive_events_enabled(): + if isinstance(prompt, ChatHistory): + prompt = _messages_to_openai_format(prompt.messages) + span.add_event(gen_ai_attributes.PROMPT_EVENT, {gen_ai_attributes.PROMPT_EVENT_PROMPT: prompt}) + + return span + + +def _set_completion_response( + current_span: Span, + completions: list[ChatMessageContent] | list[TextContent], +) -> None: + """Set the a text or chat completion response for a given activity.""" + first_completion = completions[0] + + # Set the response ID + response_id = first_completion.metadata.get("id") or (first_completion.inner_content or {}).get("id") + if response_id: + current_span.set_attribute(gen_ai_attributes.RESPONSE_ID, response_id) + + # Set the finish reason + finish_reasons = [ + str(completion.finish_reason) for completion in completions if isinstance(completion, ChatMessageContent) + ] + if finish_reasons: + current_span.set_attribute(gen_ai_attributes.FINISH_REASON, ",".join(finish_reasons)) + + # Set usage attributes + usage = first_completion.metadata.get("usage", None) + + prompt_tokens = getattr(usage, "prompt_tokens", None) + if prompt_tokens: + current_span.set_attribute(gen_ai_attributes.PROMPT_TOKENS, prompt_tokens) + + completion_tokens = getattr(usage, "completion_tokens", None) + if completion_tokens: + current_span.set_attribute(gen_ai_attributes.COMPLETION_TOKENS, completion_tokens) + + # Set the completion event + if are_sensitive_events_enabled(): + completion_text: str = _messages_to_openai_format(completions) + current_span.add_event( + gen_ai_attributes.COMPLETION_EVENT, {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: completion_text} + ) + + +def _set_completion_error(span: Span, error: Exception) -> None: + """Set an error for a text or chat completion .""" + span.set_attribute(gen_ai_attributes.ERROR_TYPE, str(type(error))) + span.set_status(StatusCode.ERROR, repr(error)) + + +def _messages_to_openai_format(messages: list[ChatMessageContent] | list[TextContent]) -> str: + """Convert a list of ChatMessageContent to a string in the OpenAI format. + + OpenTelemetry recommends formatting the messages in the OpenAI format + regardless of the actual model being used. + """ + return json.dumps([message.to_dict() for message in messages]) diff --git a/python/semantic_kernel/utils/telemetry/const.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py similarity index 83% rename from python/semantic_kernel/utils/telemetry/const.py rename to python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py index 5c74f708b986..cca37908e466 100644 --- a/python/semantic_kernel/utils/telemetry/const.py +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/gen_ai_attributes.py @@ -1,12 +1,13 @@ # Copyright (c) Microsoft. All rights reserved. -# + # Constants for tracing activities with semantic conventions. +# Ideally, we should use the attributes from the semcov package. +# However, many of the attributes are not yet available in the package, +# so we define them here for now. # Activity tags SYSTEM = "gen_ai.system" OPERATION = "gen_ai.operation.name" -CHAT_COMPLETION_OPERATION = "chat.completions" -TEXT_COMPLETION_OPERATION = "text.completions" MODEL = "gen_ai.request.model" MAX_TOKENS = "gen_ai.request.max_tokens" # nosec TEMPERATURE = "gen_ai.request.temperature" diff --git a/python/semantic_kernel/utils/telemetry/model_diagnostics/model_diagnostics_settings.py b/python/semantic_kernel/utils/telemetry/model_diagnostics/model_diagnostics_settings.py new file mode 100644 index 000000000000..f7e509a21b26 --- /dev/null +++ b/python/semantic_kernel/utils/telemetry/model_diagnostics/model_diagnostics_settings.py @@ -0,0 +1,31 @@ +# Copyright (c) Microsoft. All rights reserved. + +from typing import ClassVar + +from semantic_kernel.kernel_pydantic import KernelBaseSettings +from semantic_kernel.utils.experimental_decorator import experimental_class + + +@experimental_class +class ModelDiagnosticSettings(KernelBaseSettings): + """Settings for model diagnostics. + + The settings are first loaded from environment variables with + the prefix 'AZURE_AI_INFERENCE_'. + If the environment variables are not found, the settings can + be loaded from a .env file with the encoding 'utf-8'. + If the settings are not found in the .env file, the settings + are ignored; however, validation will fail alerting that the + settings are missing. + + Required settings for prefix 'SEMANTICKERNEL_EXPERIMENTAL_GENAI_' are: + - enable_otel_diagnostics: bool - Enable OpenTelemetry diagnostics. Default is False. + (Env var SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS) + - enable_otel_diagnostics_sensitive: bool - Enable OpenTelemetry sensitive events. Default is False. + (Env var SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE) + """ + + env_prefix: ClassVar[str] = "SEMANTICKERNEL_EXPERIMENTAL_GENAI_" + + enable_otel_diagnostics: bool = False + enable_otel_diagnostics_sensitive: bool = False diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 692c9c759ab1..3d8d263e7a45 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -161,6 +161,11 @@ def chat_history() -> "ChatHistory": return ChatHistory() +@fixture(scope="function") +def prompt() -> str: + return "test prompt" + + # @fixture(autouse=True) # def enable_debug_mode(): # """Set `autouse=True` to enable easy debugging for tests. @@ -306,10 +311,7 @@ def anthropic_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): if override_env_param_dict is None: override_env_param_dict = {} - env_vars = { - "ANTHROPIC_CHAT_MODEL_ID": "test_chat_model_id", - "ANTHROPIC_API_KEY": "test_api_key" - } + env_vars = {"ANTHROPIC_CHAT_MODEL_ID": "test_chat_model_id", "ANTHROPIC_API_KEY": "test_api_key"} env_vars.update(override_env_param_dict) diff --git a/python/tests/unit/connectors/mistral_ai/services/test_mistralai_text_embeddings.py b/python/tests/unit/connectors/mistral_ai/services/test_mistralai_text_embeddings.py index 98550ca6f1ad..61c960b4810f 100644 --- a/python/tests/unit/connectors/mistral_ai/services/test_mistralai_text_embeddings.py +++ b/python/tests/unit/connectors/mistral_ai/services/test_mistralai_text_embeddings.py @@ -13,7 +13,7 @@ def test_embedding_with_env_variables(mistralai_unit_test_env): text_embedding = MistralAITextEmbedding() assert text_embedding.ai_model_id == "test_embedding_model_id" - assert text_embedding.client._api_key == "test_api_key" + assert text_embedding.async_client._api_key == "test_api_key" @pytest.mark.parametrize("exclude_list", [["MISTRALAI_API_KEY", "MISTRALAI_EMBEDDING_MODEL_ID"]], indirect=True) @@ -23,33 +23,33 @@ def test_embedding_with_constructor(mistralai_unit_test_env): ai_model_id="overwrite-model", ) assert text_embedding.ai_model_id == "overwrite-model" - assert text_embedding.client._api_key == "overwrite-api-key" + assert text_embedding.async_client._api_key == "overwrite-api-key" def test_embedding_with_client(mistralai_unit_test_env): client = MagicMock(spec=MistralAsyncClient) - text_embedding = MistralAITextEmbedding(client=client) - assert text_embedding.client == client + text_embedding = MistralAITextEmbedding(async_client=client) + assert text_embedding.async_client == client assert text_embedding.ai_model_id == "test_embedding_model_id" def test_embedding_with_api_key(mistralai_unit_test_env): text_embedding = MistralAITextEmbedding(api_key="overwrite-api-key") - assert text_embedding.client._api_key == "overwrite-api-key" + assert text_embedding.async_client._api_key == "overwrite-api-key" assert text_embedding.ai_model_id == "test_embedding_model_id" def test_embedding_with_model(mistralai_unit_test_env): text_embedding = MistralAITextEmbedding(ai_model_id="overwrite-model") assert text_embedding.ai_model_id == "overwrite-model" - assert text_embedding.client._api_key == "test_api_key" + assert text_embedding.async_client._api_key == "test_api_key" -@pytest.mark.parametrize("exclude_list", [["MISTRALAI_EMBEDDING_MODEL_ID"]], indirect=True) +@pytest.mark.parametrize("exclude_list", [["MISTRALAI_EMBEDDING_MODEL_ID"]], indirect=True) def test_embedding_with_model_without_env(mistralai_unit_test_env): text_embedding = MistralAITextEmbedding(ai_model_id="overwrite-model") assert text_embedding.ai_model_id == "overwrite-model" - assert text_embedding.client._api_key == "test_api_key" + assert text_embedding.async_client._api_key == "test_api_key" @pytest.mark.parametrize("exclude_list", [["MISTRALAI_EMBEDDING_MODEL_ID"]], indirect=True) @@ -90,7 +90,7 @@ async def test_embedding_generate_raw_embedding(mistralai_unit_test_env): mock_client = AsyncMock(spec=MistralAsyncClient) mock_embedding_response = MagicMock(spec=EmbeddingResponse, data=[MagicMock(embedding=[1, 2, 3, 4, 5])]) mock_client.embeddings.return_value = mock_embedding_response - text_embedding = MistralAITextEmbedding(client=mock_client) + text_embedding = MistralAITextEmbedding(async_client=mock_client) embedding = await text_embedding.generate_raw_embeddings(["test"]) assert embedding == [[1, 2, 3, 4, 5]] @@ -100,7 +100,7 @@ async def test_embedding_generate_embedding(mistralai_unit_test_env): mock_client = AsyncMock(spec=MistralAsyncClient) mock_embedding_response = MagicMock(spec=EmbeddingResponse, data=[MagicMock(embedding=[1, 2, 3, 4, 5])]) mock_client.embeddings.return_value = mock_embedding_response - text_embedding = MistralAITextEmbedding(client=mock_client) + text_embedding = MistralAITextEmbedding(async_client=mock_client) embedding = await text_embedding.generate_embeddings(["test"]) assert embedding.tolist() == [[1, 2, 3, 4, 5]] @@ -109,6 +109,6 @@ async def test_embedding_generate_embedding(mistralai_unit_test_env): async def test_embedding_generate_embedding_exception(mistralai_unit_test_env): mock_client = AsyncMock(spec=MistralAsyncClient) mock_client.embeddings.side_effect = Exception("Test Exception") - text_embedding = MistralAITextEmbedding(client=mock_client) + text_embedding = MistralAITextEmbedding(async_client=mock_client) with pytest.raises(ServiceResponseException): await text_embedding.generate_embeddings(["test"]) diff --git a/python/tests/unit/utils/model_diagnostics/conftest.py b/python/tests/unit/utils/model_diagnostics/conftest.py new file mode 100644 index 000000000000..ab7528af2bad --- /dev/null +++ b/python/tests/unit/utils/model_diagnostics/conftest.py @@ -0,0 +1,91 @@ +# Copyright (c) Microsoft. All rights reserved. + + +import sys +from collections.abc import AsyncGenerator +from typing import Any, ClassVar + +import pytest + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +import semantic_kernel +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.streaming_text_content import StreamingTextContent +from semantic_kernel.contents.text_content import TextContent +from semantic_kernel.utils.telemetry.model_diagnostics.model_diagnostics_settings import ModelDiagnosticSettings + + +@pytest.fixture() +def model_diagnostics_unit_test_env(monkeypatch): + """Fixture to set environment variables for Model Diagnostics Unit Tests.""" + env_vars = { + "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS": "true", + "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE": "true", + } + + for key, value in env_vars.items(): + monkeypatch.setenv(key, value) + + # Need to reload the settings to pick up the new environment variables since the + # settings are loaded at import time and this fixture is called after the import + semantic_kernel.utils.telemetry.model_diagnostics.decorators.MODEL_DIAGNOSTICS_SETTINGS = ( + ModelDiagnosticSettings.create() + ) + + +@pytest.fixture() +def service_env_vars(monkeypatch, request): + """Fixture to set environment variables for AI Service Unit Tests.""" + for key, value in request.param.items(): + monkeypatch.setenv(key, value) + + +class MockChatCompletion(ChatCompletionClientBase): + MODEL_PROVIDER_NAME: ClassVar[str] = "mock" + + @override + async def get_chat_message_contents( + self, + chat_history: "ChatHistory", + settings: "PromptExecutionSettings", + **kwargs: Any, + ) -> list["ChatMessageContent"]: + return [] + + @override + async def get_streaming_chat_message_contents( + self, + chat_history: "ChatHistory", + settings: "PromptExecutionSettings", + **kwargs: Any, + ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: + yield [] + + +class MockTextCompletion(TextCompletionClientBase): + MODEL_PROVIDER_NAME: ClassVar[str] = "mock" + + @override + async def get_text_contents( + self, + prompt: str, + settings: "PromptExecutionSettings", + ) -> list["TextContent"]: + return [] + + @override + async def get_streaming_text_contents( + self, + prompt: str, + settings: "PromptExecutionSettings", + ) -> AsyncGenerator[list["StreamingTextContent"], Any]: + yield [] diff --git a/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py b/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py new file mode 100644 index 000000000000..95de327818e7 --- /dev/null +++ b/python/tests/unit/utils/model_diagnostics/test_trace_chat_completion.py @@ -0,0 +1,172 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import patch + +import pytest +from opentelemetry.trace import StatusCode + +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.contents.utils.finish_reason import FinishReason +from semantic_kernel.exceptions.service_exceptions import ServiceResponseException +from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( + CHAT_COMPLETION_OPERATION, + _messages_to_openai_format, + trace_chat_completion, +) +from tests.unit.utils.model_diagnostics.conftest import MockChatCompletion + +pytestmark = pytest.mark.parametrize( + "execution_settings, mock_response", + [ + pytest.param( + PromptExecutionSettings( + extension_data={ + "max_tokens": 1000, + "temperature": 0.5, + "top_p": 0.9, + } + ), + [ + ChatMessageContent( + role=AuthorRole.ASSISTANT, + ai_model_id="ai_model_id", + content="Test content", + metadata={"id": "test_id"}, + finish_reason=FinishReason.STOP, + ) + ], + id="test_execution_settings_with_extension_data", + ), + pytest.param( + PromptExecutionSettings(), + [ + ChatMessageContent( + role=AuthorRole.ASSISTANT, + ai_model_id="ai_model_id", + metadata={"id": "test_id"}, + finish_reason=FinishReason.STOP, + ) + ], + id="test_execution_settings_no_extension_data", + ), + pytest.param( + PromptExecutionSettings(), + [ + ChatMessageContent( + role=AuthorRole.ASSISTANT, + ai_model_id="ai_model_id", + metadata={}, + finish_reason=FinishReason.STOP, + ) + ], + id="test_chat_message_content_no_metadata", + ), + pytest.param( + PromptExecutionSettings(), + [ + ChatMessageContent( + role=AuthorRole.ASSISTANT, + ai_model_id="ai_model_id", + metadata={"id": "test_id"}, + ) + ], + id="test_chat_message_content_no_finish_reason", + ), + ], +) + + +@pytest.mark.asyncio +@patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN +async def test_trace_chat_completion( + mock_span, + execution_settings, + mock_response, + chat_history, + model_diagnostics_unit_test_env, +): + # Setup + chat_completion: ChatCompletionClientBase = MockChatCompletion(ai_model_id="ai_model_id") + + with patch.object(MockChatCompletion, "get_chat_message_contents", return_value=mock_response): + # We need to reapply the decorator to the method since the mock will not have the decorator applied + MockChatCompletion.get_chat_message_contents = trace_chat_completion(MockChatCompletion.MODEL_PROVIDER_NAME)( + chat_completion.get_chat_message_contents + ) + + results: list[ChatMessageContent] = await chat_completion.get_chat_message_contents( + chat_history=chat_history, settings=execution_settings + ) + + assert results == mock_response + + # Before the call to the model + mock_span.set_attributes.assert_called_with({ + gen_ai_attributes.OPERATION: CHAT_COMPLETION_OPERATION, + gen_ai_attributes.SYSTEM: MockChatCompletion.MODEL_PROVIDER_NAME, + gen_ai_attributes.MODEL: chat_completion.ai_model_id, + }) + + # No all connectors take the same parameters + if execution_settings.extension_data.get("max_tokens") is not None: + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.MAX_TOKENS, execution_settings.extension_data["max_tokens"] + ) + if execution_settings.extension_data.get("temperature") is not None: + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.TEMPERATURE, execution_settings.extension_data["temperature"] + ) + if execution_settings.extension_data.get("top_p") is not None: + mock_span.set_attribute.assert_any_call(gen_ai_attributes.TOP_P, execution_settings.extension_data["top_p"]) + + mock_span.add_event.assert_any_call( + gen_ai_attributes.PROMPT_EVENT, + {gen_ai_attributes.PROMPT_EVENT_PROMPT: _messages_to_openai_format(chat_history)}, + ) + + # After the call to the model + # Not all connectors return the same metadata + if mock_response[0].metadata.get("id") is not None: + mock_span.set_attribute.assert_any_call(gen_ai_attributes.RESPONSE_ID, mock_response[0].metadata["id"]) + if any(completion.finish_reason is not None for completion in mock_response): + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.FINISH_REASON, + ",".join([str(completion.finish_reason) for completion in mock_response]), + ) + + mock_span.add_event.assert_any_call( + gen_ai_attributes.COMPLETION_EVENT, + {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: _messages_to_openai_format(mock_response)}, + ) + + +@pytest.mark.asyncio +@patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN +async def test_trace_chat_completion_exception( + mock_span, + execution_settings, + mock_response, + chat_history, + model_diagnostics_unit_test_env, +): + # Setup + chat_completion: ChatCompletionClientBase = MockChatCompletion(ai_model_id="ai_model_id") + + with patch.object(MockChatCompletion, "get_chat_message_contents", side_effect=ServiceResponseException()): + # We need to reapply the decorator to the method since the mock will not have the decorator applied + MockChatCompletion.get_chat_message_contents = trace_chat_completion(MockChatCompletion.MODEL_PROVIDER_NAME)( + chat_completion.get_chat_message_contents + ) + + with pytest.raises(ServiceResponseException): + await chat_completion.get_chat_message_contents(chat_history=chat_history, settings=execution_settings) + + exception = ServiceResponseException() + mock_span.set_attribute.assert_any_call(gen_ai_attributes.ERROR_TYPE, str(type(exception))) + mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) + + mock_span.end.assert_any_call() diff --git a/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py b/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py new file mode 100644 index 000000000000..f6b4d47e1b97 --- /dev/null +++ b/python/tests/unit/utils/model_diagnostics/test_trace_text_completion.py @@ -0,0 +1,150 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import patch + +import pytest +from opentelemetry.trace import StatusCode + +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.text_content import TextContent +from semantic_kernel.exceptions.service_exceptions import ServiceResponseException +from semantic_kernel.utils.telemetry.model_diagnostics import gen_ai_attributes +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( + TEXT_COMPLETION_OPERATION, + _messages_to_openai_format, + trace_text_completion, +) +from tests.unit.utils.model_diagnostics.conftest import MockTextCompletion + +pytestmark = pytest.mark.parametrize( + "execution_settings, mock_response", + [ + pytest.param( + PromptExecutionSettings( + extension_data={ + "max_tokens": 1000, + "temperature": 0.5, + "top_p": 0.9, + } + ), + [ + TextContent( + ai_model_id="ai_model_id", + text="Test content", + metadata={"id": "test_id"}, + ) + ], + id="test_execution_settings_with_extension_data", + ), + pytest.param( + PromptExecutionSettings(), + [ + TextContent( + ai_model_id="ai_model_id", + text="Test content", + metadata={"id": "test_id"}, + ) + ], + id="test_execution_settings_no_extension_data", + ), + pytest.param( + PromptExecutionSettings(), + [ + TextContent( + ai_model_id="ai_model_id", + text="Test content", + metadata={}, + ) + ], + id="test_text_content_no_metadata", + ), + ], +) + + +@pytest.mark.asyncio +@patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN +async def test_trace_text_completion( + mock_span, + execution_settings, + mock_response, + prompt, + model_diagnostics_unit_test_env, +): + # Setup + text_completion: TextCompletionClientBase = MockTextCompletion(ai_model_id="ai_model_id") + + with patch.object(MockTextCompletion, "get_text_contents", return_value=mock_response): + # We need to reapply the decorator to the method since the mock will not have the decorator applied + MockTextCompletion.get_text_contents = trace_text_completion(MockTextCompletion.MODEL_PROVIDER_NAME)( + text_completion.get_text_contents + ) + + results: list[ChatMessageContent] = await text_completion.get_text_contents( + prompt=prompt, settings=execution_settings + ) + + assert results == mock_response + + # Before the call to the model + mock_span.set_attributes.assert_called_with({ + gen_ai_attributes.OPERATION: TEXT_COMPLETION_OPERATION, + gen_ai_attributes.SYSTEM: MockTextCompletion.MODEL_PROVIDER_NAME, + gen_ai_attributes.MODEL: text_completion.ai_model_id, + }) + + # No all connectors take the same parameters + if execution_settings.extension_data.get("max_tokens") is not None: + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.MAX_TOKENS, execution_settings.extension_data["max_tokens"] + ) + if execution_settings.extension_data.get("temperature") is not None: + mock_span.set_attribute.assert_any_call( + gen_ai_attributes.TEMPERATURE, execution_settings.extension_data["temperature"] + ) + if execution_settings.extension_data.get("top_p") is not None: + mock_span.set_attribute.assert_any_call(gen_ai_attributes.TOP_P, execution_settings.extension_data["top_p"]) + + mock_span.add_event.assert_any_call( + gen_ai_attributes.PROMPT_EVENT, {gen_ai_attributes.PROMPT_EVENT_PROMPT: prompt} + ) + + # After the call to the model + # Not all connectors return the same metadata + if mock_response[0].metadata.get("id") is not None: + mock_span.set_attribute.assert_any_call(gen_ai_attributes.RESPONSE_ID, mock_response[0].metadata["id"]) + + mock_span.add_event.assert_any_call( + gen_ai_attributes.COMPLETION_EVENT, + {gen_ai_attributes.COMPLETION_EVENT_COMPLETION: _messages_to_openai_format(mock_response)}, + ) + + +@pytest.mark.asyncio +@patch("opentelemetry.trace.INVALID_SPAN") # When no tracer provider is available, the span will be an INVALID_SPAN +async def test_trace_text_completion_exception( + mock_span, + execution_settings, + mock_response, + prompt, + model_diagnostics_unit_test_env, +): + # Setup + text_completion: TextCompletionClientBase = MockTextCompletion(ai_model_id="ai_model_id") + + with patch.object(MockTextCompletion, "get_text_contents", side_effect=ServiceResponseException()): + # We need to reapply the decorator to the method since the mock will not have the decorator applied + MockTextCompletion.get_text_contents = trace_text_completion(MockTextCompletion.MODEL_PROVIDER_NAME)( + text_completion.get_text_contents + ) + + with pytest.raises(ServiceResponseException): + await text_completion.get_text_contents(prompt=prompt, settings=execution_settings) + + exception = ServiceResponseException() + mock_span.set_attribute.assert_any_call(gen_ai_attributes.ERROR_TYPE, str(type(exception))) + mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) + + mock_span.end.assert_any_call() diff --git a/python/tests/unit/utils/test_tracing.py b/python/tests/unit/utils/test_tracing.py deleted file mode 100644 index 5d2c2f9e4bf6..000000000000 --- a/python/tests/unit/utils/test_tracing.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -from unittest.mock import patch - -import pytest -from openai.types import Completion as TextCompletion -from openai.types import CompletionChoice -from opentelemetry.trace import StatusCode - -from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion -from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base import OpenAIChatCompletionBase -from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion import OpenAITextCompletion -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings -from semantic_kernel.contents.chat_history import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.text_content import TextContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.contents.utils.finish_reason import FinishReason -from semantic_kernel.exceptions.service_exceptions import ServiceResponseException -from semantic_kernel.utils.telemetry.const import ( - CHAT_COMPLETION_OPERATION, - COMPLETION_EVENT, - COMPLETION_EVENT_COMPLETION, - ERROR_TYPE, - FINISH_REASON, - MAX_TOKENS, - MODEL, - OPERATION, - PROMPT_EVENT, - PROMPT_EVENT_PROMPT, - RESPONSE_ID, - SYSTEM, - TEMPERATURE, - TEXT_COMPLETION_OPERATION, - TOP_P, -) - -TEST_CONTENT = "Test content" -TEST_RESPONSE_ID = "dummy_id" -TEST_MAX_TOKENS = "1000" -TEST_MODEL = "dummy_model" -TEST_TEMPERATURE = "0.5" -TEST_TOP_P = "0.9" -TEST_CREATED_AT = 1 -TEST_TEXT_PROMPT = "Test prompt" -EXPECTED_CHAT_COMPLETION_EVENT_PAYLOAD = f'[{{"role": "assistant", "content": "{TEST_CONTENT}"}}]' -EXPECTED_TEXT_COMPLETION_EVENT_PAYLOAD = f'["{TEST_CONTENT}"]' - -TEST_CHAT_RESPONSE = [ - ChatMessageContent( - role=AuthorRole.ASSISTANT, - ai_model_id=TEST_MODEL, - content=TEST_CONTENT, - metadata={"id": TEST_RESPONSE_ID}, - finish_reason=FinishReason.STOP, - ) -] - -TEST_TEXT_RESPONSE = TextCompletion( - model=TEST_MODEL, - text=TEST_CONTENT, - id=TEST_RESPONSE_ID, - choices=[CompletionChoice(index=0, text=TEST_CONTENT, finish_reason="stop")], - created=TEST_CREATED_AT, - object="text_completion", -) - -TEST_TEXT_RESPONSE_METADATA = { - "id": TEST_RESPONSE_ID, - "created": TEST_CREATED_AT, - "system_fingerprint": None, - "logprobs": None, - "usage": None, -} - -EXPECTED_TEXT_CONTENT = [ - TextContent( - ai_model_id=TEST_MODEL, - text=TEST_CONTENT, - encoding=None, - metadata=TEST_TEXT_RESPONSE_METADATA, - inner_content=TEST_TEXT_RESPONSE, - ) -] - - -@pytest.mark.asyncio -@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) -@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) -@patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._send_chat_request", - return_value=TEST_CHAT_RESPONSE, -) -@patch("opentelemetry.trace.INVALID_SPAN") -async def test_trace_chat_completion( - mock_span, - mock_send_chat_request, - mock_sensitive_events_enabled, - mock_model_diagnostics_enabled, - openai_unit_test_env, -): - chat_completion = OpenAIChatCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") - extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} - - results: list[ChatMessageContent] = await chat_completion.get_chat_message_contents( - chat_history=ChatHistory(), settings=PromptExecutionSettings(extension_data=extension_data) - ) - - assert results == TEST_CHAT_RESPONSE - - mock_span.set_attributes.assert_called_with( - { - OPERATION: CHAT_COMPLETION_OPERATION, - SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, - MODEL: TEST_MODEL, - } - ) - mock_span.set_attribute.assert_any_call(MAX_TOKENS, TEST_MAX_TOKENS) - mock_span.set_attribute.assert_any_call(TEMPERATURE, TEST_TEMPERATURE) - mock_span.set_attribute.assert_any_call(TOP_P, TEST_TOP_P) - mock_span.add_event.assert_any_call(PROMPT_EVENT, {PROMPT_EVENT_PROMPT: "[]"}) - - mock_span.set_attribute.assert_any_call(RESPONSE_ID, TEST_RESPONSE_ID) - mock_span.set_attribute.assert_any_call(FINISH_REASON, str(FinishReason.STOP)) - mock_span.add_event.assert_any_call( - COMPLETION_EVENT, {COMPLETION_EVENT_COMPLETION: EXPECTED_CHAT_COMPLETION_EVENT_PAYLOAD} - ) - - -@pytest.mark.asyncio -@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) -@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) -@patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion_base.OpenAITextCompletionBase._send_request", - return_value=TEST_TEXT_RESPONSE, -) -@patch("opentelemetry.trace.INVALID_SPAN") -async def test_trace_text_completion( - mock_span, mock_send_request, mock_sensitive_events_enabled, mock_model_diagnostics_enabled, openai_unit_test_env -): - chat_completion = OpenAITextCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") - extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} - - results: list[TextContent] = await chat_completion.get_text_contents( - prompt=TEST_TEXT_PROMPT, settings=PromptExecutionSettings(extension_data=extension_data) - ) - - assert results == EXPECTED_TEXT_CONTENT - - mock_span.set_attributes.assert_called_with( - { - OPERATION: TEXT_COMPLETION_OPERATION, - SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, - MODEL: TEST_MODEL, - } - ) - mock_span.set_attribute.assert_any_call(MAX_TOKENS, TEST_MAX_TOKENS) - mock_span.set_attribute.assert_any_call(TEMPERATURE, TEST_TEMPERATURE) - mock_span.set_attribute.assert_any_call(TOP_P, TEST_TOP_P) - mock_span.add_event.assert_any_call(PROMPT_EVENT, {PROMPT_EVENT_PROMPT: TEST_TEXT_PROMPT}) - - mock_span.set_attribute.assert_any_call(RESPONSE_ID, TEST_RESPONSE_ID) - mock_span.add_event.assert_any_call( - COMPLETION_EVENT, {COMPLETION_EVENT_COMPLETION: EXPECTED_TEXT_COMPLETION_EVENT_PAYLOAD} - ) - - -@pytest.mark.asyncio -@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) -@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) -@patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._send_chat_request", - side_effect=ServiceResponseException, -) -@patch("opentelemetry.trace.INVALID_SPAN") -async def test_trace_chat_completion_exception( - mock_span, - mock_send_chat_request, - mock_sensitive_events_enabled, - mock_model_diagnostics_enabled, - openai_unit_test_env, -): - chat_completion = OpenAIChatCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") - extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} - - with pytest.raises(ServiceResponseException): - await chat_completion.get_chat_message_contents( - chat_history=ChatHistory(), settings=PromptExecutionSettings(extension_data=extension_data) - ) - - mock_span.set_attributes.assert_called_with( - { - OPERATION: CHAT_COMPLETION_OPERATION, - SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, - MODEL: TEST_MODEL, - } - ) - - exception = ServiceResponseException() - mock_span.set_attribute.assert_any_call(ERROR_TYPE, str(type(exception))) - mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) - - mock_span.end.assert_any_call() - - -@pytest.mark.asyncio -@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) -@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) -@patch( - "semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion_base.OpenAITextCompletionBase._send_request", - side_effect=ServiceResponseException, -) -@patch("opentelemetry.trace.INVALID_SPAN") -async def test_trace_text_completion_exception( - mock_span, - mock_send_chat_request, - mock_sensitive_events_enabled, - mock_model_diagnostics_enabled, - openai_unit_test_env, -): - chat_completion = OpenAITextCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") - extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} - - with pytest.raises(ServiceResponseException): - await chat_completion.get_text_contents( - prompt=TEST_TEXT_PROMPT, settings=PromptExecutionSettings(extension_data=extension_data) - ) - - mock_span.set_attributes.assert_called_with( - { - OPERATION: TEXT_COMPLETION_OPERATION, - SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, - MODEL: TEST_MODEL, - } - ) - - exception = ServiceResponseException() - mock_span.set_attribute.assert_any_call(ERROR_TYPE, str(type(exception))) - mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) - - mock_span.end.assert_any_call()