From ca78ff73e0a85e5c385e42bbe4c486880d6bc620 Mon Sep 17 00:00:00 2001 From: Gil LaHaye Date: Thu, 25 Jul 2024 13:18:41 -0700 Subject: [PATCH] Python: Add OpenTelemetry to Python SK (#6914) ### Motivation and Context We want observability into usage of SK ### Description Add OpenTelemetry to Python SK ### Contribution Checklist - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations --------- Co-authored-by: Tao Chen --- python/poetry.lock | 3 +- python/pyproject.toml | 5 + .../azure_ai_inference_chat_completion.py | 2 +- .../azure_ai_inference_text_embedding.py | 2 +- .../ai/chat_completion_client_base.py | 2 +- .../ai/open_ai/services/azure_config_base.py | 2 +- .../services/open_ai_chat_completion_base.py | 6 +- .../open_ai/services/open_ai_config_base.py | 2 +- .../services/open_ai_text_completion_base.py | 6 +- .../ai/text_completion_client_base.py | 2 +- .../connectors/memory/astradb/astra_client.py | 2 +- .../openapi_function_execution_parameters.py | 2 +- .../openapi_plugin/openapi_runner.py | 2 +- .../connectors/utils/document_loader.py | 2 +- .../sessions_python_plugin.py | 2 +- .../functions/kernel_function_from_prompt.py | 3 +- .../utils/telemetry/__init__.py | 0 .../semantic_kernel/utils/telemetry/const.py | 28 ++ .../utils/telemetry/decorators.py | 265 ++++++++++++++++++ .../telemetry/user_agent.py} | 0 ...test_azure_ai_inference_chat_completion.py | 24 +- .../test_azure_ai_inference_text_embedding.py | 2 +- .../test_mistralai_chat_completion.py | 50 ++-- .../services/test_ollama_chat_completion.py | 8 +- .../services/test_ollama_text_completion.py | 4 +- .../services/test_azure_chat_completion.py | 14 +- .../services/test_azure_text_completion.py | 4 +- .../connectors/utils/test_document_loader.py | 2 +- .../unit/functions/test_kernel_plugins.py | 2 +- .../tests/unit/telemetry/test_user_agent.py | 34 +-- python/tests/unit/utils/test_tracing.py | 241 ++++++++++++++++ 31 files changed, 634 insertions(+), 89 deletions(-) create mode 100644 python/semantic_kernel/utils/telemetry/__init__.py create mode 100644 python/semantic_kernel/utils/telemetry/const.py create mode 100644 python/semantic_kernel/utils/telemetry/decorators.py rename python/semantic_kernel/{connectors/telemetry.py => utils/telemetry/user_agent.py} (100%) create mode 100644 python/tests/unit/utils/test_tracing.py diff --git a/python/poetry.lock b/python/poetry.lock index 83c52ee44f02..fa66ec67ab43 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -3248,6 +3248,7 @@ description = "Nvidia JIT LTO Library" optional = false python-versions = ">=3" files = [ + {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"}, {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"}, {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"}, ] @@ -7059,4 +7060,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.13" -content-hash = "034b0c11304ba2e3e6668e7529c7b38fb74361aab5580077fd4ce24199095c3c" +content-hash = "c29fb1fca8d1da50daf3538331cce8f45bbdc9949d0699feaced0fe049787251" diff --git a/python/pyproject.toml b/python/pyproject.toml index c3ccf9fe1256..96557cf8aa7a 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -26,6 +26,11 @@ openai = ">=1.0" # openapi and swagger openapi_core = ">=0.18,<0.20" + +# OpenTelemetry +opentelemetry-api = "^1.24.0" +opentelemetry-sdk = "^1.24.0" + prance = "^23.6.21.0" # templating diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py index 754f4752137a..f8a1adf295db 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py @@ -7,7 +7,7 @@ from functools import reduce from typing import TYPE_CHECKING, Any -from semantic_kernel.connectors.telemetry import SEMANTIC_KERNEL_USER_AGENT +from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT if sys.version_info >= (3, 12): from typing import override # pragma: no cover diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_text_embedding.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_text_embedding.py index a1bb0a2c9fd9..882449da7dfe 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_text_embedding.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_text_embedding.py @@ -20,9 +20,9 @@ from semantic_kernel.connectors.ai.azure_ai_inference.azure_ai_inference_settings import AzureAIInferenceSettings from semantic_kernel.connectors.ai.azure_ai_inference.services.azure_ai_inference_base import AzureAIInferenceBase from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase -from semantic_kernel.connectors.telemetry import SEMANTIC_KERNEL_USER_AGENT from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError from semantic_kernel.utils.experimental_decorator import experimental_class +from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py index e1823d571756..ae24136838c1 100644 --- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py @@ -50,7 +50,7 @@ async def get_chat_message_content( Returns: A string representing the response from the LLM. """ - results = await self.get_chat_message_contents(chat_history, settings, **kwargs) + results = await self.get_chat_message_contents(chat_history=chat_history, settings=settings, **kwargs) if results: return results[0] # this should not happen, should error out before returning an empty list diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py index 6b6aa86d1c2c..980f39c51bab 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_config_base.py @@ -9,10 +9,10 @@ from semantic_kernel.connectors.ai.open_ai.const import DEFAULT_AZURE_API_VERSION from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler, OpenAIModelTypes -from semantic_kernel.connectors.telemetry import APP_INFO, prepend_semantic_kernel_to_user_agent from semantic_kernel.const import USER_AGENT from semantic_kernel.exceptions import ServiceInitializationError from semantic_kernel.kernel_pydantic import HttpsUrl +from semantic_kernel.utils.telemetry.user_agent import APP_INFO, prepend_semantic_kernel_to_user_agent logger: logging.Logger = logging.getLogger(__name__) diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py index e5f4f5a81357..e71fd85ef265 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py @@ -5,7 +5,7 @@ import sys from collections.abc import AsyncGenerator from functools import reduce -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, ClassVar if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -38,6 +38,7 @@ from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import ( AutoFunctionInvocationContext, ) +from semantic_kernel.utils.telemetry.decorators import trace_chat_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -56,6 +57,8 @@ class InvokeTermination(Exception): class OpenAIChatCompletionBase(OpenAIHandler, ChatCompletionClientBase): """OpenAI Chat completion class.""" + MODEL_PROVIDER_NAME: ClassVar[str] = "openai" + # region Overriding base class methods # most of the methods are overridden from the ChatCompletionClientBase class, otherwise it is mentioned @@ -64,6 +67,7 @@ def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"] return OpenAIChatPromptExecutionSettings @override + @trace_chat_completion(MODEL_PROVIDER_NAME) async def get_chat_message_contents( self, chat_history: ChatHistory, diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py index b2463a1633d8..7ead64865445 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_config_base.py @@ -9,9 +9,9 @@ from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler from semantic_kernel.connectors.ai.open_ai.services.open_ai_model_types import OpenAIModelTypes -from semantic_kernel.connectors.telemetry import APP_INFO, prepend_semantic_kernel_to_user_agent from semantic_kernel.const import USER_AGENT from semantic_kernel.exceptions import ServiceInitializationError +from semantic_kernel.utils.telemetry.user_agent import APP_INFO, prepend_semantic_kernel_to_user_agent logger: logging.Logger = logging.getLogger(__name__) diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py index 29968b329ee2..8adc7d5d1fb6 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_completion_base.py @@ -3,7 +3,7 @@ import logging import sys from collections.abc import AsyncGenerator -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, ClassVar if sys.version_info >= (3, 12): from typing import override # pragma: no cover @@ -26,6 +26,7 @@ from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase from semantic_kernel.contents.streaming_text_content import StreamingTextContent from semantic_kernel.contents.text_content import TextContent +from semantic_kernel.utils.telemetry.decorators import trace_text_completion if TYPE_CHECKING: from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -34,11 +35,14 @@ class OpenAITextCompletionBase(OpenAIHandler, TextCompletionClientBase): + MODEL_PROVIDER_NAME: ClassVar[str] = "openai" + @override def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]: return OpenAITextPromptExecutionSettings @override + @trace_text_completion(MODEL_PROVIDER_NAME) async def get_text_contents( self, prompt: str, diff --git a/python/semantic_kernel/connectors/ai/text_completion_client_base.py b/python/semantic_kernel/connectors/ai/text_completion_client_base.py index 2b81baf9cd66..c03d30a6d2e3 100644 --- a/python/semantic_kernel/connectors/ai/text_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/text_completion_client_base.py @@ -40,7 +40,7 @@ async def get_text_content(self, prompt: str, settings: "PromptExecutionSettings Returns: TextContent: A string or list of strings representing the response(s) from the LLM. """ - result = await self.get_text_contents(prompt, settings) + result = await self.get_text_contents(prompt=prompt, settings=settings) if result: return result[0] # this should not happen, should error out before returning an empty list diff --git a/python/semantic_kernel/connectors/memory/astradb/astra_client.py b/python/semantic_kernel/connectors/memory/astradb/astra_client.py index aff8ce5da30f..8129b2b4f552 100644 --- a/python/semantic_kernel/connectors/memory/astradb/astra_client.py +++ b/python/semantic_kernel/connectors/memory/astradb/astra_client.py @@ -5,9 +5,9 @@ import aiohttp from semantic_kernel.connectors.memory.astradb.utils import AsyncSession -from semantic_kernel.connectors.telemetry import APP_INFO from semantic_kernel.exceptions import ServiceResponseException from semantic_kernel.utils.experimental_decorator import experimental_class +from semantic_kernel.utils.telemetry.user_agent import APP_INFO ASTRA_CALLER_IDENTITY: str SEMANTIC_KERNEL_VERSION = APP_INFO.get("Semantic-Kernel-Version") diff --git a/python/semantic_kernel/connectors/openapi_plugin/openapi_function_execution_parameters.py b/python/semantic_kernel/connectors/openapi_plugin/openapi_function_execution_parameters.py index 1468e200ab45..dd014a97e55c 100644 --- a/python/semantic_kernel/connectors/openapi_plugin/openapi_function_execution_parameters.py +++ b/python/semantic_kernel/connectors/openapi_plugin/openapi_function_execution_parameters.py @@ -28,7 +28,7 @@ class OpenAPIFunctionExecutionParameters(KernelBaseModel): def model_post_init(self, __context: Any) -> None: """Post initialization method for the model.""" - from semantic_kernel.connectors.telemetry import HTTP_USER_AGENT + from semantic_kernel.utils.telemetry.user_agent import HTTP_USER_AGENT if self.server_url_override: parsed_url = urlparse(self.server_url_override) diff --git a/python/semantic_kernel/connectors/openapi_plugin/openapi_runner.py b/python/semantic_kernel/connectors/openapi_plugin/openapi_runner.py index 951a2c4d69fc..09869445dc05 100644 --- a/python/semantic_kernel/connectors/openapi_plugin/openapi_runner.py +++ b/python/semantic_kernel/connectors/openapi_plugin/openapi_runner.py @@ -17,10 +17,10 @@ ) from semantic_kernel.connectors.openapi_plugin.models.rest_api_operation_payload import RestApiOperationPayload from semantic_kernel.connectors.openapi_plugin.models.rest_api_operation_run_options import RestApiOperationRunOptions -from semantic_kernel.connectors.telemetry import APP_INFO, prepend_semantic_kernel_to_user_agent from semantic_kernel.exceptions.function_exceptions import FunctionExecutionException from semantic_kernel.functions.kernel_arguments import KernelArguments from semantic_kernel.utils.experimental_decorator import experimental_class +from semantic_kernel.utils.telemetry.user_agent import APP_INFO, prepend_semantic_kernel_to_user_agent logger: logging.Logger = logging.getLogger(__name__) diff --git a/python/semantic_kernel/connectors/utils/document_loader.py b/python/semantic_kernel/connectors/utils/document_loader.py index 74a0190b8bb1..5984c3b9bfce 100644 --- a/python/semantic_kernel/connectors/utils/document_loader.py +++ b/python/semantic_kernel/connectors/utils/document_loader.py @@ -6,8 +6,8 @@ from httpx import AsyncClient, HTTPStatusError, RequestError -from semantic_kernel.connectors.telemetry import HTTP_USER_AGENT from semantic_kernel.exceptions import ServiceInvalidRequestError +from semantic_kernel.utils.telemetry.user_agent import HTTP_USER_AGENT logger: logging.Logger = logging.getLogger(__name__) diff --git a/python/semantic_kernel/core_plugins/sessions_python_tool/sessions_python_plugin.py b/python/semantic_kernel/core_plugins/sessions_python_tool/sessions_python_plugin.py index 63cf86a27c08..70849ce6827d 100644 --- a/python/semantic_kernel/core_plugins/sessions_python_tool/sessions_python_plugin.py +++ b/python/semantic_kernel/core_plugins/sessions_python_tool/sessions_python_plugin.py @@ -10,7 +10,6 @@ from httpx import AsyncClient, HTTPStatusError from pydantic import ValidationError -from semantic_kernel.connectors.telemetry import HTTP_USER_AGENT, version_info from semantic_kernel.const import USER_AGENT from semantic_kernel.core_plugins.sessions_python_tool.sessions_python_settings import ( ACASessionsSettings, @@ -20,6 +19,7 @@ from semantic_kernel.exceptions.function_exceptions import FunctionExecutionException, FunctionInitializationError from semantic_kernel.functions.kernel_function_decorator import kernel_function from semantic_kernel.kernel_pydantic import HttpsUrl, KernelBaseModel +from semantic_kernel.utils.telemetry.user_agent import HTTP_USER_AGENT, version_info logger = logging.getLogger(__name__) diff --git a/python/semantic_kernel/functions/kernel_function_from_prompt.py b/python/semantic_kernel/functions/kernel_function_from_prompt.py index c83fba398eae..fb96ab5f3b71 100644 --- a/python/semantic_kernel/functions/kernel_function_from_prompt.py +++ b/python/semantic_kernel/functions/kernel_function_from_prompt.py @@ -187,7 +187,8 @@ async def _invoke_internal(self, context: FunctionInvocationContext) -> None: if isinstance(prompt_render_result.ai_service, TextCompletionClientBase): try: texts = await prompt_render_result.ai_service.get_text_contents( - unescape(prompt_render_result.rendered_prompt), prompt_render_result.execution_settings + prompt=unescape(prompt_render_result.rendered_prompt), + settings=prompt_render_result.execution_settings, ) except Exception as exc: raise FunctionExecutionException(f"Error occurred while invoking function {self.name}: {exc}") from exc diff --git a/python/semantic_kernel/utils/telemetry/__init__.py b/python/semantic_kernel/utils/telemetry/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/semantic_kernel/utils/telemetry/const.py b/python/semantic_kernel/utils/telemetry/const.py new file mode 100644 index 000000000000..5c74f708b986 --- /dev/null +++ b/python/semantic_kernel/utils/telemetry/const.py @@ -0,0 +1,28 @@ +# Copyright (c) Microsoft. All rights reserved. +# +# Constants for tracing activities with semantic conventions. + +# Activity tags +SYSTEM = "gen_ai.system" +OPERATION = "gen_ai.operation.name" +CHAT_COMPLETION_OPERATION = "chat.completions" +TEXT_COMPLETION_OPERATION = "text.completions" +MODEL = "gen_ai.request.model" +MAX_TOKENS = "gen_ai.request.max_tokens" # nosec +TEMPERATURE = "gen_ai.request.temperature" +TOP_P = "gen_ai.request.top_p" +RESPONSE_ID = "gen_ai.response.id" +FINISH_REASON = "gen_ai.response.finish_reason" +PROMPT_TOKENS = "gen_ai.response.prompt_tokens" # nosec +COMPLETION_TOKENS = "gen_ai.response.completion_tokens" # nosec +ADDRESS = "server.address" +PORT = "server.port" +ERROR_TYPE = "error.type" + +# Activity events +PROMPT_EVENT = "gen_ai.content.prompt" +COMPLETION_EVENT = "gen_ai.content.completion" + +# Activity event attributes +PROMPT_EVENT_PROMPT = "gen_ai.prompt" +COMPLETION_EVENT_COMPLETION = "gen_ai.completion" diff --git a/python/semantic_kernel/utils/telemetry/decorators.py b/python/semantic_kernel/utils/telemetry/decorators.py new file mode 100644 index 000000000000..366168ae3938 --- /dev/null +++ b/python/semantic_kernel/utils/telemetry/decorators.py @@ -0,0 +1,265 @@ +# Copyright (c) Microsoft. All rights reserved. +# +# Code to trace model activities with the OTel semantic conventions. +# This code contains experimental features and may change in the future. +# To enable these features, set one of the following senvironment variables to true: +# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS +# SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE + +import functools +import json +import os +from collections.abc import Callable +from typing import Any + +from opentelemetry.trace import Span, StatusCode, get_tracer, use_span + +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.text_content import TextContent +from semantic_kernel.utils.telemetry.const import ( + CHAT_COMPLETION_OPERATION, + COMPLETION_EVENT, + COMPLETION_EVENT_COMPLETION, + COMPLETION_TOKENS, + ERROR_TYPE, + FINISH_REASON, + MAX_TOKENS, + MODEL, + OPERATION, + PROMPT_EVENT, + PROMPT_EVENT_PROMPT, + PROMPT_TOKENS, + RESPONSE_ID, + SYSTEM, + TEMPERATURE, + TEXT_COMPLETION_OPERATION, + TOP_P, +) + +OTEL_ENABLED_ENV_VAR = "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS" +OTEL_SENSITIVE_ENABLED_ENV_VAR = "SEMANTICKERNEL_EXPERIMENTAL_GENAI_ENABLE_OTEL_DIAGNOSTICS_SENSITIVE" + + +_enable_diagnostics = os.getenv(OTEL_ENABLED_ENV_VAR, "false").lower() in ("true", "1", "t") +_enable_sensitive_events = os.getenv(OTEL_SENSITIVE_ENABLED_ENV_VAR, "false").lower() in ("true", "1", "t") + +# Creates a tracer from the global tracer provider +tracer = get_tracer(__name__) + + +def are_model_diagnostics_enabled() -> bool: + """Check if model diagnostics are enabled. + + Model diagnostics are enabled if either _enable_diagnostics or _enable_sensitive_events is set. + """ + return _enable_diagnostics or _enable_sensitive_events + + +def are_sensitive_events_enabled() -> bool: + """Check if sensitive events are enabled. + + Sensitive events are enabled if _enable_sensitive_events is set. + """ + return _enable_sensitive_events + + +def trace_chat_completion(model_provider: str) -> Callable: + """Decorator to trace chat completion activities.""" + + def inner_trace_chat_completion(completion_func: Callable) -> Callable: + @functools.wraps(completion_func) + async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[ChatMessageContent]: + chat_history: ChatHistory = kwargs["chat_history"] + settings: PromptExecutionSettings = kwargs["settings"] + + model_name = getattr(settings, "ai_model_id", None) or getattr(args[0], "ai_model_id", None) or "unknown" + + formatted_messages = ( + _messages_to_openai_format(chat_history.messages) if are_sensitive_events_enabled() else None + ) + span = _start_completion_activity( + CHAT_COMPLETION_OPERATION, model_name, model_provider, formatted_messages, settings + ) + + try: + completions: list[ChatMessageContent] = await completion_func(*args, **kwargs) + except Exception as exception: + if span: + _set_completion_error(span, exception) + span.end() + raise + + if span and completions: + with use_span(span, end_on_exit=True): + first_completion = completions[0] + response_id = first_completion.metadata.get("id") or (first_completion.inner_content or {}).get( + "id" + ) + usage = first_completion.metadata.get("usage", None) + prompt_tokens = getattr(usage, "prompt_tokens", None) + completion_tokens = getattr(usage, "completion_tokens", None) + + completion_text: str | None = ( + _messages_to_openai_format(completions) if are_sensitive_events_enabled() else None + ) + + finish_reasons: list[str] = [str(completion.finish_reason) for completion in completions] + + _set_completion_response( + span, + completion_text, + finish_reasons, + response_id or "unknown", + prompt_tokens, + completion_tokens, + ) + + return completions + + return wrapper_decorator + + return inner_trace_chat_completion + + +def trace_text_completion(model_provider: str) -> Callable: + """Decorator to trace text completion activities.""" + + def inner_trace_text_completion(completion_func: Callable) -> Callable: + @functools.wraps(completion_func) + async def wrapper_decorator(*args: Any, **kwargs: Any) -> list[TextContent]: + prompt: str = kwargs["prompt"] + settings: PromptExecutionSettings = kwargs["settings"] + + model_name = getattr(settings, "ai_model_id", None) or getattr(args[0], "ai_model_id", None) or "unknown" + + span = _start_completion_activity(TEXT_COMPLETION_OPERATION, model_name, model_provider, prompt, settings) + + try: + completions: list[TextContent] = await completion_func(*args, **kwargs) + except Exception as exception: + if span: + _set_completion_error(span, exception) + span.end() + raise + + if span and completions: + with use_span(span, end_on_exit=True): + first_completion = completions[0] + response_id = first_completion.metadata.get("id") or (first_completion.inner_content or {}).get( + "id" + ) + usage = first_completion.metadata.get("usage", None) + prompt_tokens = getattr(usage, "prompt_tokens", None) + completion_tokens = getattr(usage, "completion_tokens", None) + + completion_text: str | None = ( + json.dumps([completion.text for completion in completions]) + if are_sensitive_events_enabled() + else None + ) + + _set_completion_response( + span, + completion_text, + None, + response_id or "unknown", + prompt_tokens, + completion_tokens, + ) + + return completions + + return wrapper_decorator + + return inner_trace_text_completion + + +def _start_completion_activity( + operation_name: str, + model_name: str, + model_provider: str, + prompt: str | None, + execution_settings: PromptExecutionSettings | None, +) -> Span | None: + """Start a text or chat completion activity for a given model.""" + if not are_model_diagnostics_enabled(): + return None + + span = tracer.start_span(f"{operation_name} {model_name}") + + # Set attributes on the span + span.set_attributes( + { + OPERATION: operation_name, + SYSTEM: model_provider, + MODEL: model_name, + } + ) + + # TODO(@glahaye): we'll need to have a way to get these attributes from model + # providers other than OpenAI (for example if the attributes are named differently) + if execution_settings: + attribute = execution_settings.extension_data.get("max_tokens") + if attribute: + span.set_attribute(MAX_TOKENS, attribute) + + attribute = execution_settings.extension_data.get("temperature") + if attribute: + span.set_attribute(TEMPERATURE, attribute) + + attribute = execution_settings.extension_data.get("top_p") + if attribute: + span.set_attribute(TOP_P, attribute) + + if are_sensitive_events_enabled() and prompt: + span.add_event(PROMPT_EVENT, {PROMPT_EVENT_PROMPT: prompt}) + + return span + + +def _set_completion_response( + span: Span, + completion_text: str | None, + finish_reasons: list[str] | None, + response_id: str, + prompt_tokens: int | None = None, + completion_tokens: int | None = None, +) -> None: + """Set the a text or chat completion response for a given activity.""" + if not are_model_diagnostics_enabled(): + return + + span.set_attribute(RESPONSE_ID, response_id) + + if finish_reasons: + span.set_attribute(FINISH_REASON, ",".join(finish_reasons)) + + if prompt_tokens: + span.set_attribute(PROMPT_TOKENS, prompt_tokens) + + if completion_tokens: + span.set_attribute(COMPLETION_TOKENS, completion_tokens) + + if are_sensitive_events_enabled() and completion_text: + span.add_event(COMPLETION_EVENT, {COMPLETION_EVENT_COMPLETION: completion_text}) + + +def _set_completion_error(span: Span, error: Exception) -> None: + """Set an error for a text or chat completion .""" + if not are_model_diagnostics_enabled(): + return + + span.set_attribute(ERROR_TYPE, str(type(error))) + + span.set_status(StatusCode.ERROR, repr(error)) + + +def _messages_to_openai_format(messages: list[ChatMessageContent]) -> str: + """Convert a list of ChatMessageContent to a string in the OpenAI format. + + OpenTelemetry recommends formatting the messages in the OpenAI format + regardless of the actual model being used. + """ + return json.dumps([message.to_dict() for message in messages]) diff --git a/python/semantic_kernel/connectors/telemetry.py b/python/semantic_kernel/utils/telemetry/user_agent.py similarity index 100% rename from python/semantic_kernel/connectors/telemetry.py rename to python/semantic_kernel/utils/telemetry/user_agent.py diff --git a/python/tests/unit/connectors/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py b/python/tests/unit/connectors/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py index 7487ad631b41..4a9426c611b7 100644 --- a/python/tests/unit/connectors/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py +++ b/python/tests/unit/connectors/azure_ai_inference/services/test_azure_ai_inference_chat_completion.py @@ -13,7 +13,6 @@ ) from semantic_kernel.connectors.ai.azure_ai_inference.azure_ai_inference_settings import AzureAIInferenceSettings from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.connectors.telemetry import SEMANTIC_KERNEL_USER_AGENT from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.utils.finish_reason import FinishReason from semantic_kernel.exceptions.service_exceptions import ( @@ -21,6 +20,7 @@ ServiceInvalidExecutionSettingsError, ) from semantic_kernel.functions.kernel_arguments import KernelArguments +from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT # region init @@ -123,7 +123,7 @@ async def test_azure_ai_inference_chat_completion( mock_complete.return_value = mock_azure_ai_inference_chat_completion_response - responses = await azure_ai_inference_service.get_chat_message_contents(chat_history, settings) + responses = await azure_ai_inference_service.get_chat_message_contents(chat_history=chat_history, settings=settings) mock_complete.assert_awaited_once_with( messages=[UserMessage(content=user_message_content)], @@ -164,7 +164,7 @@ async def test_azure_ai_inference_chat_completion_with_standard_parameters( mock_complete.return_value = mock_azure_ai_inference_chat_completion_response - responses = await azure_ai_inference_service.get_chat_message_contents(chat_history, settings) + responses = await azure_ai_inference_service.get_chat_message_contents(chat_history=chat_history, settings=settings) mock_complete.assert_awaited_once_with( messages=[UserMessage(content=user_message_content)], @@ -204,7 +204,7 @@ async def test_azure_ai_inference_chat_completion_with_extra_parameters( mock_complete.return_value = mock_azure_ai_inference_chat_completion_response - responses = await azure_ai_inference_service.get_chat_message_contents(chat_history, settings) + responses = await azure_ai_inference_service.get_chat_message_contents(chat_history=chat_history, settings=settings) mock_complete.assert_awaited_once_with( messages=[UserMessage(content=user_message_content)], @@ -235,8 +235,8 @@ async def test_azure_ai_inference_chat_completion_with_function_choice_behavior_ function_choice_behavior=FunctionChoiceBehavior.Auto(), ) await azure_ai_inference_service.get_chat_message_contents( - chat_history, - settings, + chat_history=chat_history, + settings=settings, arguments=KernelArguments(), ) @@ -247,8 +247,8 @@ async def test_azure_ai_inference_chat_completion_with_function_choice_behavior_ extra_parameters={"n": 2}, ) await azure_ai_inference_service.get_chat_message_contents( - chat_history, - settings, + chat_history=chat_history, + settings=settings, kernel=kernel, arguments=KernelArguments(), ) @@ -280,8 +280,8 @@ async def test_azure_ai_inference_chat_completion_with_function_choice_behavior( mock_complete.return_value = mock_azure_ai_inference_chat_completion_response_with_tool_call responses = await azure_ai_inference_service.get_chat_message_contents( - chat_history, - settings, + chat_history=chat_history, + settings=settings, kernel=kernel, arguments=KernelArguments(), ) @@ -320,8 +320,8 @@ async def test_azure_ai_inference_chat_completion_with_function_choice_behavior_ mock_complete.return_value = mock_azure_ai_inference_chat_completion_response responses = await azure_ai_inference_service.get_chat_message_contents( - chat_history, - settings, + chat_history=chat_history, + settings=settings, kernel=kernel, arguments=KernelArguments(), ) diff --git a/python/tests/unit/connectors/azure_ai_inference/services/test_azure_ai_inference_text_embedding.py b/python/tests/unit/connectors/azure_ai_inference/services/test_azure_ai_inference_text_embedding.py index 11f70ce3a2da..f399e7d2a6ee 100644 --- a/python/tests/unit/connectors/azure_ai_inference/services/test_azure_ai_inference_text_embedding.py +++ b/python/tests/unit/connectors/azure_ai_inference/services/test_azure_ai_inference_text_embedding.py @@ -11,8 +11,8 @@ AzureAIInferenceTextEmbedding, ) from semantic_kernel.connectors.ai.azure_ai_inference.azure_ai_inference_settings import AzureAIInferenceSettings -from semantic_kernel.connectors.telemetry import SEMANTIC_KERNEL_USER_AGENT from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError +from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT def test_azure_ai_inference_text_embedding_init(azure_ai_inference_unit_test_env, model_id) -> None: diff --git a/python/tests/unit/connectors/mistral_ai/services/test_mistralai_chat_completion.py b/python/tests/unit/connectors/mistral_ai/services/test_mistralai_chat_completion.py index ba1b0b51aa7b..1fe0a868a9ff 100644 --- a/python/tests/unit/connectors/mistral_ai/services/test_mistralai_chat_completion.py +++ b/python/tests/unit/connectors/mistral_ai/services/test_mistralai_chat_completion.py @@ -27,9 +27,7 @@ def mock_settings() -> MistralAIChatPromptExecutionSettings: def mock_mistral_ai_client_completion() -> MistralAsyncClient: client = MagicMock(spec=MistralAsyncClient) chat_completion_response = AsyncMock() - choices = [ - MagicMock(finish_reason="stop", message=MagicMock(role="assistant", content="Test")) - ] + choices = [MagicMock(finish_reason="stop", message=MagicMock(role="assistant", content="Test"))] chat_completion_response.choices = choices client.chat.return_value = chat_completion_response return client @@ -41,8 +39,8 @@ def mock_mistral_ai_client_completion_stream() -> MistralAsyncClient: chat_completion_response = MagicMock() choices = [ MagicMock(finish_reason="stop", delta=MagicMock(role="assistant", content="Test")), - MagicMock(finish_reason="stop", delta=MagicMock(role="assistant", content="Test", tool_calls=None)) - ] + MagicMock(finish_reason="stop", delta=MagicMock(role="assistant", content="Test", tool_calls=None)), + ] chat_completion_response.choices = choices chat_completion_response_empty = MagicMock() chat_completion_response_empty.choices = [] @@ -54,9 +52,9 @@ def mock_mistral_ai_client_completion_stream() -> MistralAsyncClient: @pytest.mark.asyncio async def test_complete_chat_contents( - kernel: Kernel, + kernel: Kernel, mock_settings: MistralAIChatPromptExecutionSettings, - mock_mistral_ai_client_completion: MistralAsyncClient + mock_mistral_ai_client_completion: MistralAsyncClient, ): chat_history = MagicMock() arguments = KernelArguments() @@ -65,7 +63,7 @@ async def test_complete_chat_contents( ) content: list[ChatMessageContent] = await chat_completion_base.get_chat_message_contents( - chat_history, mock_settings, kernel=kernel, arguments=arguments + chat_history=chat_history, settings=mock_settings, kernel=kernel, arguments=arguments ) assert content is not None @@ -74,15 +72,16 @@ async def test_complete_chat_contents( async def test_complete_chat_stream_contents( kernel: Kernel, mock_settings: MistralAIChatPromptExecutionSettings, - mock_mistral_ai_client_completion_stream: MistralAsyncClient + mock_mistral_ai_client_completion_stream: MistralAsyncClient, ): chat_history = MagicMock() arguments = KernelArguments() chat_completion_base = MistralAIChatCompletion( - ai_model_id="test_model_id", - service_id="test", api_key="", - async_client=mock_mistral_ai_client_completion_stream + ai_model_id="test_model_id", + service_id="test", + api_key="", + async_client=mock_mistral_ai_client_completion_stream, ) async for content in chat_completion_base.get_streaming_chat_message_contents( @@ -99,14 +98,12 @@ async def test_mistral_ai_sdk_exception(kernel: Kernel, mock_settings: MistralAI client.chat.side_effect = Exception("Test Exception") chat_completion_base = MistralAIChatCompletion( - ai_model_id="test_model_id", - service_id="test", api_key="", - async_client=client + ai_model_id="test_model_id", service_id="test", api_key="", async_client=client ) with pytest.raises(ServiceResponseException): await chat_completion_base.get_chat_message_contents( - chat_history, mock_settings, kernel=kernel, arguments=arguments + chat_history=chat_history, settings=mock_settings, kernel=kernel, arguments=arguments ) @@ -126,7 +123,7 @@ async def test_mistral_ai_sdk_exception_streaming(kernel: Kernel, mock_settings: chat_history, mock_settings, kernel=kernel, arguments=arguments ): assert content is not None - + def test_mistral_ai_chat_completion_init(mistralai_unit_test_env) -> None: # Test successful initialization @@ -162,21 +159,16 @@ def test_prompt_execution_settings_class(mistralai_unit_test_env): @pytest.mark.asyncio -async def test_with_different_execution_settings( - kernel: Kernel, - mock_mistral_ai_client_completion: MagicMock -): +async def test_with_different_execution_settings(kernel: Kernel, mock_mistral_ai_client_completion: MagicMock): chat_history = MagicMock() settings = OpenAIChatPromptExecutionSettings(temperature=0.2, seed=2) arguments = KernelArguments() chat_completion_base = MistralAIChatCompletion( - ai_model_id="test_model_id", - service_id="test", api_key="", - async_client=mock_mistral_ai_client_completion + ai_model_id="test_model_id", service_id="test", api_key="", async_client=mock_mistral_ai_client_completion ) await chat_completion_base.get_chat_message_contents( - chat_history, settings, kernel=kernel, arguments=arguments + chat_history=chat_history, settings=settings, kernel=kernel, arguments=arguments ) assert mock_mistral_ai_client_completion.chat.call_args.kwargs["temperature"] == 0.2 assert mock_mistral_ai_client_completion.chat.call_args.kwargs["seed"] == 2 @@ -184,16 +176,16 @@ async def test_with_different_execution_settings( @pytest.mark.asyncio async def test_with_different_execution_settings_stream( - kernel: Kernel, - mock_mistral_ai_client_completion_stream: MagicMock + kernel: Kernel, mock_mistral_ai_client_completion_stream: MagicMock ): chat_history = MagicMock() settings = OpenAIChatPromptExecutionSettings(temperature=0.2, seed=2) arguments = KernelArguments() chat_completion_base = MistralAIChatCompletion( ai_model_id="test_model_id", - service_id="test", api_key="", - async_client=mock_mistral_ai_client_completion_stream + service_id="test", + api_key="", + async_client=mock_mistral_ai_client_completion_stream, ) async for chunk in chat_completion_base.get_streaming_chat_message_contents( diff --git a/python/tests/unit/connectors/ollama/services/test_ollama_chat_completion.py b/python/tests/unit/connectors/ollama/services/test_ollama_chat_completion.py index 7e78e6d8a315..05f850ff7e8b 100644 --- a/python/tests/unit/connectors/ollama/services/test_ollama_chat_completion.py +++ b/python/tests/unit/connectors/ollama/services/test_ollama_chat_completion.py @@ -126,8 +126,8 @@ async def test_chat_completion(mock_chat_client, model_id, service_id, chat_hist ollama = OllamaChatCompletion(ai_model_id=model_id) response = await ollama.get_chat_message_contents( - chat_history, - OllamaChatPromptExecutionSettings(service_id=service_id, options=default_options), + chat_history=chat_history, + settings=OllamaChatPromptExecutionSettings(service_id=service_id, options=default_options), ) assert len(response) == 1 @@ -168,8 +168,8 @@ async def test_text_completion(mock_chat_client, model_id, service_id, prompt, d mock_chat_client.return_value = {"message": {"content": "test_response"}} ollama = OllamaChatCompletion(ai_model_id=model_id) response = await ollama.get_text_contents( - prompt, - OllamaTextPromptExecutionSettings(service_id=service_id, options=default_options), + prompt=prompt, + settings=OllamaTextPromptExecutionSettings(service_id=service_id, options=default_options), ) assert len(response) == 1 diff --git a/python/tests/unit/connectors/ollama/services/test_ollama_text_completion.py b/python/tests/unit/connectors/ollama/services/test_ollama_text_completion.py index 3992517d5cc4..eb2521dcc9fa 100644 --- a/python/tests/unit/connectors/ollama/services/test_ollama_text_completion.py +++ b/python/tests/unit/connectors/ollama/services/test_ollama_text_completion.py @@ -80,8 +80,8 @@ async def test_completion(mock_completion_client, model_id, service_id, prompt, ollama = OllamaTextCompletion(ai_model_id=model_id) response = await ollama.get_text_contents( - prompt, - OllamaTextPromptExecutionSettings(service_id=service_id, options=default_options), + prompt=prompt, + settings=OllamaTextPromptExecutionSettings(service_id=service_id, options=default_options), ) assert response[0].text == "test_response" diff --git a/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py b/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py index e18d223f6453..cfe96401ae9a 100644 --- a/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py +++ b/python/tests/unit/connectors/open_ai/services/test_azure_chat_completion.py @@ -653,7 +653,7 @@ async def test_content_filtering_raises_correct_exception( with pytest.raises(ContentFilterAIException, match="service encountered a content error") as exc_info: await azure_chat_completion.get_chat_message_contents( - chat_history, complete_prompt_execution_settings, kernel=kernel + chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel ) content_filter_exc = exc_info.value @@ -696,7 +696,7 @@ async def test_content_filtering_without_response_code_raises_with_default_code( with pytest.raises(ContentFilterAIException, match="service encountered a content error"): await azure_chat_completion.get_chat_message_contents( - chat_history, complete_prompt_execution_settings, kernel=kernel + chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel ) @@ -718,7 +718,7 @@ async def test_bad_request_non_content_filter( with pytest.raises(ServiceResponseException, match="service failed to complete the prompt"): await azure_chat_completion.get_chat_message_contents( - chat_history, complete_prompt_execution_settings, kernel=kernel + chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel ) @@ -744,7 +744,9 @@ async def test_no_kernel_provided_throws_error( ServiceInvalidExecutionSettingsError, match="The kernel is required for OpenAI tool calls.", ): - await azure_chat_completion.get_chat_message_contents(chat_history, complete_prompt_execution_settings) + await azure_chat_completion.get_chat_message_contents( + chat_history=chat_history, settings=complete_prompt_execution_settings + ) @pytest.mark.asyncio @@ -769,7 +771,9 @@ async def test_auto_invoke_false_no_kernel_provided_throws_error( ServiceInvalidExecutionSettingsError, match="The kernel is required for OpenAI tool calls.", ): - await azure_chat_completion.get_chat_message_contents(chat_history, complete_prompt_execution_settings) + await azure_chat_completion.get_chat_message_contents( + chat_history=chat_history, settings=complete_prompt_execution_settings + ) @pytest.mark.asyncio diff --git a/python/tests/unit/connectors/open_ai/services/test_azure_text_completion.py b/python/tests/unit/connectors/open_ai/services/test_azure_text_completion.py index d188ac4416e5..f4fb67fe3ed4 100644 --- a/python/tests/unit/connectors/open_ai/services/test_azure_text_completion.py +++ b/python/tests/unit/connectors/open_ai/services/test_azure_text_completion.py @@ -103,7 +103,7 @@ async def test_call_with_parameters( complete_prompt_execution_settings = OpenAITextPromptExecutionSettings() azure_text_completion = AzureTextCompletion() - await azure_text_completion.get_text_contents(prompt, complete_prompt_execution_settings) + await azure_text_completion.get_text_contents(prompt=prompt, settings=complete_prompt_execution_settings) mock_create.assert_awaited_once_with( model=azure_openai_unit_test_env["AZURE_OPENAI_TEXT_DEPLOYMENT_NAME"], @@ -135,7 +135,7 @@ async def test_call_with_parameters_logit_bias_not_none( azure_text_completion = AzureTextCompletion() - await azure_text_completion.get_text_contents(prompt, complete_prompt_execution_settings) + await azure_text_completion.get_text_contents(prompt=prompt, settings=complete_prompt_execution_settings) mock_create.assert_awaited_once_with( model=azure_openai_unit_test_env["AZURE_OPENAI_TEXT_DEPLOYMENT_NAME"], diff --git a/python/tests/unit/connectors/utils/test_document_loader.py b/python/tests/unit/connectors/utils/test_document_loader.py index a7ca87e6cd18..349f4c697483 100644 --- a/python/tests/unit/connectors/utils/test_document_loader.py +++ b/python/tests/unit/connectors/utils/test_document_loader.py @@ -5,9 +5,9 @@ import pytest from httpx import AsyncClient, HTTPStatusError, RequestError -from semantic_kernel.connectors.telemetry import HTTP_USER_AGENT from semantic_kernel.connectors.utils.document_loader import DocumentLoader from semantic_kernel.exceptions import ServiceInvalidRequestError +from semantic_kernel.utils.telemetry.user_agent import HTTP_USER_AGENT @pytest.fixture diff --git a/python/tests/unit/functions/test_kernel_plugins.py b/python/tests/unit/functions/test_kernel_plugins.py index 627357c23526..a30f16f6b2c4 100644 --- a/python/tests/unit/functions/test_kernel_plugins.py +++ b/python/tests/unit/functions/test_kernel_plugins.py @@ -13,7 +13,6 @@ from semantic_kernel.connectors.openai_plugin.openai_function_execution_parameters import ( OpenAIFunctionExecutionParameters, ) -from semantic_kernel.connectors.telemetry import HTTP_USER_AGENT from semantic_kernel.exceptions.function_exceptions import PluginInitializationError from semantic_kernel.functions import kernel_function from semantic_kernel.functions.kernel_function import KernelFunction @@ -22,6 +21,7 @@ from semantic_kernel.functions.kernel_plugin import KernelPlugin from semantic_kernel.prompt_template.input_variable import InputVariable from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig +from semantic_kernel.utils.telemetry.user_agent import HTTP_USER_AGENT @pytest.fixture diff --git a/python/tests/unit/telemetry/test_user_agent.py b/python/tests/unit/telemetry/test_user_agent.py index 8572ca01c3d0..72059dac2fdc 100644 --- a/python/tests/unit/telemetry/test_user_agent.py +++ b/python/tests/unit/telemetry/test_user_agent.py @@ -2,23 +2,23 @@ import importlib -from semantic_kernel.connectors.telemetry import ( +from semantic_kernel.const import USER_AGENT +from semantic_kernel.utils.telemetry.user_agent import ( HTTP_USER_AGENT, TELEMETRY_DISABLED_ENV_VAR, prepend_semantic_kernel_to_user_agent, ) -from semantic_kernel.const import USER_AGENT def test_append_to_existing_user_agent(monkeypatch): monkeypatch.setenv(TELEMETRY_DISABLED_ENV_VAR, "false") monkeypatch.setattr("importlib.metadata.version", lambda _: "1.0.0") - monkeypatch.setattr("semantic_kernel.connectors.telemetry.version_info", "1.0.0") + monkeypatch.setattr("semantic_kernel.utils.telemetry.user_agent.version_info", "1.0.0") # need to reload the module to get the updated version number - import semantic_kernel.connectors.telemetry + import semantic_kernel.utils.telemetry.user_agent - importlib.reload(semantic_kernel.connectors.telemetry) + importlib.reload(semantic_kernel.utils.telemetry.user_agent) headers = {USER_AGENT: "existing-agent"} expected = {USER_AGENT: f"{HTTP_USER_AGENT}/1.0.0 existing-agent"} @@ -29,12 +29,12 @@ def test_append_to_existing_user_agent(monkeypatch): def test_create_new_user_agent(monkeypatch): monkeypatch.setenv(TELEMETRY_DISABLED_ENV_VAR, "false") monkeypatch.setattr("importlib.metadata.version", lambda _: "1.0.0") - monkeypatch.setattr("semantic_kernel.connectors.telemetry.version_info", "1.0.0") + monkeypatch.setattr("semantic_kernel.utils.telemetry.user_agent.version_info", "1.0.0") # need to reload the module to get the updated version number - import semantic_kernel.connectors.telemetry + import semantic_kernel.utils.telemetry.user_agent - importlib.reload(semantic_kernel.connectors.telemetry) + importlib.reload(semantic_kernel.utils.telemetry.user_agent) headers = {} expected = {USER_AGENT: f"{HTTP_USER_AGENT}/1.0.0"} @@ -45,7 +45,7 @@ def test_create_new_user_agent(monkeypatch): def test_telemetry_disabled(monkeypatch): monkeypatch.setenv(TELEMETRY_DISABLED_ENV_VAR, "true") monkeypatch.setattr("importlib.metadata.version", lambda _: "1.0.0") - monkeypatch.setattr("semantic_kernel.connectors.telemetry.version_info", "1.0.0") + monkeypatch.setattr("semantic_kernel.utils.telemetry.user_agent.version_info", "1.0.0") headers = {} result = prepend_semantic_kernel_to_user_agent(headers) @@ -55,25 +55,25 @@ def test_telemetry_disabled(monkeypatch): def test_app_info_when_telemetry_enabled(monkeypatch): monkeypatch.setenv(TELEMETRY_DISABLED_ENV_VAR, "false") monkeypatch.setattr("importlib.metadata.version", lambda _: "1.0.0") - monkeypatch.setattr("semantic_kernel.connectors.telemetry.version_info", "1.0.0") + monkeypatch.setattr("semantic_kernel.utils.telemetry.user_agent.version_info", "1.0.0") # need to reload the module to get the updated APP_INFO - import semantic_kernel.connectors.telemetry + import semantic_kernel.utils.telemetry.user_agent - importlib.reload(semantic_kernel.connectors.telemetry) + importlib.reload(semantic_kernel.utils.telemetry.user_agent) expected = {"semantic-kernel-version": "python/1.0.0"} - assert expected == semantic_kernel.connectors.telemetry.APP_INFO + assert expected == semantic_kernel.utils.telemetry.user_agent.APP_INFO def test_app_info_when_telemetry_disabled(monkeypatch): monkeypatch.setenv(TELEMETRY_DISABLED_ENV_VAR, "true") monkeypatch.setattr("importlib.metadata.version", lambda _: "1.0.0") - monkeypatch.setattr("semantic_kernel.connectors.telemetry.version_info", "1.0.0") + monkeypatch.setattr("semantic_kernel.utils.telemetry.user_agent.version_info", "1.0.0") # need to reload the module to get the updated APP_INFO - import semantic_kernel.connectors.telemetry + import semantic_kernel.utils.telemetry.user_agent - importlib.reload(semantic_kernel.connectors.telemetry) + importlib.reload(semantic_kernel.utils.telemetry.user_agent) - assert semantic_kernel.connectors.telemetry.APP_INFO is None + assert semantic_kernel.utils.telemetry.user_agent.APP_INFO is None diff --git a/python/tests/unit/utils/test_tracing.py b/python/tests/unit/utils/test_tracing.py new file mode 100644 index 000000000000..5d2c2f9e4bf6 --- /dev/null +++ b/python/tests/unit/utils/test_tracing.py @@ -0,0 +1,241 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import patch + +import pytest +from openai.types import Completion as TextCompletion +from openai.types import CompletionChoice +from opentelemetry.trace import StatusCode + +from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion +from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base import OpenAIChatCompletionBase +from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion import OpenAITextCompletion +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.text_content import TextContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.contents.utils.finish_reason import FinishReason +from semantic_kernel.exceptions.service_exceptions import ServiceResponseException +from semantic_kernel.utils.telemetry.const import ( + CHAT_COMPLETION_OPERATION, + COMPLETION_EVENT, + COMPLETION_EVENT_COMPLETION, + ERROR_TYPE, + FINISH_REASON, + MAX_TOKENS, + MODEL, + OPERATION, + PROMPT_EVENT, + PROMPT_EVENT_PROMPT, + RESPONSE_ID, + SYSTEM, + TEMPERATURE, + TEXT_COMPLETION_OPERATION, + TOP_P, +) + +TEST_CONTENT = "Test content" +TEST_RESPONSE_ID = "dummy_id" +TEST_MAX_TOKENS = "1000" +TEST_MODEL = "dummy_model" +TEST_TEMPERATURE = "0.5" +TEST_TOP_P = "0.9" +TEST_CREATED_AT = 1 +TEST_TEXT_PROMPT = "Test prompt" +EXPECTED_CHAT_COMPLETION_EVENT_PAYLOAD = f'[{{"role": "assistant", "content": "{TEST_CONTENT}"}}]' +EXPECTED_TEXT_COMPLETION_EVENT_PAYLOAD = f'["{TEST_CONTENT}"]' + +TEST_CHAT_RESPONSE = [ + ChatMessageContent( + role=AuthorRole.ASSISTANT, + ai_model_id=TEST_MODEL, + content=TEST_CONTENT, + metadata={"id": TEST_RESPONSE_ID}, + finish_reason=FinishReason.STOP, + ) +] + +TEST_TEXT_RESPONSE = TextCompletion( + model=TEST_MODEL, + text=TEST_CONTENT, + id=TEST_RESPONSE_ID, + choices=[CompletionChoice(index=0, text=TEST_CONTENT, finish_reason="stop")], + created=TEST_CREATED_AT, + object="text_completion", +) + +TEST_TEXT_RESPONSE_METADATA = { + "id": TEST_RESPONSE_ID, + "created": TEST_CREATED_AT, + "system_fingerprint": None, + "logprobs": None, + "usage": None, +} + +EXPECTED_TEXT_CONTENT = [ + TextContent( + ai_model_id=TEST_MODEL, + text=TEST_CONTENT, + encoding=None, + metadata=TEST_TEXT_RESPONSE_METADATA, + inner_content=TEST_TEXT_RESPONSE, + ) +] + + +@pytest.mark.asyncio +@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) +@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) +@patch( + "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._send_chat_request", + return_value=TEST_CHAT_RESPONSE, +) +@patch("opentelemetry.trace.INVALID_SPAN") +async def test_trace_chat_completion( + mock_span, + mock_send_chat_request, + mock_sensitive_events_enabled, + mock_model_diagnostics_enabled, + openai_unit_test_env, +): + chat_completion = OpenAIChatCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") + extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} + + results: list[ChatMessageContent] = await chat_completion.get_chat_message_contents( + chat_history=ChatHistory(), settings=PromptExecutionSettings(extension_data=extension_data) + ) + + assert results == TEST_CHAT_RESPONSE + + mock_span.set_attributes.assert_called_with( + { + OPERATION: CHAT_COMPLETION_OPERATION, + SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, + MODEL: TEST_MODEL, + } + ) + mock_span.set_attribute.assert_any_call(MAX_TOKENS, TEST_MAX_TOKENS) + mock_span.set_attribute.assert_any_call(TEMPERATURE, TEST_TEMPERATURE) + mock_span.set_attribute.assert_any_call(TOP_P, TEST_TOP_P) + mock_span.add_event.assert_any_call(PROMPT_EVENT, {PROMPT_EVENT_PROMPT: "[]"}) + + mock_span.set_attribute.assert_any_call(RESPONSE_ID, TEST_RESPONSE_ID) + mock_span.set_attribute.assert_any_call(FINISH_REASON, str(FinishReason.STOP)) + mock_span.add_event.assert_any_call( + COMPLETION_EVENT, {COMPLETION_EVENT_COMPLETION: EXPECTED_CHAT_COMPLETION_EVENT_PAYLOAD} + ) + + +@pytest.mark.asyncio +@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) +@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) +@patch( + "semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion_base.OpenAITextCompletionBase._send_request", + return_value=TEST_TEXT_RESPONSE, +) +@patch("opentelemetry.trace.INVALID_SPAN") +async def test_trace_text_completion( + mock_span, mock_send_request, mock_sensitive_events_enabled, mock_model_diagnostics_enabled, openai_unit_test_env +): + chat_completion = OpenAITextCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") + extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} + + results: list[TextContent] = await chat_completion.get_text_contents( + prompt=TEST_TEXT_PROMPT, settings=PromptExecutionSettings(extension_data=extension_data) + ) + + assert results == EXPECTED_TEXT_CONTENT + + mock_span.set_attributes.assert_called_with( + { + OPERATION: TEXT_COMPLETION_OPERATION, + SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, + MODEL: TEST_MODEL, + } + ) + mock_span.set_attribute.assert_any_call(MAX_TOKENS, TEST_MAX_TOKENS) + mock_span.set_attribute.assert_any_call(TEMPERATURE, TEST_TEMPERATURE) + mock_span.set_attribute.assert_any_call(TOP_P, TEST_TOP_P) + mock_span.add_event.assert_any_call(PROMPT_EVENT, {PROMPT_EVENT_PROMPT: TEST_TEXT_PROMPT}) + + mock_span.set_attribute.assert_any_call(RESPONSE_ID, TEST_RESPONSE_ID) + mock_span.add_event.assert_any_call( + COMPLETION_EVENT, {COMPLETION_EVENT_COMPLETION: EXPECTED_TEXT_COMPLETION_EVENT_PAYLOAD} + ) + + +@pytest.mark.asyncio +@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) +@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) +@patch( + "semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base.OpenAIChatCompletionBase._send_chat_request", + side_effect=ServiceResponseException, +) +@patch("opentelemetry.trace.INVALID_SPAN") +async def test_trace_chat_completion_exception( + mock_span, + mock_send_chat_request, + mock_sensitive_events_enabled, + mock_model_diagnostics_enabled, + openai_unit_test_env, +): + chat_completion = OpenAIChatCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") + extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} + + with pytest.raises(ServiceResponseException): + await chat_completion.get_chat_message_contents( + chat_history=ChatHistory(), settings=PromptExecutionSettings(extension_data=extension_data) + ) + + mock_span.set_attributes.assert_called_with( + { + OPERATION: CHAT_COMPLETION_OPERATION, + SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, + MODEL: TEST_MODEL, + } + ) + + exception = ServiceResponseException() + mock_span.set_attribute.assert_any_call(ERROR_TYPE, str(type(exception))) + mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) + + mock_span.end.assert_any_call() + + +@pytest.mark.asyncio +@patch("semantic_kernel.utils.telemetry.decorators.are_model_diagnostics_enabled", return_value=True) +@patch("semantic_kernel.utils.telemetry.decorators.are_sensitive_events_enabled", return_value=True) +@patch( + "semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion_base.OpenAITextCompletionBase._send_request", + side_effect=ServiceResponseException, +) +@patch("opentelemetry.trace.INVALID_SPAN") +async def test_trace_text_completion_exception( + mock_span, + mock_send_chat_request, + mock_sensitive_events_enabled, + mock_model_diagnostics_enabled, + openai_unit_test_env, +): + chat_completion = OpenAITextCompletion(ai_model_id=TEST_MODEL, env_file_path="test.env") + extension_data = {"max_tokens": TEST_MAX_TOKENS, "temperature": TEST_TEMPERATURE, "top_p": TEST_TOP_P} + + with pytest.raises(ServiceResponseException): + await chat_completion.get_text_contents( + prompt=TEST_TEXT_PROMPT, settings=PromptExecutionSettings(extension_data=extension_data) + ) + + mock_span.set_attributes.assert_called_with( + { + OPERATION: TEXT_COMPLETION_OPERATION, + SYSTEM: OpenAIChatCompletionBase.MODEL_PROVIDER_NAME, + MODEL: TEST_MODEL, + } + ) + + exception = ServiceResponseException() + mock_span.set_attribute.assert_any_call(ERROR_TYPE, str(type(exception))) + mock_span.set_status.assert_any_call(StatusCode.ERROR, repr(exception)) + + mock_span.end.assert_any_call()