From 3d0a890e711a1381bcf695c3c9b7ca2ec6b38333 Mon Sep 17 00:00:00 2001 From: Evan Mattson <35585003+moonbox3@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:12:32 -0400 Subject: [PATCH] Python: Support OpenAI json_schema response format (#8958) ### Motivation and Context OpenAI has released in their August 6th model the ability to specific a json_schema response format. This allows the model to use the specified Pydantic model or the provided JSON schema representing a "strict" model/class to make sure that it responds with the exact schema that was provided. Note that the strict tool calling will be handled via a separate ADR/work item once agreed upon by all three SK languages. ### Description This PR introduces the ability to specific either a Pydantic model or a non-Pydantic model for the json_schema response format OpenAIChatPromptExecutionSettings. - There are two new code sample concept files showing how to do this with both a Pydantic and a non-Pydantic model. The other code sample shows how to configure a json_schema response format while using function calling. - If the model is unable to respond based on the provided schema, it will contain a `refusal` attribute as part of the chat completion message contents. This will be included in the response as a TextContent item explaining why it couldn't fulfill the request. - Unit tests added for this new functionality. - Handles #7946 ### Contribution Checklist - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone :smile: --- python/pyproject.toml | 1 - python/samples/concepts/README.md | 1 + ...penai_function_calling_stepwise_planner.py | 4 +- .../concepts/structured_output/README.md | 16 ++ .../json_structured_output.py | 135 +++++++++++++++ ...json_structured_output_function_calling.py | 163 ++++++++++++++++++ .../open_ai_prompt_execution_settings.py | 38 +++- .../services/open_ai_chat_completion_base.py | 2 + .../ai/open_ai/services/open_ai_handler.py | 31 +++- .../ai/prompt_execution_settings.py | 1 + .../utils/structured_output_schema.py | 11 ++ .../functions/kernel_parameter_metadata.py | 9 +- .../schema/kernel_json_schema_builder.py | 58 +++++-- .../utils/authentication/__init__.py | 5 + .../completions/test_chat_completions.py | 24 ++- .../test_open_ai_chat_completion_base.py | 70 ++++++++ .../open_ai/test_openai_request_settings.py | 62 +++++++ .../tests/unit/schema/test_schema_builder.py | 87 ++++++++++ python/uv.lock | 8 +- 19 files changed, 694 insertions(+), 32 deletions(-) create mode 100644 python/samples/concepts/structured_output/README.md create mode 100644 python/samples/concepts/structured_output/json_structured_output.py create mode 100644 python/samples/concepts/structured_output/json_structured_output_function_calling.py create mode 100644 python/semantic_kernel/connectors/utils/structured_output_schema.py create mode 100644 python/semantic_kernel/utils/authentication/__init__.py diff --git a/python/pyproject.toml b/python/pyproject.toml index af8ecd8600ac..75d2181a1d50 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -41,7 +41,6 @@ dependencies = [ # OpenTelemetry "opentelemetry-api ~= 1.24", "opentelemetry-sdk ~= 1.24", - "prance ~= 23.6.21.0", # templating diff --git a/python/samples/concepts/README.md b/python/samples/concepts/README.md index 0ef6120ad285..06c660b1cde1 100644 --- a/python/samples/concepts/README.md +++ b/python/samples/concepts/README.md @@ -22,6 +22,7 @@ This section contains code snippets that demonstrate the usage of Semantic Kerne | Search | Using search services information | | Service Selector | Shows how to create and use a custom service selector class. | | Setup | How to setup environment variables for Semantic Kernel | +| Structured Output | How to leverage OpenAI's json_schema structured output functionality. | | TextGeneration | Using [`TextGeneration`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/connectors/ai/text_completion_client_base.py) capable service with models | # Configuring the Kernel diff --git a/python/samples/concepts/planners/azure_openai_function_calling_stepwise_planner.py b/python/samples/concepts/planners/azure_openai_function_calling_stepwise_planner.py index c98efa3a5a5c..6627a2a7fb26 100644 --- a/python/samples/concepts/planners/azure_openai_function_calling_stepwise_planner.py +++ b/python/samples/concepts/planners/azure_openai_function_calling_stepwise_planner.py @@ -20,8 +20,8 @@ async def main(): ) plugin_path = os.path.join( - os.path.dirname(os.path.dirname(os.path.realpath(__file__))), - "resources", + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), + "resources", ) kernel.add_plugin(parent_directory=plugin_path, plugin_name="email_plugin") diff --git a/python/samples/concepts/structured_output/README.md b/python/samples/concepts/structured_output/README.md new file mode 100644 index 000000000000..50d20964f881 --- /dev/null +++ b/python/samples/concepts/structured_output/README.md @@ -0,0 +1,16 @@ +# OpenAI Structured Outputs + +## Supported Models + +### Azure OpenAI: + +- Access to `gpt-4o-2024-08-06` or later +- The `2024-08-01-preview` API version +- If using a token instead of an API key, you must have the `Cognitive Services OpenAI Contributor` role assigned to your Azure AD user. +- See more information [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/structured-outputs?tabs=python-secure) + +### OpenAI: + +- The OpenAI models supported are: + - `gpt-4o-mini-2024-07-18` and later + - `gpt-4o-2024-08-06` and later diff --git a/python/samples/concepts/structured_output/json_structured_output.py b/python/samples/concepts/structured_output/json_structured_output.py new file mode 100644 index 000000000000..f6ea600cd56f --- /dev/null +++ b/python/samples/concepts/structured_output/json_structured_output.py @@ -0,0 +1,135 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio + +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion +from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent + +################################################################### +# The following sample demonstrates how to create a chat # +# completion call that assists users in solving math problems. # +# The bot guides the user step-by-step through the solution # +# process using a structured output format based on either a # +# Pydantic model or a non-Pydantic model. # +################################################################### + + +################################################################### +# NOTE: If using Azure OpenAI the the following is required: +# - access to gpt-4o-2024-08-06 +# - the 2024-08-01-preview API version +# - if using a token instead of an API KEY, you must have the +# `Cognitive Services OpenAI Contributor` role assigned to your +# Azure AD user. +# - flip the `use_azure_openai` flag to `True` +################################################################### +use_azure_openai = False + +system_message = """ +You are a helpful math tutor. Guide the user through the solution step by step. +""" + + +################################################################### +# OPTION 1: Define the Pydantic model that represents the +# structured output from the OpenAI service. This model will be +# used to parse the structured output from the OpenAI service, +# and ensure that the model correctly outputs the schema based +# on the Pydantic model. +from semantic_kernel.kernel_pydantic import KernelBaseModel # noqa: E402 + + +class Step(KernelBaseModel): + explanation: str + output: str + + +class Reasoning(KernelBaseModel): + steps: list[Step] + final_answer: str + + +################################################################### + + +# OPTION 2: Define a non-Pydantic model that should represent the +# structured output from the OpenAI service. This model will be +# converted to the proper JSON Schema and sent to the LLM. +# Uncomment the follow lines and comment out the Pydantic model +# above to use this option. +# class Step: +# explanation: str +# output: str + + +# class Reasoning: +# steps: list[Step] +# final_answer: str + + +################################################################### + +kernel = Kernel() + +service_id = "structured-output" +if use_azure_openai: + chat_service = AzureChatCompletion( + service_id=service_id, + ) +else: + chat_service = OpenAIChatCompletion( + service_id=service_id, + ) +kernel.add_service(chat_service) + +req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id) +req_settings.max_tokens = 2000 +req_settings.temperature = 0.7 +req_settings.top_p = 0.8 +req_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"excluded_plugins": ["chat"]}) + +# NOTE: This is the key setting in this example that tells the OpenAI service +# to return structured output based on the Pydantic model Reasoning. +req_settings.response_format = Reasoning + + +chat_function = kernel.add_function( + prompt=system_message + """{{$chat_history}}""", + function_name="chat", + plugin_name="chat", + prompt_execution_settings=req_settings, +) + +history = ChatHistory() +history.add_user_message("how can I solve 8x + 7y = -23, and 4x=12?") + + +async def main(): + stream = True + if stream: + answer = kernel.invoke_stream( + chat_function, + chat_history=history, + ) + print("Mosscap:> ", end="") + result_content: list[StreamingChatMessageContent] = [] + async for message in answer: + result_content.append(message[0]) + print(str(message[0]), end="", flush=True) + if result_content: + result = "".join([str(content) for content in result_content]) + else: + result = await kernel.invoke( + chat_function, + chat_history=history, + ) + print(f"Mosscap:> {result}") + history.add_assistant_message(str(result)) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/structured_output/json_structured_output_function_calling.py b/python/samples/concepts/structured_output/json_structured_output_function_calling.py new file mode 100644 index 000000000000..d2091dbf5f73 --- /dev/null +++ b/python/samples/concepts/structured_output/json_structured_output_function_calling.py @@ -0,0 +1,163 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from typing import Annotated + +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion +from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.functions.kernel_function_decorator import kernel_function + +################################################################### +# The following sample demonstrates how to create a chat # +# completion call that assists users in solving a question # +# using a Semantic Kernel Plugin and function calling problems. # +# The chat plugin guides the user step-by-step through the # +# solution process using a structured output format based on # +# either a Pydantic model or a non-Pydantic model # +################################################################### + + +################################################################### +# NOTE: If using Azure OpenAI the the following is required: +# - access to gpt-4o-2024-08-06 +# - the 2024-08-01-preview API version +# - if using a token instead of an API KEY, you must have the +# `Cognitive Services OpenAI Contributor` role assigned to your +# Azure AD user. +# - flip the `use_azure_openai` flag to `True` +################################################################### +use_azure_openai = True + +system_message = """ +You are a helpful math tutor. Guide the user through the solution step by step. +""" + + +# Define a sample plugin to use for function calling +class WeatherPlugin: + """A sample plugin that provides weather information for cities.""" + + @kernel_function(name="get_weather_for_city", description="Get the weather for a city") + def get_weather_for_city(self, city: Annotated[str, "The input city"]) -> Annotated[str, "The output is a string"]: + if city == "Boston": + return "61 and rainy" + if city == "London": + return "55 and cloudy" + if city == "Miami": + return "80 and sunny" + if city == "Paris": + return "60 and rainy" + if city == "Tokyo": + return "50 and sunny" + if city == "Sydney": + return "75 and sunny" + if city == "Tel Aviv": + return "80 and sunny" + return "31 and snowing" + + +################################################################### +# OPTION 1: Define the Pydantic model that represents the +# structured output from the OpenAI service. This model will be +# used to parse the structured output from the OpenAI service, +# and ensure that the model correctly outputs the schema based +# on the Pydantic model. +from semantic_kernel.kernel_pydantic import KernelBaseModel # noqa: E402 + + +class Step(KernelBaseModel): + explanation: str + output: str + + +class Reasoning(KernelBaseModel): + steps: list[Step] + final_answer: str + + +################################################################### + + +# OPTION 2: Define a non-Pydantic model that should represent the +# structured output from the OpenAI service. This model will be +# converted to the proper JSON Schema and sent to the LLM. +# Uncomment the follow lines and comment out the Pydantic model +# above to use this option. +# class Step: +# explanation: str +# output: str + + +# class Reasoning: +# steps: list[Step] +# final_answer: str + + +################################################################### + +kernel = Kernel() + +service_id = "structured-output" +if use_azure_openai: + chat_service = AzureChatCompletion( + service_id=service_id, + ) +else: + chat_service = OpenAIChatCompletion( + service_id=service_id, + ) +kernel.add_service(chat_service) + +kernel.add_plugin(WeatherPlugin(), plugin_name="weather") + +req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id) +req_settings.max_tokens = 2000 +req_settings.temperature = 0.7 +req_settings.top_p = 0.8 +req_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"excluded_plugins": ["chat"]}) + +# NOTE: This is the key setting in this example that tells the OpenAI service +# to return structured output based on the Pydantic model Reasoning. +req_settings.response_format = Reasoning + + +chat_function = kernel.add_function( + prompt=system_message + """{{$chat_history}}""", + function_name="chat", + plugin_name="chat", + prompt_execution_settings=req_settings, +) + +history = ChatHistory() +history.add_user_message("Using the available plugin, what is the weather in Paris?") + + +async def main(): + stream = True + if stream: + answer = kernel.invoke_stream( + chat_function, + chat_history=history, + ) + print("Mosscap:> ", end="") + result_content: list[StreamingChatMessageContent] = [] + async for message in answer: + result_content.append(message[0]) + print(str(message[0]), end="", flush=True) + if result_content: + result = "".join([str(content) for content in result_content]) + else: + result = await kernel.invoke( + chat_function, + chat_history=history, + ) + print(f"Mosscap:> {result}") + history.add_assistant_message(str(result)) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py index f4aa7868f5fc..c8db7dd0b2cc 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py @@ -9,7 +9,7 @@ else: from typing_extensions import Self # pragma: no cover -from pydantic import Field, field_validator, model_validator +from pydantic import BaseModel, Field, field_validator, model_validator from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -61,7 +61,9 @@ def check_best_of_and_n(self) -> "OpenAITextPromptExecutionSettings": class OpenAIChatPromptExecutionSettings(OpenAIPromptExecutionSettings): """Specific settings for the Chat Completion endpoint.""" - response_format: dict[Literal["type"], Literal["text", "json_object"]] | None = None + response_format: ( + dict[Literal["type"], Literal["text", "json_object"]] | dict[str, Any] | type[BaseModel] | type | None + ) = None function_call: str | None = None functions: list[dict[str, Any]] | None = None messages: list[dict[str, Any]] | None = None @@ -75,6 +77,7 @@ class OpenAIChatPromptExecutionSettings(OpenAIPromptExecutionSettings): None, description="Do not set this manually. It is set by the service based on the function choice configuration.", ) + structured_json_response: bool = Field(False, description="Do not set this manually. It is set by the service.") @field_validator("functions", "function_call", mode="after") @classmethod @@ -86,6 +89,37 @@ def validate_function_call(cls, v: str | list[dict[str, Any]] | None = None): ) return v + @model_validator(mode="before") + def validate_response_format_and_set_flag(cls, values) -> Any: + """Validate the response_format and set structured_json_response accordingly.""" + response_format = values.get("response_format", None) + + if response_format is None: + return values + + if isinstance(response_format, dict): + if response_format.get("type") == "json_object": + return values + if response_format.get("type") == "json_schema": + json_schema = response_format.get("json_schema") + if isinstance(json_schema, dict): + values["structured_json_response"] = True + return values + raise ServiceInvalidExecutionSettingsError( + "If response_format has type 'json_schema', 'json_schema' must be a valid dictionary." + ) + if isinstance(response_format, type): + if issubclass(response_format, BaseModel): + values["structured_json_response"] = True + else: + values["structured_json_response"] = True + else: + raise ServiceInvalidExecutionSettingsError( + "response_format must be a dictionary, a subclass of BaseModel, a Python class/type, or None" + ) + + return values + @model_validator(mode="before") @classmethod def validate_function_calling_behaviors(cls, data) -> Any: diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py index 3d74094e342c..ada65b07784c 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py @@ -155,6 +155,8 @@ def _create_chat_message_content( items.extend(self._get_function_call_from_chat_choice(choice)) if choice.message.content: items.append(TextContent(text=choice.message.content)) + elif hasattr(choice.message, "refusal") and choice.message.refusal: + items.append(TextContent(text=choice.message.refusal)) return ChatMessageContent( inner_content=response, diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py index c65e0bc01989..0af2cbd44c75 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py @@ -5,17 +5,22 @@ from typing import Any from openai import AsyncOpenAI, AsyncStream, BadRequestError +from openai.lib._parsing._completions import type_to_response_format_param from openai.types import Completion, CreateEmbeddingResponse from openai.types.chat import ChatCompletion, ChatCompletionChunk +from pydantic import BaseModel from semantic_kernel.connectors.ai.open_ai.exceptions.content_filter_ai_exception import ContentFilterAIException from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( + OpenAIChatPromptExecutionSettings, OpenAIEmbeddingPromptExecutionSettings, OpenAIPromptExecutionSettings, ) from semantic_kernel.connectors.ai.open_ai.services.open_ai_model_types import OpenAIModelTypes +from semantic_kernel.connectors.utils.structured_output_schema import generate_structured_output_response_format_schema from semantic_kernel.exceptions import ServiceResponseException from semantic_kernel.kernel_pydantic import KernelBaseModel +from semantic_kernel.schema.kernel_json_schema_builder import KernelJsonSchemaBuilder logger: logging.Logger = logging.getLogger(__name__) @@ -35,10 +40,13 @@ async def _send_request( ) -> ChatCompletion | Completion | AsyncStream[ChatCompletionChunk] | AsyncStream[Completion]: """Execute the appropriate call to OpenAI models.""" try: + settings = request_settings.prepare_settings_dict() if self.ai_model_type == OpenAIModelTypes.CHAT: - response = await self.client.chat.completions.create(**request_settings.prepare_settings_dict()) + assert isinstance(request_settings, OpenAIChatPromptExecutionSettings) # nosec + self._handle_structured_output(request_settings, settings) + response = await self.client.chat.completions.create(**settings) else: - response = await self.client.completions.create(**request_settings.prepare_settings_dict()) + response = await self.client.completions.create(**settings) self.store_usage(response) return response except BadRequestError as ex: @@ -68,6 +76,25 @@ async def _send_embedding_request(self, settings: OpenAIEmbeddingPromptExecution ex, ) from ex + def _handle_structured_output( + self, request_settings: OpenAIChatPromptExecutionSettings, settings: dict[str, Any] + ) -> None: + response_format = getattr(request_settings, "response_format", None) + if getattr(request_settings, "structured_json_response", False) and response_format: + # Case 1: response_format is a type and subclass of BaseModel + if isinstance(response_format, type) and issubclass(response_format, BaseModel): + settings["response_format"] = type_to_response_format_param(response_format) + # Case 2: response_format is a type but not a subclass of BaseModel + elif isinstance(response_format, type): + generated_schema = KernelJsonSchemaBuilder.build(parameter_type=response_format, structured_output=True) + assert generated_schema is not None # nosec + settings["response_format"] = generate_structured_output_response_format_schema( + name=response_format.__name__, schema=generated_schema + ) + # Case 3: response_format is a dictionary, pass it without modification + elif isinstance(response_format, dict): + settings["response_format"] = response_format + def store_usage( self, response: ChatCompletion diff --git a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py index b683a58e17fa..4c3abc8f5419 100644 --- a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py @@ -79,6 +79,7 @@ def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: exclude={ "service_id", "extension_data", + "structured_json_response", }, exclude_none=True, by_alias=True, diff --git a/python/semantic_kernel/connectors/utils/structured_output_schema.py b/python/semantic_kernel/connectors/utils/structured_output_schema.py new file mode 100644 index 000000000000..d274ed8e7fc5 --- /dev/null +++ b/python/semantic_kernel/connectors/utils/structured_output_schema.py @@ -0,0 +1,11 @@ +# Copyright (c) Microsoft. All rights reserved. + +from typing import Any + + +def generate_structured_output_response_format_schema(name: str, schema: dict[str, Any]) -> dict[str, Any]: + """Generate the structured output response format schema.""" + return { + "type": "json_schema", + "json_schema": {"name": name, "strict": True, "schema": schema}, + } diff --git a/python/semantic_kernel/functions/kernel_parameter_metadata.py b/python/semantic_kernel/functions/kernel_parameter_metadata.py index 20d6151bbb0b..7ddf115e7d88 100644 --- a/python/semantic_kernel/functions/kernel_parameter_metadata.py +++ b/python/semantic_kernel/functions/kernel_parameter_metadata.py @@ -36,13 +36,18 @@ def form_schema(cls, data: Any) -> Any: @classmethod def infer_schema( - cls, type_object: type | None, parameter_type: str | None, default_value: Any, description: str | None + cls, + type_object: type | None = None, + parameter_type: str | None = None, + default_value: Any | None = None, + description: str | None = None, + structured_output: bool = False, ) -> dict[str, Any] | None: """Infer the schema for the parameter metadata.""" schema = None if type_object is not None: - schema = KernelJsonSchemaBuilder.build(type_object, description) + schema = KernelJsonSchemaBuilder.build(type_object, description, structured_output) elif parameter_type is not None: string_default = str(default_value) if default_value is not None else None if string_default and string_default.strip(): diff --git a/python/semantic_kernel/schema/kernel_json_schema_builder.py b/python/semantic_kernel/schema/kernel_json_schema_builder.py index 438833fda6dc..9d19644fc25a 100644 --- a/python/semantic_kernel/schema/kernel_json_schema_builder.py +++ b/python/semantic_kernel/schema/kernel_json_schema_builder.py @@ -34,12 +34,15 @@ class KernelJsonSchemaBuilder: """Kernel JSON schema builder.""" @classmethod - def build(cls, parameter_type: type | str, description: str | None = None) -> dict[str, Any]: + def build( + cls, parameter_type: type | str | Any, description: str | None = None, structured_output: bool = False + ) -> dict[str, Any]: """Builds the JSON schema for a given parameter type and description. Args: - parameter_type (type | str): The parameter type. - description (str, optional): The description of the parameter. Defaults to None. + parameter_type: The parameter type. + description: The description of the parameter. Defaults to None. + structured_output: Whether the outputs are structured. Defaults to False. Returns: dict[str, Any]: The JSON schema for the parameter type. @@ -47,25 +50,28 @@ def build(cls, parameter_type: type | str, description: str | None = None) -> di if isinstance(parameter_type, str): return cls.build_from_type_name(parameter_type, description) if isinstance(parameter_type, KernelBaseModel): - return cls.build_model_schema(parameter_type, description) + return cls.build_model_schema(parameter_type, description, structured_output) if isinstance(parameter_type, type) and issubclass(parameter_type, Enum): return cls.build_enum_schema(parameter_type, description) if hasattr(parameter_type, "__annotations__"): - return cls.build_model_schema(parameter_type, description) + return cls.build_model_schema(parameter_type, description, structured_output) if hasattr(parameter_type, "__args__"): - return cls.handle_complex_type(parameter_type, description) + return cls.handle_complex_type(parameter_type, description, structured_output) schema = cls.get_json_schema(parameter_type) if description: schema["description"] = description return schema @classmethod - def build_model_schema(cls, model: type, description: str | None = None) -> dict[str, Any]: + def build_model_schema( + cls, model: type | KernelBaseModel, description: str | None = None, structured_output: bool = False + ) -> dict[str, Any]: """Builds the JSON schema for a given model and description. Args: - model (type): The model type. - description (str, optional): The description of the model. Defaults to None. + model: The model type. + description: The description of the model. Defaults to None. + structured_output: Whether the outputs are structured. Defaults to False. Returns: dict[str, Any]: The JSON schema for the model. @@ -88,11 +94,13 @@ def build_model_schema(cls, model: type, description: str | None = None) -> dict field_description = field_info.description if not cls._is_optional(field_type): required.append(field_name) - properties[field_name] = cls.build(field_type, field_description) + properties[field_name] = cls.build(field_type, field_description, structured_output) schema = {"type": "object", "properties": properties} if required: schema["required"] = required + if structured_output: + schema["additionalProperties"] = False # type: ignore if description: schema["description"] = description @@ -140,12 +148,15 @@ def get_json_schema(cls, parameter_type: type) -> dict[str, Any]: return {"type": type_name} @classmethod - def handle_complex_type(cls, parameter_type: type, description: str | None = None) -> dict[str, Any]: + def handle_complex_type( + cls, parameter_type: type, description: str | None = None, structured_output: bool = False + ) -> dict[str, Any]: """Handles building the JSON schema for complex types. Args: - parameter_type (type): The parameter type. - description (str, optional): The description of the parameter. Defaults to None. + parameter_type: The parameter type. + description: The description of the parameter. Defaults to None. + structured_output: Whether the outputs are structured. Defaults to False. Returns: dict[str, Any]: The JSON schema for the parameter type. @@ -156,39 +167,50 @@ def handle_complex_type(cls, parameter_type: type, description: str | None = Non schema: dict[str, Any] = {} if origin is list or origin is set: item_type = args[0] - schema = {"type": "array", "items": cls.build(item_type)} + schema = { + "type": "array", + "items": cls.build(item_type, structured_output=structured_output), + } if description: schema["description"] = description return schema if origin is dict: _, value_type = args - additional_properties = cls.build(value_type) + additional_properties = cls.build(value_type, structured_output=structured_output) if additional_properties == {"type": "object"}: additional_properties["properties"] = {} # Account for differences in Python 3.10 dict schema = {"type": "object", "additionalProperties": additional_properties} if description: schema["description"] = description + if structured_output: + schema["additionalProperties"] = False return schema if origin is tuple: - items = [cls.build(arg) for arg in args] + items = [cls.build(arg, structured_output=structured_output) for arg in args] schema = {"type": "array", "items": items} if description: schema["description"] = description + if structured_output: + schema["additionalProperties"] = False return schema if origin in {Union, types.UnionType}: # Handle Optional[T] (Union[T, None]) by making schema nullable if len(args) == 2 and type(None) in args: non_none_type = args[0] if args[1] is type(None) else args[1] - schema = cls.build(non_none_type) + schema = cls.build(non_none_type, structured_output=structured_output) schema["type"] = [schema["type"], "null"] if description: schema["description"] = description + if structured_output: + schema["additionalProperties"] = False return schema - schemas = [cls.build(arg, description) for arg in args] + schemas = [cls.build(arg, description, structured_output=structured_output) for arg in args] return {"anyOf": schemas} schema = cls.get_json_schema(parameter_type) if description: schema["description"] = description + if structured_output: + schema["additionalProperties"] = False return schema @classmethod diff --git a/python/semantic_kernel/utils/authentication/__init__.py b/python/semantic_kernel/utils/authentication/__init__.py new file mode 100644 index 000000000000..646090a8113b --- /dev/null +++ b/python/semantic_kernel/utils/authentication/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Microsoft. All rights reserved. + +from semantic_kernel.utils.authentication.entra_id_authentication import get_entra_auth_token + +__all__ = ["get_entra_auth_token"] diff --git a/python/tests/integration/completions/test_chat_completions.py b/python/tests/integration/completions/test_chat_completions.py index c9d12b939828..505f7100a2a9 100644 --- a/python/tests/integration/completions/test_chat_completions.py +++ b/python/tests/integration/completions/test_chat_completions.py @@ -11,6 +11,7 @@ from semantic_kernel.contents import ChatMessageContent, TextContent from semantic_kernel.contents.chat_history import ChatHistory from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.kernel_pydantic import KernelBaseModel from tests.integration.completions.chat_completion_test_base import ( ChatCompletionTestBase, anthropic_setup, @@ -26,6 +27,17 @@ else: from typing_extensions import override # pragma: no cover + +class Step(KernelBaseModel): + explanation: str + output: str + + +class Reasoning(KernelBaseModel): + steps: list[Step] + final_answer: str + + pytestmark = pytest.mark.parametrize( "service_id, execution_settings_kwargs, inputs, kwargs", [ @@ -39,6 +51,16 @@ {}, id="openai_text_input", ), + pytest.param( + "openai", + {"response_format": Reasoning}, + [ + ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="Hello")]), + ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="How are you today?")]), + ], + {}, + id="openai_json_schema_response_format", + ), pytest.param( "azure", {}, @@ -91,7 +113,7 @@ marks=pytest.mark.skipif(not ollama_setup, reason="Need local Ollama setup"), id="ollama_text_input", ), - pytest.param( + pytest.param( "anthropic", {}, [ diff --git a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py index b8bf2635012a..abf3f6325c18 100644 --- a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py +++ b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py @@ -321,6 +321,32 @@ async def test_cmc_no_fcc_in_response( ) +@pytest.mark.asyncio +@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) +async def test_cmc_structured_output_no_fcc( + mock_create, + kernel: Kernel, + chat_history: ChatHistory, + mock_chat_completion_response: ChatCompletion, + openai_unit_test_env, +): + mock_create.return_value = mock_chat_completion_response + chat_history.add_user_message("hello world") + complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(service_id="test_service_id") + + # Define a mock response format + class Test: + name: str + + complete_prompt_execution_settings.response_format = Test + + openai_chat_completion = OpenAIChatCompletion() + await openai_chat_completion.get_chat_message_contents( + chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel + ) + mock_create.assert_awaited_once() + + @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) async def test_cmc_run_out_of_auto_invoke_loop( @@ -502,6 +528,50 @@ async def test_scmc_singular( ) +@pytest.mark.asyncio +@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) +async def test_scmc_structured_output_no_fcc( + mock_create, + kernel: Kernel, + chat_history: ChatHistory, + openai_unit_test_env, +): + content1 = ChatCompletionChunk( + id="test_id", + choices=[], + created=0, + model="test", + object="chat.completion.chunk", + ) + content2 = ChatCompletionChunk( + id="test_id", + choices=[ChunkChoice(index=0, delta=ChunkChoiceDelta(content="test", role="assistant"), finish_reason="stop")], + created=0, + model="test", + object="chat.completion.chunk", + ) + stream = MagicMock(spec=AsyncStream) + stream.__aiter__.return_value = [content1, content2] + mock_create.return_value = stream + chat_history.add_user_message("hello world") + complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(service_id="test_service_id") + + # Define a mock response format + class Test: + name: str + + complete_prompt_execution_settings.response_format = Test + openai_chat_completion = OpenAIChatCompletion() + async for msg in openai_chat_completion.get_streaming_chat_message_content( + chat_history=chat_history, + settings=complete_prompt_execution_settings, + kernel=kernel, + arguments=KernelArguments(), + ): + assert isinstance(msg, StreamingChatMessageContent) + mock_create.assert_awaited_once() + + @pytest.mark.asyncio @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock) async def test_scmc_function_call_behavior( diff --git a/python/tests/unit/connectors/open_ai/test_openai_request_settings.py b/python/tests/unit/connectors/open_ai/test_openai_request_settings.py index e5e3b9333372..db39285f39a6 100644 --- a/python/tests/unit/connectors/open_ai/test_openai_request_settings.py +++ b/python/tests/unit/connectors/open_ai/test_openai_request_settings.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft. All rights reserved. import pytest +from pydantic import BaseModel from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import ( AzureAISearchDataSource, @@ -15,6 +16,20 @@ from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.connectors.memory.azure_cognitive_search.azure_ai_search_settings import AzureAISearchSettings from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError +from semantic_kernel.kernel_pydantic import KernelBaseModel + + +############################################ +# Test classes for structured output +class TestClass: + attribute: str + + +class TestClassPydantic(KernelBaseModel): + attribute: str + + +############################################ def test_default_openai_chat_prompt_execution_settings(): @@ -306,3 +321,50 @@ def test_azure_open_ai_chat_prompt_execution_settings_with_response_format_json( settings = AzureChatPromptExecutionSettings(response_format=response_format) options = settings.prepare_settings_dict() assert options["response_format"] == response_format + + +def test_openai_chat_prompt_execution_settings_with_json_structured_output(): + settings = OpenAIChatPromptExecutionSettings() + settings.response_format = { + "type": "json_schema", + "json_schema": { + "name": "math_response", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": {"explanation": {"type": "string"}, "output": {"type": "string"}}, + "required": ["explanation", "output"], + "additionalProperties": False, + }, + }, + "final_answer": {"type": "string"}, + }, + "required": ["steps", "final_answer"], + "additionalProperties": False, + }, + "strict": True, + }, + } + assert isinstance(settings.response_format, dict) + + +def test_openai_chat_prompt_execution_settings_with_nonpydantic_type_structured_output(): + settings = OpenAIChatPromptExecutionSettings() + settings.response_format = TestClass + assert isinstance(settings.response_format, type) + + +def test_openai_chat_prompt_execution_settings_with_pydantic_type_structured_output(): + settings = OpenAIChatPromptExecutionSettings() + settings.response_format = TestClassPydantic + assert issubclass(settings.response_format, BaseModel) + + +def test_openai_chat_prompt_execution_settings_with_invalid_structured_output(): + settings = OpenAIChatPromptExecutionSettings() + with pytest.raises(ServiceInvalidExecutionSettingsError): + settings.response_format = "invalid" diff --git a/python/tests/unit/schema/test_schema_builder.py b/python/tests/unit/schema/test_schema_builder.py index 015ad12a337c..f6f2b4071983 100644 --- a/python/tests/unit/schema/test_schema_builder.py +++ b/python/tests/unit/schema/test_schema_builder.py @@ -7,6 +7,7 @@ import pytest +from semantic_kernel.connectors.utils.structured_output_schema import generate_structured_output_response_format_schema from semantic_kernel.kernel_pydantic import KernelBaseModel from semantic_kernel.schema.kernel_json_schema_builder import KernelJsonSchemaBuilder @@ -65,6 +66,26 @@ class MockModel: } +class PydanticStep(KernelBaseModel): + explanation: str + output: str + + +class PydanticReasoning(KernelBaseModel): + steps: list[PydanticStep] + final_answer: str + + +class NonPydanticStep: + explanation: str + output: str + + +class NonPydanticReasoning: + steps: list[NonPydanticStep] + final_answer: str + + def test_build_with_kernel_base_model(): expected_schema = { "type": "object", @@ -368,3 +389,69 @@ def test_handle_complex_type(): schema = KernelJsonSchemaBuilder.handle_complex_type(str, "Description") expected_schema = {"type": "string", "description": "Description"} assert schema == expected_schema + + +def test_build_schema_with_pydantic_structured_output(): + schema = KernelJsonSchemaBuilder.build(parameter_type=PydanticReasoning, structured_output=True) + structured_output_schema = generate_structured_output_response_format_schema(name="Reasoning", schema=schema) + + expected_schema = { + "type": "json_schema", + "json_schema": { + "name": "Reasoning", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": {"explanation": {"type": "string"}, "output": {"type": "string"}}, + "required": ["explanation", "output"], + "additionalProperties": False, + }, + }, + "final_answer": {"type": "string"}, + }, + "required": ["steps", "final_answer"], + "additionalProperties": False, + }, + "strict": True, + }, + } + + assert structured_output_schema == expected_schema + + +def test_build_schema_with_nonpydantic_structured_output(): + schema = KernelJsonSchemaBuilder.build(parameter_type=NonPydanticReasoning, structured_output=True) + structured_output_schema = generate_structured_output_response_format_schema( + name="NonPydanticReasoning", schema=schema + ) + + expected_schema = { + "type": "json_schema", + "json_schema": { + "name": "NonPydanticReasoning", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": {"explanation": {"type": "string"}, "output": {"type": "string"}}, + "required": ["explanation", "output"], + "additionalProperties": False, + }, + }, + "final_answer": {"type": "string"}, + }, + "required": ["steps", "final_answer"], + "additionalProperties": False, + }, + "strict": True, + }, + } + + assert structured_output_schema == expected_schema diff --git a/python/uv.lock b/python/uv.lock index f9e28c9ee04a..0693a0bac72e 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -2545,7 +2545,7 @@ wheels = [ [[package]] name = "openai" -version = "1.42.0" +version = "1.47.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, @@ -2557,9 +2557,9 @@ dependencies = [ { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8c/1f/310b0b5efb6178ad9f9ca4a80b2ead3cb7cbc16a1b843941bcf1c52dd884/openai-1.42.0.tar.gz", hash = "sha256:c9d31853b4e0bc2dc8bd08003b462a006035655a701471695d0bfdc08529cde3", size = 290549 } +sdist = { url = "https://files.pythonhosted.org/packages/e9/80/4c366e9113527894584a6404f105d134fae83a314dc04a6a99bd0e2459bb/openai-1.47.0.tar.gz", hash = "sha256:6e14d6f77c8cf546646afcd87a2ef752505b3710d2564a2e433e17307dfa86a0", size = 297886 } wheels = [ - { url = "https://files.pythonhosted.org/packages/cf/9e/d77569d06e365f093977d94f305a395b7ac5ccd746016a2e8dd34c4e20c1/openai-1.42.0-py3-none-any.whl", hash = "sha256:dc91e0307033a4f94931e5d03cc3b29b9717014ad5e73f9f2051b6cb5eda4d80", size = 362858 }, + { url = "https://files.pythonhosted.org/packages/a2/3c/28d7cf1a3292c93e5def8acd89535ba380cb8dd888c26cdbfe420249e143/openai-1.47.0-py3-none-any.whl", hash = "sha256:9ccc8737dfa791f7bd903db4758c176b8544a8cd89d3a3d2add3cea02a34c3a0", size = 375576 }, ] [[package]] @@ -4188,7 +4188,7 @@ wheels = [ [[package]] name = "semantic-kernel" -version = "1.8.3" +version = "1.9.0" source = { editable = "." } dependencies = [ { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },