From 3d0a890e711a1381bcf695c3c9b7ca2ec6b38333 Mon Sep 17 00:00:00 2001
From: Evan Mattson <35585003+moonbox3@users.noreply.github.com>
Date: Wed, 25 Sep 2024 15:12:32 -0400
Subject: [PATCH] Python: Support OpenAI json_schema response format (#8958)

### Motivation and Context

OpenAI has released in their August 6th model the ability to specific a
json_schema response format. This allows the model to use the specified
Pydantic model or the provided JSON schema representing a "strict"
model/class to make sure that it responds with the exact schema that was
provided.

Note that the strict tool calling will be handled via a separate
ADR/work item once agreed upon by all three SK languages.

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

### Description

This PR introduces the ability to specific either a Pydantic model or a
non-Pydantic model for the json_schema response format
OpenAIChatPromptExecutionSettings.
- There are two new code sample concept files showing how to do this
with both a Pydantic and a non-Pydantic model. The other code sample
shows how to configure a json_schema response format while using
function calling.
- If the model is unable to respond based on the provided schema, it
will contain a `refusal` attribute as part of the chat completion
message contents. This will be included in the response as a TextContent
item explaining why it couldn't fulfill the request.
- Unit tests added for this new functionality.
- Handles #7946

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [X] The code builds clean without any errors or warnings
- [X] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [X] All unit tests pass, and I have added new tests where possible
- [X] I didn't break anyone :smile:
---
 python/pyproject.toml                         |   1 -
 python/samples/concepts/README.md             |   1 +
 ...penai_function_calling_stepwise_planner.py |   4 +-
 .../concepts/structured_output/README.md      |  16 ++
 .../json_structured_output.py                 | 135 +++++++++++++++
 ...json_structured_output_function_calling.py | 163 ++++++++++++++++++
 .../open_ai_prompt_execution_settings.py      |  38 +++-
 .../services/open_ai_chat_completion_base.py  |   2 +
 .../ai/open_ai/services/open_ai_handler.py    |  31 +++-
 .../ai/prompt_execution_settings.py           |   1 +
 .../utils/structured_output_schema.py         |  11 ++
 .../functions/kernel_parameter_metadata.py    |   9 +-
 .../schema/kernel_json_schema_builder.py      |  58 +++++--
 .../utils/authentication/__init__.py          |   5 +
 .../completions/test_chat_completions.py      |  24 ++-
 .../test_open_ai_chat_completion_base.py      |  70 ++++++++
 .../open_ai/test_openai_request_settings.py   |  62 +++++++
 .../tests/unit/schema/test_schema_builder.py  |  87 ++++++++++
 python/uv.lock                                |   8 +-
 19 files changed, 694 insertions(+), 32 deletions(-)
 create mode 100644 python/samples/concepts/structured_output/README.md
 create mode 100644 python/samples/concepts/structured_output/json_structured_output.py
 create mode 100644 python/samples/concepts/structured_output/json_structured_output_function_calling.py
 create mode 100644 python/semantic_kernel/connectors/utils/structured_output_schema.py
 create mode 100644 python/semantic_kernel/utils/authentication/__init__.py

diff --git a/python/pyproject.toml b/python/pyproject.toml
index af8ecd8600ac..75d2181a1d50 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -41,7 +41,6 @@ dependencies = [
     # OpenTelemetry
     "opentelemetry-api ~= 1.24",
     "opentelemetry-sdk ~= 1.24",
-
     "prance ~= 23.6.21.0",
 
     # templating
diff --git a/python/samples/concepts/README.md b/python/samples/concepts/README.md
index 0ef6120ad285..06c660b1cde1 100644
--- a/python/samples/concepts/README.md
+++ b/python/samples/concepts/README.md
@@ -22,6 +22,7 @@ This section contains code snippets that demonstrate the usage of Semantic Kerne
 | Search | Using search services information |
 | Service Selector | Shows how to create and use a custom service selector class. |
 | Setup | How to setup environment variables for Semantic Kernel |
+| Structured Output | How to leverage OpenAI's json_schema structured output functionality. |
 | TextGeneration | Using [`TextGeneration`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/connectors/ai/text_completion_client_base.py) capable service with models  |
 
 # Configuring the Kernel
diff --git a/python/samples/concepts/planners/azure_openai_function_calling_stepwise_planner.py b/python/samples/concepts/planners/azure_openai_function_calling_stepwise_planner.py
index c98efa3a5a5c..6627a2a7fb26 100644
--- a/python/samples/concepts/planners/azure_openai_function_calling_stepwise_planner.py
+++ b/python/samples/concepts/planners/azure_openai_function_calling_stepwise_planner.py
@@ -20,8 +20,8 @@ async def main():
     )
 
     plugin_path = os.path.join(
-        os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 
-        "resources", 
+        os.path.dirname(os.path.dirname(os.path.realpath(__file__))),
+        "resources",
     )
     kernel.add_plugin(parent_directory=plugin_path, plugin_name="email_plugin")
 
diff --git a/python/samples/concepts/structured_output/README.md b/python/samples/concepts/structured_output/README.md
new file mode 100644
index 000000000000..50d20964f881
--- /dev/null
+++ b/python/samples/concepts/structured_output/README.md
@@ -0,0 +1,16 @@
+# OpenAI Structured Outputs
+
+## Supported Models
+
+### Azure OpenAI:
+
+- Access to `gpt-4o-2024-08-06` or later
+- The `2024-08-01-preview` API version
+- If using a token instead of an API key, you must have the `Cognitive Services OpenAI Contributor` role assigned to your Azure AD user.
+- See more information [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/structured-outputs?tabs=python-secure)
+
+### OpenAI:
+
+- The OpenAI models supported are:
+  - `gpt-4o-mini-2024-07-18` and later
+  - `gpt-4o-2024-08-06` and later
diff --git a/python/samples/concepts/structured_output/json_structured_output.py b/python/samples/concepts/structured_output/json_structured_output.py
new file mode 100644
index 000000000000..f6ea600cd56f
--- /dev/null
+++ b/python/samples/concepts/structured_output/json_structured_output.py
@@ -0,0 +1,135 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
+from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+
+###################################################################
+# The following sample demonstrates how to create a chat          #
+# completion call that assists users in solving math problems.    #
+# The bot guides the user step-by-step through the solution       #
+# process using a structured output format based on either a      #
+# Pydantic model or a non-Pydantic model.                         #
+###################################################################
+
+
+###################################################################
+# NOTE: If using Azure OpenAI the the following is required:
+# - access to gpt-4o-2024-08-06
+# - the 2024-08-01-preview API version
+# - if using a token instead of an API KEY, you must have the
+#    `Cognitive Services OpenAI Contributor` role assigned to your
+#    Azure AD user.
+# - flip the `use_azure_openai` flag to `True`
+###################################################################
+use_azure_openai = False
+
+system_message = """
+You are a helpful math tutor. Guide the user through the solution step by step.
+"""
+
+
+###################################################################
+# OPTION 1: Define the Pydantic model that represents the
+# structured output from the OpenAI service. This model will be
+# used to parse the structured output from the OpenAI service,
+# and ensure that the model correctly outputs the schema based
+# on the Pydantic model.
+from semantic_kernel.kernel_pydantic import KernelBaseModel  # noqa: E402
+
+
+class Step(KernelBaseModel):
+    explanation: str
+    output: str
+
+
+class Reasoning(KernelBaseModel):
+    steps: list[Step]
+    final_answer: str
+
+
+###################################################################
+
+
+# OPTION 2: Define a non-Pydantic model that should represent the
+# structured output from the OpenAI service. This model will be
+# converted to the proper JSON Schema and sent to the LLM.
+# Uncomment the follow lines and comment out the Pydantic model
+# above to use this option.
+# class Step:
+#     explanation: str
+#     output: str
+
+
+# class Reasoning:
+#     steps: list[Step]
+#     final_answer: str
+
+
+###################################################################
+
+kernel = Kernel()
+
+service_id = "structured-output"
+if use_azure_openai:
+    chat_service = AzureChatCompletion(
+        service_id=service_id,
+    )
+else:
+    chat_service = OpenAIChatCompletion(
+        service_id=service_id,
+    )
+kernel.add_service(chat_service)
+
+req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
+req_settings.max_tokens = 2000
+req_settings.temperature = 0.7
+req_settings.top_p = 0.8
+req_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"excluded_plugins": ["chat"]})
+
+# NOTE: This is the key setting in this example that tells the OpenAI service
+# to return structured output based on the Pydantic model Reasoning.
+req_settings.response_format = Reasoning
+
+
+chat_function = kernel.add_function(
+    prompt=system_message + """{{$chat_history}}""",
+    function_name="chat",
+    plugin_name="chat",
+    prompt_execution_settings=req_settings,
+)
+
+history = ChatHistory()
+history.add_user_message("how can I solve 8x + 7y = -23, and 4x=12?")
+
+
+async def main():
+    stream = True
+    if stream:
+        answer = kernel.invoke_stream(
+            chat_function,
+            chat_history=history,
+        )
+        print("Mosscap:> ", end="")
+        result_content: list[StreamingChatMessageContent] = []
+        async for message in answer:
+            result_content.append(message[0])
+            print(str(message[0]), end="", flush=True)
+        if result_content:
+            result = "".join([str(content) for content in result_content])
+    else:
+        result = await kernel.invoke(
+            chat_function,
+            chat_history=history,
+        )
+        print(f"Mosscap:> {result}")
+    history.add_assistant_message(str(result))
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/structured_output/json_structured_output_function_calling.py b/python/samples/concepts/structured_output/json_structured_output_function_calling.py
new file mode 100644
index 000000000000..d2091dbf5f73
--- /dev/null
+++ b/python/samples/concepts/structured_output/json_structured_output_function_calling.py
@@ -0,0 +1,163 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+from typing import Annotated
+
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
+from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.functions.kernel_function_decorator import kernel_function
+
+###################################################################
+# The following sample demonstrates how to create a chat          #
+# completion call that assists users in solving a question        #
+# using a Semantic Kernel Plugin and function calling problems.   #
+# The chat plugin guides the user step-by-step through the        #
+# solution process using a structured output format based on      #
+# either a Pydantic model or a non-Pydantic model                 #
+###################################################################
+
+
+###################################################################
+# NOTE: If using Azure OpenAI the the following is required:
+# - access to gpt-4o-2024-08-06
+# - the 2024-08-01-preview API version
+# - if using a token instead of an API KEY, you must have the
+#    `Cognitive Services OpenAI Contributor` role assigned to your
+#    Azure AD user.
+# - flip the `use_azure_openai` flag to `True`
+###################################################################
+use_azure_openai = True
+
+system_message = """
+You are a helpful math tutor. Guide the user through the solution step by step.
+"""
+
+
+# Define a sample plugin to use for function calling
+class WeatherPlugin:
+    """A sample plugin that provides weather information for cities."""
+
+    @kernel_function(name="get_weather_for_city", description="Get the weather for a city")
+    def get_weather_for_city(self, city: Annotated[str, "The input city"]) -> Annotated[str, "The output is a string"]:
+        if city == "Boston":
+            return "61 and rainy"
+        if city == "London":
+            return "55 and cloudy"
+        if city == "Miami":
+            return "80 and sunny"
+        if city == "Paris":
+            return "60 and rainy"
+        if city == "Tokyo":
+            return "50 and sunny"
+        if city == "Sydney":
+            return "75 and sunny"
+        if city == "Tel Aviv":
+            return "80 and sunny"
+        return "31 and snowing"
+
+
+###################################################################
+# OPTION 1: Define the Pydantic model that represents the
+# structured output from the OpenAI service. This model will be
+# used to parse the structured output from the OpenAI service,
+# and ensure that the model correctly outputs the schema based
+# on the Pydantic model.
+from semantic_kernel.kernel_pydantic import KernelBaseModel  # noqa: E402
+
+
+class Step(KernelBaseModel):
+    explanation: str
+    output: str
+
+
+class Reasoning(KernelBaseModel):
+    steps: list[Step]
+    final_answer: str
+
+
+###################################################################
+
+
+# OPTION 2: Define a non-Pydantic model that should represent the
+# structured output from the OpenAI service. This model will be
+# converted to the proper JSON Schema and sent to the LLM.
+# Uncomment the follow lines and comment out the Pydantic model
+# above to use this option.
+# class Step:
+#     explanation: str
+#     output: str
+
+
+# class Reasoning:
+#     steps: list[Step]
+#     final_answer: str
+
+
+###################################################################
+
+kernel = Kernel()
+
+service_id = "structured-output"
+if use_azure_openai:
+    chat_service = AzureChatCompletion(
+        service_id=service_id,
+    )
+else:
+    chat_service = OpenAIChatCompletion(
+        service_id=service_id,
+    )
+kernel.add_service(chat_service)
+
+kernel.add_plugin(WeatherPlugin(), plugin_name="weather")
+
+req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
+req_settings.max_tokens = 2000
+req_settings.temperature = 0.7
+req_settings.top_p = 0.8
+req_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"excluded_plugins": ["chat"]})
+
+# NOTE: This is the key setting in this example that tells the OpenAI service
+# to return structured output based on the Pydantic model Reasoning.
+req_settings.response_format = Reasoning
+
+
+chat_function = kernel.add_function(
+    prompt=system_message + """{{$chat_history}}""",
+    function_name="chat",
+    plugin_name="chat",
+    prompt_execution_settings=req_settings,
+)
+
+history = ChatHistory()
+history.add_user_message("Using the available plugin, what is the weather in Paris?")
+
+
+async def main():
+    stream = True
+    if stream:
+        answer = kernel.invoke_stream(
+            chat_function,
+            chat_history=history,
+        )
+        print("Mosscap:> ", end="")
+        result_content: list[StreamingChatMessageContent] = []
+        async for message in answer:
+            result_content.append(message[0])
+            print(str(message[0]), end="", flush=True)
+        if result_content:
+            result = "".join([str(content) for content in result_content])
+    else:
+        result = await kernel.invoke(
+            chat_function,
+            chat_history=history,
+        )
+        print(f"Mosscap:> {result}")
+    history.add_assistant_message(str(result))
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
index f4aa7868f5fc..c8db7dd0b2cc 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
@@ -9,7 +9,7 @@
 else:
     from typing_extensions import Self  # pragma: no cover
 
-from pydantic import Field, field_validator, model_validator
+from pydantic import BaseModel, Field, field_validator, model_validator
 
 from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
@@ -61,7 +61,9 @@ def check_best_of_and_n(self) -> "OpenAITextPromptExecutionSettings":
 class OpenAIChatPromptExecutionSettings(OpenAIPromptExecutionSettings):
     """Specific settings for the Chat Completion endpoint."""
 
-    response_format: dict[Literal["type"], Literal["text", "json_object"]] | None = None
+    response_format: (
+        dict[Literal["type"], Literal["text", "json_object"]] | dict[str, Any] | type[BaseModel] | type | None
+    ) = None
     function_call: str | None = None
     functions: list[dict[str, Any]] | None = None
     messages: list[dict[str, Any]] | None = None
@@ -75,6 +77,7 @@ class OpenAIChatPromptExecutionSettings(OpenAIPromptExecutionSettings):
         None,
         description="Do not set this manually. It is set by the service based on the function choice configuration.",
     )
+    structured_json_response: bool = Field(False, description="Do not set this manually. It is set by the service.")
 
     @field_validator("functions", "function_call", mode="after")
     @classmethod
@@ -86,6 +89,37 @@ def validate_function_call(cls, v: str | list[dict[str, Any]] | None = None):
             )
         return v
 
+    @model_validator(mode="before")
+    def validate_response_format_and_set_flag(cls, values) -> Any:
+        """Validate the response_format and set structured_json_response accordingly."""
+        response_format = values.get("response_format", None)
+
+        if response_format is None:
+            return values
+
+        if isinstance(response_format, dict):
+            if response_format.get("type") == "json_object":
+                return values
+            if response_format.get("type") == "json_schema":
+                json_schema = response_format.get("json_schema")
+                if isinstance(json_schema, dict):
+                    values["structured_json_response"] = True
+                    return values
+                raise ServiceInvalidExecutionSettingsError(
+                    "If response_format has type 'json_schema', 'json_schema' must be a valid dictionary."
+                )
+        if isinstance(response_format, type):
+            if issubclass(response_format, BaseModel):
+                values["structured_json_response"] = True
+            else:
+                values["structured_json_response"] = True
+        else:
+            raise ServiceInvalidExecutionSettingsError(
+                "response_format must be a dictionary, a subclass of BaseModel, a Python class/type, or None"
+            )
+
+        return values
+
     @model_validator(mode="before")
     @classmethod
     def validate_function_calling_behaviors(cls, data) -> Any:
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
index 3d74094e342c..ada65b07784c 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py
@@ -155,6 +155,8 @@ def _create_chat_message_content(
         items.extend(self._get_function_call_from_chat_choice(choice))
         if choice.message.content:
             items.append(TextContent(text=choice.message.content))
+        elif hasattr(choice.message, "refusal") and choice.message.refusal:
+            items.append(TextContent(text=choice.message.refusal))
 
         return ChatMessageContent(
             inner_content=response,
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py
index c65e0bc01989..0af2cbd44c75 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_handler.py
@@ -5,17 +5,22 @@
 from typing import Any
 
 from openai import AsyncOpenAI, AsyncStream, BadRequestError
+from openai.lib._parsing._completions import type_to_response_format_param
 from openai.types import Completion, CreateEmbeddingResponse
 from openai.types.chat import ChatCompletion, ChatCompletionChunk
+from pydantic import BaseModel
 
 from semantic_kernel.connectors.ai.open_ai.exceptions.content_filter_ai_exception import ContentFilterAIException
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
+    OpenAIChatPromptExecutionSettings,
     OpenAIEmbeddingPromptExecutionSettings,
     OpenAIPromptExecutionSettings,
 )
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_model_types import OpenAIModelTypes
+from semantic_kernel.connectors.utils.structured_output_schema import generate_structured_output_response_format_schema
 from semantic_kernel.exceptions import ServiceResponseException
 from semantic_kernel.kernel_pydantic import KernelBaseModel
+from semantic_kernel.schema.kernel_json_schema_builder import KernelJsonSchemaBuilder
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -35,10 +40,13 @@ async def _send_request(
     ) -> ChatCompletion | Completion | AsyncStream[ChatCompletionChunk] | AsyncStream[Completion]:
         """Execute the appropriate call to OpenAI models."""
         try:
+            settings = request_settings.prepare_settings_dict()
             if self.ai_model_type == OpenAIModelTypes.CHAT:
-                response = await self.client.chat.completions.create(**request_settings.prepare_settings_dict())
+                assert isinstance(request_settings, OpenAIChatPromptExecutionSettings)  # nosec
+                self._handle_structured_output(request_settings, settings)
+                response = await self.client.chat.completions.create(**settings)
             else:
-                response = await self.client.completions.create(**request_settings.prepare_settings_dict())
+                response = await self.client.completions.create(**settings)
             self.store_usage(response)
             return response
         except BadRequestError as ex:
@@ -68,6 +76,25 @@ async def _send_embedding_request(self, settings: OpenAIEmbeddingPromptExecution
                 ex,
             ) from ex
 
+    def _handle_structured_output(
+        self, request_settings: OpenAIChatPromptExecutionSettings, settings: dict[str, Any]
+    ) -> None:
+        response_format = getattr(request_settings, "response_format", None)
+        if getattr(request_settings, "structured_json_response", False) and response_format:
+            # Case 1: response_format is a type and subclass of BaseModel
+            if isinstance(response_format, type) and issubclass(response_format, BaseModel):
+                settings["response_format"] = type_to_response_format_param(response_format)
+            # Case 2: response_format is a type but not a subclass of BaseModel
+            elif isinstance(response_format, type):
+                generated_schema = KernelJsonSchemaBuilder.build(parameter_type=response_format, structured_output=True)
+                assert generated_schema is not None  # nosec
+                settings["response_format"] = generate_structured_output_response_format_schema(
+                    name=response_format.__name__, schema=generated_schema
+                )
+            # Case 3: response_format is a dictionary, pass it without modification
+            elif isinstance(response_format, dict):
+                settings["response_format"] = response_format
+
     def store_usage(
         self,
         response: ChatCompletion
diff --git a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py
index b683a58e17fa..4c3abc8f5419 100644
--- a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py
@@ -79,6 +79,7 @@ def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
             exclude={
                 "service_id",
                 "extension_data",
+                "structured_json_response",
             },
             exclude_none=True,
             by_alias=True,
diff --git a/python/semantic_kernel/connectors/utils/structured_output_schema.py b/python/semantic_kernel/connectors/utils/structured_output_schema.py
new file mode 100644
index 000000000000..d274ed8e7fc5
--- /dev/null
+++ b/python/semantic_kernel/connectors/utils/structured_output_schema.py
@@ -0,0 +1,11 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from typing import Any
+
+
+def generate_structured_output_response_format_schema(name: str, schema: dict[str, Any]) -> dict[str, Any]:
+    """Generate the structured output response format schema."""
+    return {
+        "type": "json_schema",
+        "json_schema": {"name": name, "strict": True, "schema": schema},
+    }
diff --git a/python/semantic_kernel/functions/kernel_parameter_metadata.py b/python/semantic_kernel/functions/kernel_parameter_metadata.py
index 20d6151bbb0b..7ddf115e7d88 100644
--- a/python/semantic_kernel/functions/kernel_parameter_metadata.py
+++ b/python/semantic_kernel/functions/kernel_parameter_metadata.py
@@ -36,13 +36,18 @@ def form_schema(cls, data: Any) -> Any:
 
     @classmethod
     def infer_schema(
-        cls, type_object: type | None, parameter_type: str | None, default_value: Any, description: str | None
+        cls,
+        type_object: type | None = None,
+        parameter_type: str | None = None,
+        default_value: Any | None = None,
+        description: str | None = None,
+        structured_output: bool = False,
     ) -> dict[str, Any] | None:
         """Infer the schema for the parameter metadata."""
         schema = None
 
         if type_object is not None:
-            schema = KernelJsonSchemaBuilder.build(type_object, description)
+            schema = KernelJsonSchemaBuilder.build(type_object, description, structured_output)
         elif parameter_type is not None:
             string_default = str(default_value) if default_value is not None else None
             if string_default and string_default.strip():
diff --git a/python/semantic_kernel/schema/kernel_json_schema_builder.py b/python/semantic_kernel/schema/kernel_json_schema_builder.py
index 438833fda6dc..9d19644fc25a 100644
--- a/python/semantic_kernel/schema/kernel_json_schema_builder.py
+++ b/python/semantic_kernel/schema/kernel_json_schema_builder.py
@@ -34,12 +34,15 @@ class KernelJsonSchemaBuilder:
     """Kernel JSON schema builder."""
 
     @classmethod
-    def build(cls, parameter_type: type | str, description: str | None = None) -> dict[str, Any]:
+    def build(
+        cls, parameter_type: type | str | Any, description: str | None = None, structured_output: bool = False
+    ) -> dict[str, Any]:
         """Builds the JSON schema for a given parameter type and description.
 
         Args:
-            parameter_type (type | str): The parameter type.
-            description (str, optional): The description of the parameter. Defaults to None.
+            parameter_type: The parameter type.
+            description: The description of the parameter. Defaults to None.
+            structured_output: Whether the outputs are structured. Defaults to False.
 
         Returns:
             dict[str, Any]: The JSON schema for the parameter type.
@@ -47,25 +50,28 @@ def build(cls, parameter_type: type | str, description: str | None = None) -> di
         if isinstance(parameter_type, str):
             return cls.build_from_type_name(parameter_type, description)
         if isinstance(parameter_type, KernelBaseModel):
-            return cls.build_model_schema(parameter_type, description)
+            return cls.build_model_schema(parameter_type, description, structured_output)
         if isinstance(parameter_type, type) and issubclass(parameter_type, Enum):
             return cls.build_enum_schema(parameter_type, description)
         if hasattr(parameter_type, "__annotations__"):
-            return cls.build_model_schema(parameter_type, description)
+            return cls.build_model_schema(parameter_type, description, structured_output)
         if hasattr(parameter_type, "__args__"):
-            return cls.handle_complex_type(parameter_type, description)
+            return cls.handle_complex_type(parameter_type, description, structured_output)
         schema = cls.get_json_schema(parameter_type)
         if description:
             schema["description"] = description
         return schema
 
     @classmethod
-    def build_model_schema(cls, model: type, description: str | None = None) -> dict[str, Any]:
+    def build_model_schema(
+        cls, model: type | KernelBaseModel, description: str | None = None, structured_output: bool = False
+    ) -> dict[str, Any]:
         """Builds the JSON schema for a given model and description.
 
         Args:
-            model (type): The model type.
-            description (str, optional): The description of the model. Defaults to None.
+            model: The model type.
+            description: The description of the model. Defaults to None.
+            structured_output: Whether the outputs are structured. Defaults to False.
 
         Returns:
             dict[str, Any]: The JSON schema for the model.
@@ -88,11 +94,13 @@ def build_model_schema(cls, model: type, description: str | None = None) -> dict
                     field_description = field_info.description
             if not cls._is_optional(field_type):
                 required.append(field_name)
-            properties[field_name] = cls.build(field_type, field_description)
+            properties[field_name] = cls.build(field_type, field_description, structured_output)
 
         schema = {"type": "object", "properties": properties}
         if required:
             schema["required"] = required
+        if structured_output:
+            schema["additionalProperties"] = False  # type: ignore
         if description:
             schema["description"] = description
 
@@ -140,12 +148,15 @@ def get_json_schema(cls, parameter_type: type) -> dict[str, Any]:
         return {"type": type_name}
 
     @classmethod
-    def handle_complex_type(cls, parameter_type: type, description: str | None = None) -> dict[str, Any]:
+    def handle_complex_type(
+        cls, parameter_type: type, description: str | None = None, structured_output: bool = False
+    ) -> dict[str, Any]:
         """Handles building the JSON schema for complex types.
 
         Args:
-            parameter_type (type): The parameter type.
-            description (str, optional): The description of the parameter. Defaults to None.
+            parameter_type: The parameter type.
+            description: The description of the parameter. Defaults to None.
+            structured_output: Whether the outputs are structured. Defaults to False.
 
         Returns:
             dict[str, Any]: The JSON schema for the parameter type.
@@ -156,39 +167,50 @@ def handle_complex_type(cls, parameter_type: type, description: str | None = Non
         schema: dict[str, Any] = {}
         if origin is list or origin is set:
             item_type = args[0]
-            schema = {"type": "array", "items": cls.build(item_type)}
+            schema = {
+                "type": "array",
+                "items": cls.build(item_type, structured_output=structured_output),
+            }
             if description:
                 schema["description"] = description
             return schema
         if origin is dict:
             _, value_type = args
-            additional_properties = cls.build(value_type)
+            additional_properties = cls.build(value_type, structured_output=structured_output)
             if additional_properties == {"type": "object"}:
                 additional_properties["properties"] = {}  # Account for differences in Python 3.10 dict
             schema = {"type": "object", "additionalProperties": additional_properties}
             if description:
                 schema["description"] = description
+            if structured_output:
+                schema["additionalProperties"] = False
             return schema
         if origin is tuple:
-            items = [cls.build(arg) for arg in args]
+            items = [cls.build(arg, structured_output=structured_output) for arg in args]
             schema = {"type": "array", "items": items}
             if description:
                 schema["description"] = description
+            if structured_output:
+                schema["additionalProperties"] = False
             return schema
         if origin in {Union, types.UnionType}:
             # Handle Optional[T] (Union[T, None]) by making schema nullable
             if len(args) == 2 and type(None) in args:
                 non_none_type = args[0] if args[1] is type(None) else args[1]
-                schema = cls.build(non_none_type)
+                schema = cls.build(non_none_type, structured_output=structured_output)
                 schema["type"] = [schema["type"], "null"]
                 if description:
                     schema["description"] = description
+                if structured_output:
+                    schema["additionalProperties"] = False
                 return schema
-            schemas = [cls.build(arg, description) for arg in args]
+            schemas = [cls.build(arg, description, structured_output=structured_output) for arg in args]
             return {"anyOf": schemas}
         schema = cls.get_json_schema(parameter_type)
         if description:
             schema["description"] = description
+        if structured_output:
+            schema["additionalProperties"] = False
         return schema
 
     @classmethod
diff --git a/python/semantic_kernel/utils/authentication/__init__.py b/python/semantic_kernel/utils/authentication/__init__.py
new file mode 100644
index 000000000000..646090a8113b
--- /dev/null
+++ b/python/semantic_kernel/utils/authentication/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from semantic_kernel.utils.authentication.entra_id_authentication import get_entra_auth_token
+
+__all__ = ["get_entra_auth_token"]
diff --git a/python/tests/integration/completions/test_chat_completions.py b/python/tests/integration/completions/test_chat_completions.py
index c9d12b939828..505f7100a2a9 100644
--- a/python/tests/integration/completions/test_chat_completions.py
+++ b/python/tests/integration/completions/test_chat_completions.py
@@ -11,6 +11,7 @@
 from semantic_kernel.contents import ChatMessageContent, TextContent
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.utils.author_role import AuthorRole
+from semantic_kernel.kernel_pydantic import KernelBaseModel
 from tests.integration.completions.chat_completion_test_base import (
     ChatCompletionTestBase,
     anthropic_setup,
@@ -26,6 +27,17 @@
 else:
     from typing_extensions import override  # pragma: no cover
 
+
+class Step(KernelBaseModel):
+    explanation: str
+    output: str
+
+
+class Reasoning(KernelBaseModel):
+    steps: list[Step]
+    final_answer: str
+
+
 pytestmark = pytest.mark.parametrize(
     "service_id, execution_settings_kwargs, inputs, kwargs",
     [
@@ -39,6 +51,16 @@
             {},
             id="openai_text_input",
         ),
+        pytest.param(
+            "openai",
+            {"response_format": Reasoning},
+            [
+                ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="Hello")]),
+                ChatMessageContent(role=AuthorRole.USER, items=[TextContent(text="How are you today?")]),
+            ],
+            {},
+            id="openai_json_schema_response_format",
+        ),
         pytest.param(
             "azure",
             {},
@@ -91,7 +113,7 @@
             marks=pytest.mark.skipif(not ollama_setup, reason="Need local Ollama setup"),
             id="ollama_text_input",
         ),
-         pytest.param(
+        pytest.param(
             "anthropic",
             {},
             [
diff --git a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py
index b8bf2635012a..abf3f6325c18 100644
--- a/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py
+++ b/python/tests/unit/connectors/open_ai/services/test_open_ai_chat_completion_base.py
@@ -321,6 +321,32 @@ async def test_cmc_no_fcc_in_response(
     )
 
 
+@pytest.mark.asyncio
+@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
+async def test_cmc_structured_output_no_fcc(
+    mock_create,
+    kernel: Kernel,
+    chat_history: ChatHistory,
+    mock_chat_completion_response: ChatCompletion,
+    openai_unit_test_env,
+):
+    mock_create.return_value = mock_chat_completion_response
+    chat_history.add_user_message("hello world")
+    complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(service_id="test_service_id")
+
+    # Define a mock response format
+    class Test:
+        name: str
+
+    complete_prompt_execution_settings.response_format = Test
+
+    openai_chat_completion = OpenAIChatCompletion()
+    await openai_chat_completion.get_chat_message_contents(
+        chat_history=chat_history, settings=complete_prompt_execution_settings, kernel=kernel
+    )
+    mock_create.assert_awaited_once()
+
+
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_cmc_run_out_of_auto_invoke_loop(
@@ -502,6 +528,50 @@ async def test_scmc_singular(
     )
 
 
+@pytest.mark.asyncio
+@patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
+async def test_scmc_structured_output_no_fcc(
+    mock_create,
+    kernel: Kernel,
+    chat_history: ChatHistory,
+    openai_unit_test_env,
+):
+    content1 = ChatCompletionChunk(
+        id="test_id",
+        choices=[],
+        created=0,
+        model="test",
+        object="chat.completion.chunk",
+    )
+    content2 = ChatCompletionChunk(
+        id="test_id",
+        choices=[ChunkChoice(index=0, delta=ChunkChoiceDelta(content="test", role="assistant"), finish_reason="stop")],
+        created=0,
+        model="test",
+        object="chat.completion.chunk",
+    )
+    stream = MagicMock(spec=AsyncStream)
+    stream.__aiter__.return_value = [content1, content2]
+    mock_create.return_value = stream
+    chat_history.add_user_message("hello world")
+    complete_prompt_execution_settings = OpenAIChatPromptExecutionSettings(service_id="test_service_id")
+
+    # Define a mock response format
+    class Test:
+        name: str
+
+    complete_prompt_execution_settings.response_format = Test
+    openai_chat_completion = OpenAIChatCompletion()
+    async for msg in openai_chat_completion.get_streaming_chat_message_content(
+        chat_history=chat_history,
+        settings=complete_prompt_execution_settings,
+        kernel=kernel,
+        arguments=KernelArguments(),
+    ):
+        assert isinstance(msg, StreamingChatMessageContent)
+    mock_create.assert_awaited_once()
+
+
 @pytest.mark.asyncio
 @patch.object(AsyncChatCompletions, "create", new_callable=AsyncMock)
 async def test_scmc_function_call_behavior(
diff --git a/python/tests/unit/connectors/open_ai/test_openai_request_settings.py b/python/tests/unit/connectors/open_ai/test_openai_request_settings.py
index e5e3b9333372..db39285f39a6 100644
--- a/python/tests/unit/connectors/open_ai/test_openai_request_settings.py
+++ b/python/tests/unit/connectors/open_ai/test_openai_request_settings.py
@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import pytest
+from pydantic import BaseModel
 
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
     AzureAISearchDataSource,
@@ -15,6 +16,20 @@
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.connectors.memory.azure_cognitive_search.azure_ai_search_settings import AzureAISearchSettings
 from semantic_kernel.exceptions import ServiceInvalidExecutionSettingsError
+from semantic_kernel.kernel_pydantic import KernelBaseModel
+
+
+############################################
+# Test classes for structured output
+class TestClass:
+    attribute: str
+
+
+class TestClassPydantic(KernelBaseModel):
+    attribute: str
+
+
+############################################
 
 
 def test_default_openai_chat_prompt_execution_settings():
@@ -306,3 +321,50 @@ def test_azure_open_ai_chat_prompt_execution_settings_with_response_format_json(
     settings = AzureChatPromptExecutionSettings(response_format=response_format)
     options = settings.prepare_settings_dict()
     assert options["response_format"] == response_format
+
+
+def test_openai_chat_prompt_execution_settings_with_json_structured_output():
+    settings = OpenAIChatPromptExecutionSettings()
+    settings.response_format = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "math_response",
+            "schema": {
+                "type": "object",
+                "properties": {
+                    "steps": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {"explanation": {"type": "string"}, "output": {"type": "string"}},
+                            "required": ["explanation", "output"],
+                            "additionalProperties": False,
+                        },
+                    },
+                    "final_answer": {"type": "string"},
+                },
+                "required": ["steps", "final_answer"],
+                "additionalProperties": False,
+            },
+            "strict": True,
+        },
+    }
+    assert isinstance(settings.response_format, dict)
+
+
+def test_openai_chat_prompt_execution_settings_with_nonpydantic_type_structured_output():
+    settings = OpenAIChatPromptExecutionSettings()
+    settings.response_format = TestClass
+    assert isinstance(settings.response_format, type)
+
+
+def test_openai_chat_prompt_execution_settings_with_pydantic_type_structured_output():
+    settings = OpenAIChatPromptExecutionSettings()
+    settings.response_format = TestClassPydantic
+    assert issubclass(settings.response_format, BaseModel)
+
+
+def test_openai_chat_prompt_execution_settings_with_invalid_structured_output():
+    settings = OpenAIChatPromptExecutionSettings()
+    with pytest.raises(ServiceInvalidExecutionSettingsError):
+        settings.response_format = "invalid"
diff --git a/python/tests/unit/schema/test_schema_builder.py b/python/tests/unit/schema/test_schema_builder.py
index 015ad12a337c..f6f2b4071983 100644
--- a/python/tests/unit/schema/test_schema_builder.py
+++ b/python/tests/unit/schema/test_schema_builder.py
@@ -7,6 +7,7 @@
 
 import pytest
 
+from semantic_kernel.connectors.utils.structured_output_schema import generate_structured_output_response_format_schema
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.schema.kernel_json_schema_builder import KernelJsonSchemaBuilder
 
@@ -65,6 +66,26 @@ class MockModel:
     }
 
 
+class PydanticStep(KernelBaseModel):
+    explanation: str
+    output: str
+
+
+class PydanticReasoning(KernelBaseModel):
+    steps: list[PydanticStep]
+    final_answer: str
+
+
+class NonPydanticStep:
+    explanation: str
+    output: str
+
+
+class NonPydanticReasoning:
+    steps: list[NonPydanticStep]
+    final_answer: str
+
+
 def test_build_with_kernel_base_model():
     expected_schema = {
         "type": "object",
@@ -368,3 +389,69 @@ def test_handle_complex_type():
     schema = KernelJsonSchemaBuilder.handle_complex_type(str, "Description")
     expected_schema = {"type": "string", "description": "Description"}
     assert schema == expected_schema
+
+
+def test_build_schema_with_pydantic_structured_output():
+    schema = KernelJsonSchemaBuilder.build(parameter_type=PydanticReasoning, structured_output=True)
+    structured_output_schema = generate_structured_output_response_format_schema(name="Reasoning", schema=schema)
+
+    expected_schema = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "Reasoning",
+            "schema": {
+                "type": "object",
+                "properties": {
+                    "steps": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {"explanation": {"type": "string"}, "output": {"type": "string"}},
+                            "required": ["explanation", "output"],
+                            "additionalProperties": False,
+                        },
+                    },
+                    "final_answer": {"type": "string"},
+                },
+                "required": ["steps", "final_answer"],
+                "additionalProperties": False,
+            },
+            "strict": True,
+        },
+    }
+
+    assert structured_output_schema == expected_schema
+
+
+def test_build_schema_with_nonpydantic_structured_output():
+    schema = KernelJsonSchemaBuilder.build(parameter_type=NonPydanticReasoning, structured_output=True)
+    structured_output_schema = generate_structured_output_response_format_schema(
+        name="NonPydanticReasoning", schema=schema
+    )
+
+    expected_schema = {
+        "type": "json_schema",
+        "json_schema": {
+            "name": "NonPydanticReasoning",
+            "schema": {
+                "type": "object",
+                "properties": {
+                    "steps": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {"explanation": {"type": "string"}, "output": {"type": "string"}},
+                            "required": ["explanation", "output"],
+                            "additionalProperties": False,
+                        },
+                    },
+                    "final_answer": {"type": "string"},
+                },
+                "required": ["steps", "final_answer"],
+                "additionalProperties": False,
+            },
+            "strict": True,
+        },
+    }
+
+    assert structured_output_schema == expected_schema
diff --git a/python/uv.lock b/python/uv.lock
index f9e28c9ee04a..0693a0bac72e 100644
--- a/python/uv.lock
+++ b/python/uv.lock
@@ -2545,7 +2545,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.42.0"
+version = "1.47.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
@@ -2557,9 +2557,9 @@ dependencies = [
     { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
     { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8c/1f/310b0b5efb6178ad9f9ca4a80b2ead3cb7cbc16a1b843941bcf1c52dd884/openai-1.42.0.tar.gz", hash = "sha256:c9d31853b4e0bc2dc8bd08003b462a006035655a701471695d0bfdc08529cde3", size = 290549 }
+sdist = { url = "https://files.pythonhosted.org/packages/e9/80/4c366e9113527894584a6404f105d134fae83a314dc04a6a99bd0e2459bb/openai-1.47.0.tar.gz", hash = "sha256:6e14d6f77c8cf546646afcd87a2ef752505b3710d2564a2e433e17307dfa86a0", size = 297886 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cf/9e/d77569d06e365f093977d94f305a395b7ac5ccd746016a2e8dd34c4e20c1/openai-1.42.0-py3-none-any.whl", hash = "sha256:dc91e0307033a4f94931e5d03cc3b29b9717014ad5e73f9f2051b6cb5eda4d80", size = 362858 },
+    { url = "https://files.pythonhosted.org/packages/a2/3c/28d7cf1a3292c93e5def8acd89535ba380cb8dd888c26cdbfe420249e143/openai-1.47.0-py3-none-any.whl", hash = "sha256:9ccc8737dfa791f7bd903db4758c176b8544a8cd89d3a3d2add3cea02a34c3a0", size = 375576 },
 ]
 
 [[package]]
@@ -4188,7 +4188,7 @@ wheels = [
 
 [[package]]
 name = "semantic-kernel"
-version = "1.8.3"
+version = "1.9.0"
 source = { editable = "." }
 dependencies = [
     { name = "aiohttp", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },