From 9d28474c4a5fab533419c5bca6b9328533d7b6fd Mon Sep 17 00:00:00 2001 From: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com> Date: Tue, 17 Sep 2024 08:06:29 -0700 Subject: [PATCH] .Net: OpenAI Structured Outputs ADR and implementation for Option 1 and Option 2 (#8648) ### Motivation and Context Related: https://github.com/microsoft/semantic-kernel/issues/7946 Closes: https://github.com/microsoft/semantic-kernel/issues/8519 [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) is a feature in OpenAI API that ensures the model will always generate responses based on provided JSON Schema. This gives more control over model responses, allows to avoid model hallucinations and write simpler prompts without a need to be specific about response format. This PR contains an ADR which describes several options how to enable this functionality in .NET version of Semantic Kernel and implementation for Option 1 and Option 2. ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --------- Co-authored-by: Eirik Tsarpalis --- .../0053-dotnet-structured-outputs.md | 299 ++++++++++++++++++ .../OpenAI_StructuredOutputs.cs | 278 ++++++++++++++++ .../Connectors.OpenAI.UnitTests.csproj | 8 +- .../Core/OpenAIJsonSchemaTransformerTests.cs | 166 ++++++++++ .../OpenAIChatCompletionServiceTests.cs | 211 +++++++++++- ...chat_completion_refusal_test_response.json | 22 ++ ...letion_streaming_refusal_test_response.txt | 5 + .../Core/ClientCore.ChatCompletion.cs | 31 ++ .../Core/OpenAIJsonSchemaTransformer.cs | 71 +++++ .../Settings/OpenAIPromptExecutionSettings.cs | 6 +- .../src/Schema/KernelJsonSchemaBuilder.cs | 13 +- 11 files changed, 1102 insertions(+), 8 deletions(-) create mode 100644 docs/decisions/0053-dotnet-structured-outputs.md create mode 100644 dotnet/samples/Concepts/ChatCompletion/OpenAI_StructuredOutputs.cs create mode 100644 dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Core/OpenAIJsonSchemaTransformerTests.cs create mode 100644 dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/chat_completion_refusal_test_response.json create mode 100644 dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/chat_completion_streaming_refusal_test_response.txt create mode 100644 dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAIJsonSchemaTransformer.cs diff --git a/docs/decisions/0053-dotnet-structured-outputs.md b/docs/decisions/0053-dotnet-structured-outputs.md new file mode 100644 index 000000000000..1b028ff58796 --- /dev/null +++ b/docs/decisions/0053-dotnet-structured-outputs.md @@ -0,0 +1,299 @@ +--- +# These are optional elements. Feel free to remove any of them. +status: proposed +contact: dmytrostruk +date: 2024-09-10 +deciders: sergeymenshykh, markwallace, rbarreto, westey-m, dmytrostruk, ben.thomas, evan.mattson, crickman +--- + +# Structured Outputs implementation in .NET version of Semantic Kernel + +## Context and Problem Statement + +[Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs) is a feature in OpenAI API that ensures the model will always generate responses based on provided JSON Schema. This gives more control over model responses, allows to avoid model hallucinations and write simpler prompts without a need to be specific about response format. This ADR describes several options how to enable this functionality in .NET version of Semantic Kernel. + +A couple of examples how it's implemented in .NET and Python OpenAI SDKs: + +.NET OpenAI SDK: +```csharp +ChatCompletionOptions options = new() +{ + ResponseFormat = ChatResponseFormat.CreateJsonSchemaFormat( + name: "math_reasoning", + jsonSchema: BinaryData.FromString(""" + { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "explanation": { "type": "string" }, + "output": { "type": "string" } + }, + "required": ["explanation", "output"], + "additionalProperties": false + } + }, + "final_answer": { "type": "string" } + }, + "required": ["steps", "final_answer"], + "additionalProperties": false + } + """), + strictSchemaEnabled: true) +}; + +ChatCompletion chatCompletion = await client.CompleteChatAsync( + ["How can I solve 8x + 7 = -23?"], + options); + +using JsonDocument structuredJson = JsonDocument.Parse(chatCompletion.ToString()); + +Console.WriteLine($"Final answer: {structuredJson.RootElement.GetProperty("final_answer").GetString()}"); +Console.WriteLine("Reasoning steps:"); +``` + +Python OpenAI SDK: + +```python +class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + +completion = client.beta.chat.completions.parse( + model="gpt-4o-2024-08-06", + messages=[ + {"role": "system", "content": "Extract the event information."}, + {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."}, + ], + response_format=CalendarEvent, +) + +event = completion.choices[0].message.parsed +``` + +## Considered Options + +**Note**: All of the options presented in this ADR are not mutually exclusive - they can be implemented and supported simultaneously. + +### Option #1: Use OpenAI.Chat.ChatResponseFormat object for ResponseFormat property (similar to .NET OpenAI SDK) + +This approach means that `OpenAI.Chat.ChatResponseFormat` object with JSON Schema will be constructed by user and provided to `OpenAIPromptExecutionSettings.ResponseFormat` property, and Semantic Kernel will pass it to .NET OpenAI SDK as it is. + +Usage example: + +```csharp +// Initialize Kernel +Kernel kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: "gpt-4o-2024-08-06", + apiKey: TestConfiguration.OpenAI.ApiKey) + .Build(); + +// Create JSON Schema with desired response type from string. +ChatResponseFormat chatResponseFormat = ChatResponseFormat.CreateJsonSchemaFormat( + name: "math_reasoning", + jsonSchema: BinaryData.FromString(""" + { + "type": "object", + "properties": { + "Steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "Explanation": { "type": "string" }, + "Output": { "type": "string" } + }, + "required": ["Explanation", "Output"], + "additionalProperties": false + } + }, + "FinalAnswer": { "type": "string" } + }, + "required": ["Steps", "FinalAnswer"], + "additionalProperties": false + } + """), + strictSchemaEnabled: true); + +// Pass ChatResponseFormat in OpenAIPromptExecutionSettings.ResponseFormat property. +var executionSettings = new OpenAIPromptExecutionSettings +{ + ResponseFormat = chatResponseFormat +}; + +// Get string result. +var result = await kernel.InvokePromptAsync("How can I solve 8x + 7 = -23?", new(executionSettings)); + +Console.WriteLine(result.ToString()); + +// Output: + +// { +// "Steps":[ +// { +// "Explanation":"Start with the equation: (8x + 7 = -23). The goal is to isolate (x) on one side of the equation. To begin, we need to remove the constant term from the left side of the equation.", +// "Output":"8x + 7 = -23" +// }, +// { +// "Explanation":"Subtract 7 from both sides of the equation to eliminate the constant from the left side.", +// "Output":"8x + 7 - 7 = -23 - 7" +// }, +// { +// "Explanation":"Simplify both sides: The +7 and -7 on the left will cancel out, while on the right side, -23 - 7 equals -30.", +// "Output":"8x = -30" +// }, +// { +// "Explanation":"Now, solve for (x) by dividing both sides of the equation by 8. This will isolate (x).", +// "Output":"8x / 8 = -30 / 8" +// }, +// { +// "Explanation":"Simplify the right side of the equation by performing the division: -30 divided by 8 equals -3.75.", +// "Output":"x = -3.75" +// } +// ], +// "FinalAnswer":"x = -3.75" +// } +``` + +Pros: +- This approach is already supported in Semantic Kernel without any additional changes, since there is a logic to pass `ChatResponseFormat` object as it is to .NET OpenAI SDK. +- Consistent with .NET OpenAI SDK. + +Cons: +- No type-safety. Information about response type should be manually constructed by user to perform a request. To access each response property, the response should be handled manually as well. It's possible to define a C# type and use JSON deserialization for response, but JSON Schema for request will still be defined separately, which means that information about the type will be stored in 2 places and any modifications to the type should be handled in 2 places. +- Inconsistent with Python version, where response type is defined in a class and passed to `response_format` property by simple assignment. + +### Option #2: Use C# type for ResponseFormat property (similar to Python OpenAI SDK) + +This approach means that `OpenAI.Chat.ChatResponseFormat` object with JSON Schema will be constructed by Semantic Kernel, and user just needs to define C# type and assign it to `OpenAIPromptExecutionSettings.ResponseFormat` property. + +Usage example: + +```csharp +// Define desired response models +private sealed class MathReasoning +{ + public List Steps { get; set; } + + public string FinalAnswer { get; set; } +} + +private sealed class MathReasoningStep +{ + public string Explanation { get; set; } + + public string Output { get; set; } +} + +// Initialize Kernel +Kernel kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: "gpt-4o-2024-08-06", + apiKey: TestConfiguration.OpenAI.ApiKey) + .Build(); + +// Pass desired response type in OpenAIPromptExecutionSettings.ResponseFormat property. +var executionSettings = new OpenAIPromptExecutionSettings +{ + ResponseFormat = typeof(MathReasoning) +}; + +// Get string result. +var result = await kernel.InvokePromptAsync("How can I solve 8x + 7 = -23?", new(executionSettings)); + +// Deserialize string to desired response type. +var mathReasoning = JsonSerializer.Deserialize(result.ToString())!; + +OutputResult(mathReasoning); + +// Output: + +// Step #1 +// Explanation: Start with the given equation. +// Output: 8x + 7 = -23 + +// Step #2 +// Explanation: To isolate the term containing x, subtract 7 from both sides of the equation. +// Output: 8x + 7 - 7 = -23 - 7 + +// Step #3 +// Explanation: To solve for x, divide both sides of the equation by 8, which is the coefficient of x. +// Output: (8x)/8 = (-30)/8 + +// Step #4 +// Explanation: This simplifies to x = -3.75, as dividing -30 by 8 gives -3.75. +// Output: x = -3.75 + +// Final answer: x = -3.75 +``` + +Pros: +- Type safety. Users won't need to define JSON Schema manually as it will be handled by Semantic Kernel, so users could focus on defining C# types only. Properties on C# type can be added or removed to change the format of desired response. `Description` attribute is supported to provide more detailed information about specific property. +- Consistent with Python OpenAI SDK. +- Minimal code changes are required since Semantic Kernel codebase already has a logic to build a JSON Schema from C# type. + +Cons: +- Desired type should be provided via `ResponseFormat = typeof(MathReasoning)` or `ResponseFormat = object.GetType()` assignment, which can be improved by using C# generics. +- Response coming from Kernel is still a `string`, so it should be deserialized to desired type manually by user. + +### Option #3: Use C# generics + +This approach is similar to Option #2, but instead of providing type information via `ResponseFormat = typeof(MathReasoning)` or `ResponseFormat = object.GetType()` assignment, it will be possible to use C# generics. + +Usage example: + +```csharp +// Define desired response models +private sealed class MathReasoning +{ + public List Steps { get; set; } + + public string FinalAnswer { get; set; } +} + +private sealed class MathReasoningStep +{ + public string Explanation { get; set; } + + public string Output { get; set; } +} + +// Initialize Kernel +Kernel kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: "gpt-4o-2024-08-06", + apiKey: TestConfiguration.OpenAI.ApiKey) + .Build(); + +// Get MathReasoning result. +var result = await kernel.InvokePromptAsync("How can I solve 8x + 7 = -23?"); + +OutputResult(mathReasoning); +``` + +Pros: +- Simple usage, no need in defining `PromptExecutionSettings` and deserializing string response later. + +Cons: +- Implementation complexity compared to Option #1 and Option #2: + 1. Chat completion service returns a string, so deserialization logic should be added somewhere to return a type instead of string. Potential place: `FunctionResult`, as it already contains `GetValue` generic method, but it doesn't contain deserialization logic, so it should be added and tested. + 2. `IChatCompletionService` and its methods are not generic, but information about the response type should still be passed to OpenAI connector. One way would be to add generic version of `IChatCompletionService`, which may introduce a lot of additional code changes. Another way is to pass type information through `PromptExecutionSettings` object. Taking into account that `IChatCompletionService` uses `PromptExecutionSettings` and not `OpenAIPromptExecutionSettings`, `ResponseFormat` property should be moved to the base execution settings class, so it's possible to pass the information about response format without coupling to specific connector. On the other hand, it's not clear if `ResponseFormat` parameter will be useful for other AI connectors. + 3. Streaming scenario won't be supported, because for deserialization all the response content should be aggregated first. If Semantic Kernel will do the aggregation, then streaming capability will be lost. + +## Out of scope + +Function Calling functionality is out of scope of this ADR, since Structured Outputs feature is already partially used in current function calling implementation by providing JSON schema with information about function and its arguments. The only remaining parameter to add to this process is `strict` property which should be set to `true` to enable Structured Outputs in function calling. This parameter can be exposed through `PromptExecutionSettings` type. + +By setting `strict` property to `true` for function calling process, the model should not create additional non-existent parameters or functions, which could resolve hallucination problems. On the other hand, enabling Structured Outputs for function calling will introduce additional latency during first request since the schema is processed first, so it may impact the performance, which means that this property should be well-documented. + +More information here: [Function calling with Structured Outputs](https://platform.openai.com/docs/guides/function-calling/function-calling-with-structured-outputs). + +## Decision Outcome + +1. Support Option #1 and Option #2, create a task for Option #3 to handle it separately. +2. Create a task for Structured Outputs in Function Calling and handle it separately. diff --git a/dotnet/samples/Concepts/ChatCompletion/OpenAI_StructuredOutputs.cs b/dotnet/samples/Concepts/ChatCompletion/OpenAI_StructuredOutputs.cs new file mode 100644 index 000000000000..df9463c69e6f --- /dev/null +++ b/dotnet/samples/Concepts/ChatCompletion/OpenAI_StructuredOutputs.cs @@ -0,0 +1,278 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Text.Json; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.OpenAI; +using OpenAI.Chat; + +namespace ChatCompletion; + +/// +/// Structured Outputs is a feature in OpenAI API that ensures the model will always generate responses based on provided JSON Schema. +/// This gives more control over model responses, allows to avoid model hallucinations and write simpler prompts without a need to be specific about response format. +/// More information here: . +/// +/// +/// OpenAI Structured Outputs feature is available only in latest large language models, starting with GPT-4o. +/// More information here: . +/// +/// +/// Some keywords from JSON Schema are not supported in OpenAI Structured Outputs yet. For example, "format" keyword for strings is not supported. +/// It means that properties with types , , , , +/// , are not supported. +/// This information should be taken into consideration during response format type design. +/// More information here: . +/// +public class OpenAI_StructuredOutputs(ITestOutputHelper output) : BaseTest(output) +{ + /// + /// This method shows how to enable Structured Outputs feature with object by providing + /// JSON schema of desired response format. + /// + [Fact] + public async Task StructuredOutputsWithChatResponseFormatAsync() + { + // Initialize kernel. + Kernel kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: "gpt-4o-2024-08-06", + apiKey: TestConfiguration.OpenAI.ApiKey) + .Build(); + + // Initialize ChatResponseFormat object with JSON schema of desired response format. + ChatResponseFormat chatResponseFormat = ChatResponseFormat.CreateJsonSchemaFormat( + name: "movie_result", + jsonSchema: BinaryData.FromString(""" + { + "type": "object", + "properties": { + "Movies": { + "type": "array", + "items": { + "type": "object", + "properties": { + "Title": { "type": "string" }, + "Director": { "type": "string" }, + "ReleaseYear": { "type": "integer" }, + "Rating": { "type": "number" }, + "IsAvailableOnStreaming": { "type": "boolean" }, + "Tags": { "type": "array", "items": { "type": "string" } } + }, + "required": ["Title", "Director", "ReleaseYear", "Rating", "IsAvailableOnStreaming", "Tags"], + "additionalProperties": false + } + } + }, + "required": ["Movies"], + "additionalProperties": false + } + """), + strictSchemaEnabled: true); + + // Specify response format by setting ChatResponseFormat object in prompt execution settings. + var executionSettings = new OpenAIPromptExecutionSettings + { + ResponseFormat = chatResponseFormat + }; + + // Send a request and pass prompt execution settings with desired response format. + var result = await kernel.InvokePromptAsync("What are the top 10 movies of all time?", new(executionSettings)); + + // Deserialize string response to a strong type to access type properties. + // At this point, the deserialization logic won't fail, because MovieResult type was described using JSON schema. + // This ensures that response string is a serialized version of MovieResult type. + var movieResult = JsonSerializer.Deserialize(result.ToString())!; + + // Output the result. + this.OutputResult(movieResult); + + // Output: + + // Title: The Lord of the Rings: The Fellowship of the Ring + // Director: Peter Jackson + // Release year: 2001 + // Rating: 8.8 + // Is available on streaming: True + // Tags: Adventure,Drama,Fantasy + + // ...and more... + } + + /// + /// This method shows how to enable Structured Outputs feature with object by providing + /// the type of desired response format. In this scenario, JSON schema will be created automatically based on provided type. + /// + [Fact] + public async Task StructuredOutputsWithTypeInExecutionSettingsAsync() + { + // Initialize kernel. + Kernel kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: "gpt-4o-2024-08-06", + apiKey: TestConfiguration.OpenAI.ApiKey) + .Build(); + + // Specify response format by setting Type object in prompt execution settings. + var executionSettings = new OpenAIPromptExecutionSettings + { + ResponseFormat = typeof(MovieResult) + }; + + // Send a request and pass prompt execution settings with desired response format. + var result = await kernel.InvokePromptAsync("What are the top 10 movies of all time?", new(executionSettings)); + + // Deserialize string response to a strong type to access type properties. + // At this point, the deserialization logic won't fail, because MovieResult type was specified as desired response format. + // This ensures that response string is a serialized version of MovieResult type. + var movieResult = JsonSerializer.Deserialize(result.ToString())!; + + // Output the result. + this.OutputResult(movieResult); + + // Output: + + // Title: The Lord of the Rings: The Fellowship of the Ring + // Director: Peter Jackson + // Release year: 2001 + // Rating: 8.8 + // Is available on streaming: True + // Tags: Adventure,Drama,Fantasy + + // ...and more... + } + + /// + /// This method shows how to use Structured Outputs feature in combination with Function Calling. + /// function returns a of email bodies. + /// As for final result, the desired response format should be , which contains additional property. + /// This shows how the data can be transformed with AI using strong types without additional instructions in the prompt. + /// + [Fact] + public async Task StructuredOutputsWithFunctionCallingAsync() + { + // Initialize kernel. + Kernel kernel = Kernel.CreateBuilder() + .AddOpenAIChatCompletion( + modelId: "gpt-4o-2024-08-06", + apiKey: TestConfiguration.OpenAI.ApiKey) + .Build(); + + kernel.ImportPluginFromType(); + + // Specify response format by setting Type object in prompt execution settings and enable automatic function calling. + var executionSettings = new OpenAIPromptExecutionSettings + { + ResponseFormat = typeof(EmailResult), + ToolCallBehavior = ToolCallBehavior.AutoInvokeKernelFunctions + }; + + // Send a request and pass prompt execution settings with desired response format. + var result = await kernel.InvokePromptAsync("Process the emails.", new(executionSettings)); + + // Deserialize string response to a strong type to access type properties. + // At this point, the deserialization logic won't fail, because EmailResult type was specified as desired response format. + // This ensures that response string is a serialized version of EmailResult type. + var emailResult = JsonSerializer.Deserialize(result.ToString())!; + + // Output the result. + this.OutputResult(emailResult); + + // Output: + + // Email #1 + // Body: Let's catch up over coffee this Saturday. It's been too long! + // Category: Social + + // Email #2 + // Body: Please review the attached document and provide your feedback by EOD. + // Category: Work + + // ...and more... + } + + #region private + + /// Movie result struct that will be used as desired chat completion response format (structured output). + private struct MovieResult + { + public List Movies { get; set; } + } + + /// Movie struct that will be used as desired chat completion response format (structured output). + private struct Movie + { + public string Title { get; set; } + + public string Director { get; set; } + + public int ReleaseYear { get; set; } + + public double Rating { get; set; } + + public bool IsAvailableOnStreaming { get; set; } + + public List Tags { get; set; } + } + + private sealed class EmailResult + { + public List Emails { get; set; } + } + + private sealed class Email + { + public string Body { get; set; } + + public string Category { get; set; } + } + + /// Plugin to simulate RAG scenario and return collection of data. + private sealed class EmailPlugin + { + /// Function to simulate RAG scenario and return collection of data. + [KernelFunction] + private List GetEmails() + { + return + [ + "Hey, just checking in to see how you're doing!", + "Can you pick up some groceries on your way back home? We need milk and bread.", + "Happy Birthday! Wishing you a fantastic day filled with love and joy.", + "Let's catch up over coffee this Saturday. It's been too long!", + "Please review the attached document and provide your feedback by EOD.", + ]; + } + } + + /// Helper method to output object content. + private void OutputResult(MovieResult movieResult) + { + for (var i = 0; i < movieResult.Movies.Count; i++) + { + var movie = movieResult.Movies[i]; + + this.Output.WriteLine($"Movie #{i + 1}"); + this.Output.WriteLine($"Title: {movie.Title}"); + this.Output.WriteLine($"Director: {movie.Director}"); + this.Output.WriteLine($"Release year: {movie.ReleaseYear}"); + this.Output.WriteLine($"Rating: {movie.Rating}"); + this.Output.WriteLine($"Is available on streaming: {movie.IsAvailableOnStreaming}"); + this.Output.WriteLine($"Tags: {string.Join(",", movie.Tags)}"); + } + } + + /// Helper method to output object content. + private void OutputResult(EmailResult emailResult) + { + for (var i = 0; i < emailResult.Emails.Count; i++) + { + var email = emailResult.Emails[i]; + + this.Output.WriteLine($"Email #{i + 1}"); + this.Output.WriteLine($"Body: {email.Body}"); + this.Output.WriteLine($"Category: {email.Category}"); + } + } + + #endregion +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj index e187080a2c35..d80e3bd914de 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Connectors.OpenAI.UnitTests.csproj @@ -7,7 +7,7 @@ true enable false - $(NoWarn);SKEXP0001;SKEXP0070;SKEXP0010;CS1591;IDE1006;RCS1261;CA1031;CA1308;CA1861;CA2007;CA2234;VSTHRD111 + $(NoWarn);SKEXP0001;SKEXP0070;SKEXP0010;CS1591;IDE1006;RCS1261;CA1031;CA1308;CA1861;CA2007;CA2234;VSTHRD111;CA1812 @@ -56,9 +56,15 @@ Always + + Always + Always + + Always + Always diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Core/OpenAIJsonSchemaTransformerTests.cs b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Core/OpenAIJsonSchemaTransformerTests.cs new file mode 100644 index 000000000000..b6df5ae06354 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Core/OpenAIJsonSchemaTransformerTests.cs @@ -0,0 +1,166 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.ComponentModel; +using System.Text.Json; +using JsonSchemaMapper; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.OpenAI; +using Xunit; + +namespace SemanticKernel.Connectors.OpenAI.UnitTests.Core; + +/// +/// Unit tests for class. +/// +public sealed class OpenAIJsonSchemaTransformerTests +{ + private static readonly JsonSchemaMapperConfiguration s_jsonSchemaMapperConfiguration = new() + { + IncludeSchemaVersion = false, + IncludeTypeInEnums = true, + TreatNullObliviousAsNonNullable = true, + TransformSchemaNode = OpenAIJsonSchemaTransformer.Transform, + }; + + private static readonly JsonSerializerOptions s_jsonSerializerOptions = new() + { + WriteIndented = false + }; + + [Fact] + public void ItTransformsJsonSchemaCorrectly() + { + // Arrange + var type = typeof(Parent); + var expectedSchema = """ + { + "type": "object", + "properties": { + "Items": { + "type": "array", + "items": { + "type": "object", + "properties": { + "NumericProperty": { + "description": "Description of numeric property.", + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "NumericProperty" + ] + } + }, + "Item": { + "type": "object", + "properties": { + "NumericProperty": { + "description": "Description of numeric property.", + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "NumericProperty" + ] + }, + "NullableItems": { + "type": [ + "array", + "null" + ], + "items": { + "type": "object", + "properties": { + "TextProperty": { + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false, + "required": [ + "TextProperty" + ] + } + }, + "NullableItem": { + "type": [ + "object", + "null" + ], + "properties": { + "TextProperty": { + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false, + "required": [ + "TextProperty" + ] + }, + "TextProperty": { + "type": [ + "string", + "null" + ] + } + }, + "additionalProperties": false, + "required": [ + "Items", + "Item", + "NullableItems", + "NullableItem", + "TextProperty" + ] + } + """; + + // Act + var schema = KernelJsonSchemaBuilder.Build(options: null, type, configuration: s_jsonSchemaMapperConfiguration); + + // Assert + Assert.Equal(NormalizeJson(expectedSchema), NormalizeJson(schema.ToString())); + } + + #region private + + private static string NormalizeJson(string json) + { + using JsonDocument doc = JsonDocument.Parse(json); + return JsonSerializer.Serialize(doc, s_jsonSerializerOptions); + } + + private sealed class Parent + { + public List Items { get; set; } = []; + + public Child Item { get; set; } = new(); + + public List? NullableItems { get; set; } + + public ChildNullable? NullableItem { get; set; } + + public string? TextProperty { get; set; } + } + + private sealed class Child + { + [Description("Description of numeric property.")] + public int NumericProperty { get; set; } + } + + private struct ChildNullable + { + public string? TextProperty { get; set; } + } + + #endregion +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs index 014d9ce4bba9..658709cf5b14 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIChatCompletionServiceTests.cs @@ -1032,6 +1032,192 @@ static void MutateChatHistory(AutoFunctionInvocationContext context, Func(actualRequestContent); + var requestResponseFormat = requestJsonElement.GetProperty("response_format"); + + Assert.Equal("json_schema", requestResponseFormat.GetProperty("type").GetString()); + Assert.Equal("MathReasoning", requestResponseFormat.GetProperty("json_schema").GetProperty("name").GetString()); + Assert.True(requestResponseFormat.GetProperty("json_schema").GetProperty("strict").GetBoolean()); + + var schema = requestResponseFormat.GetProperty("json_schema").GetProperty("schema"); + + Assert.Equal("object", schema.GetProperty("type").GetString()); + Assert.False(schema.GetProperty("additionalProperties").GetBoolean()); + Assert.Equal(2, schema.GetProperty("required").GetArrayLength()); + + var requiredParentProperties = new List + { + schema.GetProperty("required")[0].GetString(), + schema.GetProperty("required")[1].GetString(), + }; + + Assert.Contains("Steps", requiredParentProperties); + Assert.Contains("FinalAnswer", requiredParentProperties); + + var schemaProperties = schema.GetProperty("properties"); + + Assert.Equal("string", schemaProperties.GetProperty("FinalAnswer").GetProperty("type").GetString()); + Assert.Equal("array", schemaProperties.GetProperty("Steps").GetProperty("type").GetString()); + + var items = schemaProperties.GetProperty("Steps").GetProperty("items"); + + Assert.Equal("object", items.GetProperty("type").GetString()); + Assert.False(items.GetProperty("additionalProperties").GetBoolean()); + Assert.Equal(2, items.GetProperty("required").GetArrayLength()); + + var requiredChildProperties = new List + { + items.GetProperty("required")[0].GetString(), + items.GetProperty("required")[1].GetString(), + }; + + Assert.Contains("Explanation", requiredChildProperties); + Assert.Contains("Output", requiredChildProperties); + + var itemsProperties = items.GetProperty("properties"); + + Assert.Equal("string", itemsProperties.GetProperty("Explanation").GetProperty("type").GetString()); + Assert.Equal("string", itemsProperties.GetProperty("Output").GetProperty("type").GetString()); + } + + [Theory] + [InlineData(typeof(TestStruct))] + [InlineData(typeof(TestStruct?))] + public async Task GetChatMessageContentsSendsValidJsonSchemaWithStruct(Type responseFormatType) + { + // Arrange + var executionSettings = new OpenAIPromptExecutionSettings { ResponseFormat = responseFormatType }; + + this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent(File.ReadAllText("TestData/chat_completion_test_response.json")) + }; + + var sut = new OpenAIChatCompletionService("model-id", "api-key", httpClient: this._httpClient); + + // Act + await sut.GetChatMessageContentsAsync(this._chatHistoryForTest, executionSettings); + + // Assert + var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent!); + Assert.NotNull(actualRequestContent); + + var requestJsonElement = JsonSerializer.Deserialize(actualRequestContent); + var requestResponseFormat = requestJsonElement.GetProperty("response_format"); + + Assert.Equal("json_schema", requestResponseFormat.GetProperty("type").GetString()); + Assert.Equal("TestStruct", requestResponseFormat.GetProperty("json_schema").GetProperty("name").GetString()); + Assert.True(requestResponseFormat.GetProperty("json_schema").GetProperty("strict").GetBoolean()); + + var schema = requestResponseFormat.GetProperty("json_schema").GetProperty("schema"); + + Assert.Equal("object", schema.GetProperty("type").GetString()); + Assert.False(schema.GetProperty("additionalProperties").GetBoolean()); + Assert.Equal(2, schema.GetProperty("required").GetArrayLength()); + + var requiredParentProperties = new List + { + schema.GetProperty("required")[0].GetString(), + schema.GetProperty("required")[1].GetString(), + }; + + Assert.Contains("TextProperty", requiredParentProperties); + Assert.Contains("NumericProperty", requiredParentProperties); + } + + [Fact] + public async Task GetChatMessageContentReturnsRefusal() + { + // Arrange + this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent(File.ReadAllText("TestData/chat_completion_refusal_test_response.json")) + }; + + var sut = new OpenAIChatCompletionService("model-id", "api-key", httpClient: this._httpClient); + + // Act + var content = await sut.GetChatMessageContentAsync(this._chatHistoryForTest); + + // Assert + var refusal = content.Metadata?["Refusal"] as string; + + Assert.NotNull(refusal); + Assert.Equal("I'm sorry, I cannot assist with that request.", refusal); + } + + [Fact] + public async Task GetStreamingChatMessageContentsReturnsRefusal() + { + // Arrange + var service = new OpenAIChatCompletionService("model-id", "api-key", "organization", this._httpClient); + using var stream = File.OpenRead("TestData/chat_completion_streaming_refusal_test_response.txt"); + + this._messageHandlerStub.ResponseToReturn = new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StreamContent(stream) + }; + + // Act + var enumerator = service.GetStreamingChatMessageContentsAsync([]).GetAsyncEnumerator(); + + await enumerator.MoveNextAsync(); + + // Assert + var refusalUpdate = enumerator.Current.Metadata?["RefusalUpdate"] as string; + + Assert.NotNull(refusalUpdate); + Assert.Equal("I'm sorry, I cannot assist with that request.", refusalUpdate); + } + [Fact] public async Task ItCreatesCorrectFunctionToolCallsWhenUsingAutoFunctionChoiceBehaviorAsync() { @@ -1055,7 +1241,6 @@ public async Task ItCreatesCorrectFunctionToolCallsWhenUsingAutoFunctionChoiceBe // Act await chatCompletion.GetChatMessageContentsAsync(chatHistory, executionSettings, kernel); - // Assert var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent!); Assert.NotNull(actualRequestContent); @@ -1090,7 +1275,6 @@ public async Task ItCreatesCorrectFunctionToolCallsWhenUsingNoneFunctionChoiceBe // Act await chatCompletion.GetChatMessageContentsAsync(chatHistory, executionSettings, kernel); - // Assert var actualRequestContent = Encoding.UTF8.GetString(this._messageHandlerStub.RequestContent!); Assert.NotNull(actualRequestContent); @@ -1226,4 +1410,27 @@ public async Task OnAutoFunctionInvocationAsync(AutoFunctionInvocationContext co } } """; + +#pragma warning disable CS8618, CA1812 + private sealed class MathReasoning + { + public List Steps { get; set; } + + public string FinalAnswer { get; set; } + } + + private sealed class MathReasoningStep + { + public string Explanation { get; set; } + + public string Output { get; set; } + } + + private struct TestStruct + { + public string TextProperty { get; set; } + + public int? NumericProperty { get; set; } + } +#pragma warning restore CS8618, CA1812 } diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/chat_completion_refusal_test_response.json b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/chat_completion_refusal_test_response.json new file mode 100644 index 000000000000..7ed7e188feeb --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/chat_completion_refusal_test_response.json @@ -0,0 +1,22 @@ +{ + "id": "response-id", + "object": "chat.completion", + "created": 1704208954, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "refusal": "I'm sorry, I cannot assist with that request." + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 55, + "completion_tokens": 100, + "total_tokens": 155 + }, + "system_fingerprint": null +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/chat_completion_streaming_refusal_test_response.txt b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/chat_completion_streaming_refusal_test_response.txt new file mode 100644 index 000000000000..06e07ba459dc --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/TestData/chat_completion_streaming_refusal_test_response.txt @@ -0,0 +1,5 @@ +data: {"id":"chatcmpl-96fqQVHGjG9Yzs4ZMB1K6nfy2oEoo","object":"chat.completion.chunk","created":1711377846,"model":"gpt-4-0125-preview","system_fingerprint":"fp_a7daf7c51e","choices":[{"index":0,"delta":{"refusal":"I'm sorry, I cannot assist with that request."},"logprobs":null,"finish_reason":null}]} + +data: {"id":"chatcmpl-96fqQVHGjG9Yzs4ZMB1K6nfy2oEoo","object":"chat.completion.chunk","created":1711377846,"model":"gpt-4-0125-preview","system_fingerprint":"fp_a7daf7c51e","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]} + +data: [DONE] diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs index a44663435d99..9f1120d7c651 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/ClientCore.ChatCompletion.cs @@ -11,6 +11,7 @@ using System.Text.Json; using System.Threading; using System.Threading.Tasks; +using JsonSchemaMapper; using Microsoft.Extensions.Logging; using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Diagnostics; @@ -26,6 +27,17 @@ namespace Microsoft.SemanticKernel.Connectors.OpenAI; /// internal partial class ClientCore { + /// + /// for JSON schema format for structured outputs. + /// + private static readonly JsonSchemaMapperConfiguration s_jsonSchemaMapperConfiguration = new() + { + IncludeSchemaVersion = false, + IncludeTypeInEnums = true, + TreatNullObliviousAsNonNullable = true, + TransformSchemaNode = OpenAIJsonSchemaTransformer.Transform + }; + protected const string ModelProvider = "openai"; protected record ToolCallingConfig(IList? Tools, ChatToolChoice? Choice, bool AutoInvoke, bool AllowAnyRequestedKernelFunction, FunctionChoiceBehaviorOptions? Options); @@ -90,6 +102,7 @@ protected record ToolCallingConfig(IList? Tools, ChatToolChoice? Choic { nameof(completions.CreatedAt), completions.CreatedAt }, { nameof(completions.SystemFingerprint), completions.SystemFingerprint }, { nameof(completions.Usage), completions.Usage }, + { nameof(completions.Refusal), completions.Refusal }, // Serialization of this struct behaves as an empty object {}, need to cast to string to avoid it. { nameof(completions.FinishReason), completions.FinishReason.ToString() }, @@ -104,6 +117,7 @@ protected record ToolCallingConfig(IList? Tools, ChatToolChoice? Choic { nameof(completionUpdate.Id), completionUpdate.Id }, { nameof(completionUpdate.CreatedAt), completionUpdate.CreatedAt }, { nameof(completionUpdate.SystemFingerprint), completionUpdate.SystemFingerprint }, + { nameof(completionUpdate.RefusalUpdate), completionUpdate.RefusalUpdate }, // Serialization of this struct behaves as an empty object {}, need to cast to string to avoid it. { nameof(completionUpdate.FinishReason), completionUpdate.FinishReason?.ToString() }, @@ -528,11 +542,28 @@ protected virtual ChatCompletionOptions CreateChatCompletionOptions( } } break; + case Type formatObjectType: + return GetJsonSchemaResponseFormat(formatObjectType); } return null; } + /// + /// Gets instance of object for JSON schema format for structured outputs. + /// + private static ChatResponseFormat GetJsonSchemaResponseFormat(Type formatObjectType) + { + var type = formatObjectType.IsGenericType && formatObjectType.GetGenericTypeDefinition() == typeof(Nullable<>) ? + Nullable.GetUnderlyingType(formatObjectType)! : + formatObjectType; + + var schema = KernelJsonSchemaBuilder.Build(options: null, type, configuration: s_jsonSchemaMapperConfiguration); + var schemaBinaryData = BinaryData.FromString(schema.ToString()); + + return ChatResponseFormat.CreateJsonSchemaFormat(type.Name, schemaBinaryData, strictSchemaEnabled: true); + } + /// Checks if a tool call is for a function that was defined. private static bool IsRequestableTool(IList tools, FunctionCallContent functionCallContent) { diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAIJsonSchemaTransformer.cs b/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAIJsonSchemaTransformer.cs new file mode 100644 index 000000000000..73a0fbfb711d --- /dev/null +++ b/dotnet/src/Connectors/Connectors.OpenAI/Core/OpenAIJsonSchemaTransformer.cs @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Collections.Generic; +using System.Linq; +using System.Text.Json.Nodes; +using JsonSchemaMapper; + +namespace Microsoft.SemanticKernel.Connectors.OpenAI; + +/// +/// JSON Schema transformer to apply OpenAI conditions for structured outputs. +/// +/// - "additionalProperties" property must always be set to in objects. +/// More information here: . +/// +/// +/// - All fields must be "required". +/// More information here: . +/// +/// +internal static class OpenAIJsonSchemaTransformer +{ + private const string AdditionalPropertiesPropertyName = "additionalProperties"; + private const string TypePropertyName = "type"; + private const string ObjectValueName = "object"; + private const string PropertiesPropertyName = "properties"; + private const string RequiredPropertyName = "required"; + + internal static JsonNode Transform(JsonSchemaGenerationContext context, JsonNode schema) + { + // Transform schema if node is object only. + if (schema is JsonObject jsonSchemaObject) + { + var types = GetTypes(jsonSchemaObject); + + if (types is not null && types.Contains(ObjectValueName)) + { + // Set "additionalProperties" to "false". + jsonSchemaObject[AdditionalPropertiesPropertyName] = false; + + // Specify all properties as "required". + if (jsonSchemaObject.TryGetPropertyValue(PropertiesPropertyName, out var properties) && + properties is JsonObject propertiesObject) + { + var propertyNames = propertiesObject.Select(l => (JsonNode)l.Key).ToArray(); + + jsonSchemaObject[RequiredPropertyName] = new JsonArray(propertyNames); + } + } + } + + return schema; + } + + private static List? GetTypes(JsonObject jsonObject) + { + if (jsonObject.TryGetPropertyValue(TypePropertyName, out var typeProperty) && typeProperty is not null) + { + // For cases when "type" has an array value (e.g "type": "["object", "null"]"). + if (typeProperty is JsonArray nodeArray) + { + return nodeArray.ToArray().Select(element => element?.GetValue()).ToList(); + } + + // Case when "type" has a string value (e.g. "type": "object"). + return [typeProperty.GetValue()]; + } + + return null; + } +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAIPromptExecutionSettings.cs b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAIPromptExecutionSettings.cs index c83c653b2423..e6a86b5f1ba2 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAIPromptExecutionSettings.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAIPromptExecutionSettings.cs @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. +using System; using System.Collections.Generic; using System.Collections.ObjectModel; using System.Diagnostics.CodeAnalysis; @@ -142,7 +143,10 @@ public long? Seed /// Gets or sets the response format to use for the completion. /// /// - /// Possible values are: "json_object", "text", object. + /// Possible values are: + /// - values: "json_object", "text"; + /// - object; + /// - object, which will be used to automatically create a JSON schema. /// [Experimental("SKEXP0010")] [JsonPropertyName("response_format")] diff --git a/dotnet/src/InternalUtilities/src/Schema/KernelJsonSchemaBuilder.cs b/dotnet/src/InternalUtilities/src/Schema/KernelJsonSchemaBuilder.cs index f5f9bc07bce2..416b77485d2e 100644 --- a/dotnet/src/InternalUtilities/src/Schema/KernelJsonSchemaBuilder.cs +++ b/dotnet/src/InternalUtilities/src/Schema/KernelJsonSchemaBuilder.cs @@ -29,11 +29,16 @@ internal static class KernelJsonSchemaBuilder TreatNullObliviousAsNonNullable = true, }; - public static KernelJsonSchema Build(JsonSerializerOptions? options, Type type, string? description = null) + public static KernelJsonSchema Build( + JsonSerializerOptions? options, + Type type, + string? description = null, + JsonSchemaMapperConfiguration? configuration = null) { - options ??= s_options; + var serializerOptions = options ?? s_options; + var mapperConfiguration = configuration ?? s_config; - JsonNode jsonSchema = options.GetJsonSchema(type, s_config); + JsonNode jsonSchema = serializerOptions.GetJsonSchema(type, mapperConfiguration); Debug.Assert(jsonSchema.GetValueKind() is JsonValueKind.Object or JsonValueKind.False or JsonValueKind.True); if (jsonSchema is not JsonObject jsonObj) @@ -49,7 +54,7 @@ public static KernelJsonSchema Build(JsonSerializerOptions? options, Type type, jsonObj["description"] = description; } - return KernelJsonSchema.Parse(jsonObj.ToJsonString(options)); + return KernelJsonSchema.Parse(jsonObj.ToJsonString(serializerOptions)); } private static JsonSerializerOptions CreateDefaultOptions()