From 3b8a7c206776fd849afb5e0d7bf96d82ab7332be Mon Sep 17 00:00:00 2001 From: Adit Sheth Date: Tue, 3 Dec 2024 23:26:55 +0530 Subject: [PATCH] .Net: Add responseMimeType Parameter to GeminiPromptExecutionSettings for Enhanced Output Control. (#9870) ### Motivation and Context **Why is this change required?** The absence of the `responseMimeType` property in .NET prevented users from leveraging structured output formats available in the Gemini API, causing unintended behavior in scenarios requiring JSON or controlled output. **What problem does it solve?** Adds flexibility for developers by enabling control over MIME type responses, particularly for JSON data parsing and classification tasks. **What scenario does it contribute to?** - Requests with structured response needs, such as JSON objects or controlled classification outputs. - Seamless migration and parity with the Python SDK's `responseMimeType` support. Fixes #9863 ### Description This PR adds support for the optional `responseMimeType` parameter to the `GeminiPromptExecutionSettings` class, allowing for better control over the output response format in Gemini API calls. This change addresses the missing ability to specify MIME types for valid JSON, plain text, or enumerated outputs. ### Contribution Checklist - [Y] The code builds clean without any errors or warnings - [Y] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [Y] All unit tests pass, and I have added new tests where possible - [Y] I didn't break anyone :smile: Co-authored-by: Adit Sheth --- .../Clients/GeminiChatGenerationTests.cs | 4 +++- .../Core/Gemini/GeminiRequestTests.cs | 8 +++++-- .../GeminiPromptExecutionSettingsTests.cs | 5 ++++- .../Core/Gemini/Models/GeminiRequest.cs | 7 +++++- .../GeminiPromptExecutionSettings.cs | 22 ++++++++++++++++++- 5 files changed, 40 insertions(+), 6 deletions(-) diff --git a/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/Clients/GeminiChatGenerationTests.cs b/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/Clients/GeminiChatGenerationTests.cs index 9c6ee490029a..987e55f703bb 100644 --- a/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/Clients/GeminiChatGenerationTests.cs +++ b/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/Clients/GeminiChatGenerationTests.cs @@ -237,7 +237,8 @@ public async Task ShouldUsePromptExecutionSettingsAsync() MaxTokens = 102, Temperature = 0.45, TopP = 0.6, - AudioTimestamp = true + AudioTimestamp = true, + ResponseMimeType = "application/json" }; // Act @@ -249,6 +250,7 @@ public async Task ShouldUsePromptExecutionSettingsAsync() Assert.Equal(executionSettings.MaxTokens, geminiRequest.Configuration!.MaxOutputTokens); Assert.Equal(executionSettings.Temperature, geminiRequest.Configuration!.Temperature); Assert.Equal(executionSettings.AudioTimestamp, geminiRequest.Configuration!.AudioTimestamp); + Assert.Equal(executionSettings.ResponseMimeType, geminiRequest.Configuration!.ResponseMimeType); Assert.Equal(executionSettings.TopP, geminiRequest.Configuration!.TopP); } diff --git a/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/GeminiRequestTests.cs b/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/GeminiRequestTests.cs index 3c723490dc27..c6701ee09b84 100644 --- a/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/GeminiRequestTests.cs +++ b/dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/GeminiRequestTests.cs @@ -24,7 +24,8 @@ public void FromPromptItReturnsWithConfiguration() Temperature = 1.5, MaxTokens = 10, TopP = 0.9, - AudioTimestamp = true + AudioTimestamp = true, + ResponseMimeType = "application/json" }; // Act @@ -35,6 +36,7 @@ public void FromPromptItReturnsWithConfiguration() Assert.Equal(executionSettings.Temperature, request.Configuration.Temperature); Assert.Equal(executionSettings.MaxTokens, request.Configuration.MaxOutputTokens); Assert.Equal(executionSettings.AudioTimestamp, request.Configuration.AudioTimestamp); + Assert.Equal(executionSettings.ResponseMimeType, request.Configuration.ResponseMimeType); Assert.Equal(executionSettings.TopP, request.Configuration.TopP); } @@ -87,7 +89,8 @@ public void FromChatHistoryItReturnsWithConfiguration() Temperature = 1.5, MaxTokens = 10, TopP = 0.9, - AudioTimestamp = true + AudioTimestamp = true, + ResponseMimeType = "application/json" }; // Act @@ -98,6 +101,7 @@ public void FromChatHistoryItReturnsWithConfiguration() Assert.Equal(executionSettings.Temperature, request.Configuration.Temperature); Assert.Equal(executionSettings.MaxTokens, request.Configuration.MaxOutputTokens); Assert.Equal(executionSettings.AudioTimestamp, request.Configuration.AudioTimestamp); + Assert.Equal(executionSettings.ResponseMimeType, request.Configuration.ResponseMimeType); Assert.Equal(executionSettings.TopP, request.Configuration.TopP); } diff --git a/dotnet/src/Connectors/Connectors.Google.UnitTests/GeminiPromptExecutionSettingsTests.cs b/dotnet/src/Connectors/Connectors.Google.UnitTests/GeminiPromptExecutionSettingsTests.cs index f9d679db5830..b13a2e397ec7 100644 --- a/dotnet/src/Connectors/Connectors.Google.UnitTests/GeminiPromptExecutionSettingsTests.cs +++ b/dotnet/src/Connectors/Connectors.Google.UnitTests/GeminiPromptExecutionSettingsTests.cs @@ -27,6 +27,7 @@ public void ItCreatesGeminiExecutionSettingsWithCorrectDefaults() Assert.Null(executionSettings.CandidateCount); Assert.Null(executionSettings.SafetySettings); Assert.Null(executionSettings.AudioTimestamp); + Assert.Null(executionSettings.ResponseMimeType); Assert.Equal(GeminiPromptExecutionSettings.DefaultTextMaxTokens, executionSettings.MaxTokens); } @@ -41,6 +42,7 @@ public void ItUsesExistingGeminiExecutionSettings() TopK = 20, CandidateCount = 3, AudioTimestamp = true, + ResponseMimeType = "application/json", StopSequences = ["foo", "bar"], MaxTokens = 128, SafetySettings = @@ -67,7 +69,8 @@ public void ItCreatesGeminiExecutionSettingsFromExtensionDataSnakeCase() { { "max_tokens", 1000 }, { "temperature", 0 }, - { "audio_timestamp", true } + { "audio_timestamp", true }, + { "response_mimetype", "application/json" } } }; diff --git a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Models/GeminiRequest.cs b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Models/GeminiRequest.cs index d550d6a3e5e2..7787122756be 100644 --- a/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Models/GeminiRequest.cs +++ b/dotnet/src/Connectors/Connectors.Google/Core/Gemini/Models/GeminiRequest.cs @@ -248,7 +248,8 @@ private static void AddConfiguration(GeminiPromptExecutionSettings executionSett MaxOutputTokens = executionSettings.MaxTokens, StopSequences = executionSettings.StopSequences, CandidateCount = executionSettings.CandidateCount, - AudioTimestamp = executionSettings.AudioTimestamp + AudioTimestamp = executionSettings.AudioTimestamp, + ResponseMimeType = executionSettings.ResponseMimeType }; } @@ -287,5 +288,9 @@ internal sealed class ConfigurationElement [JsonPropertyName("audioTimestamp")] [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public bool? AudioTimestamp { get; set; } + + [JsonPropertyName("responseMimeType")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? ResponseMimeType { get; set; } } } diff --git a/dotnet/src/Connectors/Connectors.Google/GeminiPromptExecutionSettings.cs b/dotnet/src/Connectors/Connectors.Google/GeminiPromptExecutionSettings.cs index 00821e9a2760..cfb07941a393 100644 --- a/dotnet/src/Connectors/Connectors.Google/GeminiPromptExecutionSettings.cs +++ b/dotnet/src/Connectors/Connectors.Google/GeminiPromptExecutionSettings.cs @@ -24,6 +24,7 @@ public sealed class GeminiPromptExecutionSettings : PromptExecutionSettings private int? _candidateCount; private IList? _stopSequences; private bool? _audioTimestamp; + private string? _responseMimeType; private IList? _safetySettings; private GeminiToolCallBehavior? _toolCallBehavior; @@ -187,6 +188,24 @@ public bool? AudioTimestamp } } + /// + /// The output response MIME type of the generated candidate text. + /// The following MIME types are supported: + /// 1. application/json: JSON response in the candidates. + /// 2. text/plain (default): Plain text output. + /// 3. text/x.enum: For classification tasks, output an enum value as defined in the response schema. + /// + [JsonPropertyName("response_mimetype")] + public string? ResponseMimeType + { + get => this._responseMimeType; + set + { + this.ThrowIfFrozen(); + this._responseMimeType = value; + } + } + /// public override void Freeze() { @@ -223,7 +242,8 @@ public override PromptExecutionSettings Clone() StopSequences = this.StopSequences is not null ? new List(this.StopSequences) : null, SafetySettings = this.SafetySettings?.Select(setting => new GeminiSafetySetting(setting)).ToList(), ToolCallBehavior = this.ToolCallBehavior?.Clone(), - AudioTimestamp = this.AudioTimestamp + AudioTimestamp = this.AudioTimestamp, + ResponseMimeType = this.ResponseMimeType }; }