From 33c1de68d89daa61d03a1354a44679a0f1a9a7da Mon Sep 17 00:00:00 2001
From: Mark Wallace <127216156+markwallace-microsoft@users.noreply.github.com>
Date: Mon, 25 Nov 2024 11:10:51 +0000
Subject: [PATCH 01/23] .Net: Improve the DI sample and function naming (#9794)

### Motivation and Context

Create samples that uses a plugin which depends on a service made
available using dependency injection
- Native function
- Open API function

Closes #9769

### Description

A frequent ask is how to inject a service using DI which will be used
during function execution.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone :smile:
---
 .../GettingStarted/GettingStarted.csproj      |   6 +
 .../Resources/repair-service.json             | 210 ++++++++++++++++++
 .../GettingStarted/Step1_Create_Kernel.cs     |   2 +-
 .../GettingStarted/Step2_Add_Plugins.cs       |   2 +-
 .../GettingStarted/Step3_Yaml_Prompt.cs       |   2 +-
 .../Step4_Dependency_Injection.cs             |  55 ++++-
 .../GettingStarted/Step5_Chat_Prompt.cs       |   2 +-
 .../GettingStarted/Step6_Responsible_AI.cs    |   2 +-
 .../GettingStarted/Step8_Pipelining.cs        |   2 +-
 .../GettingStarted/Step9_OpenAPI_Plugins.cs   | 142 ++++++++++++
 10 files changed, 417 insertions(+), 8 deletions(-)
 create mode 100644 dotnet/samples/GettingStarted/Resources/repair-service.json
 create mode 100644 dotnet/samples/GettingStarted/Step9_OpenAPI_Plugins.cs
diff --git a/dotnet/samples/GettingStarted/GettingStarted.csproj b/dotnet/samples/GettingStarted/GettingStarted.csproj
index 81581e7b4d57..133c8902a450 100644
--- a/dotnet/samples/GettingStarted/GettingStarted.csproj
+++ b/dotnet/samples/GettingStarted/GettingStarted.csproj
@@ -14,6 +14,12 @@
   <ItemGroup>
     <None Remove="Resources\GenerateStory.yaml" />
     <None Remove="Resources\GenerateStoryHandlebars.yaml" />
+    <None Remove="Resources\repair-service.json" />
+  </ItemGroup>
+  <ItemGroup>
+    <EmbeddedResource Include="Resources\repair-service.json">
+      <CopyToOutputDirectory>Never</CopyToOutputDirectory>
+    </EmbeddedResource>
   </ItemGroup>
   <ItemGroup>
     <EmbeddedResource Include="Resources\GenerateStory.yaml" />
diff --git a/dotnet/samples/GettingStarted/Resources/repair-service.json b/dotnet/samples/GettingStarted/Resources/repair-service.json
new file mode 100644
index 000000000000..5cb848a385d3
--- /dev/null
+++ b/dotnet/samples/GettingStarted/Resources/repair-service.json
@@ -0,0 +1,210 @@
+{
+    "openapi": "3.0.0",
+    "info": {
+        "title": "Repair Service",
+        "description": "A simple service to manage repairs for various items",
+        "version": "1.0.0"
+    },
+    "servers": [
+      {
+        "url": "https://piercerepairsapi.azurewebsites.net"
+      }
+    ],
+    "paths": {
+        "/repairs": {
+            "get": {
+                "operationId": "listRepairs",
+                "summary": "List all repairs",
+                "description": "Returns a list of repairs with their details and images",
+                "parameters": [
+                    {
+                        "name": "assignedTo",
+                        "in": "query",
+                        "description": "Filter repairs by who they're assigned to",
+                        "schema": {
+                            "type": "string"
+                        },
+                        "required": false
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "A successful response",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "type": "array",
+                                    "items": {
+                                        "type": "object",
+                                        "properties": {
+                                            "id": {
+                                                "type": "integer",
+                                                "description": "The unique identifier of the repair"
+                                            },
+                                            "title": {
+                                                "type": "string",
+                                                "description": "The short summary of the repair"
+                                            },
+                                            "description": {
+                                                "type": "string",
+                                                "description": "The detailed description of the repair"
+                                            },
+                                            "assignedTo": {
+                                                "type": "string",
+                                                "description": "The user who is responsible for the repair"
+                                            },
+                                            "date": {
+                                                "type": "string",
+                                                "format": "date-time",
+                                                "description": "The date and time when the repair is scheduled or completed"
+                                            },
+                                            "image": {
+                                                "type": "string",
+                                                "format": "uri",
+                                                "description": "The URL of the image of the item to be repaired or the repair process"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            },
+            "post": {
+                "operationId": "createRepair",
+                "summary": "Create a new repair",
+                "description": "Adds a new repair to the list with the given details and image URL",
+                "requestBody": {
+                    "required": true,
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "type": "object",
+                                "properties": {
+                                    "title": {
+                                        "type": "string",
+                                        "description": "The short summary of the repair"
+                                    },
+                                    "description": {
+                                        "type": "string",
+                                        "description": "The detailed description of the repair"
+                                    },
+                                    "assignedTo": {
+                                        "type": "string",
+                                        "description": "The user who is responsible for the repair"
+                                    },
+                                    "date": {
+                                        "type": "string",
+                                        "format": "date-time",
+                                        "description": "The optional date and time when the repair is scheduled or completed"
+                                    },
+                                    "image": {
+                                        "type": "string",
+                                        "format": "uri",
+                                        "description": "The URL of the image of the item to be repaired or the repair process"
+                                    }
+                                },
+                                "required": [
+                                    "title",
+                                    "description"
+                                ]
+                            }
+                        }
+                    }
+                },
+                "responses": {
+                    "201": {
+                        "description": "A successful response indicating that the repair was created"
+                    }
+                }
+            },
+            "patch": {
+                "operationId": "updateRepair",
+                "summary": "Update an existing repair",
+                "description": "Update an existing repair to the list with the new updated details and image URL",
+                "requestBody": {
+                    "required": true,
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "type": "object",
+                                "required": [
+                                    "id"
+                                ],
+                                "properties": {
+                                    "id": {
+                                        "type": "integer",
+                                        "description": "The unique identifier of the repair to update"
+                                    },
+                                    "title": {
+                                        "type": "string",
+                                        "description": "The short summary of the repair"
+                                    },
+                                    "description": {
+                                        "type": "string",
+                                        "description": "The detailed description of the repair"
+                                    },
+                                    "assignedTo": {
+                                        "type": "string",
+                                        "description": "The user who is responsible for the repair"
+                                    },
+                                    "date": {
+                                        "type": "string",
+                                        "format": "date-time",
+                                        "description": "The date and time when the repair is scheduled or completed"
+                                    },
+                                    "image": {
+                                        "type": "string",
+                                        "format": "uri",
+                                        "description": "The URL of the image of the item to be repaired or the repair process"
+                                    }
+                                }
+                            }
+                        }
+                    }
+                },
+                "responses": {
+                    "200": {
+                        "description": "Repair updated"
+                    },
+                    "404": {
+                        "description": "Repair not found"
+                    }
+                }
+            },
+            "delete": {
+                "operationId": "deleteRepair",
+                "summary": "Delete an existing repair",
+                "description": "Delete an existing repair from the list using its ID",
+                "requestBody": {
+                    "required": true,
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "type": "object",
+                                "required": [
+                                    "id"
+                                ],
+                                "properties": {
+                                    "id": {
+                                        "type": "integer",
+                                        "description": "The unique identifier of the repair to delete"
+                                    }
+                                }
+                            }
+                        }
+                    }
+                },
+                "responses": {
+                    "200": {
+                        "description": "Repair deleted"
+                    },
+                    "404": {
+                        "description": "Repair not found"
+                    }
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/dotnet/samples/GettingStarted/Step1_Create_Kernel.cs b/dotnet/samples/GettingStarted/Step1_Create_Kernel.cs
index faa8811f1c22..e7f2d50462ed 100644
--- a/dotnet/samples/GettingStarted/Step1_Create_Kernel.cs
+++ b/dotnet/samples/GettingStarted/Step1_Create_Kernel.cs
@@ -14,7 +14,7 @@ public sealed class Step1_Create_Kernel(ITestOutputHelper output) : BaseTest(out
     /// Show how to create a <see cref="Kernel"/> and use it to execute prompts.
     /// </summary>
     [Fact]
-    public async Task RunAsync()
+    public async Task CreateKernelAsync()
     {
         // Create a kernel with OpenAI chat completion
         Kernel kernel = Kernel.CreateBuilder()
diff --git a/dotnet/samples/GettingStarted/Step2_Add_Plugins.cs b/dotnet/samples/GettingStarted/Step2_Add_Plugins.cs
index bb2fc387aabe..b3294919607f 100644
--- a/dotnet/samples/GettingStarted/Step2_Add_Plugins.cs
+++ b/dotnet/samples/GettingStarted/Step2_Add_Plugins.cs
@@ -17,7 +17,7 @@ public sealed class Step2_Add_Plugins(ITestOutputHelper output) : BaseTest(outpu
     /// Shows different ways to load a <see cref="KernelPlugin"/> instances.
     /// </summary>
     [Fact]
-    public async Task RunAsync()
+    public async Task AddPluginsAsync()
     {
         // Create a kernel with OpenAI chat completion
         IKernelBuilder kernelBuilder = Kernel.CreateBuilder();
diff --git a/dotnet/samples/GettingStarted/Step3_Yaml_Prompt.cs b/dotnet/samples/GettingStarted/Step3_Yaml_Prompt.cs
index 3fe837bf098e..29d50f7b6da7 100644
--- a/dotnet/samples/GettingStarted/Step3_Yaml_Prompt.cs
+++ b/dotnet/samples/GettingStarted/Step3_Yaml_Prompt.cs
@@ -15,7 +15,7 @@ public sealed class Step3_Yaml_Prompt(ITestOutputHelper output) : BaseTest(outpu
     /// Show how to create a prompt <see cref="KernelFunction"/> from a YAML resource.
     /// </summary>
     [Fact]
-    public async Task RunAsync()
+    public async Task CreatPromptFromYamlAsync()
     {
         // Create a kernel with OpenAI chat completion
         Kernel kernel = Kernel.CreateBuilder()
diff --git a/dotnet/samples/GettingStarted/Step4_Dependency_Injection.cs b/dotnet/samples/GettingStarted/Step4_Dependency_Injection.cs
index dd39962d627a..b5c31acfd3a8 100644
--- a/dotnet/samples/GettingStarted/Step4_Dependency_Injection.cs
+++ b/dotnet/samples/GettingStarted/Step4_Dependency_Injection.cs
@@ -16,7 +16,7 @@ public sealed class Step4_Dependency_Injection(ITestOutputHelper output) : BaseT
     /// Show how to create a <see cref="Kernel"/> that participates in Dependency Injection.
     /// </summary>
     [Fact]
-    public async Task RunAsync()
+    public async Task GetKernelUsingDependencyInjectionAsync()
     {
         // If an application follows DI guidelines, the following line is unnecessary because DI will inject an instance of the KernelClient class to a class that references it.
         // DI container guidelines - https://learn.microsoft.com/en-us/dotnet/core/extensions/dependency-injection-guidelines#recommendations
@@ -32,6 +32,22 @@ public async Task RunAsync()
         }
     }
 
+    /// <summary>
+    /// Show how to use a plugin that participates in Dependency Injection.
+    /// </summary>
+    [Fact]
+    public async Task PluginUsingDependencyInjectionAsync()
+    {
+        // If an application follows DI guidelines, the following line is unnecessary because DI will inject an instance of the KernelClient class to a class that references it.
+        // DI container guidelines - https://learn.microsoft.com/en-us/dotnet/core/extensions/dependency-injection-guidelines#recommendations
+        var serviceProvider = BuildServiceProvider();
+        var kernel = serviceProvider.GetRequiredService<Kernel>();
+
+        // Invoke the prompt which relies on invoking a plugin that depends on a service made available using Dependency Injection.
+        PromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
+        Console.WriteLine(await kernel.InvokePromptAsync("Greet the current user by name.", new(settings)));
+    }
+
     /// <summary>
     /// Build a ServiceProvider that can be used to resolve services.
     /// </summary>
@@ -39,10 +55,12 @@ private ServiceProvider BuildServiceProvider()
     {
         var collection = new ServiceCollection();
         collection.AddSingleton<ILoggerFactory>(new XunitLogger(this.Output));
+        collection.AddSingleton<IUserService>(new FakeUserService());
 
         var kernelBuilder = collection.AddKernel();
         kernelBuilder.Services.AddOpenAIChatCompletion(TestConfiguration.OpenAI.ChatModelId, TestConfiguration.OpenAI.ApiKey);
         kernelBuilder.Plugins.AddFromType<TimeInformation>();
+        kernelBuilder.Plugins.AddFromType<UserInformation>();
 
         return collection.BuildServiceProvider();
     }
@@ -52,7 +70,7 @@ private ServiceProvider BuildServiceProvider()
     /// </summary>
     public class TimeInformation(ILoggerFactory loggerFactory)
     {
-        private readonly ILogger _logger = loggerFactory.CreateLogger(typeof(TimeInformation));
+        private readonly ILogger _logger = loggerFactory.CreateLogger<TimeInformation>();
 
         [KernelFunction]
         [Description("Retrieves the current time in UTC.")]
@@ -63,4 +81,37 @@ public string GetCurrentUtcTime()
             return utcNow;
         }
     }
+
+    /// <summary>
+    /// A plugin that returns the current time.
+    /// </summary>
+    public class UserInformation(IUserService userService)
+    {
+        [KernelFunction]
+        [Description("Retrieves the current users name.")]
+        public string GetUsername()
+        {
+            return userService.GetCurrentUsername();
+        }
+    }
+
+    /// <summary>
+    /// Interface for a service to get the current user id.
+    /// </summary>
+    public interface IUserService
+    {
+        /// <summary>
+        /// Return the user id for the current user.
+        /// </summary>
+        string GetCurrentUsername();
+    }
+
+    /// <summary>
+    /// Fake implementation of <see cref="IUserService"/>
+    /// </summary>
+    public class FakeUserService : IUserService
+    {
+        /// <inheritdoc/>
+        public string GetCurrentUsername() => "Bob";
+    }
 }
diff --git a/dotnet/samples/GettingStarted/Step5_Chat_Prompt.cs b/dotnet/samples/GettingStarted/Step5_Chat_Prompt.cs
index 41e90085a5ec..5541b1f07838 100644
--- a/dotnet/samples/GettingStarted/Step5_Chat_Prompt.cs
+++ b/dotnet/samples/GettingStarted/Step5_Chat_Prompt.cs
@@ -10,7 +10,7 @@ public sealed class Step5_Chat_Prompt(ITestOutputHelper output) : BaseTest(outpu
     /// Show how to construct a chat prompt and invoke it.
     /// </summary>
     [Fact]
-    public async Task RunAsync()
+    public async Task InvokeChatPromptAsync()
     {
         // Create a kernel with OpenAI chat completion
         Kernel kernel = Kernel.CreateBuilder()
diff --git a/dotnet/samples/GettingStarted/Step6_Responsible_AI.cs b/dotnet/samples/GettingStarted/Step6_Responsible_AI.cs
index 30a0d69c5c14..d1f717aa47e0 100644
--- a/dotnet/samples/GettingStarted/Step6_Responsible_AI.cs
+++ b/dotnet/samples/GettingStarted/Step6_Responsible_AI.cs
@@ -11,7 +11,7 @@ public sealed class Step6_Responsible_AI(ITestOutputHelper output) : BaseTest(ou
     /// Show how to use prompt filters to ensure that prompts are rendered in a responsible manner.
     /// </summary>
     [Fact]
-    public async Task RunAsync()
+    public async Task AddPromptFilterAsync()
     {
         // Create a kernel with OpenAI chat completion
         var builder = Kernel.CreateBuilder()
diff --git a/dotnet/samples/GettingStarted/Step8_Pipelining.cs b/dotnet/samples/GettingStarted/Step8_Pipelining.cs
index 135d7d73d616..a7d554f11cd5 100644
--- a/dotnet/samples/GettingStarted/Step8_Pipelining.cs
+++ b/dotnet/samples/GettingStarted/Step8_Pipelining.cs
@@ -14,7 +14,7 @@ public sealed class Step8_Pipelining(ITestOutputHelper output) : BaseTest(output
     /// them in a sequence, passing the output from one as input to the next.
     /// </summary>
     [Fact]
-    public async Task RunAsync()
+    public async Task CreateFunctionPipelineAsync()
     {
         IKernelBuilder builder = Kernel.CreateBuilder();
         builder.AddOpenAIChatCompletion(
diff --git a/dotnet/samples/GettingStarted/Step9_OpenAPI_Plugins.cs b/dotnet/samples/GettingStarted/Step9_OpenAPI_Plugins.cs
new file mode 100644
index 000000000000..2813bac110ba
--- /dev/null
+++ b/dotnet/samples/GettingStarted/Step9_OpenAPI_Plugins.cs
@@ -0,0 +1,142 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.SemanticKernel;
+using Resources;
+
+namespace GettingStarted;
+
+/// <summary>
+/// This example shows how to load an Open API <see cref="KernelPlugin"/> instance.
+/// </summary>
+public sealed class Step9_OpenAPI_Plugins(ITestOutputHelper output) : BaseTest(output)
+{
+    /// <summary>
+    /// Shows how to load an Open API <see cref="KernelPlugin"/> instance.
+    /// </summary>
+    [Fact]
+    public async Task AddOpenAPIPluginsAsync()
+    {
+        // Create a kernel with OpenAI chat completion
+        IKernelBuilder kernelBuilder = Kernel.CreateBuilder();
+        kernelBuilder.AddOpenAIChatCompletion(
+                modelId: TestConfiguration.OpenAI.ChatModelId,
+                apiKey: TestConfiguration.OpenAI.ApiKey);
+        Kernel kernel = kernelBuilder.Build();
+
+        // Load OpenAPI plugin
+        var stream = EmbeddedResource.ReadStream("repair-service.json");
+        var plugin = await kernel.ImportPluginFromOpenApiAsync("RepairService", stream!);
+
+        PromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
+        Console.WriteLine(await kernel.InvokePromptAsync("List all of the repairs .", new(settings)));
+    }
+
+    /// <summary>
+    /// Shows how to transform an Open API <see cref="KernelPlugin"/> instance to support dependency injection.
+    /// </summary>
+    [Fact]
+    public async Task TransformOpenAPIPluginsAsync()
+    {
+        // Create a kernel with OpenAI chat completion
+        var serviceProvider = BuildServiceProvider();
+        var kernel = serviceProvider.GetRequiredService<Kernel>();
+
+        // Load OpenAPI plugin
+        var stream = EmbeddedResource.ReadStream("repair-service.json");
+        var plugin = await kernel.CreatePluginFromOpenApiAsync("RepairService", stream!);
+
+        // Transform the plugin to use IMechanicService via dependency injection
+        kernel.Plugins.Add(TransformPlugin(plugin));
+
+        PromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
+        Console.WriteLine(await kernel.InvokePromptAsync("Book an appointment to drain the old engine oil and replace it with fresh oil.", new(settings)));
+    }
+
+    /// <summary>
+    /// Build a ServiceProvider that can be used to resolve services.
+    /// </summary>
+    private ServiceProvider BuildServiceProvider()
+    {
+        var collection = new ServiceCollection();
+        collection.AddSingleton<IMechanicService>(new FakeMechanicService());
+
+        var kernelBuilder = collection.AddKernel();
+        kernelBuilder.Services.AddOpenAIChatCompletion(TestConfiguration.OpenAI.ChatModelId, TestConfiguration.OpenAI.ApiKey);
+
+        return collection.BuildServiceProvider();
+    }
+
+    /// <summary>
+    /// Transform the plugin to change the behavior of the createRepair function.
+    /// </summary>
+    public static KernelPlugin TransformPlugin(KernelPlugin plugin)
+    {
+        List<KernelFunction>? functions = [];
+
+        foreach (KernelFunction function in plugin)
+        {
+            if (function.Name == "createRepair")
+            {
+                functions.Add(CreateRepairFunction(function));
+            }
+            else
+            {
+                functions.Add(function);
+            }
+        }
+
+        return KernelPluginFactory.CreateFromFunctions(plugin.Name, plugin.Description, functions);
+    }
+
+    /// <summary>
+    /// Create a <see cref="KernelFunction"/> instance for the createRepair operation which only takes
+    /// the title, description parameters and has a delegate which uses the IMechanicService to get the
+    /// assignedTo.
+    /// </summary>
+    private static KernelFunction CreateRepairFunction(KernelFunction function)
+    {
+        var method = (
+            Kernel kernel,
+            KernelFunction currentFunction,
+            KernelArguments arguments,
+            [FromKernelServices] IMechanicService mechanicService,
+            CancellationToken cancellationToken) =>
+        {
+            arguments.Add("assignedTo", mechanicService.GetMechanic());
+            arguments.Add("date", DateTime.UtcNow.ToString("R"));
+
+            return function.InvokeAsync(kernel, arguments, cancellationToken);
+        };
+
+        var options = new KernelFunctionFromMethodOptions()
+        {
+            FunctionName = function.Name,
+            Description = function.Description,
+            Parameters = function.Metadata.Parameters.Where(p => p.Name == "title" || p.Name == "description").ToList(),
+            ReturnParameter = function.Metadata.ReturnParameter,
+        };
+
+        return KernelFunctionFactory.CreateFromMethod(method, options);
+    }
+
+    /// <summary>
+    /// Interface for a service to get the mechanic to assign to the next job.
+    /// </summary>
+    public interface IMechanicService
+    {
+        /// <summary>
+        /// Return the name of the mechanic to assign the next job to.
+        /// </summary>
+        string GetMechanic();
+    }
+
+    /// <summary>
+    /// Fake implementation of <see cref="IMechanicService"/>
+    /// </summary>
+    public class FakeMechanicService : IMechanicService
+    {
+        /// <inheritdoc/>
+        public string GetMechanic() => "Bob";
+    }
+}

From ada7ba6bb7e8306d808fa17d53693dc5c9b3f050 Mon Sep 17 00:00:00 2001
From: Stephen Toub <stoub@microsoft.com>
Date: Mon, 25 Nov 2024 08:58:45 -0500
Subject: [PATCH 02/23] .Net: Update to latest M.E.AI (#9795)

---
 dotnet/Directory.Packages.props               |   8 +-
 ...reAIInferenceChatCompletionServiceTests.cs |   6 +-
 ...eAIInferenceServiceCollectionExtensions.cs | 112 ++++++++----------
 .../AzureAIInferenceChatCompletionService.cs  |  77 +++++-------
 .../OllamaServiceCollectionExtensions.cs      | 107 +++++++++--------
 .../ChatCompletionServiceChatClient.cs        |  10 +-
 .../EmbeddingGenerationServiceExtensions.cs   |  10 +-
 .../AI/ServiceConversionExtensionsTests.cs    |   4 +-
 8 files changed, 162 insertions(+), 172 deletions(-)

diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props
index 8ea8825027bb..7770e35f415f 100644
--- a/dotnet/Directory.Packages.props
+++ b/dotnet/Directory.Packages.props
@@ -56,15 +56,15 @@
     <PackageVersion Include="System.Net.Http" Version="4.3.4" />
     <PackageVersion Include="System.Numerics.Tensors" Version="8.0.0" />
     <PackageVersion Include="System.Text.Json" Version="8.0.5" />
-    <PackageVersion Include="OllamaSharp" Version="4.0.6" />
+    <PackageVersion Include="OllamaSharp" Version="4.0.8" />
     <!-- Tokenizers -->
     <PackageVersion Include="Microsoft.ML.Tokenizers" Version="1.0.0" />
     <PackageVersion Include="Microsoft.DeepDev.TokenizerLib" Version="1.3.3" />
     <PackageVersion Include="SharpToken" Version="2.0.3" />
     <!-- Microsoft.Extensions.* -->
-    <PackageVersion Include="Microsoft.Extensions.AI" Version="9.0.0-preview.9.24556.5" />
-    <PackageVersion Include="Microsoft.Extensions.AI.Abstractions" Version="9.0.0-preview.9.24556.5" />
-    <PackageVersion Include="Microsoft.Extensions.AI.AzureAIInference" Version="9.0.0-preview.9.24556.5" />
+    <PackageVersion Include="Microsoft.Extensions.AI" Version="9.0.1-preview.1.24570.5" />
+    <PackageVersion Include="Microsoft.Extensions.AI.Abstractions" Version="9.0.1-preview.1.24570.5" />
+    <PackageVersion Include="Microsoft.Extensions.AI.AzureAIInference" Version="9.0.1-preview.1.24570.5" />
     <PackageVersion Include="Microsoft.Extensions.Configuration" Version="8.0.0" />
     <PackageVersion Include="Microsoft.Extensions.Configuration.Binder" Version="8.0.2" />
     <PackageVersion Include="Microsoft.Extensions.Configuration.EnvironmentVariables" Version="8.0.0" />
diff --git a/dotnet/src/Connectors/Connectors.AzureAIInference.UnitTests/Services/AzureAIInferenceChatCompletionServiceTests.cs b/dotnet/src/Connectors/Connectors.AzureAIInference.UnitTests/Services/AzureAIInferenceChatCompletionServiceTests.cs
index 417f32cc545b..a8447d4838a3 100644
--- a/dotnet/src/Connectors/Connectors.AzureAIInference.UnitTests/Services/AzureAIInferenceChatCompletionServiceTests.cs
+++ b/dotnet/src/Connectors/Connectors.AzureAIInference.UnitTests/Services/AzureAIInferenceChatCompletionServiceTests.cs
@@ -10,6 +10,7 @@
 using Azure;
 using Azure.AI.Inference;
 using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Logging.Abstractions;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.Connectors.AzureAIInference;
@@ -51,7 +52,6 @@ public void ConstructorsWorksAsExpected()
     {
         // Arrange
         using var httpClient = new HttpClient() { BaseAddress = this._endpoint };
-        var loggerFactoryMock = new Mock<ILoggerFactory>();
         ChatCompletionsClient client = new(this._endpoint, new AzureKeyCredential("api-key"));
 
         // Act & Assert
@@ -60,12 +60,12 @@ public void ConstructorsWorksAsExpected()
         new AzureAIInferenceChatCompletionService(modelId: "model", httpClient: httpClient, apiKey: null); // Only the HttpClient with a BaseClass defined
         new AzureAIInferenceChatCompletionService(modelId: "model", endpoint: this._endpoint, apiKey: null); // ModelId and endpoint
         new AzureAIInferenceChatCompletionService(modelId: "model", apiKey: "api-key", endpoint: this._endpoint); // ModelId, apiKey, and endpoint
-        new AzureAIInferenceChatCompletionService(modelId: "model", endpoint: this._endpoint, apiKey: null, loggerFactory: loggerFactoryMock.Object); // Endpoint and loggerFactory
+        new AzureAIInferenceChatCompletionService(modelId: "model", endpoint: this._endpoint, apiKey: null, loggerFactory: NullLoggerFactory.Instance); // Endpoint and loggerFactory
 
         // Breaking Glass constructor
         new AzureAIInferenceChatCompletionService(modelId: null, chatClient: client); // Client without model 
         new AzureAIInferenceChatCompletionService(modelId: "model", chatClient: client); // Client
-        new AzureAIInferenceChatCompletionService(modelId: "model", chatClient: client, loggerFactory: loggerFactoryMock.Object); // Client
+        new AzureAIInferenceChatCompletionService(modelId: "model", chatClient: client, loggerFactory: NullLoggerFactory.Instance); // Client
     }
 
     [Theory]
diff --git a/dotnet/src/Connectors/Connectors.AzureAIInference/Extensions/AzureAIInferenceServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.AzureAIInference/Extensions/AzureAIInferenceServiceCollectionExtensions.cs
index 387d9b89a62a..c932c27c3831 100644
--- a/dotnet/src/Connectors/Connectors.AzureAIInference/Extensions/AzureAIInferenceServiceCollectionExtensions.cs
+++ b/dotnet/src/Connectors/Connectors.AzureAIInference/Extensions/AzureAIInferenceServiceCollectionExtensions.cs
@@ -9,7 +9,6 @@
 using Microsoft.Extensions.DependencyInjection;
 using Microsoft.Extensions.Logging;
 using Microsoft.SemanticKernel.ChatCompletion;
-using Microsoft.SemanticKernel.Http;
 
 namespace Microsoft.SemanticKernel;
 
@@ -38,34 +37,30 @@ public static IServiceCollection AddAzureAIInferenceChatCompletion(
     {
         Verify.NotNull(services);
 
-        services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
+        return services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
         {
-            var chatClientBuilder = new ChatClientBuilder()
-               .UseFunctionInvocation(config =>
-                   config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+            var options = new AzureAIInferenceClientOptions();
 
-            var logger = serviceProvider.GetService<ILoggerFactory>()?.CreateLogger<ChatCompletionsClient>();
-            if (logger is not null)
+            httpClient ??= serviceProvider.GetService<HttpClient>();
+            if (httpClient is not null)
             {
-                chatClientBuilder.UseLogging(logger);
+                options.Transport = new HttpClientTransport(httpClient);
             }
 
-            var options = new AzureAIInferenceClientOptions();
-            if (httpClient is not null)
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
+
+            var builder = new Azure.AI.Inference.ChatCompletionsClient(endpoint, new Azure.AzureKeyCredential(apiKey ?? SingleSpace), options)
+                .AsChatClient(modelId)
+                .AsBuilder()
+                .UseFunctionInvocation(loggerFactory, f => f.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+
+            if (loggerFactory is not null)
             {
-                options.Transport = new HttpClientTransport(HttpClientProvider.GetHttpClient(httpClient, serviceProvider));
+                builder.UseLogging(loggerFactory);
             }
 
-            return
-                chatClientBuilder.Use(
-                    new Microsoft.Extensions.AI.AzureAIInferenceChatClient(
-                        modelId: modelId,
-                        chatCompletionsClient: new Azure.AI.Inference.ChatCompletionsClient(endpoint, new Azure.AzureKeyCredential(apiKey ?? SingleSpace), options)
-                    )
-                ).AsChatCompletionService();
+            return builder.Build(serviceProvider).AsChatCompletionService(serviceProvider);
         });
-
-        return services;
     }
 
     /// <summary>
@@ -88,34 +83,30 @@ public static IServiceCollection AddAzureAIInferenceChatCompletion(
     {
         Verify.NotNull(services);
 
-        services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
+        return services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
         {
-            var chatClientBuilder = new ChatClientBuilder()
-               .UseFunctionInvocation(config =>
-                   config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+            var options = new AzureAIInferenceClientOptions();
 
-            var logger = serviceProvider.GetService<ILoggerFactory>()?.CreateLogger<ChatCompletionsClient>();
-            if (logger is not null)
+            httpClient ??= serviceProvider.GetService<HttpClient>();
+            if (httpClient is not null)
             {
-                chatClientBuilder.UseLogging(logger);
+                options.Transport = new HttpClientTransport(httpClient);
             }
 
-            var options = new AzureAIInferenceClientOptions();
-            if (httpClient is not null)
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
+
+            var builder = new Azure.AI.Inference.ChatCompletionsClient(endpoint, credential, options)
+                .AsChatClient(modelId)
+                .AsBuilder()
+                .UseFunctionInvocation(loggerFactory, f => f.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+
+            if (loggerFactory is not null)
             {
-                options.Transport = new HttpClientTransport(HttpClientProvider.GetHttpClient(httpClient, serviceProvider));
+                builder.UseLogging(loggerFactory);
             }
 
-            return
-                chatClientBuilder.Use(
-                    new Microsoft.Extensions.AI.AzureAIInferenceChatClient(
-                        modelId: modelId,
-                        chatCompletionsClient: new Azure.AI.Inference.ChatCompletionsClient(endpoint, credential, options)
-                    )
-                ).AsChatCompletionService();
+            return builder.Build(serviceProvider).AsChatCompletionService(serviceProvider);
         });
-
-        return services;
     }
 
     /// <summary>
@@ -133,26 +124,24 @@ public static IServiceCollection AddAzureAIInferenceChatCompletion(this IService
     {
         Verify.NotNull(services);
 
-        services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
+        return services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
         {
             chatClient ??= serviceProvider.GetRequiredService<ChatCompletionsClient>();
 
-            var chatClientBuilder = new ChatClientBuilder()
-               .UseFunctionInvocation(config =>
-                   config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
 
-            var logger = serviceProvider.GetService<ILoggerFactory>()?.CreateLogger<ChatCompletionsClient>();
-            if (logger is not null)
+            var builder = chatClient
+                .AsChatClient(modelId)
+                .AsBuilder()
+                .UseFunctionInvocation(loggerFactory, f => f.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+
+            if (loggerFactory is not null)
             {
-                chatClientBuilder.UseLogging(logger);
+                builder.UseLogging(loggerFactory);
             }
 
-            return chatClientBuilder
-                .Use(new Microsoft.Extensions.AI.AzureAIInferenceChatClient(chatClient, modelId))
-                .AsChatCompletionService();
+            return builder.Build(serviceProvider).AsChatCompletionService(serviceProvider);
         });
-
-        return services;
     }
 
     /// <summary>
@@ -168,26 +157,23 @@ public static IServiceCollection AddAzureAIInferenceChatCompletion(this IService
     {
         Verify.NotNull(services);
 
-        services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
+        return services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
         {
             chatClient ??= serviceProvider.GetRequiredService<AzureAIInferenceChatClient>();
 
-            var chatClientBuilder = new ChatClientBuilder()
-               .UseFunctionInvocation(config =>
-                   config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
 
-            var logger = serviceProvider.GetService<ILoggerFactory>()?.CreateLogger<ChatCompletionsClient>();
-            if (logger is not null)
+            var builder = chatClient
+                .AsBuilder()
+                .UseFunctionInvocation(loggerFactory, f => f.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+
+            if (loggerFactory is not null)
             {
-                chatClientBuilder.UseLogging(logger);
+                builder.UseLogging(loggerFactory);
             }
 
-            return chatClientBuilder
-                .Use(chatClient)
-                .AsChatCompletionService();
+            return builder.Build(serviceProvider).AsChatCompletionService(serviceProvider);
         });
-
-        return services;
     }
 
     #region Private
diff --git a/dotnet/src/Connectors/Connectors.AzureAIInference/Services/AzureAIInferenceChatCompletionService.cs b/dotnet/src/Connectors/Connectors.AzureAIInference/Services/AzureAIInferenceChatCompletionService.cs
index 392f93b47147..a940151e4ec4 100644
--- a/dotnet/src/Connectors/Connectors.AzureAIInference/Services/AzureAIInferenceChatCompletionService.cs
+++ b/dotnet/src/Connectors/Connectors.AzureAIInference/Services/AzureAIInferenceChatCompletionService.cs
@@ -38,26 +38,19 @@ public AzureAIInferenceChatCompletionService(
             HttpClient? httpClient = null,
             ILoggerFactory? loggerFactory = null)
     {
-        var logger = loggerFactory?.CreateLogger(typeof(AzureAIInferenceChatCompletionService));
-        this._core = new(
-            modelId,
-            apiKey,
-            endpoint,
-            httpClient,
-            logger);
-
-        var builder = new ChatClientBuilder()
-            .UseFunctionInvocation(config =>
-                config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
-
-        if (logger is not null)
+        this._core = new ChatClientCore(modelId, apiKey, endpoint, httpClient);
+
+        var builder = this._core.Client
+            .AsChatClient(modelId)
+            .AsBuilder()
+            .UseFunctionInvocation(loggerFactory, f => f.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+
+        if (loggerFactory is not null)
         {
-            builder = builder.UseLogging(logger);
+            builder.UseLogging(loggerFactory);
         }
 
-        this._chatService = builder
-            .Use(this._core.Client.AsChatClient(modelId))
-            .AsChatCompletionService();
+        this._chatService = builder.Build().AsChatCompletionService();
     }
 
     /// <summary>
@@ -75,26 +68,19 @@ public AzureAIInferenceChatCompletionService(
             HttpClient? httpClient = null,
             ILoggerFactory? loggerFactory = null)
     {
-        var logger = loggerFactory?.CreateLogger(typeof(AzureAIInferenceChatCompletionService));
-        this._core = new(
-            modelId,
-            credential,
-            endpoint,
-            httpClient,
-            logger);
-
-        var builder = new ChatClientBuilder()
-           .UseFunctionInvocation(config =>
-               config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
-
-        if (logger is not null)
+        this._core = new ChatClientCore(modelId, credential, endpoint, httpClient);
+
+        var builder = this._core.Client
+            .AsChatClient(modelId)
+            .AsBuilder()
+            .UseFunctionInvocation(loggerFactory, f => f.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+
+        if (loggerFactory is not null)
         {
-            builder = builder.UseLogging(logger);
+            builder.UseLogging(loggerFactory);
         }
 
-        this._chatService = builder
-            .Use(this._core.Client.AsChatClient(modelId))
-            .AsChatCompletionService();
+        this._chatService = builder.Build().AsChatCompletionService();
     }
 
     /// <summary>
@@ -108,24 +94,21 @@ public AzureAIInferenceChatCompletionService(
         ChatCompletionsClient chatClient,
         ILoggerFactory? loggerFactory = null)
     {
-        var logger = loggerFactory?.CreateLogger(typeof(AzureAIInferenceChatCompletionService));
-        this._core = new(
-            modelId,
-            chatClient,
-            logger);
+        Verify.NotNull(chatClient);
+
+        this._core = new ChatClientCore(modelId, chatClient);
 
-        var builder = new ChatClientBuilder()
-         .UseFunctionInvocation(config =>
-             config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+        var builder = chatClient
+            .AsChatClient(modelId)
+            .AsBuilder()
+            .UseFunctionInvocation(loggerFactory, f => f.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
 
-        if (logger is not null)
+        if (loggerFactory is not null)
         {
-            builder = builder.UseLogging(logger);
+            builder.UseLogging(loggerFactory);
         }
 
-        this._chatService = builder
-            .Use(this._core.Client.AsChatClient(modelId))
-            .AsChatCompletionService();
+        this._chatService = builder.Build().AsChatCompletionService();
     }
 
     /// <inheritdoc/>
diff --git a/dotnet/src/Connectors/Connectors.Ollama/Extensions/OllamaServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.Ollama/Extensions/OllamaServiceCollectionExtensions.cs
index d53825079721..960466bd9f5d 100644
--- a/dotnet/src/Connectors/Connectors.Ollama/Extensions/OllamaServiceCollectionExtensions.cs
+++ b/dotnet/src/Connectors/Connectors.Ollama/Extensions/OllamaServiceCollectionExtensions.cs
@@ -110,24 +110,21 @@ public static IServiceCollection AddOllamaChatCompletion(
     {
         Verify.NotNull(services);
 
-        services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
+        return services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
         {
-            var ollamaClient = new OllamaApiClient(endpoint, modelId);
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
 
-            var chatClientBuilder = new ChatClientBuilder()
-                .UseFunctionInvocation(config =>
-                    config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+            var builder = ((IChatClient)new OllamaApiClient(endpoint, modelId))
+                .AsBuilder()
+                .UseFunctionInvocation(loggerFactory, config => config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
 
-            var logger = serviceProvider.GetService<ILoggerFactory>()?.CreateLogger(ollamaClient.GetType());
-            if (logger is not null)
+            if (loggerFactory is not null)
             {
-                chatClientBuilder.UseLogging(logger);
+                builder.UseLogging(loggerFactory);
             }
 
-            return chatClientBuilder.Use(ollamaClient).AsChatCompletionService(serviceProvider);
+            return builder.Build(serviceProvider).AsChatCompletionService(serviceProvider);
         });
-
-        return services;
     }
 
     /// <summary>
@@ -146,26 +143,23 @@ public static IServiceCollection AddOllamaChatCompletion(
     {
         Verify.NotNull(services);
 
-        services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
+        return services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
         {
-            var ollamaClient = new OllamaApiClient(
-                client: HttpClientProvider.GetHttpClient(httpClient, serviceProvider),
-                modelId);
+            httpClient ??= HttpClientProvider.GetHttpClient(httpClient, serviceProvider);
+
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
 
-            var chatClientBuilder = new ChatClientBuilder()
-                .UseFunctionInvocation(config =>
-                    config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
+            var builder = ((IChatClient)new OllamaApiClient(httpClient, modelId))
+                .AsBuilder()
+                .UseFunctionInvocation(loggerFactory, config => config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
 
-            var logger = serviceProvider.GetService<ILoggerFactory>()?.CreateLogger(ollamaClient.GetType());
-            if (logger is not null)
+            if (loggerFactory is not null)
             {
-                chatClientBuilder.UseLogging(logger);
+                builder.UseLogging(loggerFactory);
             }
 
-            return chatClientBuilder.Use(ollamaClient).AsChatCompletionService(serviceProvider);
+            return builder.Build(serviceProvider).AsChatCompletionService(serviceProvider);
         });
-
-        return services;
     }
 
     /// <summary>
@@ -182,10 +176,21 @@ public static IServiceCollection AddOllamaChatCompletion(
     {
         Verify.NotNull(services);
 
-        services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _)
-            => ollamaClient.AsChatCompletionService(serviceProvider));
+        return services.AddKeyedSingleton<IChatCompletionService>(serviceId, (serviceProvider, _) =>
+        {
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
+
+            var builder = ((IChatClient)ollamaClient)
+                .AsBuilder()
+                .UseFunctionInvocation(loggerFactory, config => config.MaximumIterationsPerRequest = MaxInflightAutoInvokes);
 
-        return services;
+            if (loggerFactory is not null)
+            {
+                builder.UseLogging(loggerFactory);
+            }
+
+            return builder.Build(serviceProvider).AsChatCompletionService(serviceProvider);
+        });
     }
 
     #endregion
@@ -208,22 +213,20 @@ public static IServiceCollection AddOllamaTextEmbeddingGeneration(
     {
         Verify.NotNull(services);
 
-        services.AddKeyedSingleton<ITextEmbeddingGenerationService>(serviceId, (serviceProvider, _) =>
+        return services.AddKeyedSingleton<ITextEmbeddingGenerationService>(serviceId, (serviceProvider, _) =>
         {
-            var ollamaClient = new OllamaApiClient(endpoint, modelId);
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
 
-            var builder = new EmbeddingGeneratorBuilder<string, Embedding<float>>();
+            var builder = ((IEmbeddingGenerator<string, Embedding<float>>)new OllamaApiClient(endpoint, modelId))
+                .AsBuilder();
 
-            var logger = serviceProvider.GetService<ILoggerFactory>()?.CreateLogger(ollamaClient.GetType());
-            if (logger is not null)
+            if (loggerFactory is not null)
             {
-                builder.UseLogging(logger);
+                builder.UseLogging(loggerFactory);
             }
 
-            return builder.Use(ollamaClient).AsTextEmbeddingGenerationService(serviceProvider);
+            return builder.Build(serviceProvider).AsTextEmbeddingGenerationService(serviceProvider);
         });
-
-        return services;
     }
 
     /// <summary>
@@ -244,19 +247,19 @@ public static IServiceCollection AddOllamaTextEmbeddingGeneration(
 
         services.AddKeyedSingleton<ITextEmbeddingGenerationService>(serviceId, (serviceProvider, _) =>
         {
-            var ollamaClient = new OllamaApiClient(
-                client: HttpClientProvider.GetHttpClient(httpClient, serviceProvider),
-                defaultModel: modelId);
+            httpClient ??= HttpClientProvider.GetHttpClient(httpClient, serviceProvider);
 
-            var builder = new EmbeddingGeneratorBuilder<string, Embedding<float>>();
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
 
-            var logger = serviceProvider.GetService<ILoggerFactory>()?.CreateLogger(ollamaClient.GetType());
-            if (logger is not null)
+            var builder = ((IEmbeddingGenerator<string, Embedding<float>>)new OllamaApiClient(httpClient, modelId))
+                .AsBuilder();
+
+            if (loggerFactory is not null)
             {
-                builder.UseLogging(logger);
+                builder.UseLogging(loggerFactory);
             }
 
-            return builder.Use(ollamaClient).AsTextEmbeddingGenerationService(serviceProvider);
+            return builder.Build(serviceProvider).AsTextEmbeddingGenerationService(serviceProvider);
         });
 
         return services;
@@ -276,10 +279,20 @@ public static IServiceCollection AddOllamaTextEmbeddingGeneration(
     {
         Verify.NotNull(services);
 
-        services.AddKeyedSingleton<ITextEmbeddingGenerationService>(serviceId, (serviceProvider, _)
-            => ollamaClient.AsTextEmbeddingGenerationService(serviceProvider));
+        return services.AddKeyedSingleton<ITextEmbeddingGenerationService>(serviceId, (serviceProvider, _) =>
+        {
+            var loggerFactory = serviceProvider.GetService<ILoggerFactory>();
 
-        return services;
+            var builder = ((IEmbeddingGenerator<string, Embedding<float>>)ollamaClient)
+                .AsBuilder();
+
+            if (loggerFactory is not null)
+            {
+                builder.UseLogging(loggerFactory);
+            }
+
+            return builder.Build(serviceProvider).AsTextEmbeddingGenerationService(serviceProvider);
+        });
     }
 
     #endregion
diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/ChatCompletionServiceChatClient.cs b/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/ChatCompletionServiceChatClient.cs
index cab0bce50d26..ba9d4e80fc80 100644
--- a/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/ChatCompletionServiceChatClient.cs
+++ b/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/ChatCompletionServiceChatClient.cs
@@ -74,11 +74,15 @@ public void Dispose()
     }
 
     /// <inheritdoc />
-    public TService? GetService<TService>(object? key = null) where TService : class
+    public object? GetService(Type serviceType, object? serviceKey = null)
     {
+        Verify.NotNull(serviceType);
+
         return
-            typeof(TService) == typeof(IChatClient) ? (TService)(object)this :
-            this._chatCompletionService as TService;
+            serviceKey is not null ? null :
+            serviceType.IsInstanceOfType(this) ? this :
+            serviceType.IsInstanceOfType(this._chatCompletionService) ? this._chatCompletionService :
+            null;
     }
 
     /// <summary>Converts a <see cref="ChatOptions"/> to a <see cref="PromptExecutionSettings"/>.</summary>
diff --git a/dotnet/src/SemanticKernel.Abstractions/AI/Embeddings/EmbeddingGenerationServiceExtensions.cs b/dotnet/src/SemanticKernel.Abstractions/AI/Embeddings/EmbeddingGenerationServiceExtensions.cs
index 7ae6593f4d2d..c060c3f0d523 100644
--- a/dotnet/src/SemanticKernel.Abstractions/AI/Embeddings/EmbeddingGenerationServiceExtensions.cs
+++ b/dotnet/src/SemanticKernel.Abstractions/AI/Embeddings/EmbeddingGenerationServiceExtensions.cs
@@ -124,11 +124,15 @@ public async Task<GeneratedEmbeddings<Embedding<TEmbedding>>> GenerateAsync(IEnu
         }
 
         /// <inheritdoc />
-        public TService? GetService<TService>(object? key = null) where TService : class
+        public object? GetService(Type serviceType, object? serviceKey = null)
         {
+            Verify.NotNull(serviceType);
+
             return
-                typeof(TService) == typeof(IEmbeddingGenerator<TValue, Embedding<TEmbedding>>) ? (TService)(object)this :
-                this._service as TService;
+                serviceKey is not null ? null :
+                serviceType.IsInstanceOfType(this) ? this :
+                serviceType.IsInstanceOfType(this._service) ? this._service :
+                null;
         }
     }
 
diff --git a/dotnet/src/SemanticKernel.UnitTests/AI/ServiceConversionExtensionsTests.cs b/dotnet/src/SemanticKernel.UnitTests/AI/ServiceConversionExtensionsTests.cs
index 09f1966e2837..9f8a60b40098 100644
--- a/dotnet/src/SemanticKernel.UnitTests/AI/ServiceConversionExtensionsTests.cs
+++ b/dotnet/src/SemanticKernel.UnitTests/AI/ServiceConversionExtensionsTests.cs
@@ -672,7 +672,7 @@ public IAsyncEnumerable<StreamingChatCompletionUpdate> CompleteStreamingAsync(IL
 
         public void Dispose() { }
 
-        public TService? GetService<TService>(object? key = null) where TService : class
+        public object? GetService(Type serviceType, object? serviceKey = null)
         {
             return null;
         }
@@ -707,7 +707,7 @@ public Task<GeneratedEmbeddings<Embedding<float>>> GenerateAsync(IEnumerable<str
                 : throw new NotImplementedException();
         }
 
-        public TService? GetService<TService>(object? key = null) where TService : class
+        public object? GetService(Type serviceType, object? serviceKey = null)
         {
             return null;
         }

From b8680fa599b6b7dacad17e67c94904c1c2dbf5eb Mon Sep 17 00:00:00 2001
From: westey <164392973+westey-m@users.noreply.github.com>
Date: Mon, 25 Nov 2024 16:12:16 +0000
Subject: [PATCH 03/23] .Net: Fix bug where filtered tag field throws. (#9804)

### Motivation and Context

#9799

### Description

Missed the mapping for enumerable strings for index creation, so needed
to add it, and a proper check for unsupported types.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:
---
 .../QdrantVectorStoreRecordCollection.cs      | 15 ++++++++++++-
 .../Memory/Qdrant/QdrantVectorStoreFixture.cs | 18 ++++++++++------
 .../QdrantVectorStoreRecordCollectionTests.cs | 21 +++++++++----------
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs
index 1706448d7df3..6ddf009391b9 100644
--- a/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs
+++ b/dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordCollection.cs
@@ -175,7 +175,20 @@ await this.RunOperationAsync(
         foreach (var dataProperty in dataProperties)
         {
             var storageFieldName = this._propertyReader.GetStoragePropertyName(dataProperty.DataModelPropertyName);
-            var schemaType = QdrantVectorStoreCollectionCreateMapping.s_schemaTypeMap[dataProperty.PropertyType!];
+
+            if (QdrantVectorStoreCollectionCreateMapping.s_schemaTypeMap.TryGetValue(dataProperty.PropertyType!, out PayloadSchemaType schemaType))
+            {
+                // Do nothing since schemaType is already set.
+            }
+            else if (VectorStoreRecordPropertyVerification.IsSupportedEnumerableType(dataProperty.PropertyType) && VectorStoreRecordPropertyVerification.GetCollectionElementType(dataProperty.PropertyType) == typeof(string))
+            {
+                // For enumerable of strings, use keyword schema type, since this allows tag filtering.
+                schemaType = PayloadSchemaType.Keyword;
+            }
+            else
+            {
+                throw new InvalidOperationException($"Property {nameof(VectorStoreRecordDataProperty.IsFilterable)} on {nameof(VectorStoreRecordDataProperty)} '{dataProperty.DataModelPropertyName}' is set to true, but the property type is not supported for filtering. The Qdrant VectorStore supports filtering on {string.Join(", ", QdrantVectorStoreCollectionCreateMapping.s_schemaTypeMap.Keys.Select(x => x.Name))} properties only.");
+            }
 
             await this.RunOperationAsync(
                 "CreatePayloadIndex",
diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs
index c101ea570597..60d1a68ce4bf 100644
--- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs
+++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs
@@ -57,7 +57,7 @@ public QdrantVectorStoreFixture()
                 new VectorStoreRecordDataProperty("HotelCode", typeof(int)) { IsFilterable = true },
                 new VectorStoreRecordDataProperty("ParkingIncluded", typeof(bool)) { IsFilterable = true, StoragePropertyName = "parking_is_included" },
                 new VectorStoreRecordDataProperty("HotelRating", typeof(float)) { IsFilterable = true },
-                new VectorStoreRecordDataProperty("Tags", typeof(List<string>)),
+                new VectorStoreRecordDataProperty("Tags", typeof(List<string>)) { IsFilterable = true },
                 new VectorStoreRecordDataProperty("Description", typeof(string)),
                 new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory<float>?)) { Dimensions = VectorDimensions, DistanceFunction = DistanceFunction.ManhattanDistance }
             }
@@ -146,11 +146,17 @@ await this.QdrantClient.CreateCollectionAsync(
 
         // Create test data common to both named and unnamed vectors.
         var tags = new ListValue();
-        tags.Values.Add("t1");
-        tags.Values.Add("t2");
+        tags.Values.Add("t11.1");
+        tags.Values.Add("t11.2");
         var tagsValue = new Value();
         tagsValue.ListValue = tags;
 
+        var tags2 = new ListValue();
+        tags2.Values.Add("t13.1");
+        tags2.Values.Add("t13.2");
+        var tagsValue2 = new Value();
+        tagsValue2.ListValue = tags2;
+
         // Create some test data using named vectors.
         var embedding = await this.EmbeddingGenerator.GenerateEmbeddingAsync("This is a great hotel.");
         var embeddingArray = embedding.ToArray();
@@ -183,7 +189,7 @@ await this.QdrantClient.CreateCollectionAsync(
             {
                 Id = 13,
                 Vectors = new Vectors { Vectors_ = namedVectors3 },
-                Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." }
+                Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Tags"] = tagsValue2, ["Description"] = "This is a great hotel." }
             },
             new PointStruct
             {
@@ -214,7 +220,7 @@ await this.QdrantClient.CreateCollectionAsync(
             {
                 Id = 13,
                 Vectors = embeddingArray,
-                Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." }
+                Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Tags"] = tagsValue2, ["Description"] = "This is a great hotel." }
             },
         ];
 
@@ -327,7 +333,7 @@ public record HotelInfo()
         [VectorStoreRecordData(IsFilterable = true, StoragePropertyName = "parking_is_included")]
         public bool ParkingIncluded { get; set; }
 
-        [VectorStoreRecordData]
+        [VectorStoreRecordData(IsFilterable = true)]
         public List<string> Tags { get; set; } = new List<string>();
 
         /// <summary>A data field.</summary>
diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs
index 0b8a4bb0cd22..135d09d025aa 100644
--- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs
+++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs
@@ -66,7 +66,7 @@ public async Task ItCanCreateACollectionUpsertGetAndSearchAsync(bool hasNamedVec
         var vector = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("A great hotel");
         var actual = await sut.VectorizedSearchAsync(
             vector,
-            new VectorSearchOptions { Filter = new VectorSearchFilter().EqualTo("HotelCode", 30) });
+            new VectorSearchOptions { Filter = new VectorSearchFilter().EqualTo("HotelCode", 30).AnyTagEqualTo("Tags", "t2") });
 
         // Assert
         var collectionExistResult = await sut.CollectionExistsAsync();
@@ -221,8 +221,8 @@ public async Task ItCanGetDocumentFromVectorStoreAsync(bool useRecordDefinition,
         Assert.True(getResult?.ParkingIncluded);
         Assert.Equal(4.5f, getResult?.HotelRating);
         Assert.Equal(2, getResult?.Tags.Count);
-        Assert.Equal("t1", getResult?.Tags[0]);
-        Assert.Equal("t2", getResult?.Tags[1]);
+        Assert.Equal("t11.1", getResult?.Tags[0]);
+        Assert.Equal("t11.2", getResult?.Tags[1]);
         Assert.Equal("This is a great hotel.", getResult?.Description);
         if (withEmbeddings)
         {
@@ -389,7 +389,7 @@ public async Task ItCanSearchWithFilterAsync(bool useRecordDefinition, string co
 
         // Act.
         var vector = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("A great hotel");
-        var filter = filterType == "equality" ? new VectorSearchFilter().EqualTo("HotelName", "My Hotel 11") : new VectorSearchFilter().AnyTagEqualTo("Tags", "t1");
+        var filter = filterType == "equality" ? new VectorSearchFilter().EqualTo("HotelName", "My Hotel 13") : new VectorSearchFilter().AnyTagEqualTo("Tags", "t13.2");
         var actual = await sut.VectorizedSearchAsync(
             vector,
             new()
@@ -402,12 +402,11 @@ public async Task ItCanSearchWithFilterAsync(bool useRecordDefinition, string co
         Assert.Single(searchResults);
 
         var searchResultRecord = searchResults.First().Record;
-        Assert.Equal(11ul, searchResultRecord?.HotelId);
-        Assert.Equal("My Hotel 11", searchResultRecord?.HotelName);
-        Assert.Equal(11, searchResultRecord?.HotelCode);
-        Assert.Equal(4.5f, searchResultRecord?.HotelRating);
-        Assert.Equal(true, searchResultRecord?.ParkingIncluded);
-        Assert.Equal(new string[] { "t1", "t2" }, searchResultRecord?.Tags.ToArray());
+        Assert.Equal(13ul, searchResultRecord?.HotelId);
+        Assert.Equal("My Hotel 13", searchResultRecord?.HotelName);
+        Assert.Equal(13, searchResultRecord?.HotelCode);
+        Assert.Equal(false, searchResultRecord?.ParkingIncluded);
+        Assert.Equal(new string[] { "t13.1", "t13.2" }, searchResultRecord?.Tags.ToArray());
         Assert.Equal("This is a great hotel.", searchResultRecord?.Description);
     }
 
@@ -448,7 +447,7 @@ public async Task ItCanUpsertAndRetrieveUsingTheGenericMapperAsync()
         Assert.Equal(11, baseSetGetResult.Data["HotelCode"]);
         Assert.True((bool)baseSetGetResult.Data["ParkingIncluded"]!);
         Assert.Equal(4.5f, baseSetGetResult.Data["HotelRating"]);
-        Assert.Equal(new[] { "t1", "t2" }, ((List<string>)baseSetGetResult.Data["Tags"]!).ToArray());
+        Assert.Equal(new[] { "t11.1", "t11.2" }, ((List<string>)baseSetGetResult.Data["Tags"]!).ToArray());
         Assert.Equal("This is a great hotel.", baseSetGetResult.Data["Description"]);
         Assert.NotNull(baseSetGetResult.Vectors["DescriptionEmbedding"]);
         Assert.IsType<ReadOnlyMemory<float>>(baseSetGetResult.Vectors["DescriptionEmbedding"]);

From ec055b55336e0b88c68adaff03127608b7992b06 Mon Sep 17 00:00:00 2001
From: Stephen Toub <stoub@microsoft.com>
Date: Mon, 25 Nov 2024 12:56:29 -0500
Subject: [PATCH 04/23] .Net: Stop setting upper bound on nuget references
 (#9808)

It's an anti-pattern, blocking developers from upgrading.

https://github.com/microsoft/semantic-kernel/issues/9802
---
 dotnet/Directory.Packages.props | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props
index 7770e35f415f..ed3091b0fe06 100644
--- a/dotnet/Directory.Packages.props
+++ b/dotnet/Directory.Packages.props
@@ -81,8 +81,8 @@
     <PackageVersion Include="Microsoft.Extensions.Logging.Debug" Version="8.0.0" />
     <PackageVersion Include="Microsoft.Extensions.Options.DataAnnotations" Version="8.0.0" />
     <PackageVersion Include="Microsoft.Extensions.TimeProvider.Testing" Version="8.10.0" />
-    <PackageVersion Include="Microsoft.Extensions.FileProviders.Physical" Version="[8.0.0, 9.0.0)" />
-    <PackageVersion Include="Microsoft.Extensions.FileProviders.Embedded" Version="[8.0.0, 9.0.0)" />
+    <PackageVersion Include="Microsoft.Extensions.FileProviders.Physical" Version="8.0.0" />
+    <PackageVersion Include="Microsoft.Extensions.FileProviders.Embedded" Version="8.0.0" />
     <!-- Test -->
     <PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
     <PackageVersion Include="Moq" Version="[4.18.4]" />
@@ -102,13 +102,13 @@
     <PackageVersion Include="DuckDB.NET.Data.Full" Version="1.1.2.1" />
     <PackageVersion Include="DuckDB.NET.Data" Version="1.1.2.1" />
     <PackageVersion Include="MongoDB.Driver" Version="2.30.0" />
-    <PackageVersion Include="Microsoft.Graph" Version="[4.51.0, 5)" />
-    <PackageVersion Include="Microsoft.Identity.Client.Extensions.Msal" Version="[2.28.0, )" />
+    <PackageVersion Include="Microsoft.Graph" Version="4.51.0" />
+    <PackageVersion Include="Microsoft.Identity.Client.Extensions.Msal" Version="2.28.0" />
     <PackageVersion Include="Microsoft.OpenApi" Version="1.6.22" />
     <PackageVersion Include="Microsoft.OpenApi.Readers" Version="1.6.22" />
     <PackageVersion Include="Microsoft.OpenApi.ApiManifest" Version="0.5.6-preview" />
     <PackageVersion Include="Microsoft.Plugins.Manifest" Version="1.0.0-rc2" />
-    <PackageVersion Include="Google.Apis.CustomSearchAPI.v1" Version="[1.60.0.3001, )" />
+    <PackageVersion Include="Google.Apis.CustomSearchAPI.v1" Version="1.60.0.3001" />
     <PackageVersion Include="Grpc.Net.Client" Version="2.66.0" />
     <PackageVersion Include="protobuf-net" Version="3.2.45" />
     <PackageVersion Include="protobuf-net.Reflection" Version="3.2.12" />

From c5beb8a97634140732b2614918d27c41a4df0b8f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 25 Nov 2024 17:59:04 +0000
Subject: [PATCH 05/23] Bump dawidd6/action-download-artifact from 3 to 6 in
 /.github/workflows (#9805)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[dawidd6/action-download-artifact](https://github.com/dawidd6/action-download-artifact)
from 3 to 6.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/dawidd6/action-download-artifact/releases">dawidd6/action-download-artifact's
releases</a>.</em></p>
<blockquote>
<h2>v6</h2>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/dawidd6/action-download-artifact/compare/v5...v6">https://github.com/dawidd6/action-download-artifact/compare/v5...v6</a></p>
<h2>v5</h2>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/dawidd6/action-download-artifact/compare/v4...v5">https://github.com/dawidd6/action-download-artifact/compare/v4...v5</a></p>
<h2>v4</h2>
<h2>What's Changed</h2>
<ul>
<li><strong>VERSIONING CHANGE</strong>: now there will only be major
releases of this action, e.g. v5, v6 and so on</li>
<li>build(deps): bump undici from 5.28.3 to 5.28.4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/284">dawidd6/action-download-artifact#284</a></li>
<li>build(deps): bump <code>@​actions/artifact</code> from 2.1.4 to
2.1.5 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/285">dawidd6/action-download-artifact#285</a></li>
<li>build(deps): bump <code>@​actions/artifact</code> from 2.1.5 to
2.1.7 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/287">dawidd6/action-download-artifact#287</a></li>
<li>build(deps): bump adm-zip from 0.5.12 to 0.5.13 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/289">dawidd6/action-download-artifact#289</a></li>
<li>Set allow_forks to false by default by <a
href="https://github.com/timweri"><code>@​timweri</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/290">dawidd6/action-download-artifact#290</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/timweri"><code>@​timweri</code></a> made
their first contribution in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/290">dawidd6/action-download-artifact#290</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/dawidd6/action-download-artifact/compare/v3...v4">https://github.com/dawidd6/action-download-artifact/compare/v3...v4</a></p>
<h2>v3.1.4</h2>
<h2>What's Changed</h2>
<ul>
<li>build(deps): bump adm-zip from 0.5.10 to 0.5.12 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/282">dawidd6/action-download-artifact#282</a></li>
<li>build(deps): bump <code>@​actions/artifact</code> from 2.1.2 to
2.1.4 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/280">dawidd6/action-download-artifact#280</a></li>
<li>fix: accept expired artifacts with documentation url by <a
href="https://github.com/wdconinc"><code>@​wdconinc</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/283">dawidd6/action-download-artifact#283</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/wdconinc"><code>@​wdconinc</code></a>
made their first contribution in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/283">dawidd6/action-download-artifact#283</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/dawidd6/action-download-artifact/compare/v3...v3.1.4">https://github.com/dawidd6/action-download-artifact/compare/v3...v3.1.4</a></p>
<h2>v3.1.3</h2>
<h2>What's Changed</h2>
<ul>
<li>node_modules: upgrade by <a
href="https://github.com/dawidd6"><code>@​dawidd6</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/276">dawidd6/action-download-artifact#276</a></li>
<li>build(deps): bump <code>@​actions/artifact</code> from 2.1.1 to
2.1.2 by <a
href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/277">dawidd6/action-download-artifact#277</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/dawidd6/action-download-artifact/compare/v3.1.2...v3.1.3">https://github.com/dawidd6/action-download-artifact/compare/v3.1.2...v3.1.3</a></p>
<h2>v3.1.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Read workflow_search input as a boolean by <a
href="https://github.com/klutchell"><code>@​klutchell</code></a> in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/273">dawidd6/action-download-artifact#273</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/klutchell"><code>@​klutchell</code></a>
made their first contribution in <a
href="https://redirect.github.com/dawidd6/action-download-artifact/pull/273">dawidd6/action-download-artifact#273</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/dawidd6/action-download-artifact/compare/v3.1.1...v3.1.2">https://github.com/dawidd6/action-download-artifact/compare/v3.1.1...v3.1.2</a></p>
<h2>v3.1.1</h2>
<h2>What's Changed</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/dawidd6/action-download-artifact/commit/bf251b5aa9c2f7eeb574a96ee720e24f801b7c11"><code>bf251b5</code></a>
node_modules: upgrade</li>
<li><a
href="https://github.com/dawidd6/action-download-artifact/commit/93c629661111aae296c04004b30ae3ba22ed46f3"><code>93c6296</code></a>
README: v5</li>
<li><a
href="https://github.com/dawidd6/action-download-artifact/commit/deb3bb83256a78589fef6a7b942e5f2573ad7c13"><code>deb3bb8</code></a>
node_modules: upgrade</li>
<li><a
href="https://github.com/dawidd6/action-download-artifact/commit/1d93f37db2a8005b41437c75a4793d52e664d858"><code>1d93f37</code></a>
README: v4</li>
<li><a
href="https://github.com/dawidd6/action-download-artifact/commit/854e2de9396304899dbe03bf9995fd73533190d1"><code>854e2de</code></a>
Set allow_forks to false by default (<a
href="https://redirect.github.com/dawidd6/action-download-artifact/issues/290">#290</a>)</li>
<li><a
href="https://github.com/dawidd6/action-download-artifact/commit/436c9d3774019b3e2789d7332e9c4efdba3d9d79"><code>436c9d3</code></a>
build(deps): bump adm-zip from 0.5.12 to 0.5.13 (<a
href="https://redirect.github.com/dawidd6/action-download-artifact/issues/289">#289</a>)</li>
<li><a
href="https://github.com/dawidd6/action-download-artifact/commit/14040524bb7e51dee9683e2e755e0d562621a1d5"><code>1404052</code></a>
build(deps): bump <code>@​actions/artifact</code> from 2.1.5 to 2.1.7
(<a
href="https://redirect.github.com/dawidd6/action-download-artifact/issues/287">#287</a>)</li>
<li><a
href="https://github.com/dawidd6/action-download-artifact/commit/8a9be734dc508dcf8d67c27ba3f727b0d682ccb0"><code>8a9be73</code></a>
build(deps): bump <code>@​actions/artifact</code> from 2.1.4 to 2.1.5
(<a
href="https://redirect.github.com/dawidd6/action-download-artifact/issues/285">#285</a>)</li>
<li><a
href="https://github.com/dawidd6/action-download-artifact/commit/df593bbd0462b45b479f042d043c3aa47fe1c483"><code>df593bb</code></a>
build(deps): bump undici from 5.28.3 to 5.28.4 (<a
href="https://redirect.github.com/dawidd6/action-download-artifact/issues/284">#284</a>)</li>
<li>See full diff in <a
href="https://github.com/dawidd6/action-download-artifact/compare/v3...v6">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=dawidd6/action-download-artifact&package-manager=github_actions&previous-version=3&new-version=6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the
[Security Alerts
page](https://github.com/microsoft/semantic-kernel/network/alerts).

</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Mark Wallace <127216156+markwallace-microsoft@users.noreply.github.com>
---
 .github/workflows/python-test-coverage.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-test-coverage.yml b/.github/workflows/python-test-coverage.yml
index 7d3c14ce783b..4b5f6ea1778c 100644
--- a/.github/workflows/python-test-coverage.yml
+++ b/.github/workflows/python-test-coverage.yml
@@ -31,7 +31,7 @@ jobs:
       - name: Setup filename variables
         run: echo "FILE_ID=${{ github.event.number }}" >> $GITHUB_ENV
       - name: Download coverage
-        uses: dawidd6/action-download-artifact@v3
+        uses: dawidd6/action-download-artifact@v6
         with:
           name: python-coverage-${{ env.FILE_ID }}.txt
           github_token: ${{ secrets.GH_ACTIONS_PR_WRITE }}
@@ -39,7 +39,7 @@ jobs:
           search_artifacts: true
           if_no_artifact_found: warn
       - name: Download pytest
-        uses: dawidd6/action-download-artifact@v3
+        uses: dawidd6/action-download-artifact@v6
         with:
           name: pytest-${{ env.FILE_ID }}.xml
           github_token: ${{ secrets.GH_ACTIONS_PR_WRITE }}

From 5adf59d466eaaa318d39c4b58d4cafa2bd6a1b3a Mon Sep 17 00:00:00 2001
From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com>
Date: Mon, 25 Nov 2024 21:16:19 +0100
Subject: [PATCH 06/23] .Net: Add new 2024-09-01-preview version (#9809)

### Motivation and Context

Add new version `2024-09-01-preview` option available in Azure.AI.OpenAI
2.0.0-beta.2
---
 .../Services/AzureOpenAIChatCompletionServiceTests.cs        | 5 +++++
 .../Connectors.AzureOpenAI/Core/AzureClientCore.cs           | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs
index 0bf81dad8453..336d12036db9 100644
--- a/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs
+++ b/dotnet/src/Connectors/Connectors.AzureOpenAI.UnitTests/Services/AzureOpenAIChatCompletionServiceTests.cs
@@ -1541,6 +1541,10 @@ public async Task GetStreamingChatMessageContentsWithFunctionCallAndEmptyArgumen
         { "V2024_10_01_PREVIEW", "2024-10-01-preview" },
         { "2024_10_01_Preview", "2024-10-01-preview" },
         { "2024-10-01-preview", "2024-10-01-preview" },
+        { "V2024_09_01_preview", "2024-09-01-preview" },
+        { "V2024_09_01_PREVIEW", "2024-09-01-preview" },
+        { "2024_09_01_Preview", "2024-09-01-preview" },
+        { "2024-09-01-preview", "2024-09-01-preview" },
         { "V2024_08_01_preview", "2024-08-01-preview" },
         { "V2024_08_01_PREVIEW", "2024-08-01-preview" },
         { "2024_08_01_Preview", "2024-08-01-preview" },
@@ -1549,6 +1553,7 @@ public async Task GetStreamingChatMessageContentsWithFunctionCallAndEmptyArgumen
         { "2024_06_01", "2024-06-01" },
         { "2024-06-01", "2024-06-01" },
         { AzureOpenAIClientOptions.ServiceVersion.V2024_10_01_Preview.ToString(), null },
+        { AzureOpenAIClientOptions.ServiceVersion.V2024_09_01_Preview.ToString(), null },
         { AzureOpenAIClientOptions.ServiceVersion.V2024_08_01_Preview.ToString(), null },
         { AzureOpenAIClientOptions.ServiceVersion.V2024_06_01.ToString(), null }
     };
diff --git a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs
index efb59d3479ef..5ad45701a921 100644
--- a/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs
+++ b/dotnet/src/Connectors/Connectors.AzureOpenAI/Core/AzureClientCore.cs
@@ -136,7 +136,9 @@ internal static AzureOpenAIClientOptions GetAzureOpenAIClientOptions(HttpClient?
             {
                 "2024-06-01" or "V2024_06_01" or "2024_06_01" => AzureOpenAIClientOptions.ServiceVersion.V2024_06_01,
                 "2024-08-01-PREVIEW" or "V2024_08_01_PREVIEW" or "2024_08_01_PREVIEW" => AzureOpenAIClientOptions.ServiceVersion.V2024_08_01_Preview,
+                "2024-09-01-PREVIEW" or "V2024_09_01_PREVIEW" or "2024_09_01_PREVIEW" => AzureOpenAIClientOptions.ServiceVersion.V2024_09_01_Preview,
                 "2024-10-01-PREVIEW" or "V2024_10_01_PREVIEW" or "2024_10_01_PREVIEW" => AzureOpenAIClientOptions.ServiceVersion.V2024_10_01_Preview,
+
                 _ => throw new NotSupportedException($"The service version '{serviceVersion}' is not supported.")
             };
         }

From 27a89bae54590b9d16133b9dedb672616ace5d3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E3=81=B5=E3=81=81=E3=83=BC?=
 <47295014+ymuichiro@users.noreply.github.com>
Date: Tue, 26 Nov 2024 07:39:07 +0900
Subject: [PATCH 07/23] Python: Introduced a new condition to yield
 `StreamingChatMessageContent` directly when usage data is available. (#9753)

### Motivation and Context

issue: https://github.com/microsoft/semantic-kernel/issues/9751

This pull request addresses a bug where setting
`stream_options.include_usage` to `True` does not return token usage,
resulting in `None` for the `usage` field.

The issue occurs when using Azure OpenAI's GPT-4o and GPT-4omini models.
In particular, if the last chunk of the response has an empty `choices`
list, the chunk is skipped entirely, and the token usage is not
processed correctly.

In the Azure OpenAI implementation, if `usage` information is included,
the chunk should be processed appropriately. However, the current code
skips processing when `choices` is empty. This pull request fixes this
behavior so that the chunk is processed when `usage` is present, even if
`choices` is empty.

### Description

This fix includes the following changes:

- Modified the relevant section in `azure_chat_completion.py` to ensure
that chunks with empty `choices` are not skipped if `usage` information
is present.
- Specifically, the condition `if len(chunk.choices) == 0:` was updated
to allow chunks with `usage` data to be processed correctly.

With these changes, setting `stream_options.include_usage` to `True`
will correctly return token usage data, even for chunks where the
`choices` list is empty.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:

---------

Co-authored-by: Tao Chen <taochen@microsoft.com>
---
 .../open_ai/services/azure_chat_completion.py | 53 ++-----------------
 .../services/test_azure_chat_completion.py    |  4 ++
 2 files changed, 7 insertions(+), 50 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
index bd2a0ca51bab..73e1a8fe62b7 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py
@@ -2,18 +2,12 @@
 
 import json
 import logging
-import sys
-from collections.abc import AsyncGenerator, Mapping
+from collections.abc import Mapping
 from copy import deepcopy
 from typing import Any, TypeVar
 from uuid import uuid4
 
-if sys.version_info >= (3, 12):
-    from typing import override  # pragma: no cover
-else:
-    from typing_extensions import override  # pragma: no cover
-
-from openai import AsyncAzureOpenAI, AsyncStream
+from openai import AsyncAzureOpenAI
 from openai.lib.azure import AsyncAzureADTokenProvider
 from openai.types.chat.chat_completion import ChatCompletion, Choice
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
@@ -23,24 +17,19 @@
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
     AzureChatPromptExecutionSettings,
 )
-from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import (
-    OpenAIChatPromptExecutionSettings,
-)
 from semantic_kernel.connectors.ai.open_ai.services.azure_config_base import AzureOpenAIConfigBase
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion_base import OpenAIChatCompletionBase
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIModelTypes
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_text_completion_base import OpenAITextCompletionBase
 from semantic_kernel.connectors.ai.open_ai.settings.azure_open_ai_settings import AzureOpenAISettings
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.function_result_content import FunctionResultContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.text_content import TextContent
 from semantic_kernel.contents.utils.finish_reason import FinishReason
-from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError, ServiceInvalidResponseError
-from semantic_kernel.utils.telemetry.model_diagnostics.decorators import trace_streaming_chat_completion
+from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -121,42 +110,6 @@ def __init__(
             client=async_client,
         )
 
-    @override
-    @trace_streaming_chat_completion(OpenAIChatCompletionBase.MODEL_PROVIDER_NAME)
-    async def _inner_get_streaming_chat_message_contents(
-        self,
-        chat_history: "ChatHistory",
-        settings: "PromptExecutionSettings",
-    ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]:
-        """Override the base method.
-
-        This is because the latest Azure OpenAI API GA version doesn't support `stream_option`
-        yet and it will potentially result in errors if the option is included.
-        This method will be called instead of the base method.
-        TODO: Remove this method when the `stream_option` is supported by the Azure OpenAI API.
-        GitHub Issue: https://github.com/microsoft/semantic-kernel/issues/8996
-        """
-        if not isinstance(settings, OpenAIChatPromptExecutionSettings):
-            settings = self.get_prompt_execution_settings_from_settings(settings)
-        assert isinstance(settings, OpenAIChatPromptExecutionSettings)  # nosec
-
-        settings.stream = True
-        settings.messages = self._prepare_chat_history_for_request(chat_history)
-        settings.ai_model_id = settings.ai_model_id or self.ai_model_id
-
-        response = await self._send_request(settings)
-        if not isinstance(response, AsyncStream):
-            raise ServiceInvalidResponseError("Expected an AsyncStream[ChatCompletionChunk] response.")
-        async for chunk in response:
-            if len(chunk.choices) == 0:
-                continue
-
-            assert isinstance(chunk, ChatCompletionChunk)  # nosec
-            chunk_metadata = self._get_metadata_from_streaming_chat_response(chunk)
-            yield [
-                self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) for choice in chunk.choices
-            ]
-
     @classmethod
     def from_dict(cls, settings: dict[str, Any]) -> "AzureChatCompletion":
         """Initialize an Azure OpenAI service from a dictionary of settings.
diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
index eaef9ff64931..a5e8ca638aab 100644
--- a/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
+++ b/python/tests/unit/connectors/ai/open_ai/services/test_azure_chat_completion.py
@@ -948,4 +948,8 @@ async def test_cmc_streaming(
         model=azure_openai_unit_test_env["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"],
         stream=True,
         messages=azure_chat_completion._prepare_chat_history_for_request(chat_history),
+        # NOTE: The `stream_options={"include_usage": True}` is explicitly enforced in
+        # `OpenAIChatCompletionBase._inner_get_streaming_chat_message_contents`.
+        # To ensure consistency, we align the arguments here accordingly.
+        stream_options={"include_usage": True},
     )

From 8c481dfb77913fb45d3cac1640d39729482ecf84 Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Mon, 25 Nov 2024 15:45:50 -0800
Subject: [PATCH 08/23] .Net: Fix typo (#9824)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
A typo is causing the spell check in our pipeline to fail, thus block
PRs.

### Description
Fix the typo.

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone :smile:
---
 .../src/Connectors/Connectors.Memory.Kusto/KustoMemoryStore.cs  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dotnet/src/Connectors/Connectors.Memory.Kusto/KustoMemoryStore.cs b/dotnet/src/Connectors/Connectors.Memory.Kusto/KustoMemoryStore.cs
index 359de4d57a5e..5e28a7a8a016 100644
--- a/dotnet/src/Connectors/Connectors.Memory.Kusto/KustoMemoryStore.cs
+++ b/dotnet/src/Connectors/Connectors.Memory.Kusto/KustoMemoryStore.cs
@@ -374,7 +374,7 @@ private static string GetCollectionName(string tableName)
     /// Returns base Kusto query.
     /// </summary>
     /// <remarks>
-    /// Kusto is an append-only store. Although deletions are possible, they are highly discourged,
+    /// Kusto is an append-only store. Although deletions are possible, they are highly discouraged,
     /// and should only be used in rare cases (see: https://learn.microsoft.com/en-us/azure/data-explorer/kusto/concepts/data-soft-delete#use-cases).
     /// As such, the recommended approach for dealing with row updates is versioning.
     /// An easy way to achieve this is by using the ingestion time of the record (insertion time).

From a0d2f75ca9f13ff8aff3e35b108423fde8761f17 Mon Sep 17 00:00:00 2001
From: Mark Wallace <127216156+markwallace-microsoft@users.noreply.github.com>
Date: Tue, 26 Nov 2024 09:56:36 +0000
Subject: [PATCH 09/23] Update codeql actions to v3 (#9826)

### Motivation and Context

CodeQL Action v2 will be deprecated on December 5th, 2024. Please update
all occurrences of the CodeQL Action in your workflow files to v3. For
more information, see
https://github.blog/changelog/2024-01-12-code-scanning-deprecation-of-codeql-action-v2/

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone :smile:
---
 .github/workflows/codeql-analysis.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index a7cfd921830b..f679ff830971 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -34,7 +34,7 @@ jobs:
 
       # Initializes the CodeQL tools for scanning.
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@v2
+        uses: github/codeql-action/init@v3
         with:
           languages: ${{ matrix.language }}
           # If you wish to specify custom queries, you can do so here or in a config file.
@@ -48,7 +48,7 @@ jobs:
       # If this step fails, then you should remove it and run the build manually (see below)
       - name: Autobuild
         if: ${{ matrix.language != 'java' }}
-        uses: github/codeql-action/autobuild@v2
+        uses: github/codeql-action/autobuild@v3
 
       # ℹ️ Command-line programs to run using the OS shell.
       # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
@@ -61,6 +61,6 @@ jobs:
       #     ./location_of_script_within_repo/buildscript.sh
 
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@v2
+        uses: github/codeql-action/analyze@v3
         with:
           category: "/language:${{matrix.language}}"

From bead9ef49e7166ab4b25cbafbe2c7b9f66850907 Mon Sep 17 00:00:00 2001
From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com>
Date: Tue, 26 Nov 2024 11:15:24 +0100
Subject: [PATCH 10/23] Adding issue type support for template (#9792)

### Motivation and Context

Automatically sets the new category type for github issues from the
template.

As described in this post.

- https://github.com/orgs/community/discussions/139933
---
 .github/ISSUE_TEMPLATE/bug_report.md         | 1 +
 .github/ISSUE_TEMPLATE/feature_graduation.md | 1 +
 .github/ISSUE_TEMPLATE/feature_request.md    | 1 +
 3 files changed, 3 insertions(+)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index e2455f84be12..d7f30df96e71 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -2,6 +2,7 @@
 name: Bug report
 about: Create a report to help us improve
 title: 'Bug: '
+type: 'bug'
 labels: ["bug"]
 projects: ["semantic-kernel"]
 assignees: ''
diff --git a/.github/ISSUE_TEMPLATE/feature_graduation.md b/.github/ISSUE_TEMPLATE/feature_graduation.md
index 80ad9f4e9167..b1c2aa254f02 100644
--- a/.github/ISSUE_TEMPLATE/feature_graduation.md
+++ b/.github/ISSUE_TEMPLATE/feature_graduation.md
@@ -3,6 +3,7 @@ name: Feature graduation
 about: Plan the graduation of an experimental feature
 title: 'Graduate XXX feature'
 labels: ["feature_graduation"]
+type: 'feature'
 projects: ["semantic-kernel"]
 assignees: ''
 
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 3289535f2120..0529a7c55785 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -3,6 +3,7 @@ name: Feature request
 about: Suggest an idea for this project
 title: 'New Feature: '
 labels: ''
+type: 'feature'
 projects: ["semantic-kernel"]
 assignees: ''
 

From 370c89a836145fbee6c239179f8d22471b71339f Mon Sep 17 00:00:00 2001
From: Mark Wallace <127216156+markwallace-microsoft@users.noreply.github.com>
Date: Tue, 26 Nov 2024 11:52:02 +0000
Subject: [PATCH 11/23] .Net: Improve docs for OpenAI repsonse format property
 (#9801)

### Motivation and Context

Closes #7171

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone :smile:

---------

Co-authored-by: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com>
---
 .../Settings/OpenAIPromptExecutionSettings.cs                 | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAIPromptExecutionSettings.cs b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAIPromptExecutionSettings.cs
index e6a86b5f1ba2..3a5e632b7664 100644
--- a/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAIPromptExecutionSettings.cs
+++ b/dotnet/src/Connectors/Connectors.OpenAI/Settings/OpenAIPromptExecutionSettings.cs
@@ -143,6 +143,10 @@ public long? Seed
     /// Gets or sets the response format to use for the completion.
     /// </summary>
     /// <remarks>
+    /// An object specifying the format that the model must output.
+    /// Setting to <c>{ "type": "json_schema", "json_schema": { ...} }</c> enables Structured Outputs which ensures the model will match your supplied JSON schema. Learn more in the Structured Outputs guide.
+    /// Setting to <c>{ "type": "json_object" }</c> enables JSON mode, which ensures the message the model generates is valid JSON.
+    /// Important: when using JSON mode, you must also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if finish_reason= "length", which indicates the generation exceeded max_tokens or the conversation exceeded the max context length.
     /// Possible values are:
     /// <para>- <see cref="string"/> values: <c>"json_object"</c>, <c>"text"</c>;</para>
     /// <para>- <see cref="ChatResponseFormat"/> object;</para>

From 8dadef26b89baca3482dcdae7acf9d74a98ce1fb Mon Sep 17 00:00:00 2001
From: Tao Chen <taochen@microsoft.com>
Date: Tue, 26 Nov 2024 21:44:25 -0800
Subject: [PATCH 12/23] Python: Refactor chat completion concept samples
 (#9822)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
Our chat completion concept samples are not well organized and
over-complicated with prompts and different settings.

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->
This PR refactors the samples such that they only capture the essential
concepts to the chat completion services. This PR also adds sample usage
for all supported chat completion services in Semantic Kernel Python.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:
---
 python/samples/concepts/README.md             |   1 +
 .../chat_completion/azure_chat_gpt_api.py     | 110 -------
 .../chat_completion/azure_chat_image_input.py |  76 -----
 .../chat_completion/chat_anthropic_api.py     |  89 ------
 .../chat_completion/chat_bedrock_api.py       |  94 ------
 .../concepts/chat_completion/chat_gpt_api.py  |  71 -----
 .../chat_completion/chat_mistral_api.py       |  89 ------
 .../chat_completion/chat_streaming.py         |  97 ------
 .../chat_completion/openai_logit_bias.py      | 235 --------------
 .../chat_completion/simple_chatbot.py         |  89 ++++++
 .../simple_chatbot_kernel_function.py         | 127 ++++++++
 .../simple_chatbot_logit_bias.py              | 111 +++++++
 .../simple_chatbot_streaming.py               | 102 ++++++
 .../simple_chatbot_with_image.py              | 132 ++++++++
 .../setup/chat_completion_services.py         | 299 ++++++++++++++++++
 .../concepts/setup/openai_env_setup.py        |  65 ++--
 .../connectors/ai/onnx/__init__.py            |   7 +-
 python/tests/samples/test_concepts.py         |  28 +-
 18 files changed, 905 insertions(+), 917 deletions(-)
 delete mode 100644 python/samples/concepts/chat_completion/azure_chat_gpt_api.py
 delete mode 100644 python/samples/concepts/chat_completion/azure_chat_image_input.py
 delete mode 100644 python/samples/concepts/chat_completion/chat_anthropic_api.py
 delete mode 100644 python/samples/concepts/chat_completion/chat_bedrock_api.py
 delete mode 100644 python/samples/concepts/chat_completion/chat_gpt_api.py
 delete mode 100644 python/samples/concepts/chat_completion/chat_mistral_api.py
 delete mode 100644 python/samples/concepts/chat_completion/chat_streaming.py
 delete mode 100644 python/samples/concepts/chat_completion/openai_logit_bias.py
 create mode 100644 python/samples/concepts/chat_completion/simple_chatbot.py
 create mode 100644 python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
 create mode 100644 python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
 create mode 100644 python/samples/concepts/chat_completion/simple_chatbot_streaming.py
 create mode 100644 python/samples/concepts/chat_completion/simple_chatbot_with_image.py
 create mode 100644 python/samples/concepts/setup/chat_completion_services.py

diff --git a/python/samples/concepts/README.md b/python/samples/concepts/README.md
index d949d7200559..a7008d4b92af 100644
--- a/python/samples/concepts/README.md
+++ b/python/samples/concepts/README.md
@@ -5,6 +5,7 @@ This section contains code snippets that demonstrate the usage of Semantic Kerne
 | Features | Description |
 | -------- | ----------- |
 | Agents | Creating and using [agents](../../semantic_kernel/agents/) in Semantic Kernel |
+| Audio | Using services that support audio-to-text and text-to-audio conversion |
 | AutoFunctionCalling | Using `Auto Function Calling` to allow function call capable models to invoke Kernel Functions automatically |
 | ChatCompletion | Using [`ChatCompletion`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/connectors/ai/chat_completion_client_base.py) messaging capable service with models  |
 | ChatHistory | Using and serializing the [`ChatHistory`](https://github.com/microsoft/semantic-kernel/blob/main/python/semantic_kernel/contents/chat_history.py) |
diff --git a/python/samples/concepts/chat_completion/azure_chat_gpt_api.py b/python/samples/concepts/chat_completion/azure_chat_gpt_api.py
deleted file mode 100644
index d2f372ec762f..000000000000
--- a/python/samples/concepts/chat_completion/azure_chat_gpt_api.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-import logging
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
-from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
-from semantic_kernel.contents import ChatHistory
-
-logging.basicConfig(level=logging.WARNING)
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "chat-gpt"
-chat_service = AzureChatCompletion(
-    service_id=service_id,
-)
-kernel.add_service(chat_service)
-
-## there are three ways to create the request settings in code: # noqa: E266
-# Note: the prompt_execution_settings are a dictionary with the service_id as the key and the request settings as the value. # noqa: E501
-
-## 1. create the request settings from the base class: # noqa: E266
-# from semantic_kernel.connectors.ai.chat_completion_client_base import PromptExecutionSettings
-# req_settings = PromptExecutionSettings(extension_data = { "max_tokens": 2000, "temperature": 0.7, "top_p": 0.8} )
-## This method (using the PromptExecutionSettings base class) is the most generic, and it allows you to store request settings for different services in the same extension_data field. There are two downsides to this approach: the specific request setting class will be created dynamically for each call, this is overhead when using just a single service. and the request settings are not type checked, so you will receive error messages once the dynamic creation of the request settings class fails. # noqa: E501 E266
-
-## 2. create the request settings directly for the service you are using: # noqa: E266
-# req_settings = sk_oai.AzureChatPromptExecutionSettings(max_tokens=2000, temperature=0.7, top_p=0.8)
-
-## The second method is useful when you are using a single service, and you want to have type checking on the request settings or when you are using multiple instances of the same type of service, for instance gpt-35-turbo and gpt-4, both in openai and both for chat.  # noqa: E501 E266
-## 3. create the request settings from the kernel based on the registered service class: # noqa: E266
-req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
-req_settings.max_tokens = 2000
-req_settings.temperature = 0.7
-req_settings.top_p = 0.8
-req_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"excluded_plugins": []})
-## The third method is the most specific as the returned request settings class is the one that is registered for the service and has some fields already filled in, like the service_id and ai_model_id. # noqa: E501 E266
-
-
-chat_function = kernel.add_function(
-    prompt=system_message + """{{$chat_history}}{{$user_input}}""",
-    function_name="chat",
-    plugin_name="chat",
-    prompt_execution_settings=req_settings,
-)
-
-history = ChatHistory()
-history.add_user_message("Hi there, who are you?")
-history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    stream = True
-    if stream:
-        chunks = kernel.invoke_stream(
-            chat_function,
-            user_input=user_input,
-            chat_history=history,
-        )
-        print("Mosscap:> ", end="")
-        answer = ""
-        async for message in chunks:
-            print(str(message[0]), end="")
-            answer += str(message[0])
-        print("\n")
-    else:
-        answer = await kernel.invoke(
-            chat_function,
-            user_input=user_input,
-            chat_history=history,
-        )
-        print(f"Mosscap:> {answer}")
-
-    history.add_user_message(user_input)
-    history.add_assistant_message(str(answer))
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/azure_chat_image_input.py b/python/samples/concepts/chat_completion/azure_chat_image_input.py
deleted file mode 100644
index 5a813ee13eba..000000000000
--- a/python/samples/concepts/chat_completion/azure_chat_image_input.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-import logging
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
-from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
-from semantic_kernel.contents import ChatHistory, ChatMessageContent, ImageContent, TextContent
-
-logging.basicConfig(level=logging.WARNING)
-
-system_message = """
-You are an image reviewing chat bot. Your name is Mosscap and you have one goal
-critiquing images that are supplied.
-"""
-
-kernel = Kernel()
-
-service_id = "chat-gpt"
-chat_service = AzureChatCompletion(service_id=service_id)
-kernel.add_service(chat_service)
-
-req_settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
-req_settings.max_tokens = 2000
-req_settings.temperature = 0.7
-req_settings.top_p = 0.8
-req_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"excluded_plugins": []})
-
-chat_function = kernel.add_function(
-    prompt=system_message + """{{$chat_history}}""",
-    function_name="chat",
-    plugin_name="chat",
-    prompt_execution_settings=req_settings,
-)
-
-
-async def chat(uri: str | None = None, image_path: str | None = None) -> bool:
-    history = ChatHistory()
-    if uri:
-        history.add_message(
-            ChatMessageContent(
-                role="user",
-                items=[TextContent(text="What is in this image?"), ImageContent(uri=uri)],
-            )
-        )
-    elif image_path:
-        history.add_message(
-            ChatMessageContent(
-                role="user",
-                items=[TextContent(text="What is in this image?"), ImageContent.from_image_path(image_path)],
-            )
-        )
-    else:
-        history.add_user_message("Hi there, who are you?")
-    answer = kernel.invoke_stream(
-        chat_function,
-        chat_history=history,
-    )
-    print("Mosscap:> ", end="")
-    async for message in answer:
-        print(str(message[0]), end="")
-    print("\n")
-
-
-async def main() -> None:
-    print("Get a description of a image from a URL.")
-    await chat(
-        uri="https://upload.wikimedia.org/wikipedia/commons/d/d5/Half-timbered_mansion%2C_Zirkel%2C_East_view.jpg"
-    )
-    print("Get a description of the same image but now from a local file!")
-    await chat(image_path="samples/concepts/resources/sample_image.jpg")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_anthropic_api.py b/python/samples/concepts/chat_completion/chat_anthropic_api.py
deleted file mode 100644
index 4494a07362c5..000000000000
--- a/python/samples/concepts/chat_completion/chat_anthropic_api.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion
-from semantic_kernel.contents import ChatHistory
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "mistral-ai-chat"
-kernel.add_service(AnthropicChatCompletion(service_id=service_id, ai_model_id="claude-3-opus-20240229"))
-
-settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
-settings.system = system_message
-settings.max_tokens = 2000
-settings.temperature = 0.7
-settings.top_p = 0.8
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot",
-    function_name="Chat",
-    prompt="{{$chat_history}}{{$user_input}}",
-    template_format="semantic-kernel",
-    prompt_execution_settings=settings,
-)
-
-chat_history = ChatHistory()
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    stream = True
-    if stream:
-        chunks = kernel.invoke_stream(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print("Mosscap:> ", end="")
-        answer = ""
-        async for message in chunks:
-            print(str(message[0]), end="", flush=True)
-            answer += str(message[0])
-        print("\n")
-    else:
-        answer = await kernel.invoke(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print(f"Mosscap:> {answer}")
-
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_bedrock_api.py b/python/samples/concepts/chat_completion/chat_bedrock_api.py
deleted file mode 100644
index cd56cefb7a47..000000000000
--- a/python/samples/concepts/chat_completion/chat_bedrock_api.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.bedrock import BedrockChatCompletion, BedrockChatPromptExecutionSettings
-from semantic_kernel.contents import ChatHistory
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "bedrock-chat"
-kernel.add_service(BedrockChatCompletion(service_id=service_id, model_id="cohere.command-r-v1:0"))
-
-settings = BedrockChatPromptExecutionSettings(
-    max_tokens=2000,
-    temperature=0.7,
-    top_p=0.8,
-    # Cohere Command specific settings: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
-    extension_data={
-        "presence_penalty": 0.5,
-        "seed": 5,
-    },
-)
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot",
-    function_name="Chat",
-    prompt="{{$chat_history}}{{$user_input}}",
-    template_format="semantic-kernel",
-    prompt_execution_settings=settings,
-)
-
-chat_history = ChatHistory()
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    stream = True
-    if stream:
-        chunks = kernel.invoke_stream(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print("Mosscap:> ", end="")
-        answer = ""
-        async for message in chunks:
-            print(str(message[0]), end="", flush=True)
-            answer += str(message[0])
-        print("\n")
-    else:
-        answer = await kernel.invoke(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print(f"Mosscap:> {answer}")
-
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_gpt_api.py b/python/samples/concepts/chat_completion/chat_gpt_api.py
deleted file mode 100644
index 66a3839800b8..000000000000
--- a/python/samples/concepts/chat_completion/chat_gpt_api.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
-from semantic_kernel.contents import ChatHistory
-from semantic_kernel.functions import KernelArguments
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "chat-gpt"
-kernel.add_service(OpenAIChatCompletion(service_id=service_id, ai_model_id="gpt-3.5-turbo"))
-
-settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
-settings.max_tokens = 2000
-settings.temperature = 0.7
-settings.top_p = 0.8
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot",
-    function_name="Chat",
-    prompt="{{$chat_history}}{{$user_input}}",
-    template_format="semantic-kernel",
-    prompt_execution_settings=settings,
-)
-
-chat_history = ChatHistory(system_message=system_message)
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-chat_history.add_user_message("I want to find a hotel in Seattle with free wifi and a pool.")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    answer = await kernel.invoke(chat_function, KernelArguments(user_input=user_input, chat_history=chat_history))
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
-    print(f"Mosscap:> {answer}")
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_mistral_api.py b/python/samples/concepts/chat_completion/chat_mistral_api.py
deleted file mode 100644
index adada91a944c..000000000000
--- a/python/samples/concepts/chat_completion/chat_mistral_api.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion
-from semantic_kernel.contents import ChatHistory
-
-system_message = """
-You are a chat bot. Your name is Mosscap and
-you have one goal: figure out what people need.
-Your full name, should you need to know it, is
-Splendid Speckled Mosscap. You communicate
-effectively, but you tend to answer with long
-flowery prose.
-"""
-
-kernel = Kernel()
-
-service_id = "mistral-ai-chat"
-kernel.add_service(MistralAIChatCompletion(service_id=service_id))
-
-settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
-settings.max_tokens = 2000
-settings.temperature = 0.7
-settings.top_p = 0.8
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot",
-    function_name="Chat",
-    prompt="{{$chat_history}}{{$user_input}}",
-    template_format="semantic-kernel",
-    prompt_execution_settings=settings,
-)
-
-chat_history = ChatHistory(system_message=system_message)
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-chat_history.add_user_message("I want to find a hotel in Seattle with free wifi and a pool.")
-
-
-async def chat() -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    stream = True
-    if stream:
-        chunks = kernel.invoke_stream(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print("Mosscap:> ", end="")
-        answer = ""
-        async for message in chunks:
-            print(str(message[0]), end="")
-            answer += str(message[0])
-        print("\n")
-    else:
-        answer = await kernel.invoke(
-            chat_function,
-            user_input=user_input,
-            chat_history=chat_history,
-        )
-        print(f"Mosscap:> {answer}")
-
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_streaming.py b/python/samples/concepts/chat_completion/chat_streaming.py
deleted file mode 100644
index bad6e9ebd09a..000000000000
--- a/python/samples/concepts/chat_completion/chat_streaming.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-from functools import reduce
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
-from semantic_kernel.contents import ChatHistory
-from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
-from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig
-
-prompt = """
-ChatBot can have a conversation with you about any topic.
-It can give explicit instructions or say 'I don't know'
-when it doesn't know the answer.
-
-{{$chat_history}}
-
-User:> {{$user_input}}
-ChatBot:>
-"""
-
-kernel = Kernel()
-
-service_id = "chat"
-kernel.add_service(OpenAIChatCompletion(service_id=service_id))
-
-settings = kernel.get_prompt_execution_settings_from_service_id(service_id)
-settings.max_tokens = 2000
-settings.temperature = 0.7
-settings.top_p = 0.8
-
-prompt_template_config = PromptTemplateConfig(
-    template=prompt,
-    name="chat",
-    template_format="semantic-kernel",
-    input_variables=[
-        InputVariable(
-            name="user_input",
-            description="The user input",
-            is_required=True,
-            default="",
-        ),
-        InputVariable(
-            name="chat_history",
-            description="The history of the conversation",
-            is_required=True,
-        ),
-    ],
-    execution_settings=settings,
-)
-
-chat_history = ChatHistory()
-chat_history.add_user_message("Hi there, who are you?")
-chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
-
-chat_function = kernel.add_function(
-    plugin_name="ChatBot", function_name="Chat", prompt_template_config=prompt_template_config
-)
-
-
-async def chat(chat_history: ChatHistory) -> bool:
-    try:
-        user_input = input("User:> ")
-    except KeyboardInterrupt:
-        print("\n\nExiting chat...")
-        return False
-    except EOFError:
-        print("\n\nExiting chat...")
-        return False
-
-    if user_input == "exit":
-        print("\n\nExiting chat...")
-        return False
-
-    print("ChatBot:> ", end="")
-    streamed_chunks: list[StreamingChatMessageContent] = []
-    responses = kernel.invoke_stream(chat_function, user_input=user_input, chat_history=chat_history)
-    async for message in responses:
-        streamed_chunks.append(message[0])
-        print(str(message[0]), end="")
-    print("")
-    chat_history.add_user_message(user_input)
-    if streamed_chunks:
-        streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
-        chat_history.add_message(streaming_chat_message)
-    return True
-
-
-async def main() -> None:
-    chatting = True
-    while chatting:
-        chatting = await chat(chat_history)
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/openai_logit_bias.py b/python/samples/concepts/chat_completion/openai_logit_bias.py
deleted file mode 100644
index f39416d0370e..000000000000
--- a/python/samples/concepts/chat_completion/openai_logit_bias.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-from typing import Any
-
-from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai import PromptExecutionSettings
-from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAITextCompletion
-from semantic_kernel.contents import AuthorRole, ChatHistory
-from semantic_kernel.functions import KernelArguments
-from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig
-
-"""
-Logit bias enables prioritizing certain tokens within a given output.
-To utilize the logit bias function, you will need to know the token ids of the words you are using.
-See the GPT Tokenizer to obtain token ids: https://platform.openai.com/tokenizer
-Read more about logit bias and how to configure output: https://help.openai.com/en/articles/5247780-using-logit-bias-to-define-token-probability
-"""
-
-
-def _config_ban_tokens(settings: PromptExecutionSettings, keys: dict[Any, Any]):
-    if settings.logit_bias is None:
-        settings.logit_bias = {}
-    # Map each token in the keys list to a bias value from -100 (a potential ban) to 100 (exclusive selection)
-    for k in keys:
-        # -100 to potentially ban all tokens in the list
-        settings.logit_bias[k] = -100
-    return settings
-
-
-def _prepare_input_chat(chat: ChatHistory):
-    return "".join([f"{msg.role}: {msg.content}\n" for msg in chat])
-
-
-async def chat_request_example(kernel: Kernel):
-    service_id = "chat_service"
-    openai_chat_completion = OpenAIChatCompletion(
-        service_id=service_id,
-        ai_model_id="gpt-3.5-turbo",
-    )
-    kernel.add_service(openai_chat_completion)
-
-    # Spaces and capitalization affect the token ids.
-    # The following is the token ids of basketball related words.
-    keys = [
-        2032,
-        680,
-        9612,
-        26675,
-        3438,
-        42483,
-        21265,
-        6057,
-        11230,
-        1404,
-        2484,
-        12494,
-        35,
-        822,
-        11108,
-    ]
-    banned_words = [
-        "swish",
-        "screen",
-        "score",
-        "dominant",
-        "basketball",
-        "game",
-        "GOAT",
-        "Shooting",
-        "Dribbling",
-    ]
-
-    # Model will try its best to avoid using any of the above words
-    settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
-    settings = _config_ban_tokens(settings, keys)
-
-    prompt_template_config = PromptTemplateConfig(
-        template="{{$user_input}}",
-        name="chat",
-        template_format="semantic-kernel",
-        input_variables=[
-            InputVariable(
-                name="user_input", description="The history of the conversation", is_required=True, default=""
-            ),
-        ],
-        execution_settings=settings,
-    )
-
-    chat = ChatHistory()
-
-    chat.add_user_message("Hi there, who are you?")
-    chat.add_assistant_message("I am an AI assistant here to answer your questions.")
-
-    chat_function = kernel.add_function(
-        plugin_name="ChatBot", function_name="Chat", prompt_template_config=prompt_template_config
-    )
-
-    chat.add_system_message("You are a basketball expert")
-    chat.add_user_message("I love the LA Lakers, tell me an interesting fact about LeBron James.")
-
-    answer = await kernel.invoke(chat_function, KernelArguments(user_input=_prepare_input_chat(chat)))
-    chat.add_assistant_message(str(answer))
-
-    chat.add_user_message("What are his best all-time stats?")
-    answer = await kernel.invoke(chat_function, KernelArguments(user_input=_prepare_input_chat(chat)))
-    chat.add_assistant_message(str(answer))
-
-    print(chat)
-
-    kernel.remove_all_services()
-
-    return chat, banned_words
-
-
-async def text_complete_request_example(kernel: Kernel):
-    service_id = "text_service"
-    openai_text_completion = OpenAITextCompletion(
-        service_id=service_id,
-        ai_model_id="gpt-3.5-turbo-instruct",
-    )
-    kernel.add_service(openai_text_completion)
-
-    # Spaces and capitalization affect the token ids.
-    # The following is the token ids of pie related words.
-    keys = [
-        18040,
-        17180,
-        16108,
-        4196,
-        79,
-        931,
-        5116,
-        30089,
-        36724,
-        47,
-        931,
-        5116,
-        431,
-        5171,
-        613,
-        5171,
-        350,
-        721,
-        272,
-        47,
-        721,
-        272,
-    ]
-    banned_words = [
-        "apple",
-        " apple",
-        "Apple",
-        " Apple",
-        "pumpkin",
-        " pumpkin",
-        " Pumpkin",
-        "pecan",
-        " pecan",
-        " Pecan",
-        "Pecan",
-    ]
-
-    # Model will try its best to avoid using any of the above words
-    settings = kernel.get_prompt_execution_settings_from_service_id(service_id=service_id)
-    settings = _config_ban_tokens(settings, keys)
-
-    prompt_template_config = PromptTemplateConfig(
-        template="{{$user_input}}",
-        name="chat",
-        template_format="semantic-kernel",
-        input_variables=[
-            InputVariable(
-                name="user_input", description="The history of the conversation", is_required=True, default=""
-            ),
-        ],
-        execution_settings=settings,
-    )
-
-    chat = ChatHistory()
-
-    chat.add_user_message("The best pie flavor to have in autumn is")
-
-    text_function = kernel.add_function(
-        plugin_name="TextBot", function_name="TextCompletion", prompt_template_config=prompt_template_config
-    )
-
-    answer = await kernel.invoke(text_function, KernelArguments(user_input=_prepare_input_chat(chat)))
-    chat.add_assistant_message(str(answer))
-
-    print(chat)
-
-    kernel.remove_all_services()
-
-    return chat, banned_words
-
-
-def _check_banned_words(banned_list, actual_list) -> bool:
-    passed = True
-    for word in banned_list:
-        if word in actual_list:
-            print(f'The banned word "{word}" was found in the answer')
-            passed = False
-    return passed
-
-
-def _format_output(chat, banned_words) -> None:
-    print("--- Checking for banned words ---")
-    chat_bot_ans_words = [
-        word for msg in chat.messages if msg.role == AuthorRole.ASSISTANT for word in msg.content.split()
-    ]
-    if _check_banned_words(banned_words, chat_bot_ans_words):
-        print("None of the banned words were found in the answer")
-
-
-async def main() -> None:
-    kernel = Kernel()
-
-    print("Chat completion example:")
-    print("------------------------")
-    chat, banned_words = await chat_request_example(kernel)
-    _format_output(chat, banned_words)
-
-    print("------------------------")
-
-    print("\nText completion example:")
-    print("------------------------")
-    chat, banned_words = await text_complete_request_example(kernel)
-    _format_output(chat, banned_words)
-
-    return
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot.py b/python/samples/concepts/chat_completion/simple_chatbot.py
new file mode 100644
index 000000000000..a52b52aaace1
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot.py
@@ -0,0 +1,89 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+
+# This sample shows how to create a chatbot. This sample uses the following two main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a chat history object with the system message.
+chat_history = ChatHistory(system_message=system_message)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Add the user message to the chat history so that the chatbot can respond to it.
+    chat_history.add_user_message(user_input)
+
+    # Get the chat message content from the chat completion service.
+    response = await chat_completion_service.get_chat_message_content(
+        chat_history=chat_history,
+        settings=request_settings,
+    )
+    print(f"Mosscap:> {response}")
+
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_assistant_message(str(response))
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py b/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
new file mode 100644
index 000000000000..361e4e706d5d
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
@@ -0,0 +1,127 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.functions.kernel_arguments import KernelArguments
+from semantic_kernel.kernel import Kernel
+
+# This sample shows how to create a chatbot using a kernel function.
+# This sample uses the following two main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# - a KernelFunction: This function will be a prompt function, meaning the function is composed of
+#                     a prompt and will be invoked by Semantic Kernel.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+# [NOTE]
+# The purpose of this sample is to demonstrate how to use a kernel function.
+# To build a basic chatbot, it is sufficient to use a ChatCompletionService with a chat history directly.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a chat history object with the system message.
+chat_history = ChatHistory(system_message=system_message)
+
+# Create a kernel and register a prompt function.
+# The prompt here contains two variables: chat_history and user_input.
+# They will be replaced by the kernel with the actual values when the function is invoked.
+# [NOTE]
+# The chat_history, which is a ChatHistory object, will be serialized to a string internally
+# to create/render the final prompt.
+# Since this sample uses a chat completion service, the prompt will be deserialized back to
+# a ChatHistory object that gets passed to the chat completion service. This new chat history
+# object will contain the original messages and the user input.
+kernel = Kernel()
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    # You can attach the request settings to the function or
+    # pass the settings to the kernel.invoke method via the kernel arguments.
+    # If you specify the settings in both places, the settings in the kernel arguments will
+    # take precedence given the same service id.
+    # prompt_execution_settings=request_settings,
+)
+
+# Invoking a kernel function requires a service, so we add the chat completion service to the kernel.
+kernel.add_service(chat_completion_service)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Get the chat message content from the chat completion service.
+    kernel_arguments = KernelArguments(
+        settings=request_settings,
+        # Use keyword arguments to pass the chat history and user input to the kernel function.
+        chat_history=chat_history,
+        user_input=user_input,
+    )
+
+    answer = await kernel.invoke(chat_function, kernel_arguments)
+    # Alternatively, you can invoke the function directly with the kernel as an argument:
+    # answer = await chat_function.invoke(kernel, kernel_arguments)
+
+    print(f"Mosscap:> {answer}")
+
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_user_message(user_input)
+    chat_history.add_assistant_message(str(answer))
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py b/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
new file mode 100644
index 000000000000..63fa49e1dc4c
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
@@ -0,0 +1,111 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+
+# This sample shows how to create a chatbot that whose output can be biased using logit bias.
+# This sample uses the following three main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# - a list of tokens whose bias value will be reduced, meaning the likelihood of these tokens appearing
+#   in the output will be reduced.
+# The chatbot in this sample is called Mosscap, who is an expert in basketball.
+
+# To learn more about logit bias, see: https://help.openai.com/en/articles/5247780-using-logit-bias-to-define-token-probability
+
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot whose expertise is basketball.
+Your name is Mosscap and you have one goal: to answer questions about basketball.
+"""
+
+# Create a chat history object with the system message.
+chat_history = ChatHistory(system_message=system_message)
+
+# Create a list of tokens whose bias value will be reduced.
+# The token ids of these words can be obtained using the GPT Tokenizer: https://platform.openai.com/tokenizer
+# the targeted model series is GPT-4o & GPT-4o mini
+# banned_words = ["basketball", "NBA", "player", "career", "points"]
+banned_tokens = [
+    # "basketball"
+    106622,
+    5052,
+    # "NBA"
+    99915,
+    # " NBA"
+    32272,
+    # "player"
+    6450,
+    # " player"
+    5033,
+    # "career"
+    198069,
+    # " career"
+    8461,
+    # "points"
+    14011,
+    # " points"
+    5571,
+]
+# Configure the logit bias settings to minimize the likelihood of the
+# tokens in the banned_tokens list appearing in the output.
+request_settings.logit_bias = {k: -100 for k in banned_tokens}
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Add the user message to the chat history so that the chatbot can respond to it.
+    chat_history.add_user_message(user_input)
+
+    # Get the chat message content from the chat completion service.
+    response = await chat_completion_service.get_chat_message_content(
+        chat_history=chat_history,
+        settings=request_settings,
+    )
+    print(f"Mosscap:> {response}")
+
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_assistant_message(str(response))
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Who has the most career points in NBA history?
+    # Mosscap:> As of October 2023, the all-time leader in total regular-season scoring in the history of the National
+    #           Basketball Association (N.B.A.) is Kareem Abdul-Jabbar, who scored 38,387 total regular-seasonPoints
+    #           during his illustrious 20-year playing Career.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_streaming.py b/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
new file mode 100644
index 000000000000..21744ffd53c2
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
@@ -0,0 +1,102 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+from functools import reduce
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+
+# This sample shows how to create a chatbot that streams responses.
+# This sample uses the following two main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# The chatbot in this sample is called Mosscap, who responds to user messages with long flowery prose.
+
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+# Please note that not all models support streaming responses. Make sure to select a model that supports streaming.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+# Create a chat history object with the system message.
+chat_history = ChatHistory(system_message=system_message)
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    # Add the user message to the chat history so that the chatbot can respond to it.
+    chat_history.add_user_message(user_input)
+
+    # Get the chat message content from the chat completion service.
+    # The response is an async generator that streams the response in chunks.
+    response = chat_completion_service.get_streaming_chat_message_content(
+        chat_history=chat_history,
+        settings=request_settings,
+    )
+
+    # Capture the chunks of the response and print them as they come in.
+    chunks = []
+    print("Mosscap:> ", end="")
+    async for chunk in response:
+        chunks.append(chunk)
+        print(chunk, end="")
+    print("")
+
+    # Combine the chunks into a single message to add to the chat history.
+    full_message = reduce(lambda first, second: first + second, chunks)
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_message(full_message)
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat loop. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # User:> Why is the sky blue in one sentence?
+    # Mosscap:> The sky is blue due to the scattering of sunlight by the molecules in the Earth's atmosphere,
+    #           a phenomenon known as Rayleigh scattering, which causes shorter blue wavelengths to become more
+    #           prominent in our visual perception.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_image.py b/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
new file mode 100644
index 000000000000..f7fac3448816
--- /dev/null
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
@@ -0,0 +1,132 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.setup.chat_completion_services import (
+    Services,
+    get_chat_completion_service_and_request_settings,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.image_content import ImageContent
+from semantic_kernel.contents.text_content import TextContent
+
+# This sample shows how to create a chatbot that responds to user messages with image input.
+# This sample uses the following three main components:
+# - a ChatCompletionService: This component is responsible for generating responses to user messages.
+# - a ChatHistory: This component is responsible for keeping track of the chat history.
+# - an ImageContent: This component is responsible for representing image content.
+# The chatbot in this sample is called Mosscap.
+
+# You can select from the following chat completion services:
+# - Services.OPENAI
+# - Services.AZURE_OPENAI
+# - Services.AZURE_AI_INFERENCE
+# - Services.ANTHROPIC
+# - Services.BEDROCK
+# - Services.GOOGLE_AI
+# - Services.MISTRAL_AI
+# - Services.OLLAMA
+# - Services.ONNX
+# - Services.VERTEX_AI
+# Please make sure you have configured your environment correctly for the selected chat completion service.
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+
+IMAGE_URI = "https://upload.wikimedia.org/wikipedia/commons/d/d5/Half-timbered_mansion%2C_Zirkel%2C_East_view.jpg"
+IMAGE_PATH = "samples/concepts/resources/sample_image.jpg"
+
+# Create an image content with the image URI.
+image_content_remote = ImageContent(uri=IMAGE_URI)
+# You can also create an image content with a local image path.
+image_content_local = ImageContent.from_image_file(IMAGE_PATH)
+
+
+# [NOTE]
+# Not all models support image input. Make sure to select a model that supports image input.
+# Not all services support image input from an image URI. If your image is saved in a remote location,
+# make sure to use a service that supports image input from a URI.
+
+
+# This is the system message that gives the chatbot its personality.
+system_message = """
+You are an image reviewing chat bot. Your name is Mosscap and you have one goal critiquing images that are supplied.
+"""
+
+# Create a chat history object with the system message and an initial user message with an image input.
+chat_history = ChatHistory(system_message=system_message)
+chat_history.add_message(
+    ChatMessageContent(
+        role="user",
+        items=[TextContent(text="What is in this image?"), image_content_local],
+    )
+)
+
+
+async def chat(skip_user_input: bool = False) -> bool:
+    """Chat with the chatbot.
+
+    Args:
+        skip_user_input (bool): Whether to skip user input. Defaults to False.
+    """
+    if not skip_user_input:
+        try:
+            user_input = input("User:> ")
+        except KeyboardInterrupt:
+            print("\n\nExiting chat...")
+            return False
+        except EOFError:
+            print("\n\nExiting chat...")
+            return False
+
+        if user_input == "exit":
+            print("\n\nExiting chat...")
+            return False
+
+        # Add the user message to the chat history so that the chatbot can respond to it.
+        chat_history.add_user_message(user_input)
+
+    # Get the chat message content from the chat completion service.
+    response = await chat_completion_service.get_chat_message_content(
+        chat_history=chat_history,
+        settings=request_settings,
+    )
+    print(f"Mosscap:> {response}")
+
+    # Add the chat message to the chat history to keep track of the conversation.
+    chat_history.add_assistant_message(str(response))
+
+    return True
+
+
+async def main() -> None:
+    # Start the chat with the image input.
+    await chat(skip_user_input=True)
+    # Continue the chat. The chat loop will continue until the user types "exit".
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+    # Sample output:
+    # Mosscap:> The image features a large, historic building that exhibits a traditional half-timbered architectural
+    #           style. The structure is located near a dense forest, characterized by lush green trees. The sky above
+    #           is partly cloudy, suggesting a pleasant day. The building itself appears well-maintained, with distinct
+    #           features such as a turret or spire and decorative wood framing, creating an elegant and charming
+    #           appearance in its natural setting.
+    # User:> What do you think about the composition of the photo?
+    # Mosscap:> The composition of the photo is quite effective. Here are a few observations:
+    #           1. **Framing**: The building is positioned slightly off-center, which can create a more dynamic and
+    #           engaging image. This drawing of attention to the structure, while still showcasing the surrounding
+    #           landscape.
+    #           2. **Foreground and Background**: The green foliage and trees in the foreground provide a nice contrast
+    #           to the building, enhancing its visual appeal. The dense forest in the background adds depth and context
+    #           to the scene.
+    #           3. **Lighting**: The light appears to be favorable, suggesting a well-lit scene. The clouds add texture
+    #           to the sky without overwhelming the overall brightness.
+    #           4. **Perspective**: The angle from which the photo is taken allows viewers to appreciate both the
+    #           architecture of the building and its natural environment, creating a harmonious balance.
+    #           Overall, the composition successfully highlights the building while incorporating its natural
+    #           surroundings, inviting viewers to appreciate both elements together.
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py
new file mode 100644
index 000000000000..903b59f42928
--- /dev/null
+++ b/python/samples/concepts/setup/chat_completion_services.py
@@ -0,0 +1,299 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from enum import Enum
+
+from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.azure_ai_inference import (
+    AzureAIInferenceChatCompletion,
+    AzureAIInferenceChatPromptExecutionSettings,
+)
+from semantic_kernel.connectors.ai.bedrock import BedrockChatCompletion, BedrockChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.connectors.ai.google.google_ai import GoogleAIChatCompletion, GoogleAIChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.google.vertex_ai import VertexAIChatCompletion, VertexAIChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate
+from semantic_kernel.connectors.ai.open_ai import (
+    AzureChatCompletion,
+    AzureChatPromptExecutionSettings,
+    OpenAIChatCompletion,
+    OpenAIChatPromptExecutionSettings,
+)
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
+
+class Services(Enum):
+    """Enum for supported chat completion services.
+
+    For service specific settings, refer to this documentation:
+    https://github.com/microsoft/semantic-kernel/blob/main/python/samples/concepts/setup/ALL_SETTINGS.md
+    """
+
+    OPENAI = "openai"
+    AZURE_OPENAI = "azure_openai"
+    AZURE_AI_INFERENCE = "azure_ai_inference"
+    ANTHROPIC = "anthropic"
+    BEDROCK = "bedrock"
+    GOOGLE_AI = "google_ai"
+    MISTRAL_AI = "mistral_ai"
+    OLLAMA = "ollama"
+    ONNX = "onnx"
+    VERTEX_AI = "vertex_ai"
+
+
+def get_chat_completion_service_and_request_settings(
+    service_name: str,
+) -> tuple[ChatCompletionClientBase, PromptExecutionSettings]:
+    """Return service and request settings."""
+    chat_services = {
+        Services.OPENAI: get_openai_chat_completion_service_and_request_settings,
+        Services.AZURE_OPENAI: get_azure_openai_chat_completion_service_and_request_settings,
+        Services.AZURE_AI_INFERENCE: get_azure_ai_inference_chat_completion_service_and_request_settings,
+        Services.ANTHROPIC: get_anthropic_chat_completion_service_and_request_settings,
+        Services.BEDROCK: get_bedrock_chat_completion_service_and_request_settings,
+        Services.GOOGLE_AI: get_google_ai_chat_completion_service_and_request_settings,
+        Services.MISTRAL_AI: get_mistral_ai_chat_completion_service_and_request_settings,
+        Services.OLLAMA: get_ollama_chat_completion_service_and_request_settings,
+        Services.ONNX: get_onnx_chat_completion_service_and_request_settings,
+        Services.VERTEX_AI: get_vertex_ai_chat_completion_service_and_request_settings,
+    }
+
+    return chat_services[service_name]()
+
+
+def get_openai_chat_completion_service_and_request_settings() -> tuple[
+    OpenAIChatCompletion, OpenAIChatPromptExecutionSettings
+]:
+    """Return OpenAI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python
+    """
+    chat_service = OpenAIChatCompletion()
+    request_settings = OpenAIChatPromptExecutionSettings(max_tokens=2000, temperature=0.7, top_p=0.8)
+
+    return chat_service, request_settings
+
+
+def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
+    AzureChatCompletion, AzureChatPromptExecutionSettings
+]:
+    """Return Azure OpenAI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = AzureChatCompletion()
+    request_settings = AzureChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tuple[
+    AzureAIInferenceChatCompletion, AzureAIInferenceChatPromptExecutionSettings
+]:
+    """Return Azure AI Inference chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = AzureAIInferenceChatCompletion(
+        ai_model_id="id",  # The model ID is simply an identifier as the model id cannot be obtained programmatically.
+    )
+    request_settings = AzureAIInferenceChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_anthropic_chat_completion_service_and_request_settings() -> tuple[
+    AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
+]:
+    """Return Anthropic chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = AnthropicChatCompletion()
+    request_settings = AnthropicChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_bedrock_chat_completion_service_and_request_settings() -> tuple[
+    BedrockChatCompletion, BedrockChatPromptExecutionSettings
+]:
+    """Return Anthropic chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = BedrockChatCompletion(model_id="cohere.command-r-v1:0")
+    request_settings = BedrockChatPromptExecutionSettings(
+        # For model specific settings, specify them in the extension_data dictionary.
+        # For example, for Cohere Command specific settings, refer to:
+        # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
+        extension_data={
+            "presence_penalty": 0.5,
+            "seed": 5,
+        },
+    )
+
+    return chat_service, request_settings
+
+
+def get_google_ai_chat_completion_service_and_request_settings() -> tuple[
+    GoogleAIChatCompletion, GoogleAIChatPromptExecutionSettings
+]:
+    """Return Google AI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = GoogleAIChatCompletion()
+    request_settings = GoogleAIChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_mistral_ai_chat_completion_service_and_request_settings() -> tuple[
+    MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
+]:
+    """Return Mistral AI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = MistralAIChatCompletion()
+    request_settings = MistralAIChatPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_ollama_chat_completion_service_and_request_settings() -> tuple[
+    OllamaChatCompletion, OllamaChatPromptExecutionSettings
+]:
+    """Return Ollama chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = OllamaChatCompletion()
+    request_settings = OllamaChatPromptExecutionSettings(
+        # For model specific settings, specify them in the options dictionary.
+        # For more information on the available options, refer to the Ollama API documentation:
+        # https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
+        options={
+            "temperature": 0.8,
+        }
+    )
+
+    return chat_service, request_settings
+
+
+def get_onnx_chat_completion_service_and_request_settings() -> tuple[
+    OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings
+]:
+    """Return Onnx chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3)
+    request_settings = OnnxGenAIPromptExecutionSettings()
+
+    return chat_service, request_settings
+
+
+def get_vertex_ai_chat_completion_service_and_request_settings() -> tuple[
+    VertexAIChatCompletion, VertexAIChatPromptExecutionSettings
+]:
+    """Return Vertex AI chat completion service and request settings.
+
+    The service credentials can be read by 3 ways:
+    1. Via the constructor
+    2. Via the environment variables
+    3. Via an environment file
+
+    The request settings control the behavior of the service. The default settings are sufficient to get started.
+    However, you can adjust the settings to suit your needs.
+    Note: Some of the settings are NOT meant to be set by the user.
+    Please refer to the Semantic Kernel Python documentation for more information:
+    https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
+    """
+    chat_service = VertexAIChatCompletion()
+    request_settings = VertexAIChatPromptExecutionSettings()
+
+    return chat_service, request_settings
diff --git a/python/samples/concepts/setup/openai_env_setup.py b/python/samples/concepts/setup/openai_env_setup.py
index b5dd5875629c..390e795a815d 100644
--- a/python/samples/concepts/setup/openai_env_setup.py
+++ b/python/samples/concepts/setup/openai_env_setup.py
@@ -1,63 +1,44 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-# Semantic Kernel allows you multiple ways to setup your connectors.
-# this sample shows that for OpenAI Connectors.
-
-# After installing the semantic-kernel package
-# you can use the following code to setup OpenAI Connector
-
-# From environment settings
-# using this method will try to find the required settings in the environment variables
-# this is done using pydantic settings, see the full docs of that here: https://docs.pydantic.dev/latest/concepts/pydantic_settings/#usage
-# We use a prefix for all the settings and then have names defined in the OpenAISettings class
-# for OpenAI that is OPENAI_ as the prefix, with the following settings:
-# - api_key (OPENAI_API_KEY): OpenAI API key, see https://platform.openai.com/account/api-keys
-# - org_id (OPENAI_ORG_ID): This is usually optional unless your account belongs to multiple organizations.
-# - chat_model_id (OPENAI_CHAT_MODEL_ID): The OpenAI chat model ID to use, for example, gpt-3.5-turbo or gpt-4,
-#   this variable is used in the OpenAIChatCompletion class and get's passed to the ai_model_id there.
-# - text_model_id (OPENAI_TEXT_MODEL_ID): The OpenAI text model ID to use, for example, gpt-3.5-turbo-instruct,
-#   this variable is used in the OpenAITextCompletion class and get's passed to the ai_model_id there.
-# - embedding_model_id (OPENAI_EMBEDDING_MODEL_ID): The embedding model ID to use, for example, text-embedding-ada-002,
-#   this variable is used in the OpenAITextEmbedding class and get's passed to the ai_model_id there.
-
 import os
 
 from pydantic import ValidationError
 
 from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
 
+# Semantic Kernel allows you multiple ways to setup your connectors. This sample shows that for OpenAI Connectors.
+# After installing the semantic-kernel package, you can use the following code to setup OpenAI Connector
+
+# 1. From environment settings
+# Using this method will try to find the required settings in the environment variables.
+# This is done using pydantic settings, see the full docs of that here: https://docs.pydantic.dev/latest/concepts/pydantic_settings/#usage
+# We use a prefix for all the settings and then have names defined in the OpenAISettings class.
+# For OpenAI that is OPENAI_ as the prefix. For a full list of OpenAI settings, refer to:
+# https://github.com/microsoft/semantic-kernel/blob/main/python/samples/concepts/setup/ALL_SETTINGS.md
 try:
-    # when nothing is passed to the constructor,
-    # it will use the above environment variable names to find the required settings,
-    # in this case it will only fail if the OPENAI_CHAT_MODEL_ID and OPENAI_API_KEY are not found
+    # When nothing is passed to the constructor, it will use the above environment variable names
+    # to find the required settings. In this case it will only fail if the OPENAI_CHAT_MODEL_ID and
+    # OPENAI_API_KEY are not found
     service = OpenAIChatCompletion(service_id="openai_chat_service")
 except ValidationError as e:
     print(e)
 
-# From a .env file
-# when you want to store and use your settings from a specific file (any file as long as it is in the .env format)
-# you can pass the path to the file to the constructor
-# this will still look at the same names of the settings as above, but will try to load them from the file
-
+# 2. From a .env file
+# When you want to store and use your settings from a specific file (any file as long as it is in the .env format),
+# you can pass the path to the file to the constructor. This will still look at the same names of the settings as above,
+# but will try to load them from the file
 try:
-    # this will try to load the settings from the file at the given path
+    # This will try to load the settings from the file at the given path
     service = OpenAIChatCompletion(service_id="openai_chat_service", env_file_path="path/to/env_file")
 except ValidationError as e:
     print(e)
 
-# From a different value
-# if you want to pass the settings yourself, you can do that by passing the values to the constructor
-# this will ignore the environment variables and the .env file
-# in this case our API_KEY is stored in a env variable called MY_API_KEY_VAR_NAME
-# if using a file for this value, then we first need to uncomment and
-# run the following code to load the .env file from the same folder as this file:
-# from dotenv import load_dotenv
-# dotenv_path = os.path.join(os.path.dirname(__file__), '.env')
-# load_dotenv(dotenv_path)
-# and after that pass the value directly to the constructor as shown below
-# we can also fix another value, in this case the ai_model_id,
-# which becomes chat_model_id in the settings, fixed to gpt-4o
-
+# 3. From a different value
+# If you want to pass the settings yourself, you can do that by passing the values to the constructor.
+# This will ignore the environment variables and the .env file.
+# In this case our API_KEY is stored in an env variable called MY_API_KEY_VAR_NAME.
+# We can also hardcode another value, in this case the ai_model_id, which becomes chat_model_id in the
+# settings, to gpt-4o
 try:
     # this will use the given values as the settings
     api_key = os.getenv("MY_API_KEY_VAR_NAME")
diff --git a/python/semantic_kernel/connectors/ai/onnx/__init__.py b/python/semantic_kernel/connectors/ai/onnx/__init__.py
index 3aa09740beef..ecce90d12615 100644
--- a/python/semantic_kernel/connectors/ai/onnx/__init__.py
+++ b/python/semantic_kernel/connectors/ai/onnx/__init__.py
@@ -1,9 +1,8 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import (
-    OnnxGenAIPromptExecutionSettings,
-)
+from semantic_kernel.connectors.ai.onnx.onnx_gen_ai_prompt_execution_settings import OnnxGenAIPromptExecutionSettings
 from semantic_kernel.connectors.ai.onnx.services.onnx_gen_ai_chat_completion import OnnxGenAIChatCompletion
 from semantic_kernel.connectors.ai.onnx.services.onnx_gen_ai_text_completion import OnnxGenAITextCompletion
+from semantic_kernel.connectors.ai.onnx.utils import ONNXTemplate
 
-__all__ = ["OnnxGenAIChatCompletion", "OnnxGenAIPromptExecutionSettings", "OnnxGenAITextCompletion"]
+__all__ = ["ONNXTemplate", "OnnxGenAIChatCompletion", "OnnxGenAIPromptExecutionSettings", "OnnxGenAITextCompletion"]
diff --git a/python/tests/samples/test_concepts.py b/python/tests/samples/test_concepts.py
index 23b4e509c363..abce5d4018f8 100644
--- a/python/tests/samples/test_concepts.py
+++ b/python/tests/samples/test_concepts.py
@@ -12,11 +12,11 @@
 from samples.concepts.auto_function_calling.functions_defined_in_yaml_prompt import (
     main as function_defined_in_yaml_prompt,
 )
-from samples.concepts.chat_completion.azure_chat_gpt_api import main as azure_chat_gpt_api
-from samples.concepts.chat_completion.azure_chat_image_input import main as azure_chat_image_input
-from samples.concepts.chat_completion.chat_gpt_api import main as chat_gpt_api
-from samples.concepts.chat_completion.chat_streaming import main as chat_streaming
-from samples.concepts.chat_completion.openai_logit_bias import main as openai_logit_bias
+from samples.concepts.chat_completion.simple_chatbot import main as simple_chatbot
+from samples.concepts.chat_completion.simple_chatbot_kernel_function import main as simple_chatbot_kernel_function
+from samples.concepts.chat_completion.simple_chatbot_logit_bias import main as simple_chatbot_logit_bias
+from samples.concepts.chat_completion.simple_chatbot_streaming import main as simple_chatbot_streaming
+from samples.concepts.chat_completion.simple_chatbot_with_image import main as simple_chatbot_with_image
 from samples.concepts.filtering.auto_function_invoke_filters import main as auto_function_invoke_filters
 from samples.concepts.filtering.function_invocation_filters import main as function_invocation_filters
 from samples.concepts.filtering.function_invocation_filters_stream import main as function_invocation_filters_stream
@@ -58,10 +58,19 @@
 
 concepts = [
     param(chat_gpt_api_function_calling, ["What is 3+3?", "exit"], id="chat_gpt_api_function_calling"),
-    param(azure_chat_gpt_api, ["Why is the sky blue?", "exit"], id="azure_chat_gpt_api"),
-    param(chat_gpt_api, ["What is life?", "exit"], id="chat_gpt_api"),
-    param(chat_streaming, ["Why is the sun hot?", "exit"], id="chat_streaming"),
-    param(openai_logit_bias, [], id="openai_logit_bias"),
+    param(simple_chatbot, ["Why is the sky blue in one sentence?", "exit"], id="simple_chatbot"),
+    param(simple_chatbot_streaming, ["Why is the sky blue in one sentence?", "exit"], id="simple_chatbot_streaming"),
+    param(simple_chatbot_with_image, ["exit"], id="simple_chatbot_with_image"),
+    param(
+        simple_chatbot_logit_bias,
+        ["Who has the most career points in NBA history?", "exit"],
+        id="simple_chatbot_logit_bias",
+    ),
+    param(
+        simple_chatbot_kernel_function,
+        ["Why is the sky blue in one sentence?", "exit"],
+        id="simple_chatbot_kernel_function",
+    ),
     param(auto_function_invoke_filters, ["What is 3+3?", "exit"], id="auto_function_invoke_filters"),
     param(function_invocation_filters, ["What is 3+3?", "exit"], id="function_invocation_filters"),
     param(function_invocation_filters_stream, ["What is 3+3?", "exit"], id="function_invocation_filters_stream"),
@@ -100,7 +109,6 @@
         id="bing_search_plugin",
         marks=pytest.mark.skip(reason="Flaky test due to Azure OpenAI content policy"),
     ),
-    param(azure_chat_image_input, [], id="azure_chat_image_input"),
     param(custom_service_selector, [], id="custom_service_selector"),
     param(function_defined_in_json_prompt, ["What is 3+3?", "exit"], id="function_defined_in_json_prompt"),
     param(function_defined_in_yaml_prompt, ["What is 3+3?", "exit"], id="function_defined_in_yaml_prompt"),

From e780d7b7fcc97c15a5e197480c666f76fafc26a7 Mon Sep 17 00:00:00 2001
From: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com>
Date: Wed, 27 Nov 2024 07:21:09 -0800
Subject: [PATCH 13/23] .Net: Replaced IMemoryStore with IVectorStore in
 examples (#9833)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

Replaced `IMemoryStore` usage with `IVectorStore` in Semantic Kernel
examples.

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:
---
 .../Caching/SemanticCachingWithFilters.cs     | 149 ++++++++++++------
 .../Optimization/FrugalGPTWithFilters.cs      |  49 ++++--
 .../PluginSelectionWithFilters.cs             |  80 ++++++----
 dotnet/samples/Demos/OnnxSimpleRAG/README.md  |   2 +-
 4 files changed, 188 insertions(+), 92 deletions(-)

diff --git a/dotnet/samples/Concepts/Caching/SemanticCachingWithFilters.cs b/dotnet/samples/Concepts/Caching/SemanticCachingWithFilters.cs
index cd90de3964b4..78c54df49434 100644
--- a/dotnet/samples/Concepts/Caching/SemanticCachingWithFilters.cs
+++ b/dotnet/samples/Concepts/Caching/SemanticCachingWithFilters.cs
@@ -1,11 +1,11 @@
 ﻿// Copyright (c) Microsoft. All rights reserved.
 
 using System.Diagnostics;
+using Azure.Identity;
 using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.VectorData;
 using Microsoft.SemanticKernel;
-using Microsoft.SemanticKernel.Connectors.AzureCosmosDBMongoDB;
-using Microsoft.SemanticKernel.Connectors.Redis;
-using Microsoft.SemanticKernel.Memory;
+using Microsoft.SemanticKernel.Embeddings;
 
 namespace Caching;
 
@@ -18,12 +18,6 @@ namespace Caching;
 /// </summary>
 public class SemanticCachingWithFilters(ITestOutputHelper output) : BaseTest(output)
 {
-    /// <summary>
-    /// Similarity/relevance score, from 0 to 1, where 1 means exact match.
-    /// It's possible to change this value during testing to see how caching logic will behave.
-    /// </summary>
-    private const double SimilarityScore = 0.9;
-
     /// <summary>
     /// Executing similar requests two times using in-memory caching store to compare execution time and results.
     /// Second execution is faster, because the result is returned from cache.
@@ -31,7 +25,10 @@ public class SemanticCachingWithFilters(ITestOutputHelper output) : BaseTest(out
     [Fact]
     public async Task InMemoryCacheAsync()
     {
-        var kernel = GetKernelWithCache(_ => new VolatileMemoryStore());
+        var kernel = GetKernelWithCache(services =>
+        {
+            services.AddInMemoryVectorStore();
+        });
 
         var result1 = await ExecuteAsync(kernel, "First run", "What's the tallest building in New York?");
         var result2 = await ExecuteAsync(kernel, "Second run", "What is the highest building in New York City?");
@@ -53,12 +50,15 @@ public async Task InMemoryCacheAsync()
     /// <summary>
     /// Executing similar requests two times using Redis caching store to compare execution time and results.
     /// Second execution is faster, because the result is returned from cache.
-    /// How to run Redis on Docker locally: https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/docker/
+    /// How to run Redis on Docker locally: https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/docker/.
     /// </summary>
     [Fact]
     public async Task RedisCacheAsync()
     {
-        var kernel = GetKernelWithCache(_ => new RedisMemoryStore("localhost:6379", vectorSize: 1536));
+        var kernel = GetKernelWithCache(services =>
+        {
+            services.AddRedisVectorStore("localhost:6379");
+        });
 
         var result1 = await ExecuteAsync(kernel, "First run", "What's the tallest building in New York?");
         var result2 = await ExecuteAsync(kernel, "Second run", "What is the highest building in New York City?");
@@ -84,10 +84,12 @@ public async Task RedisCacheAsync()
     [Fact]
     public async Task AzureCosmosDBMongoDBCacheAsync()
     {
-        var kernel = GetKernelWithCache(_ => new AzureCosmosDBMongoDBMemoryStore(
-            TestConfiguration.AzureCosmosDbMongoDb.ConnectionString,
-            TestConfiguration.AzureCosmosDbMongoDb.DatabaseName,
-            new(dimensions: 1536)));
+        var kernel = GetKernelWithCache(services =>
+        {
+            services.AddAzureCosmosDBMongoDBVectorStore(
+                TestConfiguration.AzureCosmosDbMongoDb.ConnectionString,
+                TestConfiguration.AzureCosmosDbMongoDb.DatabaseName);
+        });
 
         var result1 = await ExecuteAsync(kernel, "First run", "What's the tallest building in New York?");
         var result2 = await ExecuteAsync(kernel, "Second run", "What is the highest building in New York City?");
@@ -110,27 +112,41 @@ public async Task AzureCosmosDBMongoDBCacheAsync()
     /// <summary>
     /// Returns <see cref="Kernel"/> instance with required registered services.
     /// </summary>
-    private Kernel GetKernelWithCache(Func<IServiceProvider, IMemoryStore> cacheFactory)
+    private Kernel GetKernelWithCache(Action<IServiceCollection> configureVectorStore)
     {
         var builder = Kernel.CreateBuilder();
 
-        // Add Azure OpenAI chat completion service
-        builder.AddAzureOpenAIChatCompletion(
-            TestConfiguration.AzureOpenAI.ChatDeploymentName,
-            TestConfiguration.AzureOpenAI.Endpoint,
-            TestConfiguration.AzureOpenAI.ApiKey);
-
-        // Add Azure OpenAI text embedding generation service
-        builder.AddAzureOpenAITextEmbeddingGeneration(
-            TestConfiguration.AzureOpenAIEmbeddings.DeploymentName,
-            TestConfiguration.AzureOpenAIEmbeddings.Endpoint,
-            TestConfiguration.AzureOpenAIEmbeddings.ApiKey);
-
-        // Add memory store for caching purposes (e.g. in-memory, Redis, Azure Cosmos DB)
-        builder.Services.AddSingleton<IMemoryStore>(cacheFactory);
+        if (!string.IsNullOrWhiteSpace(TestConfiguration.AzureOpenAI.ApiKey))
+        {
+            // Add Azure OpenAI chat completion service
+            builder.AddAzureOpenAIChatCompletion(
+                TestConfiguration.AzureOpenAI.ChatDeploymentName,
+                TestConfiguration.AzureOpenAI.Endpoint,
+                TestConfiguration.AzureOpenAI.ApiKey);
+
+            // Add Azure OpenAI text embedding generation service
+            builder.AddAzureOpenAITextEmbeddingGeneration(
+                TestConfiguration.AzureOpenAIEmbeddings.DeploymentName,
+                TestConfiguration.AzureOpenAIEmbeddings.Endpoint,
+                TestConfiguration.AzureOpenAI.ApiKey);
+        }
+        else
+        {
+            // Add Azure OpenAI chat completion service
+            builder.AddAzureOpenAIChatCompletion(
+                TestConfiguration.AzureOpenAI.ChatDeploymentName,
+                TestConfiguration.AzureOpenAI.Endpoint,
+                new AzureCliCredential());
+
+            // Add Azure OpenAI text embedding generation service
+            builder.AddAzureOpenAITextEmbeddingGeneration(
+                TestConfiguration.AzureOpenAIEmbeddings.DeploymentName,
+                TestConfiguration.AzureOpenAIEmbeddings.Endpoint,
+                new AzureCliCredential());
+        }
 
-        // Add text memory service that will be used to generate embeddings and query/store data. 
-        builder.Services.AddSingleton<ISemanticTextMemory, SemanticTextMemory>();
+        // Add vector store for caching purposes (e.g. in-memory, Redis, Azure Cosmos DB)
+        configureVectorStore(builder.Services);
 
         // Add prompt render filter to query cache and check if rendered prompt was already answered.
         builder.Services.AddSingleton<IPromptRenderFilter, PromptCacheFilter>();
@@ -164,7 +180,10 @@ public class CacheBaseFilter
     /// <summary>
     /// Filter which is executed during prompt rendering operation.
     /// </summary>
-    public sealed class PromptCacheFilter(ISemanticTextMemory semanticTextMemory) : CacheBaseFilter, IPromptRenderFilter
+    public sealed class PromptCacheFilter(
+        ITextEmbeddingGenerationService textEmbeddingGenerationService,
+        IVectorStore vectorStore)
+        : CacheBaseFilter, IPromptRenderFilter
     {
         public async Task OnPromptRenderAsync(PromptRenderContext context, Func<PromptRenderContext, Task> next)
         {
@@ -174,20 +193,22 @@ public async Task OnPromptRenderAsync(PromptRenderContext context, Func<PromptRe
             // Get rendered prompt
             var prompt = context.RenderedPrompt!;
 
-            // Search for similar prompts in cache with provided similarity/relevance score
-            var searchResult = await semanticTextMemory.SearchAsync(
-                CollectionName,
-                prompt,
-                limit: 1,
-                minRelevanceScore: SimilarityScore).FirstOrDefaultAsync();
+            var promptEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(prompt);
+
+            var collection = vectorStore.GetCollection<string, CacheRecord>(CollectionName);
+            await collection.CreateCollectionIfNotExistsAsync();
+
+            // Search for similar prompts in cache.
+            var searchResults = await collection.VectorizedSearchAsync(promptEmbedding, new() { Top = 1 }, context.CancellationToken);
+            var searchResult = (await searchResults.Results.FirstOrDefaultAsync())?.Record;
 
             // If result exists, return it.
             if (searchResult is not null)
             {
                 // Override function result. This will prevent calling LLM and will return result immediately.
-                context.Result = new FunctionResult(context.Function, searchResult.Metadata.AdditionalMetadata)
+                context.Result = new FunctionResult(context.Function, searchResult.Result)
                 {
-                    Metadata = new Dictionary<string, object?> { [RecordIdKey] = searchResult.Metadata.Id }
+                    Metadata = new Dictionary<string, object?> { [RecordIdKey] = searchResult.Id }
                 };
             }
         }
@@ -196,7 +217,10 @@ public async Task OnPromptRenderAsync(PromptRenderContext context, Func<PromptRe
     /// <summary>
     /// Filter which is executed during function invocation.
     /// </summary>
-    public sealed class FunctionCacheFilter(ISemanticTextMemory semanticTextMemory) : CacheBaseFilter, IFunctionInvocationFilter
+    public sealed class FunctionCacheFilter(
+        ITextEmbeddingGenerationService textEmbeddingGenerationService,
+        IVectorStore vectorStore)
+        : CacheBaseFilter, IFunctionInvocationFilter
     {
         public async Task OnFunctionInvocationAsync(FunctionInvocationContext context, Func<FunctionInvocationContext, Task> next)
         {
@@ -212,12 +236,22 @@ public async Task OnFunctionInvocationAsync(FunctionInvocationContext context, F
                 // Get cache record id if result was cached previously or generate new id.
                 var recordId = context.Result.Metadata?.GetValueOrDefault(RecordIdKey, Guid.NewGuid().ToString()) as string;
 
+                // Generate prompt embedding.
+                var promptEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(context.Result.RenderedPrompt);
+
                 // Cache rendered prompt and LLM result.
-                await semanticTextMemory.SaveInformationAsync(
-                    CollectionName,
-                    context.Result.RenderedPrompt,
-                    recordId!,
-                    additionalMetadata: result.ToString());
+                var collection = vectorStore.GetCollection<string, CacheRecord>(CollectionName);
+                await collection.CreateCollectionIfNotExistsAsync();
+
+                var cacheRecord = new CacheRecord
+                {
+                    Id = recordId!,
+                    Prompt = context.Result.RenderedPrompt,
+                    Result = result.ToString(),
+                    PromptEmbedding = promptEmbedding
+                };
+
+                await collection.UpsertAsync(cacheRecord, cancellationToken: context.CancellationToken);
             }
         }
     }
@@ -245,4 +279,23 @@ private async Task<FunctionResult> ExecuteAsync(Kernel kernel, string title, str
     }
 
     #endregion
+
+    #region Vector Store Record
+
+    private sealed class CacheRecord
+    {
+        [VectorStoreRecordKey]
+        public string Id { get; set; }
+
+        [VectorStoreRecordData]
+        public string Prompt { get; set; }
+
+        [VectorStoreRecordData]
+        public string Result { get; set; }
+
+        [VectorStoreRecordVector(Dimensions: 1536)]
+        public ReadOnlyMemory<float> PromptEmbedding { get; set; }
+    }
+
+    #endregion
 }
diff --git a/dotnet/samples/Concepts/Optimization/FrugalGPTWithFilters.cs b/dotnet/samples/Concepts/Optimization/FrugalGPTWithFilters.cs
index 2ac3fce56b23..a5b9917e6ce0 100644
--- a/dotnet/samples/Concepts/Optimization/FrugalGPTWithFilters.cs
+++ b/dotnet/samples/Concepts/Optimization/FrugalGPTWithFilters.cs
@@ -2,10 +2,11 @@
 
 using System.Runtime.CompilerServices;
 using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.VectorData;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel.Connectors.InMemory;
 using Microsoft.SemanticKernel.Embeddings;
-using Microsoft.SemanticKernel.Memory;
 using Microsoft.SemanticKernel.PromptTemplates.Handlebars;
 using Microsoft.SemanticKernel.Services;
 
@@ -97,11 +98,11 @@ public async Task ReducePromptSizeAsync()
 
         // Add few-shot prompt optimization filter.
         // The filter uses in-memory store for vector similarity search and text embedding generation service to generate embeddings.
-        var memoryStore = new VolatileMemoryStore();
+        var vectorStore = new InMemoryVectorStore();
         var textEmbeddingGenerationService = kernel.GetRequiredService<ITextEmbeddingGenerationService>();
 
         // Register optimization filter.
-        kernel.PromptRenderFilters.Add(new FewShotPromptOptimizationFilter(memoryStore, textEmbeddingGenerationService));
+        kernel.PromptRenderFilters.Add(new FewShotPromptOptimizationFilter(vectorStore, textEmbeddingGenerationService));
 
         // Get result again and compare the usage.
         result = await kernel.InvokeAsync(function, arguments);
@@ -167,7 +168,7 @@ public async Task LLMCascadeAsync()
     /// which are similar to original request.
     /// </summary>
     private sealed class FewShotPromptOptimizationFilter(
-        IMemoryStore memoryStore,
+        IVectorStore vectorStore,
         ITextEmbeddingGenerationService textEmbeddingGenerationService) : IPromptRenderFilter
     {
         /// <summary>
@@ -176,7 +177,7 @@ private sealed class FewShotPromptOptimizationFilter(
         private const int TopN = 5;
 
         /// <summary>
-        /// Collection name to use in memory store.
+        /// Collection name to use in vector store.
         /// </summary>
         private const string CollectionName = "examples";
 
@@ -188,30 +189,38 @@ public async Task OnPromptRenderAsync(PromptRenderContext context, Func<PromptRe
 
             if (examples is { Count: > 0 } && !string.IsNullOrEmpty(request))
             {
-                var memoryRecords = new List<MemoryRecord>();
+                var exampleRecords = new List<ExampleRecord>();
 
                 // Generate embedding for each example.
                 var embeddings = await textEmbeddingGenerationService.GenerateEmbeddingsAsync(examples);
 
-                // Create memory record instances with example text and embedding.
+                // Create vector store record instances with example text and embedding.
                 for (var i = 0; i < examples.Count; i++)
                 {
-                    memoryRecords.Add(MemoryRecord.LocalRecord(Guid.NewGuid().ToString(), examples[i], "description", embeddings[i]));
+                    exampleRecords.Add(new ExampleRecord
+                    {
+                        Id = Guid.NewGuid().ToString(),
+                        Example = examples[i],
+                        ExampleEmbedding = embeddings[i]
+                    });
                 }
 
-                // Create collection and upsert all memory records for search.
+                // Create collection and upsert all vector store records for search.
                 // It's possible to do it only once and re-use the same examples for future requests.
-                await memoryStore.CreateCollectionAsync(CollectionName);
-                await memoryStore.UpsertBatchAsync(CollectionName, memoryRecords).ToListAsync();
+                var collection = vectorStore.GetCollection<string, ExampleRecord>(CollectionName);
+                await collection.CreateCollectionIfNotExistsAsync(context.CancellationToken);
+
+                await collection.UpsertBatchAsync(exampleRecords, cancellationToken: context.CancellationToken).ToListAsync(context.CancellationToken);
 
                 // Generate embedding for original request.
-                var requestEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(request);
+                var requestEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(request, cancellationToken: context.CancellationToken);
 
                 // Find top N examples which are similar to original request.
-                var topNExamples = await memoryStore.GetNearestMatchesAsync(CollectionName, requestEmbedding, TopN).ToListAsync();
+                var searchResults = await collection.VectorizedSearchAsync(requestEmbedding, new() { Top = TopN }, cancellationToken: context.CancellationToken);
+                var topNExamples = (await searchResults.Results.ToListAsync(context.CancellationToken)).Select(l => l.Record).ToList();
 
                 // Override arguments to use only top N examples, which will be sent to LLM.
-                context.Arguments["Examples"] = topNExamples.Select(l => l.Item1.Metadata.Text);
+                context.Arguments["Examples"] = topNExamples.Select(l => l.Example);
             }
 
             // Continue prompt rendering operation.
@@ -305,4 +314,16 @@ public async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMessa
             yield return new StreamingChatMessageContent(AuthorRole.Assistant, mockResult);
         }
     }
+
+    private sealed class ExampleRecord
+    {
+        [VectorStoreRecordKey]
+        public string Id { get; set; }
+
+        [VectorStoreRecordData]
+        public string Example { get; set; }
+
+        [VectorStoreRecordVector]
+        public ReadOnlyMemory<float> ExampleEmbedding { get; set; }
+    }
 }
diff --git a/dotnet/samples/Concepts/Optimization/PluginSelectionWithFilters.cs b/dotnet/samples/Concepts/Optimization/PluginSelectionWithFilters.cs
index 861034b5d336..695ff675e17f 100644
--- a/dotnet/samples/Concepts/Optimization/PluginSelectionWithFilters.cs
+++ b/dotnet/samples/Concepts/Optimization/PluginSelectionWithFilters.cs
@@ -3,11 +3,11 @@
 using System.ComponentModel;
 using Microsoft.Extensions.DependencyInjection;
 using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.VectorData;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.Connectors.OpenAI;
 using Microsoft.SemanticKernel.Embeddings;
-using Microsoft.SemanticKernel.Memory;
 
 namespace Optimization;
 
@@ -40,8 +40,8 @@ public async Task UsingVectorSearchWithKernelAsync()
         var logger = this.LoggerFactory.CreateLogger<PluginSelectionWithFilters>();
         builder.Services.AddSingleton<ILogger>(logger);
 
-        // Add memory store to keep functions and search for the most relevant ones for specific request.
-        builder.Services.AddSingleton<IMemoryStore, VolatileMemoryStore>();
+        // Add vector store to keep functions and search for the most relevant ones for specific request.
+        builder.Services.AddInMemoryVectorStore();
 
         // Add helper components defined in this example.
         builder.Services.AddSingleton<IFunctionProvider, FunctionProvider>();
@@ -114,8 +114,8 @@ public async Task UsingVectorSearchWithChatCompletionAsync()
         var logger = this.LoggerFactory.CreateLogger<PluginSelectionWithFilters>();
         builder.Services.AddSingleton<ILogger>(logger);
 
-        // Add memory store to keep functions and search for the most relevant ones for specific request.
-        builder.Services.AddSingleton<IMemoryStore, VolatileMemoryStore>();
+        // Add vector store to keep functions and search for the most relevant ones for specific request.
+        builder.Services.AddInMemoryVectorStore();
 
         // Add helper components defined in this example.
         builder.Services.AddSingleton<IFunctionProvider, FunctionProvider>();
@@ -257,7 +257,8 @@ Task<List<KernelFunction>> GetBestFunctionsAsync(
             string collectionName,
             string request,
             KernelPluginCollection plugins,
-            int numberOfBestFunctions);
+            int numberOfBestFunctions,
+            CancellationToken cancellationToken = default);
     }
 
     /// <summary>
@@ -265,7 +266,7 @@ Task<List<KernelFunction>> GetBestFunctionsAsync(
     /// </summary>
     public interface IPluginStore
     {
-        Task SaveAsync(string collectionName, KernelPluginCollection plugins);
+        Task SaveAsync(string collectionName, KernelPluginCollection plugins, CancellationToken cancellationToken = default);
     }
 
     public class FunctionKeyProvider : IFunctionKeyProvider
@@ -280,62 +281,67 @@ public string GetFunctionKey(KernelFunction kernelFunction)
 
     public class FunctionProvider(
         ITextEmbeddingGenerationService textEmbeddingGenerationService,
-        IMemoryStore memoryStore,
+        IVectorStore vectorStore,
         IFunctionKeyProvider functionKeyProvider) : IFunctionProvider
     {
         public async Task<List<KernelFunction>> GetBestFunctionsAsync(
             string collectionName,
             string request,
             KernelPluginCollection plugins,
-            int numberOfBestFunctions)
+            int numberOfBestFunctions,
+            CancellationToken cancellationToken = default)
         {
             // Generate embedding for original request.
-            var requestEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(request);
+            var requestEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(request, cancellationToken: cancellationToken);
+
+            var collection = vectorStore.GetCollection<string, FunctionRecord>(collectionName);
+            await collection.CreateCollectionIfNotExistsAsync(cancellationToken);
 
             // Find best functions to call for original request.
-            var memoryRecordKeys = await memoryStore
-                .GetNearestMatchesAsync(collectionName, requestEmbedding, limit: numberOfBestFunctions)
-                .Select(l => l.Item1.Key)
-                .ToListAsync();
+            var searchResults = await collection.VectorizedSearchAsync(requestEmbedding, new() { Top = numberOfBestFunctions }, cancellationToken);
+            var recordKeys = (await searchResults.Results.ToListAsync(cancellationToken)).Select(l => l.Record.Id);
 
             return plugins
                 .SelectMany(plugin => plugin)
-                .Where(function => memoryRecordKeys.Contains(functionKeyProvider.GetFunctionKey(function)))
+                .Where(function => recordKeys.Contains(functionKeyProvider.GetFunctionKey(function)))
                 .ToList();
         }
     }
 
     public class PluginStore(
         ITextEmbeddingGenerationService textEmbeddingGenerationService,
-        IMemoryStore memoryStore,
+        IVectorStore vectorStore,
         IFunctionKeyProvider functionKeyProvider) : IPluginStore
     {
-        public async Task SaveAsync(string collectionName, KernelPluginCollection plugins)
+        public async Task SaveAsync(string collectionName, KernelPluginCollection plugins, CancellationToken cancellationToken = default)
         {
             // Collect data about imported functions in kernel.
-            var memoryRecords = new List<MemoryRecord>();
+            var functionRecords = new List<FunctionRecord>();
             var functionsData = GetFunctionsData(plugins);
 
             // Generate embedding for each function.
             var embeddings = await textEmbeddingGenerationService
-                .GenerateEmbeddingsAsync(functionsData.Select(l => l.TextToVectorize).ToArray());
+                .GenerateEmbeddingsAsync(functionsData.Select(l => l.TextToVectorize).ToArray(), cancellationToken: cancellationToken);
 
-            // Create memory record instances with function information and embedding.
+            // Create vector store record instances with function information and embedding.
             for (var i = 0; i < functionsData.Count; i++)
             {
-                var (function, textToVectorize) = functionsData[i];
+                var (function, functionInfo) = functionsData[i];
 
-                memoryRecords.Add(MemoryRecord.LocalRecord(
-                    id: functionKeyProvider.GetFunctionKey(function),
-                    text: textToVectorize,
-                    description: null,
-                    embedding: embeddings[i]));
+                functionRecords.Add(new FunctionRecord
+                {
+                    Id = functionKeyProvider.GetFunctionKey(function),
+                    FunctionInfo = functionInfo,
+                    FunctionInfoEmbedding = embeddings[i]
+                });
             }
 
-            // Create collection and upsert all memory records for search.
+            // Create collection and upsert all vector store records for search.
             // It's possible to do it only once and re-use the same functions for future requests.
-            await memoryStore.CreateCollectionAsync(collectionName);
-            await memoryStore.UpsertBatchAsync(collectionName, memoryRecords).ToListAsync();
+            var collection = vectorStore.GetCollection<string, FunctionRecord>(collectionName);
+            await collection.CreateCollectionIfNotExistsAsync(cancellationToken);
+
+            await collection.UpsertBatchAsync(functionRecords, cancellationToken: cancellationToken).ToListAsync(cancellationToken);
         }
 
         private static List<(KernelFunction Function, string TextToVectorize)> GetFunctionsData(KernelPluginCollection plugins)
@@ -405,4 +411,20 @@ private sealed class CalendarPlugin
     }
 
     #endregion
+
+    #region Vector Store Record
+
+    private sealed class FunctionRecord
+    {
+        [VectorStoreRecordKey]
+        public string Id { get; set; }
+
+        [VectorStoreRecordData]
+        public string FunctionInfo { get; set; }
+
+        [VectorStoreRecordVector]
+        public ReadOnlyMemory<float> FunctionInfoEmbedding { get; set; }
+    }
+
+    #endregion
 }
diff --git a/dotnet/samples/Demos/OnnxSimpleRAG/README.md b/dotnet/samples/Demos/OnnxSimpleRAG/README.md
index da6a3ad726ff..07882c57d4bc 100644
--- a/dotnet/samples/Demos/OnnxSimpleRAG/README.md
+++ b/dotnet/samples/Demos/OnnxSimpleRAG/README.md
@@ -13,7 +13,7 @@ In this example we setup two ONNX AI Services:
 
 - [Chat Completion Service](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/IChatCompletionService.cs) - Using the Chat Completion Service from [Onnx Connector](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs) to generate responses from the Local Model.
 - [Text Embeddings Generation Service]() - Using the Text Embeddings Generation Service from [Onnx Connector](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Connectors/Connectors.Onnx/BertOnnxTextEmbeddingGenerationService.cs) to generate
-- [Memory Store](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryStore.cs) Using Memory Store Service with [VolatileMemoryStore](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Plugins/Plugins.Memory/VolatileMemoryStore.cs) to store and retrieve embeddings in memory for RAG.
+- [Vector Store](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/IVectorStore.cs) Using Vector Store Service with [InMemoryVectorStore](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStore.cs) to store and retrieve embeddings in memory for RAG.
 - [Semantic Text Memory](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel.Core/Memory/SemanticTextMemory.cs) to manage the embeddings in memory for RAG.
 - [Text Memory Plugin](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Plugins/Plugins.Memory/TextMemoryPlugin.cs) to enable memory retrieval functions (Recall) to be used with Prompts for RAG.
 

From 29525aaed6270aeb3e1ad68f87e1bd650255d171 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 27 Nov 2024 17:49:09 +0000
Subject: [PATCH 14/23] .Net: Bump Npgsql in /dotnet (#9819)

Bumps [Npgsql](https://github.com/npgsql/npgsql).

Updates `Npgsql` from 8.0.5 to 8.0.6
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/npgsql/npgsql/releases">Npgsql's
releases</a>.</em></p>
<blockquote>
<h2>v8.0.6</h2>
<p>The full list of changes is available <a
href="https://github.com/npgsql/npgsql/milestone/121?closed=1">here</a>.</p>
<p><strong>Full Changelog</strong>: <a
href="https://github.com/npgsql/npgsql/compare/v8.0.5...v8.0.6">https://github.com/npgsql/npgsql/compare/v8.0.5...v8.0.6</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/npgsql/npgsql/commit/c4c9d0d53cca12558c5f65c88b3ceb0366bda5e6"><code>c4c9d0d</code></a>
Sync CI macos setup to latest main</li>
<li><a
href="https://github.com/npgsql/npgsql/commit/f5a7042ccf0ff3c546670f3423c2f5a12f5f9d66"><code>f5a7042</code></a>
Stop testing on netcoreapp3.1 (doesn't work on modern mac)</li>
<li><a
href="https://github.com/npgsql/npgsql/commit/3ad6b5dcfaacef42ecdb174b26516c3a01ee024f"><code>3ad6b5d</code></a>
Test tweaks</li>
<li><a
href="https://github.com/npgsql/npgsql/commit/d3a9b183efd47158fc3b670a1fe4cb916fdf1132"><code>d3a9b18</code></a>
Disallow dotnet SDK major version roll forward</li>
<li><a
href="https://github.com/npgsql/npgsql/commit/c6077268e3db7a161a3e4b650102d4b877d1c6e1"><code>c607726</code></a>
Some dependency versions changes to get rid of transitive CVE
errors</li>
<li><a
href="https://github.com/npgsql/npgsql/commit/e1ae4b3d5bd410eed94458a5b6f789de2f29377c"><code>e1ae4b3</code></a>
Bump dotnet SDK to 8.0.404</li>
<li><a
href="https://github.com/npgsql/npgsql/commit/7232c93c33655731969e799406507210db5f530e"><code>7232c93</code></a>
Bump macos version to 15 in CI</li>
<li><a
href="https://github.com/npgsql/npgsql/commit/82c6926bb4a182502769853a3cd2258815adabbd"><code>82c6926</code></a>
Make the default value for json with Json.NET a string (<a
href="https://redirect.github.com/npgsql/npgsql/issues/5914">#5914</a>)</li>
<li><a
href="https://github.com/npgsql/npgsql/commit/882eab96144828053f30212a1bc864d243f1275d"><code>882eab9</code></a>
Fix compilation after e6c166b</li>
<li><a
href="https://github.com/npgsql/npgsql/commit/56688ddf621506ea431e765dafc1b8b55ae57dbf"><code>56688dd</code></a>
Fix not throwing due to overflow while writing NodaTime's period (<a
href="https://redirect.github.com/npgsql/npgsql/issues/5894">#5894</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/npgsql/npgsql/compare/v8.0.5...v8.0.6">compare
view</a></li>
</ul>
</details>
<br />

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com>
---
 dotnet/Directory.Packages.props | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props
index ed3091b0fe06..a17d7ee5cccc 100644
--- a/dotnet/Directory.Packages.props
+++ b/dotnet/Directory.Packages.props
@@ -19,7 +19,7 @@
     <PackageVersion Include="Microsoft.VisualStudio.Threading" Version="17.12.19" />
     <PackageVersion Include="MSTest.TestFramework" Version="3.6.1" />
     <PackageVersion Include="Newtonsoft.Json" Version="13.0.3" />
-    <PackageVersion Include="Npgsql" Version="8.0.5" />
+    <PackageVersion Include="Npgsql" Version="8.0.6" />
     <PackageVersion Include="OpenAI" Version="[2.1.0-beta.2]" />
     <PackageVersion Include="Azure.AI.ContentSafety" Version="1.0.0" />
     <PackageVersion Include="Azure.AI.OpenAI" Version="[2.1.0-beta.2]" />

From 6639ff7b449090380f61c5a78981ec807f341e7b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 27 Nov 2024 18:54:15 +0000
Subject: [PATCH 15/23] .Net: Bump Roslynator.Formatting.Analyzers from 4.12.0
 to 4.12.9 in /dotnet (#9814)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps
[Roslynator.Formatting.Analyzers](https://github.com/dotnet/roslynator)
from 4.12.0 to 4.12.9.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/dotnet/roslynator/releases">Roslynator.Formatting.Analyzers's
releases</a>.</em></p>
<blockquote>
<h2>v4.12.9</h2>
<h3>Fixed</h3>
<ul>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1090">RCS1090</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1566">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1124">RCS1124</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1572">PR</a>)</li>
<li>[CLI] Fix command <code>generate-doc</code> (<a
href="https://redirect.github.com/dotnet/roslynator/pull/1568">PR</a>,
<a
href="https://redirect.github.com/dotnet/roslynator/pull/1570">PR</a>)</li>
</ul>
<h3>Changed</h3>
<ul>
<li>Update analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1077">RCS1077</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1653">PR</a>)
<ul>
<li>Do not suggest to change <code>list.FirstOrDefault(predicate)</code>
to <code>list.Find(predicate)</code>.
Performance gain is negligible and actually <code>FirstOrDefault</code>
can be even faster on .NET 9 (see related <a
href="https://redirect.github.com/dotnet/roslynator/pull/1531">issue</a>
for more details).</li>
</ul>
</li>
</ul>
<h2>v4.12.8</h2>
<h3>Fixed</h3>
<ul>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS0053">RCS0053</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1547">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1223">RCS1223</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1552">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1140">RCS1140</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1554">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1096">RCS1096</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1558">PR</a>)</li>
<li>[CLI] Improve removing of unused symbols (<a
href="https://redirect.github.com/dotnet/roslynator/pull/1550">PR</a>)</li>
<li>[CLI] Fix command <code>generate-doc</code> (<a
href="https://redirect.github.com/dotnet/roslynator/pull/1559">PR</a>)</li>
</ul>
<h2>v4.12.7</h2>
<h3>Fixed</h3>
<ul>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1202">RCS1202</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1542">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1246">RCS1246</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1543">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1140">RCS1140</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1524">PR</a> by
<a href="https://github.com/Qluxzz"><code>@​Qluxzz</code></a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1077">RCS1077</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1544">PR</a>)</li>
</ul>
<h3>Changed</h3>
<ul>
<li>Add support for duck-typed awaitables and task-like types for
Task/Async-related analyzers (<a
href="https://redirect.github.com/dotnet/roslynator/pull/1535">PR</a> by
<a href="https://github.com/Govorunb"><code>@​Govorunb</code></a>)
<ul>
<li>Affects the following analyzers:
<ul>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1046">RCS1046</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1047">RCS1047</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1090">RCS1090</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1174">RCS1174</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1229">RCS1229</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1261">RCS1261</a></li>
</ul>
</li>
<li>Affects refactoring <a
href="https://josefpihrt.github.io/docs/roslynator/refactorings/RR0209">RR0209</a></li>
</ul>
</li>
</ul>
<h2>v4.12.6</h2>
<h3>Added</h3>
<ul>
<li>Analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1077">RCS1077</a>
now suggests to use <code>Order</code> instead of <code>OrderBy</code>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1522">PR</a> by
<a
href="https://github.com/BenjaminBrienen"><code>@​BenjaminBrienen</code></a>)</li>
</ul>
<h3>Fixed</h3>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/dotnet/roslynator/blob/main/ChangeLog.md">Roslynator.Formatting.Analyzers's
changelog</a>.</em></p>
<blockquote>
<h2>[4.12.9] - 2024-10-25</h2>
<h3>Fixed</h3>
<ul>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1090">RCS1090</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1566">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1124">RCS1124</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1572">PR</a>)</li>
<li>[CLI] Fix command <code>generate-doc</code> (<a
href="https://redirect.github.com/dotnet/roslynator/pull/1568">PR</a>,
<a
href="https://redirect.github.com/dotnet/roslynator/pull/1570">PR</a>)</li>
</ul>
<h3>Changed</h3>
<ul>
<li>Update analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1077">RCS1077</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1653">PR</a>)
<ul>
<li>Do not suggest to change <code>list.FirstOrDefault(predicate)</code>
to <code>list.Find(predicate)</code>.
Performance gain is negligible and actually <code>FirstOrDefault</code>
can be even faster on .NET 9 (see related <a
href="https://redirect.github.com/dotnet/roslynator/pull/1531">issue</a>
for more details).</li>
</ul>
</li>
</ul>
<h2>[4.12.8] - 2024-10-11</h2>
<h3>Fixed</h3>
<ul>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS0053">RCS0053</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1547">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1223">RCS1223</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1552">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1140">RCS1140</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1554">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1096">RCS1096</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1558">PR</a>)</li>
<li>[CLI] Improve removing of unused symbols (<a
href="https://redirect.github.com/dotnet/roslynator/pull/1550">PR</a>)</li>
<li>[CLI] Fix command <code>generate-doc</code> (<a
href="https://redirect.github.com/dotnet/roslynator/pull/1559">PR</a>)</li>
</ul>
<h2>[4.12.7] - 2024-10-01</h2>
<h3>Fixed</h3>
<ul>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1202">RCS1202</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1542">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1246">RCS1246</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1543">PR</a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1140">RCS1140</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1524">PR</a> by
<a href="https://github.com/Qluxzz"><code>@​Qluxzz</code></a>)</li>
<li>Fix analyzer <a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1077">RCS1077</a>
(<a
href="https://redirect.github.com/dotnet/roslynator/pull/1544">PR</a>)</li>
</ul>
<h3>Changed</h3>
<ul>
<li>Add support for duck-typed awaitables and task-like types for
Task/Async-related analyzers (<a
href="https://redirect.github.com/dotnet/roslynator/pull/1535">PR</a> by
<a href="https://github.com/Govorunb"><code>@​Govorunb</code></a>)
<ul>
<li>Affects the following analyzers:
<ul>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1046">RCS1046</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1047">RCS1047</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1090">RCS1090</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1174">RCS1174</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1229">RCS1229</a></li>
<li><a
href="https://josefpihrt.github.io/docs/roslynator/analyzers/RCS1261">RCS1261</a></li>
</ul>
</li>
<li>Affects refactoring <a
href="https://josefpihrt.github.io/docs/roslynator/refactorings/RR0209">RR0209</a></li>
</ul>
</li>
</ul>
<h2>[4.12.6] - 2024-09-23</h2>
<h3>Added</h3>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/dotnet/roslynator/commit/8c69b9e74df728bf4a2eb2e91ff1e7a7c9a5c2f1"><code>8c69b9e</code></a>
Release 4.12.9 (<a
href="https://redirect.github.com/dotnet/roslynator/issues/1573">#1573</a>)</li>
<li><a
href="https://github.com/dotnet/roslynator/commit/702b9f73240466839d1db56efa8448245b5ee120"><code>702b9f7</code></a>
Fix analyzer RCS1124 - Do not cast if not necessary (<a
href="https://redirect.github.com/dotnet/roslynator/issues/1572">#1572</a>)</li>
<li><a
href="https://github.com/dotnet/roslynator/commit/55f220e892a80387ad01e0fc007ab00adff617b2"><code>55f220e</code></a>
[CLI] Fix command generate-doc (<a
href="https://redirect.github.com/dotnet/roslynator/issues/1570">#1570</a>)</li>
<li><a
href="https://github.com/dotnet/roslynator/commit/f4e77da897e61a608840f636f04ac425aad7799d"><code>f4e77da</code></a>
[CLI] Fix command generate-doc (<a
href="https://redirect.github.com/dotnet/roslynator/issues/1568">#1568</a>)</li>
<li><a
href="https://github.com/dotnet/roslynator/commit/28bd21d4e395654ab5cead446029344245257405"><code>28bd21d</code></a>
Fix analyzer RCS1090 - Check ConfigureAwait argument value (<a
href="https://redirect.github.com/dotnet/roslynator/issues/1566">#1566</a>)</li>
<li><a
href="https://github.com/dotnet/roslynator/commit/6153c9e8e621c85300ceb921c2ed394e080fe26b"><code>6153c9e</code></a>
[CLI] Fix command generate-doc - handle &lt;para&gt; tags (<a
href="https://redirect.github.com/dotnet/roslynator/issues/1565">#1565</a>)</li>
<li><a
href="https://github.com/dotnet/roslynator/commit/ea6971e899e451980eb4e425b737dfa5dec6eed0"><code>ea6971e</code></a>
Update analyzer RCS1077 - Remove suggestion to change FirstOrDefault to
Find ...</li>
<li><a
href="https://github.com/dotnet/roslynator/commit/c37e24e488f1058c31bae3a7bc4d4949b6546dc3"><code>c37e24e</code></a>
Use Ubuntu 24.04 (<a
href="https://redirect.github.com/dotnet/roslynator/issues/1561">#1561</a>)</li>
<li><a
href="https://github.com/dotnet/roslynator/commit/4be87c4350fec0c76176f50607352c4780ddabd4"><code>4be87c4</code></a>
Release 4.12.8 (<a
href="https://redirect.github.com/dotnet/roslynator/issues/1560">#1560</a>)</li>
<li><a
href="https://github.com/dotnet/roslynator/commit/8ffe904e2db21b28b19214fd1e628a7ee12af44a"><code>8ffe904</code></a>
[CLI] Fix command generate-doc (<a
href="https://redirect.github.com/dotnet/roslynator/issues/1559">#1559</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/dotnet/roslynator/compare/v4.12.0...v4.12.9">compare
view</a></li>
</ul>
</details>
<br />

<details>
<summary>Most Recent Ignore Conditions Applied to This Pull
Request</summary>

| Dependency Name | Ignore Conditions |
| --- | --- |
| Roslynator.Formatting.Analyzers | [>= 4.5.a, < 4.6] |
</details>


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Roslynator.Formatting.Analyzers&package-manager=nuget&previous-version=4.12.0&new-version=4.12.9)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Dmytro Struk <13853051+dmytrostruk@users.noreply.github.com>
---
 dotnet/Directory.Packages.props | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props
index a17d7ee5cccc..18d29da0e49d 100644
--- a/dotnet/Directory.Packages.props
+++ b/dotnet/Directory.Packages.props
@@ -155,7 +155,7 @@
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
     </PackageReference>
-    <PackageVersion Include="Roslynator.Formatting.Analyzers" Version="[4.12.0]" />
+    <PackageVersion Include="Roslynator.Formatting.Analyzers" Version="[4.12.9]" />
     <PackageReference Include="Roslynator.Formatting.Analyzers">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>

From b9263bbcb099468a60d85203c198277a2d187606 Mon Sep 17 00:00:00 2001
From: Mark Wallace <127216156+markwallace-microsoft@users.noreply.github.com>
Date: Wed, 27 Nov 2024 19:10:32 +0000
Subject: [PATCH 16/23] .Net: Version 1.31.0 (#9835)

### Motivation and Context

Version bump for new release

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone :smile:
---
 dotnet/nuget/nuget-package.props | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dotnet/nuget/nuget-package.props b/dotnet/nuget/nuget-package.props
index b7e6fe3cb740..48b6247c2593 100644
--- a/dotnet/nuget/nuget-package.props
+++ b/dotnet/nuget/nuget-package.props
@@ -1,7 +1,7 @@
 <Project>
   <PropertyGroup>
     <!-- Central version prefix - applies to all nuget packages. -->
-    <VersionPrefix>1.30.0</VersionPrefix>
+    <VersionPrefix>1.31.0</VersionPrefix>
     <PackageVersion Condition="'$(VersionSuffix)' != ''">$(VersionPrefix)-$(VersionSuffix)</PackageVersion>
     <PackageVersion Condition="'$(VersionSuffix)' == ''">$(VersionPrefix)</PackageVersion>
 

From 0f61101f76c96bec266bdc6d463ce0fe628e53e6 Mon Sep 17 00:00:00 2001
From: SergeyMenshykh <68852919+SergeyMenshykh@users.noreply.github.com>
Date: Thu, 28 Nov 2024 15:46:35 +0000
Subject: [PATCH 17/23] .Net: Add OpenAPI operations filtering samples (#9834)

1. Add samples demonstrating the ways OpenAPI operations can be
filtered.
2. Use OpenAIPromptExecutionSettings instead of
AzureOpenAIPromptExecutionSettings with OpenAI connector.
---
 .../Plugins/OpenApiPlugin_Filtering.cs        | 192 ++++++++++++++++++
 .../Plugins/OpenApiPlugin_PayloadHandling.cs  |  14 +-
 dotnet/samples/Concepts/README.md             |   4 +
 3 files changed, 203 insertions(+), 7 deletions(-)
 create mode 100644 dotnet/samples/Concepts/Plugins/OpenApiPlugin_Filtering.cs

diff --git a/dotnet/samples/Concepts/Plugins/OpenApiPlugin_Filtering.cs b/dotnet/samples/Concepts/Plugins/OpenApiPlugin_Filtering.cs
new file mode 100644
index 000000000000..6c8a946400d8
--- /dev/null
+++ b/dotnet/samples/Concepts/Plugins/OpenApiPlugin_Filtering.cs
@@ -0,0 +1,192 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.Connectors.OpenAI;
+using Microsoft.SemanticKernel.Plugins.OpenApi;
+
+namespace Plugins;
+
+/// <summary>
+/// These samples show different ways OpenAPI operations can be filtered out from the OpenAPI document before creating a plugin out of it.
+/// </summary>
+public sealed class OpenApiPlugin_Filtering : BaseTest
+{
+    private readonly Kernel _kernel;
+    private readonly ITestOutputHelper _output;
+
+    public OpenApiPlugin_Filtering(ITestOutputHelper output) : base(output)
+    {
+        IKernelBuilder builder = Kernel.CreateBuilder();
+        builder.AddOpenAIChatCompletion(
+            modelId: TestConfiguration.OpenAI.ChatModelId,
+            apiKey: TestConfiguration.OpenAI.ApiKey);
+
+        this._kernel = builder.Build();
+
+        this._output = output;
+    }
+
+    /// <summary>
+    /// This sample demonstrates how to filter out specified operations from an OpenAPI plugin based on an exclusion list.
+    /// In this scenario, only the `listRepairs` operation from the RepairService OpenAPI plugin is allowed to be invoked,
+    /// while operations such as `createRepair`, `updateRepair`, and `deleteRepair` are excluded.
+    /// Note: The filtering occurs at the pre-parsing stage, which is more efficient from a resource utilization perspective.
+    /// </summary>
+    [Fact]
+    public async Task ExcludeOperationsBasedOnExclusionListAsync()
+    {
+        // The RepairService OpenAPI plugin being imported below includes the following operations: `listRepairs`, `createRepair`, `updateRepair`, and `deleteRepair`.
+        // However, to meet our business requirements, we need to restrict state-modifying operations such as creating, updating, and deleting repairs, allowing only non-state-modifying operations like listing repairs.
+        // To enforce this restriction, we will exclude the `createRepair`, `updateRepair`, and `deleteRepair` operations from the OpenAPI document prior to importing the plugin.  
+        OpenApiFunctionExecutionParameters executionParameters = new()
+        {
+            OperationsToExclude = ["createRepair", "updateRepair", "deleteRepair"]
+        };
+
+        // Import the RepairService OpenAPI plugin and filter out all operations except `listRepairs` one.
+        await this._kernel.ImportPluginFromOpenApiAsync(
+            pluginName: "RepairService",
+            filePath: "Resources/Plugins/RepairServicePlugin/repair-service.json",
+            executionParameters: executionParameters);
+
+        // Tell the AI model not to call any function and show the list of functions it can call instead.
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.None() };
+        FunctionResult result = await this._kernel.InvokePromptAsync(promptTemplate: "Show me the list of the functions you can call", arguments: new KernelArguments(settings));
+
+        this._output.WriteLine(result);
+
+        // The AI model output:
+        // I can call the following functions in the current context:
+        // 1. `functions.RepairService - listRepairs`: Returns a list of repairs with their details and images. It takes an optional parameter `assignedTo` to filter the repairs based on the assigned individual.
+        // I can also utilize the `multi_tool_use.parallel` function to execute multiple tools in parallel if required.
+    }
+
+    /// <summary>
+    /// This sample demonstrates how to include specified operations from an OpenAPI plugin based on an inclusion list.
+    /// In this scenario, only the `createRepair` and `updateRepair` operations from the RepairService OpenAPI plugin are allowed to be invoked,
+    /// while operations such as `listRepairs` and `deleteRepair` are excluded.
+    /// Note: The filtering occurs at the pre-parsing stage, which is more efficient from a resource utilization perspective.
+    /// </summary>
+    [Fact]
+    public async Task ImportOperationsBasedOnInclusionListAsync()
+    {
+        OpenApiDocumentParser parser = new();
+        using StreamReader reader = System.IO.File.OpenText("Resources/Plugins/RepairServicePlugin/repair-service.json");
+
+        // The RepairService OpenAPI plugin, parsed and imported below, has the following operations: `listRepairs`, `createRepair`, `updateRepair`, and `deleteRepair`.  
+        // However, for our business scenario, we only want to permit the AI model to invoke the `createRepair` and `updateRepair` operations, excluding all others.
+        // To accomplish this, we will define an inclusion list that specifies the allowed operations and filters out the rest.  
+        List<string> operationsToInclude = ["createRepair", "updateRepair"];
+
+        // The selection predicate is initialized to evaluate each operation in the OpenAPI document and include only those specified in the inclusion list. 
+        OpenApiDocumentParserOptions parserOptions = new()
+        {
+            OperationSelectionPredicate = (OperationSelectionPredicateContext context) => operationsToInclude.Contains(context.Id!)
+        };
+
+        // Parse the OpenAPI document.
+        RestApiSpecification specification = await parser.ParseAsync(stream: reader.BaseStream, options: parserOptions);
+
+        // Import the OpenAPI document specification.
+        this._kernel.ImportPluginFromOpenApi("RepairService", specification);
+
+        // Tell the AI model not to call any function and show the list of functions it can call instead.
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.None() };
+        FunctionResult result = await this._kernel.InvokePromptAsync(promptTemplate: "Show me the list of the functions you can call", arguments: new KernelArguments(settings));
+
+        this._output.WriteLine(result);
+
+        // The AI model output:
+        // Here are the functions I can call for you:
+        // 1. **RepairService - createRepair **: 
+        //    -Adds a new repair to the list with details about the repair.
+        // 2. **RepairService - updateRepair **: 
+        //    -Updates an existing repair in the list with new details.
+        // If you need to perform any repair - related actions such as creating or updating repair records, feel free to ask!
+    }
+
+    /// <summary>
+    /// This sample demonstrates how to selectively include certain operations from an OpenAPI plugin based on HTTP method used.
+    /// In this scenario, only `GET` operations from the RepairService OpenAPI plugin are allowed for invocation,
+    /// while `POST`, `PUT`, and `DELETE` operations are excluded.
+    /// Note: The filtering occurs at the pre-parsing stage, which is more efficient from a resource utilization perspective.
+    /// </summary>
+    [Fact]
+    public async Task ImportOperationsBasedOnMethodAsync()
+    {
+        OpenApiDocumentParser parser = new();
+        using StreamReader reader = System.IO.File.OpenText("Resources/Plugins/RepairServicePlugin/repair-service.json");
+
+        // The parsed RepairService OpenAPI plugin includes operations such as `listRepairs`, `createRepair`, `updateRepair`, and `deleteRepair`.  
+        // However, for our business requirements, we only permit non-state-modifying operations like listing repairs, excluding all others.  
+        // To achieve this, we set up the selection predicate to evaluate each operation in the OpenAPI document, including only those with the `GET` method.  
+        // Note: The selection predicate can assess operations based on operation ID, method, path, and description.  
+        OpenApiDocumentParserOptions parserOptions = new()
+        {
+            OperationSelectionPredicate = (OperationSelectionPredicateContext context) => context.Method == "Get"
+        };
+
+        // Parse the OpenAPI document.
+        RestApiSpecification specification = await parser.ParseAsync(stream: reader.BaseStream, options: parserOptions);
+
+        // Import the OpenAPI document specification.
+        this._kernel.ImportPluginFromOpenApi("RepairService", specification);
+
+        // Tell the AI model not to call any function and show the list of functions it can call instead.
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.None() };
+        FunctionResult result = await this._kernel.InvokePromptAsync(promptTemplate: "Show me the list of the functions you can call", arguments: new KernelArguments(settings));
+
+        this._output.WriteLine(result);
+
+        // The AI model output:
+        // I can call the following function:
+        // 1. `RepairService - listRepairs`: This function returns a list of repairs with their details and images.
+        // It can accept an optional parameter `assignedTo` to filter the repairs assigned to a specific person.
+    }
+
+    /// <summary>
+    /// This example illustrates how to selectively exclude specific operations from an OpenAPI plugin based on the HTTP method used and the presence of a payload.
+    /// In this context, GET operations that are defined with a payload, which contradicts the HTTP semantic of being idempotent, are not imported.
+    /// Note: The filtering happens at the post-parsing stage, which is less efficient in terms of resource utilization.
+    /// </summary>
+    [Fact]
+    public async Task FilterOperationsAtPostParsingStageAsync()
+    {
+        OpenApiDocumentParser parser = new();
+        using StreamReader reader = System.IO.File.OpenText("Resources/Plugins/RepairServicePlugin/repair-service.json");
+
+        // Parse the OpenAPI document.
+        RestApiSpecification specification = await parser.ParseAsync(stream: reader.BaseStream);
+
+        // The parsed RepairService OpenAPI plugin includes operations like `listRepairs`, `createRepair`, `updateRepair`, and `deleteRepair`.  
+        // However, based on our business requirements, we need to identify all GET operations that are defined as non-idempotent (i.e., have a payload),  
+        // log a warning for each of them, and exclude these operations from the import.  
+        // To do this, we will locate all GET operations that contain a payload.
+        // Note that the RepairService OpenAPI plugin does not have any GET operations with payloads, so no operations will be found in this case.
+        // However, the code below demonstrates how to identify and exclude such operations if they were present.
+        IEnumerable<RestApiOperation> operationsToExclude = specification.Operations.Where(o => o.Method == HttpMethod.Get && o.Payload is not null);
+
+        // Exclude operations that are declared as non-idempotent due to having a payload.
+        foreach (RestApiOperation operation in operationsToExclude)
+        {
+            this.Output.WriteLine($"Warning: The `{operation.Id}` operation with `{operation.Method}` has payload which contradicts to being idempotent. This operation will not be imported.");
+            specification.Operations.Remove(operation);
+        }
+
+        // Import the OpenAPI document specification.
+        this._kernel.ImportPluginFromOpenApi("RepairService", specification);
+
+        // Tell the AI model not to call any function and show the list of functions it can call instead.
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.None() };
+        FunctionResult result = await this._kernel.InvokePromptAsync(promptTemplate: "Show me the list of the functions you can call", arguments: new KernelArguments(settings));
+
+        this._output.WriteLine(result);
+
+        // The AI model output:
+        // I can call the following functions:
+        // 1. **RepairService - listRepairs **: Returns a list of repairs with their details and images.
+        // 2. **RepairService - createRepair **: Adds a new repair to the list with the given details and image URL.
+        // 3. **RepairService - updateRepair **: Updates an existing repair with new details and image URL.
+        // 4. **RepairService - deleteRepair **: Deletes an existing repair from the list using its ID.
+    }
+}
diff --git a/dotnet/samples/Concepts/Plugins/OpenApiPlugin_PayloadHandling.cs b/dotnet/samples/Concepts/Plugins/OpenApiPlugin_PayloadHandling.cs
index 56a133a6ff53..f69d7919cc31 100644
--- a/dotnet/samples/Concepts/Plugins/OpenApiPlugin_PayloadHandling.cs
+++ b/dotnet/samples/Concepts/Plugins/OpenApiPlugin_PayloadHandling.cs
@@ -4,7 +4,7 @@
 using System.Text;
 using System.Text.Json;
 using Microsoft.SemanticKernel;
-using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
+using Microsoft.SemanticKernel.Connectors.OpenAI;
 using Microsoft.SemanticKernel.Plugins.OpenApi;
 
 namespace Plugins;
@@ -140,7 +140,7 @@ public async Task InvokeOpenApiFunctionWithPayloadProvidedByCallerAsync()
         await this._kernel.InvokeAsync(createMeetingFunction, arguments);
 
         // Example of how to have the createEvent function invoked by the AI
-        AzureOpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
         await this._kernel.InvokePromptAsync("Schedule one hour IT Meeting for October 1st, 2023, at 10:00 AM UTC.", new KernelArguments(settings));
     }
 
@@ -201,7 +201,7 @@ public async Task InvokeOpenApiFunctionWithArgumentsForPayloadLeafPropertiesAsyn
         await this._kernel.InvokeAsync(createMeetingFunction, arguments);
 
         // Example of how to have the createEvent function invoked by the AI
-        AzureOpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
         await this._kernel.InvokePromptAsync("Schedule one hour IT Meeting for October 1st, 2023, at 10:00 AM UTC.", new KernelArguments(settings));
     }
 
@@ -282,7 +282,7 @@ public async Task InvokeOpenApiFunctionWithArgumentsForPayloadLeafPropertiesWith
         await this._kernel.InvokeAsync(createMeetingFunction, arguments);
 
         // Example of how to have the createEvent function invoked by the AI
-        AzureOpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
         await this._kernel.InvokePromptAsync("Schedule one hour IT Meeting for October 1st, 2023, at 10:00 AM UTC.", new KernelArguments(settings));
     }
 
@@ -302,7 +302,7 @@ public async Task InvokeOpenApiFunctionWithArgumentsForPayloadOneOfAsync()
         });
 
         // Example of how to have the updatePater function invoked by the AI
-        AzureOpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
         Console.WriteLine("\nExpected payload: Dog { breed=Husky, bark=false }");
         await this._kernel.InvokePromptAsync("My new dog is a Husky, he is very quiet, please create my pet information.", new KernelArguments(settings));
         Console.WriteLine("\nExpected payload: Dog { breed=Dingo, bark=true }");
@@ -331,7 +331,7 @@ public async Task InvokeOpenApiFunctionWithArgumentsForPayloadAllOfAsync()
         });
 
         // Example of how to have the updatePater function invoked by the AI
-        AzureOpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
         Console.WriteLine("\nExpected payload: { pet_type=dog, breed=Husky, bark=false }");
         Console.WriteLine(await this._kernel.InvokePromptAsync("My new dog is a Husky, he is very quiet, please update my pet information.", new KernelArguments(settings)));
         Console.WriteLine("\nExpected payload: { pet_type=dog, breed=Dingo, bark=true }");
@@ -361,7 +361,7 @@ public async Task InvokeOpenApiFunctionWithArgumentsForPayloadAnyOfAsync()
         });
 
         // Example of how to have the updatePater function invoked by the AI
-        AzureOpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
+        OpenAIPromptExecutionSettings settings = new() { FunctionChoiceBehavior = FunctionChoiceBehavior.Auto() };
         Console.WriteLine("\nExpected payload: { pet_type=Dog, nickname=Fido }");
         Console.WriteLine(await this._kernel.InvokePromptAsync("My new dog is named Fido he is 2 years old, please create my pet information.", new KernelArguments(settings)));
         Console.WriteLine("\nExpected payload: { pet_type=Dog, nickname=Spot age=1 hunts=true }");
diff --git a/dotnet/samples/Concepts/README.md b/dotnet/samples/Concepts/README.md
index 5bf0c404adbd..d53367f532d0 100644
--- a/dotnet/samples/Concepts/README.md
+++ b/dotnet/samples/Concepts/README.md
@@ -163,6 +163,10 @@ dotnet test -l "console;verbosity=detailed" --filter "FullyQualifiedName=ChatCom
 - [CreatePluginFromOpenApiSpec_Klarna](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/CreatePluginFromOpenApiSpec_Klarna.cs)
 - [CreatePluginFromOpenApiSpec_RepairService](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/CreatePluginFromOpenApiSpec_RepairService.cs)
 - [OpenApiPlugin_PayloadHandling](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/OpenApiPlugin_PayloadHandling.cs)
+- [OpenApiPlugin_CustomHttpContentReader](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/OpenApiPlugin_CustomHttpContentReader.cs)
+- [OpenApiPlugin_Customization](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/OpenApiPlugin_Customization.cs)
+- [OpenApiPlugin_Filtering](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/OpenApiPlugin_Filtering.cs)
+- [OpenApiPlugin_Telemetry](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/OpenApiPlugin_Telemetry.cs)
 - [CustomMutablePlugin](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/CustomMutablePlugin.cs)
 - [DescribeAllPluginsAndFunctions](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/DescribeAllPluginsAndFunctions.cs)
 - [GroundednessChecks](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/Concepts/Plugins/GroundednessChecks.cs)

From 6d3497e1a2078f32f0c9ffb5cbf07bbd61a48ff2 Mon Sep 17 00:00:00 2001
From: Sophia Lagerkrans-Pandey
 <163188263+sophialagerkranspandey@users.noreply.github.com>
Date: Thu, 28 Nov 2024 08:56:05 -0800
Subject: [PATCH 18/23] Update COMMUNITY.md (#7568)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone :smile:

---------

Co-authored-by: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com>
Co-authored-by: Mark Wallace <127216156+markwallace-microsoft@users.noreply.github.com>
---
 COMMUNITY.md | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/COMMUNITY.md b/COMMUNITY.md
index be98d4253ad8..7312b46a275c 100644
--- a/COMMUNITY.md
+++ b/COMMUNITY.md
@@ -4,29 +4,27 @@ Below are some ways that you can get involved in the SK Community.
 
 ## Engage on Github
 
-File issues, submit PRs, and provide feedback and ideas to what you'd like to see from the Semantic Kernel.
+- [Discussions](https://github.com/microsoft/semantic-kernel/discussions): Ask questions, provide feedback and ideas to what you'd like to see from the Semantic Kernel.
+- [Issues](https://github.com/microsoft/semantic-kernel/issues) - If you find a bug, unexpected behavior or have a feature request, please open an issue.
+- [Pull Requests](https://github.com/microsoft/semantic-kernel/pulls) - We welcome contributions! Please see our [Contributing Guide](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
+
 We do our best to respond to each submission.
 
 ## Public Community Office Hours
 
-We regularly have Community Office Hours that are open to the **public** to join. 
+We regularly have Community Office Hours that are open to the **public** to join.
 
-Add Semantic Kernel events to your calendar - we're running two community calls to cater different timezones for Q&A Office Hours: 
-* Americas timezone: download the [calendar.ics](https://aka.ms/sk-community-calendar) file.
-* Asia Pacific timezone: download the [calendar-APAC.ics](https://aka.ms/sk-community-calendar-apac) file.
+Add Semantic Kernel events to your calendar - we're running two community calls to cater different timezones for Q&A Office Hours:
 
-Add Semantic Kernel Development Office Hours for Python and Java to your calendar to help with development:
-* Java Development Office Hours: [Java Development Office Hours](https://aka.ms/sk-java-dev-sync)
-* Python Development Office Hours: [Python Development Office Hours](https://aka.ms/sk-python-dev-sync)
+- Americas timezone: download the [calendar.ics](https://aka.ms/sk-community-calendar) file.
+- Asia Pacific timezone: download the [calendar-APAC.ics](https://aka.ms/sk-community-calendar-apac) file.
 
 If you have any questions or if you would like to showcase your project(s), please email what you'd like us to cover here: skofficehours[at]microsoft.com.
 
 If you are unable to make it live, all meetings will be recorded and posted online.
 
-## Join the conversation on Discord
+## Engage on our Community Discord
 
-We have a growing and active channel on Discord where you can get help, engage in lively discussion,
-and share what you've built with Semantic Kernel!
+This is a great place to ask questions, share your projects, and get help from the community.
 
-Join our Discord:
-[https://aka.ms/SKDiscord](https://aka.ms/SKDiscord) 
+Join using our discord link: [aka.ms/SKDiscord](https://aka.ms/SKDiscord)

From dc7cb45ed66ccf72e41ab39d7c742181e87c2fef Mon Sep 17 00:00:00 2001
From: Eduard van Valkenburg <eavanvalkenburg@users.noreply.github.com>
Date: Fri, 29 Nov 2024 11:14:39 +0100
Subject: [PATCH 19/23] Python: remove mistaken on_activate func (#9839)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
Removes an outdated function `on_activate` from the KernelProcessStep
class and the one place it was overridden.

Fixes #9829

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:

Co-authored-by: Evan Mattson <35585003+moonbox3@users.noreply.github.com>
---
 .../step01/step01_processes.py                | 39 +++++++++++++------
 .../kernel_process/kernel_process_step.py     |  4 --
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/python/samples/getting_started_with_processes/step01/step01_processes.py b/python/samples/getting_started_with_processes/step01/step01_processes.py
index 738f41212b0d..3ad8dcc04685 100644
--- a/python/samples/getting_started_with_processes/step01/step01_processes.py
+++ b/python/samples/getting_started_with_processes/step01/step01_processes.py
@@ -37,7 +37,7 @@ class UserInputState(KernelBaseModel):
     current_input_index: int = 0
 
 
-class ScriptedUserInputStep(KernelProcessStep[UserInputState]):
+class UserInputStep(KernelProcessStep[UserInputState]):
     GET_USER_INPUT: ClassVar[str] = "get_user_input"
 
     def create_default_state(self) -> "UserInputState":
@@ -48,16 +48,11 @@ def populate_user_inputs(self):
         """Method to be overridden by the user to populate with custom user messages."""
         pass
 
-    async def on_activate(self):
-        """This is called during the activation of the process step."""
-        self.populate_user_inputs()
-
     async def activate(self, state: KernelProcessStepState[UserInputState]):
         """Activates the step and sets the state."""
         state.state = state.state or self.create_default_state()
         self.state = state.state
         self.populate_user_inputs()
-        pass
 
     @kernel_function(name=GET_USER_INPUT)
     async def get_user_input(self, context: KernelProcessStepContext):
@@ -65,9 +60,9 @@ async def get_user_input(self, context: KernelProcessStepContext):
         if not self.state:
             raise ValueError("State has not been initialized")
 
-        user_message = self.state.user_inputs[self.state.current_input_index]
+        user_message = input("USER: ")
 
-        print(f"USER: {user_message}")
+        # print(f"USER: {user_message}")
 
         if "exit" in user_message:
             await context.emit_event(process_event=ChatBotEvents.Exit, data=None)
@@ -79,7 +74,7 @@ async def get_user_input(self, context: KernelProcessStepContext):
         await context.emit_event(process_event=CommonEvents.UserInputReceived, data=user_message)
 
 
-class ChatUserInputStep(ScriptedUserInputStep):
+class ScriptedInputStep(UserInputStep):
     def populate_user_inputs(self):
         """Override the method to populate user inputs for the chat step."""
         if self.state is not None:
@@ -89,6 +84,25 @@ def populate_user_inputs(self):
             self.state.user_inputs.append("How wide is the widest river?")
             self.state.user_inputs.append("exit")
 
+    @kernel_function
+    async def get_user_input(self, context: KernelProcessStepContext):
+        """Gets the user input."""
+        if not self.state:
+            raise ValueError("State has not been initialized")
+
+        user_message = self.state.user_inputs[self.state.current_input_index]
+
+        print(f"USER: {user_message}")
+
+        if "exit" in user_message:
+            await context.emit_event(process_event=ChatBotEvents.Exit, data=None)
+            return
+
+        self.state.current_input_index += 1
+
+        # Emit the user input event
+        await context.emit_event(process_event=CommonEvents.UserInputReceived, data=user_message)
+
 
 class IntroStep(KernelProcessStep):
     @kernel_function
@@ -146,14 +160,14 @@ async def get_chat_response(self, context: "KernelProcessStepContext", user_mess
 kernel = Kernel()
 
 
-async def step01_processes():
+async def step01_processes(scripted: bool = True):
     kernel.add_service(OpenAIChatCompletion(service_id="default"))
 
     process = ProcessBuilder(name="ChatBot")
 
     # Define the steps on the process builder based on their types, not concrete objects
     intro_step = process.add_step(IntroStep)
-    user_input_step = process.add_step(ChatUserInputStep)
+    user_input_step = process.add_step(ScriptedInputStep if scripted else UserInputStep)
     response_step = process.add_step(ChatBotResponseStep)
 
     # Define the input event that starts the process and where to send it
@@ -186,4 +200,5 @@ async def step01_processes():
 
 
 if __name__ == "__main__":
-    asyncio.run(step01_processes())
+    # if you want to run this sample with your won input, set the below parameter to False
+    asyncio.run(step01_processes(scripted=False))
diff --git a/python/semantic_kernel/processes/kernel_process/kernel_process_step.py b/python/semantic_kernel/processes/kernel_process/kernel_process_step.py
index 7fd09efbd32d..887dcfac47e1 100644
--- a/python/semantic_kernel/processes/kernel_process/kernel_process_step.py
+++ b/python/semantic_kernel/processes/kernel_process/kernel_process_step.py
@@ -21,7 +21,3 @@ class KernelProcessStep(ABC, KernelBaseModel, Generic[TState]):
     async def activate(self, state: "KernelProcessStepState[TState]"):
         """Activates the step and sets the state."""
         pass  # pragma: no cover
-
-    async def on_activate(self):
-        """To be overridden by subclasses if needed."""
-        pass  # pragma: no cover

From d8496da27702b958ad735a06c2b3e7adc50888f8 Mon Sep 17 00:00:00 2001
From: Eduard van Valkenburg <eavanvalkenburg@users.noreply.github.com>
Date: Fri, 29 Nov 2024 12:31:12 +0100
Subject: [PATCH 20/23] Python: improve integration test runs (#9845)

## Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
Split the integration tests into seperate ones for Memory and
Completions
Added code to allow the samples to run as well
Moved containers to service containers.

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:
---
 .github/workflows/python-build.yml            |   6 +-
 .../workflows/python-integration-tests.yml    | 199 ++++++------
 .github/workflows/python-lint.yml             |  12 +-
 .github/workflows/python-test-coverage.yml    |  24 +-
 .github/workflows/python-unit-tests.yml       |  14 +-
 .../chat_completion/simple_chatbot.py         |  11 +-
 .../simple_chatbot_kernel_function.py         |  20 +-
 .../simple_chatbot_logit_bias.py              |  12 +-
 .../simple_chatbot_streaming.py               |  12 +-
 .../simple_chatbot_with_image.py              |  25 +-
 .../setup/chat_completion_services.py         | 137 +++++----
 .../anthropic_prompt_execution_settings.py    |  36 ++-
 ..._ai_inference_prompt_execution_settings.py |  38 ++-
 .../bedrock_prompt_execution_settings.py      |  34 +-
 .../google_ai_prompt_execution_settings.py    |  36 ++-
 .../vertex_ai_prompt_execution_settings.py    |  39 ++-
 .../mistral_ai_prompt_execution_settings.py   |  42 +--
 .../ollama_prompt_execution_settings.py       |  15 +-
 .../onnx_gen_ai_prompt_execution_settings.py  |  22 +-
 .../services/onnx_gen_ai_chat_completion.py   |   4 +-
 .../azure_chat_prompt_execution_settings.py   |   4 +-
 .../open_ai_prompt_execution_settings.py      |  74 +++--
 .../ai/prompt_execution_settings.py           |   6 +-
 .../contents/kernel_content.py                |   4 +-
 python/tests/conftest.py                      |  10 +
 .../audio_to_text/audio_to_text_test_base.py  |   2 +-
 .../completions/chat_completion_test_base.py  |   2 +-
 .../tests/integration/completions/conftest.py |   6 +
 ...t_chat_completion_with_function_calling.py |   2 +-
 ...completion_with_image_input_text_output.py |   2 +-
 .../completions/test_chat_completions.py      |   2 +-
 .../test_conversation_summary_plugin.py       |   2 +-
 .../completions/test_text_completion.py       |   2 +-
 .../embeddings/test_embedding_service_base.py |   2 +-
 .../test_astradb_memory_store.py              |   2 +-
 .../test_pinecone_memory_store.py             |   2 +-
 .../test_azure_cosmos_db_no_sql.py            | 242 +++++++--------
 .../text_to_audio/text_to_audio_test_base.py  |   2 +-
 python/tests/samples/samples_utils.py         |  31 --
 python/tests/samples/test_concepts.py         | 291 ++++++++++++++++--
 python/tests/samples/test_learn_resources.py  |   2 +-
 python/tests/{integration => }/utils.py       |  30 +-
 42 files changed, 878 insertions(+), 582 deletions(-)
 delete mode 100644 python/tests/samples/samples_utils.py
 rename python/tests/{integration => }/utils.py (63%)

diff --git a/.github/workflows/python-build.yml b/.github/workflows/python-build.yml
index 1c20a5280720..95fdf159670a 100644
--- a/.github/workflows/python-build.yml
+++ b/.github/workflows/python-build.yml
@@ -16,14 +16,14 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
       - name: Check version
         run: |
-            echo "Building and uploading Python package version: ${{ github.event.release.tag_name }}"
+          echo "Building and uploading Python package version: ${{ github.event.release.tag_name }}"
       - name: Build the package
         run: cd python && make build
       - name: Release
diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index 7100ad334308..92d20f937193 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -20,7 +20,6 @@ permissions:
 env:
   # Configure a constant location for the uv cache
   UV_CACHE_DIR: /tmp/.uv-cache
-  HNSWLIB_NO_NATIVE: 1
   Python_Integration_Tests: Python_Integration_Tests
   AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME }} # azure-text-embedding-ada-002
   AZURE_OPENAI_CHAT_DEPLOYMENT_NAME: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT_NAME }}
@@ -92,8 +91,8 @@ jobs:
         if: steps.filter.outputs.python != 'true'
         run: echo "NOT python file"
 
-  python-merge-gate:
-    name: Python Pre-Merge Integration Tests
+  python-merge-gate-ai-services:
+    name: Python Pre-Merge Integration Tests - AI Services (incl samples using those)
     needs: paths-filter
     if: github.event_name != 'pull_request' && github.event_name != 'schedule' && needs.paths-filter.outputs.pythonChanges == 'true'
     strategy:
@@ -107,21 +106,18 @@ jobs:
         working-directory: python
     runs-on: ${{ matrix.os }}
     environment: "integration"
+    env:
+      UV_PYTHON: ${{ matrix.python-version }}
+      COMPLETIONS_CONCEPT_SAMPLE: "true"
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
-      - name: Install dependencies with hnswlib native disabled
-        if: matrix.os == 'macos-latest' && matrix.python-version == '3.11'
-        run: |
-          export HNSWLIB_NO_NATIVE=1
-          uv sync --all-extras --dev
-      - name: Install dependencies with hnswlib native enabled
-        if: matrix.os != 'macos-latest' || matrix.python-version != '3.11'
+      - name: Install dependencies
         run: |
           uv sync --all-extras --dev
       - name: Install Ollama
@@ -152,18 +148,6 @@ jobs:
           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ vars.AWS_REGION }}
-      - name: Setup Redis Stack Server
-        if: matrix.os == 'ubuntu-latest'
-        run: docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest
-      - name: Setup Weaviate docker deployment
-        if: matrix.os == 'ubuntu-latest'
-        run: docker run -d -p 8080:8080 -p 50051:50051 cr.weaviate.io/semitechnologies/weaviate:1.26.6
-      - name: Start Azure Cosmos DB emulator
-        if: matrix.os == 'windows-latest'
-        run: |
-          Write-Host "Launching Cosmos DB Emulator"
-          Import-Module "$env:ProgramFiles\Azure Cosmos DB Emulator\PSModules\Microsoft.Azure.CosmosDB.Emulator"
-          Start-CosmosDbEmulator
       - name: Azure CLI Login
         if: github.event_name != 'pull_request'
         uses: azure/login@v2
@@ -171,55 +155,72 @@ jobs:
           client-id: ${{ secrets.AZURE_CLIENT_ID }}
           tenant-id: ${{ secrets.AZURE_TENANT_ID }}
           subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
-      - name: Run Integration Tests - Completions
-        id: run_tests_completions
-        timeout-minutes: 15
+      - name: Run Integration Tests
+        id: run_tests_ai_services
+        timeout-minutes: 25
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/completions -v --junitxml=pytest-completions.xml
-      - name: Run Integration Tests - Embeddings
-        id: run_tests_embeddings
-        timeout-minutes: 5
-        shell: bash
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/completions ./tests/integration/embeddings ./tests/samples ./tests/integration/cross_language
+
+  python-merge-gate-memory:
+    name: Python Pre-Merge Integration Tests - Memory (incl samples using those)
+    needs: paths-filter
+    if: github.event_name != 'pull_request' && github.event_name != 'schedule' && needs.paths-filter.outputs.pythonChanges == 'true'
+    strategy:
+      max-parallel: 1
+      fail-fast: false
+      matrix:
+        python-version: ["3.11"]
+        os: [ubuntu-latest]
+    defaults:
+      run:
+        working-directory: python
+    runs-on: ${{ matrix.os }}
+    environment: "integration"
+    env:
+      UV_PYTHON: ${{ matrix.python-version }}
+      MEMORY_CONCEPT_SAMPLE: "true"
+    # Service containers to run with for the memory connectors, this only works on Ubuntu
+    services:
+      # Label used to access the service container
+      redis:
+        # Docker Hub image
+        image: redis/redis-stack-server:latest
+        ports:
+          # Opens tcp port 6379 on the host and service container
+          - 6379:6379
+      weaviate:
+        image: cr.weaviate.io/semitechnologies/weaviate:1.26.6
+        ports:
+          - 8080:8080
+          - 50051:50051
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "0.5.x"
+          enable-cache: true
+          cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
+      - name: Install dependencies
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/embeddings -v --junitxml=pytest-embeddings.xml
-      - name: Run Integration Tests - Memory
+          uv sync --all-extras --dev
+      - name: Azure CLI Login
+        if: github.event_name != 'pull_request'
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      - name: Run Integration Tests
         id: run_tests_memory
         timeout-minutes: 10
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/memory -v --junitxml=pytest-memory.xml
-      - name: Run Integration Tests - Cross Language
-        id: run_tests_cross_language
-        timeout-minutes: 5
-        shell: bash
-        run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/cross_language -v --junitxml=pytest-cross.xml
-      - name: Run Integration Tests - Planning
-        id: run_tests_planning
-        timeout-minutes: 5
-        shell: bash
-        run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/planning -v --junitxml=pytest-planning.xml
-      - name: Run Integration Tests - Samples
-        id: run_tests_samples
-        timeout-minutes: 5
-        shell: bash
-        run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/samples -v --junitxml=pytest-samples.xml
-      - name: Surface failing tests
-        if: always()
-        uses: pmeier/pytest-results-action@main
-        with:
-          path: python/pytest-*.xml
-          summary: true
-          display-options: fEX
-          fail-on-empty: true
-          title: Test results
-      - name: Minimize uv cache
-        run: uv cache prune --ci
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/memory ./tests/samples
 
   python-integration-tests:
+    name: Python Integration Tests - Scheduled run
     needs: paths-filter
     if: (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && needs.paths-filter.outputs.pythonChanges == 'true'
     strategy:
@@ -233,21 +234,33 @@ jobs:
         working-directory: python
     runs-on: ${{ matrix.os }}
     environment: "integration"
+    env:
+      UV_PYTHON: ${{ matrix.python-version }}
+      MEMORY_CONCEPT_SAMPLE: "true"
+      COMPLETIONS_CONCEPT_SAMPLE: "true"
+    # Service containers to run with for the memory connectors, this only works on Ubuntu
+    services:
+      # Label used to access the service container
+      redis:
+        # Docker Hub image
+        image: redis/redis-stack-server:latest
+        ports:
+          # Opens tcp port 6379 on the host and service container
+          - 6379:6379
+      weaviate:
+        image: cr.weaviate.io/semitechnologies/weaviate:1.26.6
+        ports:
+          - 8080:8080
+          - 50051:50051
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
-      - name: Install dependencies with hnswlib native disabled
-        if: matrix.os == 'macos-latest' && matrix.python-version == '3.11'
-        run: |
-          export HNSWLIB_NO_NATIVE=1
-          uv sync --all-extras --dev
-      - name: Install dependencies with hnswlib native enabled
-        if: matrix.os != 'macos-latest' || matrix.python-version != '3.11'
+      - name: Install dependencies
         run: |
           uv sync --all-extras --dev
       - name: Install Ollama
@@ -278,12 +291,6 @@ jobs:
           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
           aws-region: ${{ vars.AWS_REGION }}
-      - name: Setup Redis Stack Server
-        if: matrix.os == 'ubuntu-latest'
-        run: docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest
-      - name: Setup Weaviate docker deployment
-        if: matrix.os == 'ubuntu-latest'
-        run: docker run -d -p 8080:8080 -p 50051:50051 cr.weaviate.io/semitechnologies/weaviate:1.26.6
       - name: Start Azure Cosmos DB emulator
         if: matrix.os == 'windows-latest'
         run: |
@@ -302,48 +309,37 @@ jobs:
         timeout-minutes: 10
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/completions -v --junitxml=pytest-completions.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/completions
       - name: Run Integration Tests - Embeddings
         id: run_tests_embeddings
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/embeddings -v --junitxml=pytest-embeddings.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/embeddings
       - name: Run Integration Tests - Memory
         id: run_tests_memory
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/memory -v --junitxml=pytest-memory.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/memory
       - name: Run Integration Tests - Cross Language
         id: run_tests_cross_language
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/cross_language -v --junitxml=pytest-cross.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/cross_language
       - name: Run Integration Tests - Planning
         id: run_tests_planning
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/integration/planning -v --junitxml=pytest-planning.xml
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/integration/planning
       - name: Run Integration Tests - Samples
         id: run_tests_samples
         timeout-minutes: 5
         shell: bash
         run: |
-          uv run pytest -n logical --dist loadfile --dist worksteal ./tests/samples -v --junitxml=pytest-samples.xml
-      - name: Surface failing tests
-        if: always()
-        uses: pmeier/pytest-results-action@main
-        with:
-          path: python/pytest-*.xml
-          summary: true
-          display-options: fEX
-          fail-on-empty: true
-          title: Test results
-      - name: Minimize uv cache
-        run: uv cache prune --ci
+          uv run pytest -v -n logical --dist loadfile --dist worksteal ./tests/samples
 
   # This final job is required to satisfy the merge queue. It must only run (or succeed) if no tests failed
   python-integration-tests-check:
@@ -352,7 +348,12 @@ jobs:
     strategy:
       max-parallel: 1
       fail-fast: false
-    needs: [python-merge-gate, python-integration-tests]
+    needs:
+      [
+        python-merge-gate-ai-services,
+        python-merge-gate-memory,
+        python-integration-tests,
+      ]
     steps:
       - name: Get Date
         shell: bash
@@ -399,7 +400,7 @@ jobs:
           dry_run: ${{ env.run_type != 'Daily' && env.run_type != 'Manual'}}
           job: ${{ toJson(job) }}
           steps: ${{ toJson(steps) }}
-          overwrite: "{title: ` ${{ env.run_type }}: ${{ env.date }} `, text: ` ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`}"
+          title: "{title: ` ${{ env.run_type }}: ${{ env.date }} `, text: ` ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`}"
 
       - name: Microsoft Teams Notification (Dry Run)
         uses: skitionek/notify-microsoft-teams@master
@@ -409,4 +410,4 @@ jobs:
           dry_run: ${{ env.run_type != 'Daily' && env.run_type != 'Manual'}}
           job: ${{ toJson(job) }}
           steps: ${{ toJson(steps) }}
-          overwrite: "{title: ` ${{ env.run_type }}: ${{ env.date }} `, text: ` ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`}"
+          title: "{title: ` ${{ env.run_type }}: ${{ env.date }} `, text: ` ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`}"
diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
index f3c945ce263d..18444e097b6c 100644
--- a/.github/workflows/python-lint.yml
+++ b/.github/workflows/python-lint.yml
@@ -2,13 +2,13 @@ name: Python Code Quality Checks
 on:
   workflow_dispatch:
   pull_request:
-    branches: [ "main", "feature*" ]
+    branches: ["main", "feature*"]
     paths:
-      - 'python/**'
+      - "python/**"
 
 jobs:
   pre-commit:
-    if: '!cancelled()'
+    if: "!cancelled()"
     strategy:
       fail-fast: false
       matrix:
@@ -25,9 +25,9 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
       - name: Install the project
@@ -38,5 +38,3 @@ jobs:
           extra_args: --config python/.pre-commit-config.yaml --all-files
       - name: Run Mypy
         run: uv run mypy -p semantic_kernel --config-file mypy.ini
-      - name: Minimize uv cache
-        run: uv cache prune --ci
diff --git a/.github/workflows/python-test-coverage.yml b/.github/workflows/python-test-coverage.yml
index 4b5f6ea1778c..d2aebc3796c6 100644
--- a/.github/workflows/python-test-coverage.yml
+++ b/.github/workflows/python-test-coverage.yml
@@ -30,25 +30,19 @@ jobs:
       - uses: actions/checkout@v4
       - name: Setup filename variables
         run: echo "FILE_ID=${{ github.event.number }}" >> $GITHUB_ENV
-      - name: Download coverage
-        uses: dawidd6/action-download-artifact@v6
+      - name: Download Files
+        uses: actions/download-artifact@v4
         with:
-          name: python-coverage-${{ env.FILE_ID }}.txt
-          github_token: ${{ secrets.GH_ACTIONS_PR_WRITE }}
-          workflow: python-unit-tests.yml
-          search_artifacts: true
-          if_no_artifact_found: warn
-      - name: Download pytest
-        uses: dawidd6/action-download-artifact@v6
-        with:
-          name: pytest-${{ env.FILE_ID }}.xml
-          github_token: ${{ secrets.GH_ACTIONS_PR_WRITE }}
-          workflow: python-unit-tests.yml
-          search_artifacts: true
-          if_no_artifact_found: warn
+          github-token: ${{ secrets.GH_ACTIONS_PR_WRITE }}
+          run-id: ${{ github.event.workflow_run.id }}
+          path: python/
+          merge-multiple: true
+      - name: Display structure of downloaded files
+        run: ls python/
       - name: Pytest coverage comment
         id: coverageComment
         uses: MishaKav/pytest-coverage-comment@main
+        continue-on-error: true
         with:
           github-token: ${{ secrets.GH_ACTIONS_PR_WRITE }}
           pytest-coverage-path: python-coverage.txt
diff --git a/.github/workflows/python-unit-tests.yml b/.github/workflows/python-unit-tests.yml
index ef1f481ae769..aec1937984f5 100644
--- a/.github/workflows/python-unit-tests.yml
+++ b/.github/workflows/python-unit-tests.yml
@@ -42,9 +42,9 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ matrix.python-version }}
       - name: Install the project
@@ -62,8 +62,6 @@ jobs:
           display-options: fEX
           fail-on-empty: true
           title: Test results
-      - name: Minimize uv cache
-        run: uv cache prune --ci
   python-test-coverage:
     name: Python Test Coverage
     runs-on: [ubuntu-latest]
@@ -80,15 +78,15 @@ jobs:
       - name: Setup filename variables
         run: echo "FILE_ID=${{ github.event.number }}" >> $GITHUB_ENV
       - name: Set up uv
-        uses: astral-sh/setup-uv@v3
+        uses: astral-sh/setup-uv@v4
         with:
-          version: "0.4.30"
+          version: "0.5.x"
           enable-cache: true
           cache-suffix: ${{ runner.os }}-${{ env.UV_PYTHON }}
       - name: Install the project
         run: uv sync --all-extras --dev
       - name: Test with pytest
-        run: uv run --frozen pytest -q --junitxml=pytest.xml  --cov=semantic_kernel --cov-report=term-missing:skip-covered ./tests/unit | tee python-coverage.txt      
+        run: uv run --frozen pytest -q --junitxml=pytest.xml  --cov=semantic_kernel --cov-report=term-missing:skip-covered ./tests/unit | tee python-coverage.txt
       - name: Upload coverage
         if: always()
         uses: actions/upload-artifact@v4
@@ -105,5 +103,3 @@ jobs:
           path: python/pytest.xml
           overwrite: true
           retention-days: 1
-      - name: Minimize uv cache
-        run: uv cache prune --ci
diff --git a/python/samples/concepts/chat_completion/simple_chatbot.py b/python/samples/concepts/chat_completion/simple_chatbot.py
index a52b52aaace1..630bd75061f2 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot.py
@@ -6,7 +6,7 @@
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents import ChatHistory
 
 # This sample shows how to create a chatbot. This sample uses the following two main components:
 # - a ChatCompletionService: This component is responsible for generating responses to user messages.
@@ -26,7 +26,7 @@
 # - Services.ONNX
 # - Services.VERTEX_AI
 # Please make sure you have configured your environment correctly for the selected chat completion service.
-chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
+chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI)
 
 # This is the system message that gives the chatbot its personality.
 system_message = """
@@ -64,10 +64,11 @@ async def chat() -> bool:
         chat_history=chat_history,
         settings=request_settings,
     )
-    print(f"Mosscap:> {response}")
+    if response:
+        print(f"Mosscap:> {response}")
 
-    # Add the chat message to the chat history to keep track of the conversation.
-    chat_history.add_assistant_message(str(response))
+        # Add the chat message to the chat history to keep track of the conversation.
+        chat_history.add_message(response)
 
     return True
 
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py b/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
index 361e4e706d5d..6ed249276c08 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_kernel_function.py
@@ -6,9 +6,9 @@
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.functions.kernel_arguments import KernelArguments
-from semantic_kernel.kernel import Kernel
+from semantic_kernel import Kernel
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.functions import KernelArguments
 
 # This sample shows how to create a chatbot using a kernel function.
 # This sample uses the following two main components:
@@ -97,15 +97,15 @@ async def chat() -> bool:
         user_input=user_input,
     )
 
-    answer = await kernel.invoke(chat_function, kernel_arguments)
+    answer = await kernel.invoke(plugin_name="ChatBot", function_name="Chat", arguments=kernel_arguments)
     # Alternatively, you can invoke the function directly with the kernel as an argument:
     # answer = await chat_function.invoke(kernel, kernel_arguments)
-
-    print(f"Mosscap:> {answer}")
-
-    # Add the chat message to the chat history to keep track of the conversation.
-    chat_history.add_user_message(user_input)
-    chat_history.add_assistant_message(str(answer))
+    if answer:
+        print(f"Mosscap:> {answer}")
+        # Since the user_input is rendered by the template, it is not yet part of the chat history, so we add it here.
+        chat_history.add_user_message(user_input)
+        # Add the chat message to the chat history to keep track of the conversation.
+        chat_history.add_message(answer.value[0])
 
     return True
 
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py b/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
index 63fa49e1dc4c..f852cb1744c6 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_logit_bias.py
@@ -6,7 +6,7 @@
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents import ChatHistory
 
 # This sample shows how to create a chatbot that whose output can be biased using logit bias.
 # This sample uses the following three main components:
@@ -33,7 +33,6 @@
 
 # Create a chat history object with the system message.
 chat_history = ChatHistory(system_message=system_message)
-
 # Create a list of tokens whose bias value will be reduced.
 # The token ids of these words can be obtained using the GPT Tokenizer: https://platform.openai.com/tokenizer
 # the targeted model series is GPT-4o & GPT-4o mini
@@ -61,7 +60,7 @@
 ]
 # Configure the logit bias settings to minimize the likelihood of the
 # tokens in the banned_tokens list appearing in the output.
-request_settings.logit_bias = {k: -100 for k in banned_tokens}
+request_settings.logit_bias = {k: -100 for k in banned_tokens}  # type: ignore
 
 
 async def chat() -> bool:
@@ -86,10 +85,11 @@ async def chat() -> bool:
         chat_history=chat_history,
         settings=request_settings,
     )
-    print(f"Mosscap:> {response}")
+    if response:
+        print(f"Mosscap:> {response}")
 
-    # Add the chat message to the chat history to keep track of the conversation.
-    chat_history.add_assistant_message(str(response))
+        # Add the chat message to the chat history to keep track of the conversation.
+        chat_history.add_message(response)
 
     return True
 
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_streaming.py b/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
index 21744ffd53c2..b513aeeb408d 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_streaming.py
@@ -1,13 +1,12 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import asyncio
-from functools import reduce
 
 from samples.concepts.setup.chat_completion_services import (
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents import ChatHistory, StreamingChatMessageContent
 
 # This sample shows how to create a chatbot that streams responses.
 # This sample uses the following two main components:
@@ -70,15 +69,16 @@ async def chat() -> bool:
     )
 
     # Capture the chunks of the response and print them as they come in.
-    chunks = []
+    chunks: list[StreamingChatMessageContent] = []
     print("Mosscap:> ", end="")
     async for chunk in response:
-        chunks.append(chunk)
-        print(chunk, end="")
+        if chunk:
+            chunks.append(chunk)
+            print(chunk, end="")
     print("")
 
     # Combine the chunks into a single message to add to the chat history.
-    full_message = reduce(lambda first, second: first + second, chunks)
+    full_message = sum(chunks[1:], chunks[0])
     # Add the chat message to the chat history to keep track of the conversation.
     chat_history.add_message(full_message)
 
diff --git a/python/samples/concepts/chat_completion/simple_chatbot_with_image.py b/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
index f7fac3448816..5ee1244f5d6d 100644
--- a/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
+++ b/python/samples/concepts/chat_completion/simple_chatbot_with_image.py
@@ -6,10 +6,7 @@
     Services,
     get_chat_completion_service_and_request_settings,
 )
-from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.image_content import ImageContent
-from semantic_kernel.contents.text_content import TextContent
+from semantic_kernel.contents import AuthorRole, ChatHistory, ChatMessageContent, ImageContent, TextContent
 
 # This sample shows how to create a chatbot that responds to user messages with image input.
 # This sample uses the following three main components:
@@ -30,6 +27,11 @@
 # - Services.ONNX
 # - Services.VERTEX_AI
 # Please make sure you have configured your environment correctly for the selected chat completion service.
+
+# [NOTE]
+# Not all models support image input. Make sure to select a model that supports image input.
+# Not all services support image input from an image URI. If your image is saved in a remote location,
+# make sure to use a service that supports image input from a URI.
 chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)
 
 IMAGE_URI = "https://upload.wikimedia.org/wikipedia/commons/d/d5/Half-timbered_mansion%2C_Zirkel%2C_East_view.jpg"
@@ -41,12 +43,6 @@
 image_content_local = ImageContent.from_image_file(IMAGE_PATH)
 
 
-# [NOTE]
-# Not all models support image input. Make sure to select a model that supports image input.
-# Not all services support image input from an image URI. If your image is saved in a remote location,
-# make sure to use a service that supports image input from a URI.
-
-
 # This is the system message that gives the chatbot its personality.
 system_message = """
 You are an image reviewing chat bot. Your name is Mosscap and you have one goal critiquing images that are supplied.
@@ -56,7 +52,7 @@
 chat_history = ChatHistory(system_message=system_message)
 chat_history.add_message(
     ChatMessageContent(
-        role="user",
+        role=AuthorRole.USER,
         items=[TextContent(text="What is in this image?"), image_content_local],
     )
 )
@@ -90,10 +86,11 @@ async def chat(skip_user_input: bool = False) -> bool:
         chat_history=chat_history,
         settings=request_settings,
     )
-    print(f"Mosscap:> {response}")
+    if response:
+        print(f"Mosscap:> {response}")
 
-    # Add the chat message to the chat history to keep track of the conversation.
-    chat_history.add_assistant_message(str(response))
+        # Add the chat message to the chat history to keep track of the conversation.
+        chat_history.add_message(response)
 
     return True
 
diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py
index 903b59f42928..40dd127eda47 100644
--- a/python/samples/concepts/setup/chat_completion_services.py
+++ b/python/samples/concepts/setup/chat_completion_services.py
@@ -1,29 +1,14 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 from enum import Enum
+from typing import TYPE_CHECKING
 
-from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.azure_ai_inference import (
-    AzureAIInferenceChatCompletion,
-    AzureAIInferenceChatPromptExecutionSettings,
-)
-from semantic_kernel.connectors.ai.bedrock import BedrockChatCompletion, BedrockChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
-from semantic_kernel.connectors.ai.google.google_ai import GoogleAIChatCompletion, GoogleAIChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.google.vertex_ai import VertexAIChatCompletion, VertexAIChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
-from semantic_kernel.connectors.ai.onnx import OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings, ONNXTemplate
-from semantic_kernel.connectors.ai.open_ai import (
-    AzureChatCompletion,
-    AzureChatPromptExecutionSettings,
-    OpenAIChatCompletion,
-    OpenAIChatPromptExecutionSettings,
-)
-from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-
-
-class Services(Enum):
+if TYPE_CHECKING:
+    from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
+
+class Services(str, Enum):
     """Enum for supported chat completion services.
 
     For service specific settings, refer to this documentation:
@@ -42,9 +27,12 @@ class Services(Enum):
     VERTEX_AI = "vertex_ai"
 
 
+service_id = "default"
+
+
 def get_chat_completion_service_and_request_settings(
-    service_name: str,
-) -> tuple[ChatCompletionClientBase, PromptExecutionSettings]:
+    service_name: Services,
+) -> tuple["ChatCompletionClientBase", "PromptExecutionSettings"]:
     """Return service and request settings."""
     chat_services = {
         Services.OPENAI: get_openai_chat_completion_service_and_request_settings,
@@ -58,12 +46,11 @@ def get_chat_completion_service_and_request_settings(
         Services.ONNX: get_onnx_chat_completion_service_and_request_settings,
         Services.VERTEX_AI: get_vertex_ai_chat_completion_service_and_request_settings,
     }
-
     return chat_services[service_name]()
 
 
 def get_openai_chat_completion_service_and_request_settings() -> tuple[
-    OpenAIChatCompletion, OpenAIChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return OpenAI chat completion service and request settings.
 
@@ -78,14 +65,21 @@ def get_openai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python
     """
-    chat_service = OpenAIChatCompletion()
-    request_settings = OpenAIChatPromptExecutionSettings(max_tokens=2000, temperature=0.7, top_p=0.8)
+    from semantic_kernel.connectors.ai.open_ai import (
+        OpenAIChatCompletion,
+        OpenAIChatPromptExecutionSettings,
+    )
+
+    chat_service = OpenAIChatCompletion(service_id=service_id)
+    request_settings = OpenAIChatPromptExecutionSettings(
+        service_id=service_id, max_tokens=2000, temperature=0.7, top_p=0.8
+    )
 
     return chat_service, request_settings
 
 
 def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
-    AzureChatCompletion, AzureChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Azure OpenAI chat completion service and request settings.
 
@@ -100,14 +94,19 @@ def get_azure_openai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = AzureChatCompletion()
-    request_settings = AzureChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.open_ai import (
+        AzureChatCompletion,
+        AzureChatPromptExecutionSettings,
+    )
+
+    chat_service = AzureChatCompletion(service_id=service_id)
+    request_settings = AzureChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tuple[
-    AzureAIInferenceChatCompletion, AzureAIInferenceChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Azure AI Inference chat completion service and request settings.
 
@@ -122,16 +121,22 @@ def get_azure_ai_inference_chat_completion_service_and_request_settings() -> tup
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
+    from semantic_kernel.connectors.ai.azure_ai_inference import (
+        AzureAIInferenceChatCompletion,
+        AzureAIInferenceChatPromptExecutionSettings,
+    )
+
     chat_service = AzureAIInferenceChatCompletion(
+        service_id=service_id,
         ai_model_id="id",  # The model ID is simply an identifier as the model id cannot be obtained programmatically.
     )
-    request_settings = AzureAIInferenceChatPromptExecutionSettings()
+    request_settings = AzureAIInferenceChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_anthropic_chat_completion_service_and_request_settings() -> tuple[
-    AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Anthropic chat completion service and request settings.
 
@@ -146,14 +151,16 @@ def get_anthropic_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = AnthropicChatCompletion()
-    request_settings = AnthropicChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings
+
+    chat_service = AnthropicChatCompletion(service_id=service_id)
+    request_settings = AnthropicChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_bedrock_chat_completion_service_and_request_settings() -> tuple[
-    BedrockChatCompletion, BedrockChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Anthropic chat completion service and request settings.
 
@@ -168,11 +175,14 @@ def get_bedrock_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = BedrockChatCompletion(model_id="cohere.command-r-v1:0")
+    from semantic_kernel.connectors.ai.bedrock import BedrockChatCompletion, BedrockChatPromptExecutionSettings
+
+    chat_service = BedrockChatCompletion(service_id=service_id, model_id="cohere.command-r-v1:0")
     request_settings = BedrockChatPromptExecutionSettings(
         # For model specific settings, specify them in the extension_data dictionary.
         # For example, for Cohere Command specific settings, refer to:
         # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
+        service_id=service_id,
         extension_data={
             "presence_penalty": 0.5,
             "seed": 5,
@@ -183,7 +193,7 @@ def get_bedrock_chat_completion_service_and_request_settings() -> tuple[
 
 
 def get_google_ai_chat_completion_service_and_request_settings() -> tuple[
-    GoogleAIChatCompletion, GoogleAIChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Google AI chat completion service and request settings.
 
@@ -198,14 +208,19 @@ def get_google_ai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = GoogleAIChatCompletion()
-    request_settings = GoogleAIChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.google.google_ai import (
+        GoogleAIChatCompletion,
+        GoogleAIChatPromptExecutionSettings,
+    )
+
+    chat_service = GoogleAIChatCompletion(service_id=service_id)
+    request_settings = GoogleAIChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_mistral_ai_chat_completion_service_and_request_settings() -> tuple[
-    MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Mistral AI chat completion service and request settings.
 
@@ -220,14 +235,16 @@ def get_mistral_ai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = MistralAIChatCompletion()
-    request_settings = MistralAIChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings
+
+    chat_service = MistralAIChatCompletion(service_id=service_id)
+    request_settings = MistralAIChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_ollama_chat_completion_service_and_request_settings() -> tuple[
-    OllamaChatCompletion, OllamaChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Ollama chat completion service and request settings.
 
@@ -242,21 +259,24 @@ def get_ollama_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = OllamaChatCompletion()
+    from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
+
+    chat_service = OllamaChatCompletion(service_id=service_id)
     request_settings = OllamaChatPromptExecutionSettings(
         # For model specific settings, specify them in the options dictionary.
         # For more information on the available options, refer to the Ollama API documentation:
         # https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
+        service_id=service_id,
         options={
             "temperature": 0.8,
-        }
+        },
     )
 
     return chat_service, request_settings
 
 
 def get_onnx_chat_completion_service_and_request_settings() -> tuple[
-    OnnxGenAIChatCompletion, OnnxGenAIPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Onnx chat completion service and request settings.
 
@@ -271,14 +291,20 @@ def get_onnx_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3)
-    request_settings = OnnxGenAIPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.onnx import (
+        OnnxGenAIChatCompletion,
+        OnnxGenAIPromptExecutionSettings,
+        ONNXTemplate,
+    )
+
+    chat_service = OnnxGenAIChatCompletion(ONNXTemplate.PHI3, service_id=service_id)
+    request_settings = OnnxGenAIPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
 
 
 def get_vertex_ai_chat_completion_service_and_request_settings() -> tuple[
-    VertexAIChatCompletion, VertexAIChatPromptExecutionSettings
+    "ChatCompletionClientBase", "PromptExecutionSettings"
 ]:
     """Return Vertex AI chat completion service and request settings.
 
@@ -293,7 +319,12 @@ def get_vertex_ai_chat_completion_service_and_request_settings() -> tuple[
     Please refer to the Semantic Kernel Python documentation for more information:
     https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel
     """
-    chat_service = VertexAIChatCompletion()
-    request_settings = VertexAIChatPromptExecutionSettings()
+    from semantic_kernel.connectors.ai.google.vertex_ai import (
+        VertexAIChatCompletion,
+        VertexAIChatPromptExecutionSettings,
+    )
+
+    chat_service = VertexAIChatCompletion(service_id=service_id)
+    request_settings = VertexAIChatPromptExecutionSettings(service_id=service_id)
 
     return chat_service, request_settings
diff --git a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
index 51bd4f4f7dcb..5c80506e3297 100644
--- a/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/anthropic/prompt_execution_settings/anthropic_prompt_execution_settings.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
-from typing import Any
+from typing import Annotated, Any
 
 from pydantic import Field, model_validator
 
@@ -15,7 +15,7 @@
 class AnthropicPromptExecutionSettings(PromptExecutionSettings):
     """Common request settings for Anthropic services."""
 
-    ai_model_id: str | None = Field(None, serialization_alias="model")
+    ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
 
 
 class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings):
@@ -24,20 +24,26 @@ class AnthropicChatPromptExecutionSettings(AnthropicPromptExecutionSettings):
     messages: list[dict[str, Any]] | None = None
     stream: bool | None = None
     system: str | None = None
-    max_tokens: int = Field(default=1024, gt=0)
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
+    max_tokens: Annotated[int, Field(gt=0)] = 1024
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
     stop_sequences: list[str] | None = None
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
-    top_k: int | None = Field(None, ge=0)
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description=("Do not set this manually. It is set by the service based on the function choice configuration."),
-    )
-    tool_choice: dict[str, str] | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    top_k: Annotated[int | None, Field(ge=0)] = None
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description=(
+                "Do not set this manually. It is set by the service based on the function choice configuration."
+            ),
+        ),
+    ] = None
+    tool_choice: Annotated[
+        dict[str, str] | None,
+        Field(
+            description="Do not set this manually. It is set by the service based on the function choice configuration."
+        ),
+    ] = None
 
     @model_validator(mode="after")
     def validate_tool_choice(self) -> "AnthropicChatPromptExecutionSettings":
diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py
index 9f0d8bba851d..a8be8303e6b3 100644
--- a/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/azure_ai_inference_prompt_execution_settings.py
@@ -1,6 +1,6 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 from pydantic import Field
 
@@ -16,13 +16,13 @@ class AzureAIInferencePromptExecutionSettings(PromptExecutionSettings):
         `extra_parameters` is a dictionary to pass additional model-specific parameters to the model.
     """
 
-    frequency_penalty: float | None = Field(None, ge=-2, le=2)
-    max_tokens: int | None = Field(None, gt=0)
-    presence_penalty: float | None = Field(None, ge=-2, le=2)
+    frequency_penalty: Annotated[float | None, Field(ge=-2.0, le=2.0)] = None
+    max_tokens: Annotated[int | None, Field(gt=0)] = None
+    presence_penalty: Annotated[float | None, Field(ge=-2.0, le=2.0)] = None
     seed: int | None = None
     stop: str | None = None
-    temperature: float | None = Field(None, ge=0.0, le=1.0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    temperature: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
     extra_parameters: dict[str, Any] | None = None
 
 
@@ -30,15 +30,21 @@ class AzureAIInferencePromptExecutionSettings(PromptExecutionSettings):
 class AzureAIInferenceChatPromptExecutionSettings(AzureAIInferencePromptExecutionSettings):
     """Azure AI Inference Chat Prompt Execution Settings."""
 
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_choice: str | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_choice: Annotated[
+        str | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
 
 @experimental_class
@@ -49,7 +55,7 @@ class AzureAIInferenceEmbeddingPromptExecutionSettings(PromptExecutionSettings):
         `extra_parameters` is a dictionary to pass additional model-specific parameters to the model.
     """
 
-    dimensions: int | None = Field(None, gt=0)
+    dimensions: Annotated[int | None, Field(gt=0)] = None
     encoding_format: Literal["base64", "binary", "float", "int8", "ubinary", "uint8"] | None = None
     input_type: Literal["text", "query", "document"] | None = None
     extra_parameters: dict[str, str] | None = None
diff --git a/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py
index a291f221a9d3..ca33d3123490 100644
--- a/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 
-from typing import Any
+from typing import Annotated, Any
 
 from pydantic import Field
 
@@ -11,25 +11,31 @@
 class BedrockPromptExecutionSettings(PromptExecutionSettings):
     """Bedrock Prompt Execution Settings."""
 
-    temperature: float | None = Field(None, ge=0.0, le=1.0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
-    top_k: int | None = Field(None, gt=0)
-    max_tokens: int | None = Field(None, gt=0)
+    temperature: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    top_k: Annotated[int | None, Field(gt=0)] = None
+    max_tokens: Annotated[int | None, Field(gt=0)] = None
     stop: list[str] = Field(default_factory=list)
 
 
 class BedrockChatPromptExecutionSettings(BedrockPromptExecutionSettings):
     """Bedrock Chat Prompt Execution Settings."""
 
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_choice: dict[str, Any] | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_choice: Annotated[
+        dict[str, Any] | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
 
 class BedrockTextPromptExecutionSettings(BedrockPromptExecutionSettings):
diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/google_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/google/google_ai/google_ai_prompt_execution_settings.py
index 91f81fb18580..99e7ad1f8d56 100644
--- a/python/semantic_kernel/connectors/ai/google/google_ai/google_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/google/google_ai/google_ai_prompt_execution_settings.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import sys
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 from pydantic import Field
 
@@ -16,12 +16,12 @@
 class GoogleAIPromptExecutionSettings(PromptExecutionSettings):
     """Google AI Prompt Execution Settings."""
 
-    stop_sequences: list[str] | None = Field(None, max_length=5)
+    stop_sequences: Annotated[list[str] | None, Field(max_length=5)] = None
     response_mime_type: Literal["text/plain", "application/json"] | None = None
     response_schema: Any | None = None
-    candidate_count: int | None = Field(None, ge=1)
-    max_output_tokens: int | None = Field(None, ge=1)
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
+    candidate_count: Annotated[int | None, Field(ge=1)] = None
+    max_output_tokens: Annotated[int | None, Field(ge=1)] = None
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
     top_p: float | None = None
     top_k: int | None = None
 
@@ -35,15 +35,21 @@ class GoogleAITextPromptExecutionSettings(GoogleAIPromptExecutionSettings):
 class GoogleAIChatPromptExecutionSettings(GoogleAIPromptExecutionSettings):
     """Google AI Chat Prompt Execution Settings."""
 
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_config: dict[str, Any] | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_config: Annotated[
+        dict[str, Any] | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
     @override
     def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
@@ -62,4 +68,4 @@ def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
 class GoogleAIEmbeddingPromptExecutionSettings(PromptExecutionSettings):
     """Google AI Embedding Prompt Execution Settings."""
 
-    output_dimensionality: int | None = Field(None, le=768)
+    output_dimensionality: Annotated[int | None, Field(le=768)] = None
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/vertex_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/vertex_ai_prompt_execution_settings.py
index 28c8eb6f28be..29b9e13e1278 100644
--- a/python/semantic_kernel/connectors/ai/google/vertex_ai/vertex_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/vertex_ai_prompt_execution_settings.py
@@ -1,15 +1,14 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import sys
-from typing import Any, Literal
-
-from pydantic import Field
-from vertexai.generative_models import Tool, ToolConfig
+from typing import Annotated, Any, Literal
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
 else:
     from typing_extensions import override  # pragma: no cover
+from pydantic import Field
+from vertexai.generative_models import Tool, ToolConfig
 
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 
@@ -17,12 +16,12 @@
 class VertexAIPromptExecutionSettings(PromptExecutionSettings):
     """Vertex AI Prompt Execution Settings."""
 
-    stop_sequences: list[str] | None = Field(None, max_length=5)
+    stop_sequences: Annotated[list[str] | None, Field(max_length=5)] = None
     response_mime_type: Literal["text/plain", "application/json"] | None = None
     response_schema: Any | None = None
-    candidate_count: int | None = Field(None, ge=1)
-    max_output_tokens: int | None = Field(None, ge=1)
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
+    candidate_count: Annotated[int | None, Field(ge=1)] = None
+    max_output_tokens: Annotated[int | None, Field(ge=1)] = None
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
     top_p: float | None = None
     top_k: int | None = None
 
@@ -36,15 +35,21 @@ class VertexAITextPromptExecutionSettings(VertexAIPromptExecutionSettings):
 class VertexAIChatPromptExecutionSettings(VertexAIPromptExecutionSettings):
     """Vertex AI Chat Prompt Execution Settings."""
 
-    tools: list[Tool] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_config: ToolConfig | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[Tool] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_config: Annotated[
+        ToolConfig | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
     @override
     def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/prompt_execution_settings/mistral_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/mistral_ai/prompt_execution_settings/mistral_ai_prompt_execution_settings.py
index d0409b1be659..ce61d75740f5 100644
--- a/python/semantic_kernel/connectors/ai/mistral_ai/prompt_execution_settings/mistral_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/mistral_ai/prompt_execution_settings/mistral_ai_prompt_execution_settings.py
@@ -2,7 +2,7 @@
 
 import logging
 import sys
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 from mistralai import utils
 
@@ -21,7 +21,7 @@
 class MistralAIPromptExecutionSettings(PromptExecutionSettings):
     """Common request settings for MistralAI services."""
 
-    ai_model_id: str | None = Field(None, serialization_alias="model")
+    ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
 
 
 class MistralAIChatPromptExecutionSettings(MistralAIPromptExecutionSettings):
@@ -29,28 +29,34 @@ class MistralAIChatPromptExecutionSettings(MistralAIPromptExecutionSettings):
 
     response_format: dict[Literal["type"], Literal["text", "json_object"]] | None = None
     messages: list[dict[str, Any]] | None = None
-    safe_mode: bool = Field(False, exclude=True)
+    safe_mode: Annotated[bool, Field(exclude=True)] = False
     safe_prompt: bool = False
-    max_tokens: int | None = Field(None, gt=0)
+    max_tokens: Annotated[int | None, Field(gt=0)] = None
     seed: int | None = None
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
     random_seed: int | None = None
-    presence_penalty: float | None = Field(None, gt=0)
-    frequency_penalty: float | None = Field(None, gt=0)
-    n: int | None = Field(None, gt=1)
+    presence_penalty: Annotated[float | None, Field(gt=0)] = None
+    frequency_penalty: Annotated[float | None, Field(gt=0)] = None
+    n: Annotated[int | None, Field(gt=1)] = None
     retries: utils.RetryConfig | None = None
     server_url: str | None = None
     timeout_ms: int | None = None
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_choice: str | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_choice: Annotated[
+        str | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
     @field_validator("safe_mode")
     @classmethod
diff --git a/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py
index e5c4c5b4510b..f315f971e91f 100644
--- a/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/ollama/ollama_prompt_execution_settings.py
@@ -1,6 +1,6 @@
 # Copyright (c) Microsoft. All rights reserved.
 
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 from pydantic import Field
 
@@ -29,11 +29,14 @@ class OllamaTextPromptExecutionSettings(OllamaPromptExecutionSettings):
 class OllamaChatPromptExecutionSettings(OllamaPromptExecutionSettings):
     """Settings for Ollama chat prompt execution."""
 
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
 
 
 class OllamaEmbeddingPromptExecutionSettings(OllamaPromptExecutionSettings):
diff --git a/python/semantic_kernel/connectors/ai/onnx/onnx_gen_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/onnx/onnx_gen_ai_prompt_execution_settings.py
index aebc2d231cb2..75c3f29699ff 100644
--- a/python/semantic_kernel/connectors/ai/onnx/onnx_gen_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/onnx/onnx_gen_ai_prompt_execution_settings.py
@@ -1,6 +1,8 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 
+from typing import Annotated
+
 from pydantic import Field
 
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
@@ -9,17 +11,17 @@
 class OnnxGenAIPromptExecutionSettings(PromptExecutionSettings):
     """OnnxGenAI prompt execution settings."""
 
-    diversity_penalty: float | None = Field(None, ge=0.0, le=1.0)
+    diversity_penalty: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
     do_sample: bool = False
     early_stopping: bool = True
-    length_penalty: float | None = Field(None, ge=0.0, le=1.0)
-    max_length: int = Field(3072, gt=0)
-    min_length: int | None = Field(None, gt=0)
+    length_penalty: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    max_length: Annotated[int, Field(gt=0)] = 3072
+    min_length: Annotated[int | None, Field(gt=0)] = None
     no_repeat_ngram_size: int = 0
-    num_beams: int | None = Field(None, gt=0)
-    num_return_sequences: int | None = Field(None, gt=0)
+    num_beams: Annotated[int | None, Field(gt=0)] = None
+    num_return_sequences: Annotated[int | None, Field(gt=0)] = None
     past_present_share_buffer: int = True
-    repetition_penalty: float | None = Field(None, ge=0.0, le=1.0)
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
-    top_k: int | None = Field(None, gt=0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    repetition_penalty: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
+    top_k: Annotated[int | None, Field(gt=0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py
index 3f627e12c665..bb247cb55e43 100644
--- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py
+++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py
@@ -47,6 +47,7 @@ def __init__(
         ai_model_id: str | None = None,
         env_file_path: str | None = None,
         env_file_encoding: str | None = None,
+        **kwargs: Any,
     ) -> None:
         """Initializes a new instance of the OnnxGenAITextCompletion class.
 
@@ -57,6 +58,7 @@ def __init__(
             env_file_path : Use the environment settings file as a fallback
                 to environment variables.
             env_file_encoding : The encoding of the environment settings file.
+            kwargs : Additional arguments.
         """
         try:
             settings = OnnxGenAISettings.create(
@@ -76,7 +78,7 @@ def __init__(
         if ai_model_id is None:
             ai_model_id = settings.chat_model_folder
 
-        super().__init__(ai_model_id=ai_model_id, ai_model_path=settings.chat_model_folder, template=template)
+        super().__init__(ai_model_id=ai_model_id, ai_model_path=settings.chat_model_folder, template=template, **kwargs)
 
     @override
     async def _inner_get_chat_message_contents(
diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/azure_chat_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/azure_chat_prompt_execution_settings.py
index 19ec573da19b..543b4e2c64a5 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/azure_chat_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/azure_chat_prompt_execution_settings.py
@@ -157,8 +157,8 @@ class ExtraBody(KernelBaseModel):
     """Extra body for the Azure Chat Completion endpoint."""
 
     data_sources: list[DataSource] | None = None
-    input_language: str | None = Field(None, serialization_alias="inputLanguage")
-    output_language: str | None = Field(None, serialization_alias="outputLanguage")
+    input_language: Annotated[str | None, Field(serialization_alias="inputLanguage")] = None
+    output_language: Annotated[str | None, Field(serialization_alias="outputLanguage")] = None
 
     def __getitem__(self, item):
         """Get an item from the ExtraBody."""
diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
index f87e3ccedd65..f85f03289d92 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_prompt_execution_settings.py
@@ -2,7 +2,7 @@
 
 import logging
 import sys
-from typing import Any, Literal
+from typing import Annotated, Any, Literal
 
 if sys.version_info >= (3, 11):
     from typing import Self  # pragma: no cover
@@ -21,29 +21,29 @@
 class OpenAIPromptExecutionSettings(PromptExecutionSettings):
     """Common request settings for (Azure) OpenAI services."""
 
-    ai_model_id: str | None = Field(None, serialization_alias="model")
-    frequency_penalty: float | None = Field(None, ge=-2.0, le=2.0)
+    ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
+    frequency_penalty: Annotated[float | None, Field(ge=-2.0, le=2.0)] = None
     logit_bias: dict[str | int, float] | None = None
-    max_tokens: int | None = Field(None, gt=0)
-    number_of_responses: int | None = Field(None, ge=1, le=128, serialization_alias="n")
-    presence_penalty: float | None = Field(None, ge=-2.0, le=2.0)
+    max_tokens: Annotated[int | None, Field(gt=0)] = None
+    number_of_responses: Annotated[int | None, Field(ge=1, le=128, serialization_alias="n")] = None
+    presence_penalty: Annotated[float | None, Field(ge=-2.0, le=2.0)] = None
     seed: int | None = None
     stop: str | list[str] | None = None
     stream: bool = False
-    temperature: float | None = Field(None, ge=0.0, le=2.0)
-    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    temperature: Annotated[float | None, Field(ge=0.0, le=2.0)] = None
+    top_p: Annotated[float | None, Field(ge=0.0, le=1.0)] = None
     user: str | None = None
 
 
 class OpenAITextPromptExecutionSettings(OpenAIPromptExecutionSettings):
     """Specific settings for the completions endpoint."""
 
-    prompt: str | None = Field(
-        None, description="Do not set this manually. It is set by the service based on the text content."
-    )
-    best_of: int | None = Field(None, ge=1)
+    prompt: Annotated[
+        str | None, Field(description="Do not set this manually. It is set by the service based on the text content.")
+    ] = None
+    best_of: Annotated[int | None, Field(ge=1)] = None
     echo: bool = False
-    logprobs: int | None = Field(None, ge=0, le=5)
+    logprobs: Annotated[int | None, Field(ge=0, le=5)] = None
     suffix: str | None = None
 
     @model_validator(mode="after")
@@ -68,25 +68,33 @@ class OpenAIChatPromptExecutionSettings(OpenAIPromptExecutionSettings):
     ) = None
     function_call: str | None = None
     functions: list[dict[str, Any]] | None = None
-    messages: list[dict[str, Any]] | None = Field(
-        None, description="Do not set this manually. It is set by the service based on the chat history."
-    )
-    function_call_behavior: FunctionCallBehavior | None = Field(None, exclude=True)
+    messages: Annotated[
+        list[dict[str, Any]] | None, Field(description="Do not set this manually. It is set by the service.")
+    ] = None
+    function_call_behavior: Annotated[FunctionCallBehavior | None, Field(exclude=True)] = None
     parallel_tool_calls: bool = True
-    tools: list[dict[str, Any]] | None = Field(
-        None,
-        max_length=64,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    tool_choice: str | None = Field(
-        None,
-        description="Do not set this manually. It is set by the service based on the function choice configuration.",
-    )
-    structured_json_response: bool = Field(False, description="Do not set this manually. It is set by the service.")
-    stream_options: dict[str, Any] | None = Field(
-        None,
-        description="Additional options to pass when streaming is used. Do not set this manually.",
-    )
+    tools: Annotated[
+        list[dict[str, Any]] | None,
+        Field(
+            max_length=64,
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    tool_choice: Annotated[
+        str | None,
+        Field(
+            description="Do not set this manually. It is set by the service based "
+            "on the function choice configuration.",
+        ),
+    ] = None
+    structured_json_response: Annotated[
+        bool, Field(description="Do not set this manually. It is set by the service.")
+    ] = False
+    stream_options: Annotated[
+        dict[str, Any] | None,
+        Field(description="Additional options to pass when streaming is used. Do not set this manually."),
+    ] = None
 
     @field_validator("functions", "function_call", mode="after")
     @classmethod
@@ -160,11 +168,11 @@ class OpenAIEmbeddingPromptExecutionSettings(PromptExecutionSettings):
     """Specific settings for the text embedding endpoint."""
 
     input: str | list[str] | list[int] | list[list[int]] | None = None
-    ai_model_id: str | None = Field(None, serialization_alias="model")
+    ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
     encoding_format: Literal["float", "base64"] | None = None
     user: str | None = None
     extra_headers: dict | None = None
     extra_query: dict | None = None
     extra_body: dict | None = None
     timeout: float | None = None
-    dimensions: int | None = Field(None, gt=0, le=3072)
+    dimensions: Annotated[int | None, Field(gt=0, le=3072)] = None
diff --git a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py
index 4c3abc8f5419..3865b63a62ac 100644
--- a/python/semantic_kernel/connectors/ai/prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/prompt_execution_settings.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
-from typing import Any, TypeVar
+from typing import Annotated, Any, TypeVar
 
 from pydantic import Field, model_validator
 
@@ -32,9 +32,9 @@ class PromptExecutionSettings(KernelBaseModel):
         from_prompt_execution_settings: Create a prompt execution settings from another prompt execution settings.
     """
 
-    service_id: str | None = Field(None, min_length=1)
+    service_id: Annotated[str | None, Field(min_length=1)] = None
     extension_data: dict[str, Any] = Field(default_factory=dict)
-    function_choice_behavior: FunctionChoiceBehavior | None = Field(None, exclude=True)
+    function_choice_behavior: Annotated[FunctionChoiceBehavior | None, Field(exclude=True)] = None
 
     @model_validator(mode="before")
     @classmethod
diff --git a/python/semantic_kernel/contents/kernel_content.py b/python/semantic_kernel/contents/kernel_content.py
index 98e64e8b810c..70ef59892bd6 100644
--- a/python/semantic_kernel/contents/kernel_content.py
+++ b/python/semantic_kernel/contents/kernel_content.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 from abc import ABC, abstractmethod
-from typing import Any, TypeVar
+from typing import Annotated, Any, TypeVar
 
 from pydantic import Field
 
@@ -15,7 +15,7 @@ class KernelContent(KernelBaseModel, ABC):
 
     # NOTE: if you wish to hold on to the inner content, you are responsible
     # for saving it before serializing the content/chat history as it won't be included.
-    inner_content: Any | None = Field(None, exclude=True)
+    inner_content: Annotated[Any | None, Field(exclude=True)] = None
     ai_model_id: str | None = None
     metadata: dict[str, Any] = Field(default_factory=dict)
 
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index d8d9f52e26f9..697cce70712e 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -1,5 +1,6 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import logging
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Annotated
@@ -30,6 +31,15 @@
     from semantic_kernel.services.ai_service_client_base import AIServiceClientBase
 
 
+def pytest_configure(config):
+    logging.basicConfig(level=logging.ERROR)
+    logging.getLogger("tests.utils").setLevel(logging.INFO)
+    logging.getLogger("openai").setLevel(logging.WARNING)
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+    logging.getLogger("httpcore").setLevel(logging.WARNING)
+    logging.getLogger("semantic_kernel").setLevel(logging.INFO)
+
+
 @fixture(scope="function")
 def kernel() -> "Kernel":
     from semantic_kernel.kernel import Kernel
diff --git a/python/tests/integration/audio_to_text/audio_to_text_test_base.py b/python/tests/integration/audio_to_text/audio_to_text_test_base.py
index 8375b1b39a47..78b5ab78cf1c 100644
--- a/python/tests/integration/audio_to_text/audio_to_text_test_base.py
+++ b/python/tests/integration/audio_to_text/audio_to_text_test_base.py
@@ -6,7 +6,7 @@
 
 from semantic_kernel.connectors.ai.audio_to_text_client_base import AudioToTextClientBase
 from semantic_kernel.connectors.ai.open_ai import AzureAudioToText, OpenAIAudioToText
-from tests.integration.utils import is_service_setup_for_testing
+from tests.utils import is_service_setup_for_testing
 
 # There is only the whisper model available on Azure OpenAI for audio to text. And that model is
 # only available in the North Switzerland region. Therefore, the endpoint is different than the one
diff --git a/python/tests/integration/completions/chat_completion_test_base.py b/python/tests/integration/completions/chat_completion_test_base.py
index d94c7e61442d..1fe87415e865 100644
--- a/python/tests/integration/completions/chat_completion_test_base.py
+++ b/python/tests/integration/completions/chat_completion_test_base.py
@@ -38,7 +38,7 @@
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.utils.authentication.entra_id_authentication import get_entra_auth_token
 from tests.integration.completions.completion_test_base import CompletionTestBase, ServiceType
-from tests.integration.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms
+from tests.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
diff --git a/python/tests/integration/completions/conftest.py b/python/tests/integration/completions/conftest.py
index 17a1e3968661..34731e4da958 100644
--- a/python/tests/integration/completions/conftest.py
+++ b/python/tests/integration/completions/conftest.py
@@ -1,9 +1,15 @@
 # Copyright (c) Microsoft. All rights reserved.
 
+import logging
 
 import pytest
 
 from semantic_kernel.kernel import Kernel
+from semantic_kernel.utils.logging import setup_logging
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+setup_logging()
 
 
 @pytest.fixture(scope="function")
diff --git a/python/tests/integration/completions/test_chat_completion_with_function_calling.py b/python/tests/integration/completions/test_chat_completion_with_function_calling.py
index f749f6db3768..76e759557e15 100644
--- a/python/tests/integration/completions/test_chat_completion_with_function_calling.py
+++ b/python/tests/integration/completions/test_chat_completion_with_function_calling.py
@@ -25,7 +25,7 @@
     vertex_ai_setup,
 )
 from tests.integration.completions.completion_test_base import ServiceType
-from tests.integration.utils import retry
+from tests.utils import retry
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
diff --git a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py
index 4b82a75f8fab..137445495fd9 100644
--- a/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py
+++ b/python/tests/integration/completions/test_chat_completion_with_image_input_text_output.py
@@ -20,7 +20,7 @@
     vertex_ai_setup,
 )
 from tests.integration.completions.completion_test_base import ServiceType
-from tests.integration.utils import retry
+from tests.utils import retry
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
diff --git a/python/tests/integration/completions/test_chat_completions.py b/python/tests/integration/completions/test_chat_completions.py
index c78537a4f4c3..17ab67e12c83 100644
--- a/python/tests/integration/completions/test_chat_completions.py
+++ b/python/tests/integration/completions/test_chat_completions.py
@@ -21,7 +21,7 @@
     vertex_ai_setup,
 )
 from tests.integration.completions.completion_test_base import ServiceType
-from tests.integration.utils import retry
+from tests.utils import retry
 
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
diff --git a/python/tests/integration/completions/test_conversation_summary_plugin.py b/python/tests/integration/completions/test_conversation_summary_plugin.py
index 4cc47b47f4c5..5bd80fbe445a 100644
--- a/python/tests/integration/completions/test_conversation_summary_plugin.py
+++ b/python/tests/integration/completions/test_conversation_summary_plugin.py
@@ -7,7 +7,7 @@
 from semantic_kernel.core_plugins.conversation_summary_plugin import ConversationSummaryPlugin
 from semantic_kernel.functions.kernel_arguments import KernelArguments
 from semantic_kernel.prompt_template.prompt_template_config import PromptTemplateConfig
-from tests.integration.utils import retry
+from tests.utils import retry
 
 
 @pytest.mark.asyncio
diff --git a/python/tests/integration/completions/test_text_completion.py b/python/tests/integration/completions/test_text_completion.py
index 9a8e60d2d0b6..c4c8058a9ab0 100644
--- a/python/tests/integration/completions/test_text_completion.py
+++ b/python/tests/integration/completions/test_text_completion.py
@@ -32,7 +32,7 @@
 from semantic_kernel import Kernel
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from tests.integration.completions.completion_test_base import CompletionTestBase, ServiceType
-from tests.integration.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms, retry
+from tests.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms, retry
 
 ollama_setup: bool = is_service_setup_for_testing(["OLLAMA_TEXT_MODEL_ID"]) and is_test_running_on_supported_platforms([
     "Linux"
diff --git a/python/tests/integration/embeddings/test_embedding_service_base.py b/python/tests/integration/embeddings/test_embedding_service_base.py
index d88706c17167..4bb68b8729fd 100644
--- a/python/tests/integration/embeddings/test_embedding_service_base.py
+++ b/python/tests/integration/embeddings/test_embedding_service_base.py
@@ -30,7 +30,7 @@
 )
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.utils.authentication.entra_id_authentication import get_entra_auth_token
-from tests.integration.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms
+from tests.utils import is_service_setup_for_testing, is_test_running_on_supported_platforms
 
 # Make sure all services are setup for before running the tests
 # The following exceptions apply:
diff --git a/python/tests/integration/memory/memory_stores/test_astradb_memory_store.py b/python/tests/integration/memory/memory_stores/test_astradb_memory_store.py
index 3bd91945e880..39ccf99fc09a 100644
--- a/python/tests/integration/memory/memory_stores/test_astradb_memory_store.py
+++ b/python/tests/integration/memory/memory_stores/test_astradb_memory_store.py
@@ -8,7 +8,7 @@
 
 from semantic_kernel.connectors.memory.astradb import AstraDBMemoryStore
 from semantic_kernel.connectors.memory.astradb.astradb_settings import AstraDBSettings
-from tests.integration.utils import retry
+from tests.utils import retry
 
 astradb_installed: bool
 try:
diff --git a/python/tests/integration/memory/memory_stores/test_pinecone_memory_store.py b/python/tests/integration/memory/memory_stores/test_pinecone_memory_store.py
index c4c4d9b6b8ea..58c58bd9724b 100644
--- a/python/tests/integration/memory/memory_stores/test_pinecone_memory_store.py
+++ b/python/tests/integration/memory/memory_stores/test_pinecone_memory_store.py
@@ -11,7 +11,7 @@
 from semantic_kernel.connectors.memory.pinecone.pinecone_settings import PineconeSettings
 from semantic_kernel.exceptions.service_exceptions import ServiceResourceNotFoundError
 from semantic_kernel.memory.memory_record import MemoryRecord
-from tests.integration.utils import retry
+from tests.utils import retry
 
 pinecone_installed = importlib.util.find_spec("pinecone") is not None
 pytestmark = pytest.mark.skipif(not pinecone_installed, reason="pinecone is not installed")
diff --git a/python/tests/integration/memory/vector_stores/azure_cosmos_db/test_azure_cosmos_db_no_sql.py b/python/tests/integration/memory/vector_stores/azure_cosmos_db/test_azure_cosmos_db_no_sql.py
index c278304077aa..51675b06831b 100644
--- a/python/tests/integration/memory/vector_stores/azure_cosmos_db/test_azure_cosmos_db_no_sql.py
+++ b/python/tests/integration/memory/vector_stores/azure_cosmos_db/test_azure_cosmos_db_no_sql.py
@@ -31,24 +31,23 @@ async def test_list_collection_names(
         data_model_type: type,
     ):
         """Test list collection names."""
-        store = stores["azure_cosmos_db_no_sql"]
-
-        assert await store.list_collection_names() == []
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            assert await store.list_collection_names() == []
 
-        collection_name = "list_collection_names"
-        collection = store.get_collection(collection_name, data_model_type)
-        await collection.create_collection()
+            collection_name = "list_collection_names"
+            collection = store.get_collection(collection_name, data_model_type)
+            await collection.create_collection()
 
-        collection_names = await store.list_collection_names()
-        assert collection_name in collection_names
+            collection_names = await store.list_collection_names()
+            assert collection_name in collection_names
 
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
-        collection_names = await store.list_collection_names()
-        assert collection_name not in collection_names
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
+            collection_names = await store.list_collection_names()
+            assert collection_name not in collection_names
 
-        # Deleting the collection doesn't remove it from the vector_record_collections list in the store
-        assert collection_name in store.vector_record_collections
+            # Deleting the collection doesn't remove it from the vector_record_collections list in the store
+            assert collection_name in store.vector_record_collections
 
     @pytest.mark.asyncio
     async def test_collection_not_created(
@@ -58,27 +57,27 @@ async def test_collection_not_created(
         data_record: dict[str, Any],
     ):
         """Test get without collection."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "collection_not_created"
-        collection = store.get_collection(collection_name, data_model_type)
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "collection_not_created"
+            collection = store.get_collection(collection_name, data_model_type)
 
-        assert await collection.does_collection_exist() is False
+            assert await collection.does_collection_exist() is False
 
-        with pytest.raises(
-            MemoryConnectorException, match="The collection does not exist yet. Create the collection first."
-        ):
-            await collection.upsert(data_model_type(**data_record))
+            with pytest.raises(
+                MemoryConnectorException, match="The collection does not exist yet. Create the collection first."
+            ):
+                await collection.upsert(data_model_type(**data_record))
 
-        with pytest.raises(
-            MemoryConnectorException, match="The collection does not exist yet. Create the collection first."
-        ):
-            await collection.get(data_record["id"])
+            with pytest.raises(
+                MemoryConnectorException, match="The collection does not exist yet. Create the collection first."
+            ):
+                await collection.get(data_record["id"])
 
-        with pytest.raises(MemoryConnectorException):
-            await collection.delete(data_record["id"])
+            with pytest.raises(MemoryConnectorException):
+                await collection.delete(data_record["id"])
 
-        with pytest.raises(MemoryConnectorException, match="Container could not be deleted."):
-            await collection.delete_collection()
+            with pytest.raises(MemoryConnectorException, match="Container could not be deleted."):
+                await collection.delete_collection()
 
     @pytest.mark.asyncio
     async def test_custom_partition_key(
@@ -88,33 +87,35 @@ async def test_custom_partition_key(
         data_record: dict[str, Any],
     ):
         """Test custom partition key."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "custom_partition_key"
-        collection = store.get_collection(
-            collection_name,
-            data_model_type,
-            partition_key=PartitionKey(path="/product_type"),
-        )
-
-        composite_key = AzureCosmosDBNoSQLCompositeKey(key=data_record["id"], partition_key=data_record["product_type"])
-
-        # Upsert
-        await collection.create_collection()
-        await collection.upsert(data_model_type(**data_record))
-
-        # Verify
-        record = await collection.get(composite_key)
-        assert record is not None
-        assert isinstance(record, data_model_type)
-
-        # Remove
-        await collection.delete(composite_key)
-        record = await collection.get(composite_key)
-        assert record is None
-
-        # Remove collection
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "custom_partition_key"
+            collection = store.get_collection(
+                collection_name,
+                data_model_type,
+                partition_key=PartitionKey(path="/product_type"),
+            )
+
+            composite_key = AzureCosmosDBNoSQLCompositeKey(
+                key=data_record["id"], partition_key=data_record["product_type"]
+            )
+
+            # Upsert
+            await collection.create_collection()
+            await collection.upsert(data_model_type(**data_record))
+
+            # Verify
+            record = await collection.get(composite_key)
+            assert record is not None
+            assert isinstance(record, data_model_type)
+
+            # Remove
+            await collection.delete(composite_key)
+            record = await collection.get(composite_key)
+            assert record is None
+
+            # Remove collection
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
 
     @pytest.mark.asyncio
     async def test_get_include_vector(
@@ -124,28 +125,28 @@ async def test_get_include_vector(
         data_record: dict[str, Any],
     ):
         """Test get with include_vector."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "get_include_vector"
-        collection = store.get_collection(collection_name, data_model_type)
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "get_include_vector"
+            collection = store.get_collection(collection_name, data_model_type)
 
-        # Upsert
-        await collection.create_collection()
-        await collection.upsert(data_model_type(**data_record))
+            # Upsert
+            await collection.create_collection()
+            await collection.upsert(data_model_type(**data_record))
 
-        # Verify
-        record = await collection.get(data_record["id"], include_vectors=True)
-        assert record is not None
-        assert isinstance(record, data_model_type)
-        assert record.vector == data_record["vector"]
+            # Verify
+            record = await collection.get(data_record["id"], include_vectors=True)
+            assert record is not None
+            assert isinstance(record, data_model_type)
+            assert record.vector == data_record["vector"]
 
-        # Remove
-        await collection.delete(data_record["id"])
-        record = await collection.get(data_record["id"])
-        assert record is None
+            # Remove
+            await collection.delete(data_record["id"])
+            record = await collection.get(data_record["id"])
+            assert record is None
 
-        # Remove collection
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
+            # Remove collection
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
 
     @pytest.mark.asyncio
     async def test_get_not_include_vector(
@@ -155,28 +156,28 @@ async def test_get_not_include_vector(
         data_record: dict[str, Any],
     ):
         """Test get with include_vector."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "get_not_include_vector"
-        collection = store.get_collection(collection_name, data_model_type)
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "get_not_include_vector"
+            collection = store.get_collection(collection_name, data_model_type)
 
-        # Upsert
-        await collection.create_collection()
-        await collection.upsert(data_model_type(**data_record))
+            # Upsert
+            await collection.create_collection()
+            await collection.upsert(data_model_type(**data_record))
 
-        # Verify
-        record = await collection.get(data_record["id"], include_vectors=False)
-        assert record is not None
-        assert isinstance(record, data_model_type)
-        assert record.vector is None
+            # Verify
+            record = await collection.get(data_record["id"], include_vectors=False)
+            assert record is not None
+            assert isinstance(record, data_model_type)
+            assert record.vector is None
 
-        # Remove
-        await collection.delete(data_record["id"])
-        record = await collection.get(data_record["id"])
-        assert record is None
+            # Remove
+            await collection.delete(data_record["id"])
+            record = await collection.get(data_record["id"])
+            assert record is None
 
-        # Remove collection
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
+            # Remove collection
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
 
     @pytest.mark.asyncio
     async def test_collection_with_key_as_key_field(
@@ -186,29 +187,29 @@ async def test_collection_with_key_as_key_field(
         data_record_with_key_as_key_field: dict[str, Any],
     ):
         """Test collection with key as key field."""
-        store = stores["azure_cosmos_db_no_sql"]
-        collection_name = "collection_with_key_as_key_field"
-        collection = store.get_collection(collection_name, data_model_type_with_key_as_key_field)
-
-        # Upsert
-        await collection.create_collection()
-        result = await collection.upsert(data_model_type_with_key_as_key_field(**data_record_with_key_as_key_field))
-        assert data_record_with_key_as_key_field["key"] == result
-
-        # Verify
-        record = await collection.get(data_record_with_key_as_key_field["key"])
-        assert record is not None
-        assert isinstance(record, data_model_type_with_key_as_key_field)
-        assert record.key == data_record_with_key_as_key_field["key"]
-
-        # Remove
-        await collection.delete(data_record_with_key_as_key_field["key"])
-        record = await collection.get(data_record_with_key_as_key_field["key"])
-        assert record is None
-
-        # Remove collection
-        await collection.delete_collection()
-        assert await collection.does_collection_exist() is False
+        async with stores["azure_cosmos_db_no_sql"] as store:
+            collection_name = "collection_with_key_as_key_field"
+            collection = store.get_collection(collection_name, data_model_type_with_key_as_key_field)
+
+            # Upsert
+            await collection.create_collection()
+            result = await collection.upsert(data_model_type_with_key_as_key_field(**data_record_with_key_as_key_field))
+            assert data_record_with_key_as_key_field["key"] == result
+
+            # Verify
+            record = await collection.get(data_record_with_key_as_key_field["key"])
+            assert record is not None
+            assert isinstance(record, data_model_type_with_key_as_key_field)
+            assert record.key == data_record_with_key_as_key_field["key"]
+
+            # Remove
+            await collection.delete(data_record_with_key_as_key_field["key"])
+            record = await collection.get(data_record_with_key_as_key_field["key"])
+            assert record is None
+
+            # Remove collection
+            await collection.delete_collection()
+            assert await collection.does_collection_exist() is False
 
     @pytest.mark.asyncio
     async def test_custom_client(
@@ -219,13 +220,14 @@ async def test_custom_client(
         url = os.environ.get("AZURE_COSMOS_DB_NO_SQL_URL")
         key = os.environ.get("AZURE_COSMOS_DB_NO_SQL_KEY")
 
-        async with CosmosClient(url, key) as custom_client:
-            store = AzureCosmosDBNoSQLStore(
+        async with (
+            CosmosClient(url, key) as custom_client,
+            AzureCosmosDBNoSQLStore(
                 database_name="test_database",
                 cosmos_client=custom_client,
                 create_database=True,
-            )
-
+            ) as store,
+        ):
             assert await store.list_collection_names() == []
 
             collection_name = "list_collection_names"
diff --git a/python/tests/integration/text_to_audio/text_to_audio_test_base.py b/python/tests/integration/text_to_audio/text_to_audio_test_base.py
index 2ad5bd11df76..167fd9c332a0 100644
--- a/python/tests/integration/text_to_audio/text_to_audio_test_base.py
+++ b/python/tests/integration/text_to_audio/text_to_audio_test_base.py
@@ -6,7 +6,7 @@
 
 from semantic_kernel.connectors.ai.open_ai import AzureTextToAudio, OpenAITextToAudio
 from semantic_kernel.connectors.ai.text_to_audio_client_base import TextToAudioClientBase
-from tests.integration.utils import is_service_setup_for_testing
+from tests.utils import is_service_setup_for_testing
 
 # TTS model on Azure model is not available in regions at which we have chat completion models.
 # Therefore, we need to use a different endpoint for testing.
diff --git a/python/tests/samples/samples_utils.py b/python/tests/samples/samples_utils.py
deleted file mode 100644
index de2b8257e7b7..000000000000
--- a/python/tests/samples/samples_utils.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) Microsoft. All rights reserved.
-
-import asyncio
-import logging
-
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger()
-
-
-async def retry(func, reset=None, max_retries=3):
-    """Retry a function a number of times before raising an exception.
-
-    args:
-        func: the async function to retry (required)
-        reset: a function to reset the state of any variables used in the function (optional)
-        max_retries: the number of times to retry the function before raising an exception (optional)
-    """
-    attempt = 0
-    while attempt < max_retries:
-        try:
-            if reset:
-                reset()
-            await func()
-            break
-        except Exception as e:
-            attempt += 1
-            logger.error(f"Attempt {attempt} for {func.__name__} failed: {e}")
-            if attempt == max_retries:
-                logger.error(f"All {max_retries} attempts for {func.__name__} failed")
-                raise e
-            await asyncio.sleep(1)
diff --git a/python/tests/samples/test_concepts.py b/python/tests/samples/test_concepts.py
index abce5d4018f8..d3ae4646dbae 100644
--- a/python/tests/samples/test_concepts.py
+++ b/python/tests/samples/test_concepts.py
@@ -1,6 +1,9 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import copy
+import os
+from collections.abc import Awaitable, Callable
+from typing import Any
 
 import pytest
 from pytest import mark, param
@@ -54,39 +57,149 @@
 from samples.getting_started_with_agents.step2_plugins import main as step2_plugins
 from samples.getting_started_with_agents.step3_chat import main as step3_chat
 from samples.getting_started_with_agents.step7_assistant import main as step7_assistant
-from tests.samples.samples_utils import retry
+from tests.utils import retry
+
+# These environment variable names are used to control which samples are run during integration testing.
+# This has to do with the setup of the tests and the services they depend on.
+COMPLETIONS_CONCEPT_SAMPLE = "COMPLETIONS_CONCEPT_SAMPLE"
+MEMORY_CONCEPT_SAMPLE = "MEMORY_CONCEPT_SAMPLE"
 
 concepts = [
-    param(chat_gpt_api_function_calling, ["What is 3+3?", "exit"], id="chat_gpt_api_function_calling"),
-    param(simple_chatbot, ["Why is the sky blue in one sentence?", "exit"], id="simple_chatbot"),
-    param(simple_chatbot_streaming, ["Why is the sky blue in one sentence?", "exit"], id="simple_chatbot_streaming"),
-    param(simple_chatbot_with_image, ["exit"], id="simple_chatbot_with_image"),
+    param(
+        simple_chatbot,
+        ["Why is the sky blue in one sentence?", "exit"],
+        id="simple_chatbot",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        simple_chatbot_streaming,
+        ["Why is the sky blue in one sentence?", "exit"],
+        id="simple_chatbot_streaming",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        simple_chatbot_with_image,
+        ["exit"],
+        id="simple_chatbot_with_image",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
     param(
         simple_chatbot_logit_bias,
         ["Who has the most career points in NBA history?", "exit"],
         id="simple_chatbot_logit_bias",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
     ),
     param(
         simple_chatbot_kernel_function,
         ["Why is the sky blue in one sentence?", "exit"],
         id="simple_chatbot_kernel_function",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        chat_gpt_api_function_calling,
+        ["What is 3+3?", "exit"],
+        id="chat_gpt_api_function_calling",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        auto_function_invoke_filters,
+        ["What is 3+3?", "exit"],
+        id="auto_function_invoke_filters",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        function_invocation_filters,
+        ["What is 3+3?", "exit"],
+        id="function_invocation_filters",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        function_invocation_filters_stream,
+        ["What is 3+3?", "exit"],
+        id="function_invocation_filters_stream",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        prompt_filters,
+        ["What is the fastest animal?", "exit"],
+        id="prompt_filters",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        kernel_arguments,
+        [],
+        id="kernel_arguments",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        grounded,
+        [],
+        id="grounded",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        azure_openai_function_calling_stepwise_planner,
+        [],
+        id="azure_openai_function_calling_stepwise_planner",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        openai_function_calling_stepwise_planner,
+        [],
+        id="openai_function_calling_stepwise_planner",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        sequential_planner,
+        [],
+        id="sequential_planner",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        openai_function_calling_with_custom_plugin,
+        [],
+        id="openai_function_calling_with_custom_plugin",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
     ),
-    param(auto_function_invoke_filters, ["What is 3+3?", "exit"], id="auto_function_invoke_filters"),
-    param(function_invocation_filters, ["What is 3+3?", "exit"], id="function_invocation_filters"),
-    param(function_invocation_filters_stream, ["What is 3+3?", "exit"], id="function_invocation_filters_stream"),
-    param(prompt_filters, ["What is the fastest animal?", "exit"], id="prompt_filters"),
-    param(kernel_arguments, [], id="kernel_arguments"),
-    param(grounded, [], id="grounded"),
-    param(azure_cognitive_search_memory, [], id="azure_cognitive_search_memory"),
-    param(memory, ["What are my investments?", "exit"], id="memory"),
-    param(azure_openai_function_calling_stepwise_planner, [], id="azure_openai_function_calling_stepwise_planner"),
-    param(openai_function_calling_stepwise_planner, [], id="openai_function_calling_stepwise_planner"),
-    param(sequential_planner, [], id="sequential_planner"),
-    param(openai_function_calling_with_custom_plugin, [], id="openai_function_calling_with_custom_plugin"),
     param(
         openai_plugin_azure_key_vault,
         ["Create a secret with the name 'Foo' and value 'Bar'", "exit"],
         id="openai_plugin_azure_key_vault",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
     ),
     param(
         openai_plugin_klarna,
@@ -96,12 +209,66 @@
             reason="Temporarily: https://www.klarna.com/us/shopping/public/openai/v0/api-docs/ returns 404"
         ),
     ),
-    param(plugins_from_dir, [], id="plugins_from_dir"),
-    param(azure_chat_gpt_api_handlebars, ["What is 3+3?", "exit"], id="azure_chat_gpt_api_handlebars"),
-    param(azure_chat_gpt_api_jinja2, ["What is 3+3?", "exit"], id="azure_chat_gpt_api_jinja2"),
-    param(configuring_prompts, ["What is my name?", "exit"], id="configuring_prompts"),
-    param(load_yaml_prompt, [], id="load_yaml_prompt"),
-    param(template_language, [], id="template_language"),
+    param(
+        plugins_from_dir,
+        [],
+        id="plugins_from_dir",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        azure_chat_gpt_api_handlebars,
+        ["What is 3+3?", "exit"],
+        id="azure_chat_gpt_api_handlebars",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        azure_chat_gpt_api_jinja2,
+        ["What is 3+3?", "exit"],
+        id="azure_chat_gpt_api_jinja2",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        configuring_prompts,
+        ["What is my name?", "exit"],
+        id="configuring_prompts",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        load_yaml_prompt,
+        [],
+        id="load_yaml_prompt",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        template_language,
+        [],
+        id="template_language",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        azure_cognitive_search_memory,
+        [],
+        id="azure_cognitive_search_memory",
+        marks=pytest.mark.skipif(os.getenv(MEMORY_CONCEPT_SAMPLE, None) is None, reason="Not running memory samples."),
+    ),
+    param(
+        memory,
+        ["What are my investments?", "exit"],
+        id="memory",
+        marks=pytest.mark.skipif(os.getenv(MEMORY_CONCEPT_SAMPLE, None) is None, reason="Not running memory samples."),
+    ),
     param(rag_with_text_memory_plugin, [], id="rag_with_text_memory_plugin"),
     param(
         bing_search_plugin,
@@ -109,13 +276,62 @@
         id="bing_search_plugin",
         marks=pytest.mark.skip(reason="Flaky test due to Azure OpenAI content policy"),
     ),
-    param(custom_service_selector, [], id="custom_service_selector"),
-    param(function_defined_in_json_prompt, ["What is 3+3?", "exit"], id="function_defined_in_json_prompt"),
-    param(function_defined_in_yaml_prompt, ["What is 3+3?", "exit"], id="function_defined_in_yaml_prompt"),
-    param(step1_agent, [], id="step1_agent"),
-    param(step2_plugins, [], id="step2_agent_plugins"),
-    param(step3_chat, [], id="step3_chat"),
-    param(step7_assistant, [], id="step7_assistant"),
+    param(
+        custom_service_selector,
+        [],
+        id="custom_service_selector",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        function_defined_in_json_prompt,
+        ["What is 3+3?", "exit"],
+        id="function_defined_in_json_prompt",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        function_defined_in_yaml_prompt,
+        ["What is 3+3?", "exit"],
+        id="function_defined_in_yaml_prompt",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        step1_agent,
+        [],
+        id="step1_agent",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        step2_plugins,
+        [],
+        id="step2_agent_plugins",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        step3_chat,
+        [],
+        id="step3_chat",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
+    param(
+        step7_assistant,
+        [],
+        id="step7_assistant",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
     param(
         ollama_chat_completion,
         ["Why is the sky blue?", "exit"],
@@ -134,13 +350,20 @@
         id="lm_studio_text_embedding",
         marks=pytest.mark.skip(reason="Need to set up LM Studio locally. Check out the module for more details."),
     ),
-    param(image_generation, [], id="image_generation"),
+    param(
+        image_generation,
+        [],
+        id="image_generation",
+        marks=pytest.mark.skipif(
+            os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples."
+        ),
+    ),
 ]
 
 
 @mark.asyncio
-@mark.parametrize("func, responses", concepts)
-async def test_concepts(func, responses, monkeypatch):
+@mark.parametrize("sample, responses", concepts)
+async def test_concepts(sample: Callable[..., Awaitable[Any]], responses: list[str], monkeypatch):
     saved_responses = copy.deepcopy(responses)
 
     def reset():
@@ -148,4 +371,4 @@ def reset():
         responses.extend(saved_responses)
 
     monkeypatch.setattr("builtins.input", lambda _: responses.pop(0))
-    await retry(lambda: func(), reset=reset)
+    await retry(sample, retries=3, reset=reset)
diff --git a/python/tests/samples/test_learn_resources.py b/python/tests/samples/test_learn_resources.py
index 428515d30f35..43d69d3907e8 100644
--- a/python/tests/samples/test_learn_resources.py
+++ b/python/tests/samples/test_learn_resources.py
@@ -14,7 +14,7 @@
 from samples.learn_resources.templates import main as templates
 from samples.learn_resources.using_the_kernel import main as using_the_kernel
 from samples.learn_resources.your_first_prompt import main as your_first_prompt
-from tests.samples.samples_utils import retry
+from tests.utils import retry
 
 
 @mark.asyncio
diff --git a/python/tests/integration/utils.py b/python/tests/utils.py
similarity index 63%
rename from python/tests/integration/utils.py
rename to python/tests/utils.py
index 1af9e94bf4cd..a8397f1398fc 100644
--- a/python/tests/integration/utils.py
+++ b/python/tests/utils.py
@@ -4,22 +4,40 @@
 import logging
 import os
 import platform
+from collections.abc import Awaitable, Callable
+from typing import Any
 
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
 
 
-async def retry(func, retries=20):
-    """Retry the function if it raises an exception."""
+async def retry(
+    func: Callable[..., Awaitable[Any]],
+    retries: int = 20,
+    reset: Callable[..., None] | None = None,
+):
+    """Retry the function if it raises an exception.
+
+    Args:
+        func (function): The function to retry.
+        retries (int): Number of retries.
+        reset (function): Function to reset the state of any variables used in the function
+
+    """
+    logger.info(f"Running {retries} retries with func: {func.__module__}")
     for i in range(retries):
+        logger.info(f"   Try {i + 1} for {func.__module__}")
         try:
+            if reset:
+                reset()
             return await func()
         except Exception as e:
-            logger.error(f"Retry {i + 1}: {e}")
+            logger.info(f"   On try {i + 1} got this error: {e}")
             if i == retries - 1:  # Last retry
                 raise
             # Binary exponential backoff
-            await asyncio.sleep(2**i)
+            backoff = 2**i
+            logger.info(f"   Sleeping for {backoff} seconds before retrying")
+            await asyncio.sleep(backoff)
     return None
 
 

From 592b138f8cc91d363450d628015c484b6125e827 Mon Sep 17 00:00:00 2001
From: Eduard van Valkenburg <eavanvalkenburg@users.noreply.github.com>
Date: Fri, 29 Nov 2024 14:45:58 +0100
Subject: [PATCH 21/23] Python: small makefile update for python (#9847)

### Motivation and Context

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
Small fix in the makefile for when uv is not installed.

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:
---
 python/Makefile | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/python/Makefile b/python/Makefile
index 1e165a1539ba..8effc78c8d1e 100644
--- a/python/Makefile
+++ b/python/Makefile
@@ -34,17 +34,20 @@ install:
 	make install-sk
 	make install-pre-commit
 
-UV_VERSION = $(shell uv --version 2> /dev/null)
+UV_VERSION := $(shell command -v uv 2> /dev/null)
 install-uv:
 # Check if uv is installed
-ifdef UV_VERSION
-	echo "uv found $(UV_VERSION)"
-	echo "running uv update"
-	uv self update 
-else
+ifndef UV_VERSION
 	echo "uv could not be found"
 	echo "Installing uv"
 	curl -LsSf https://astral.sh/uv/install.sh | sh
+	echo "uv installed"
+	echo "Please restart your shell."
+	exit 1
+else
+	echo "uv found $(UV_VERSION)"
+	echo "running uv update"
+	uv self update 
 endif
 
 .ONESHELL:

From d9fd8ff5ba12c8047ca582fcd17f85bbe1978357 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20M=C3=B6ller?= <nkm.moeller@gmail.com>
Date: Fri, 29 Nov 2024 18:01:52 +0100
Subject: [PATCH 22/23] Python: Fix Onnx Connector Memory Problem with Onnx
 (#9716)

Onnx currently faces memory issues when dividing functionality among
multiple methods

### Motivation and Context

I was experiencing weird non reproducible memory issues with connector
when using phi-3 vision, after tracing the memory it turned out there
are some issues when Parameters & Generation are not in the same
function. I am already in contact with the PG to adress the issue also
in onnx.

There seems to be a memory problem with pybind, because the parameters
show a non deterministc behavior, but they should determistic.

To fix the current problem i've decided to merge the Parameter Method
and the Generation Method.

<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [x] The code builds clean without any errors or warnings
- [x] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [x] All unit tests pass, and I have added new tests where possible
- [x] I didn't break anyone :smile:

Co-authored-by: Tao Chen <taochen@microsoft.com>
---
 .../services/onnx_gen_ai_completion_base.py   | 30 ++++++++-----------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py
index c7e2c47d12d4..40ce552ed88b 100644
--- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py
+++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_completion_base.py
@@ -53,23 +53,6 @@ def __init__(self, ai_model_path: str, **kwargs) -> None:
             **kwargs,
         )
 
-    def _prepare_input_params(
-        self, prompt: str, settings: OnnxGenAIPromptExecutionSettings, image: ImageContent | None = None
-    ) -> Any:
-        params = OnnxRuntimeGenAi.GeneratorParams(self.model)
-        params.set_search_options(**settings.prepare_settings_dict())
-        if not self.enable_multi_modality:
-            input_tokens = self.tokenizer.encode(prompt)
-            params.input_ids = input_tokens
-        else:
-            if image is not None:
-                # With the use of Pybind there is currently no way to load images from bytes
-                # We can only open images from a file path currently
-                image = OnnxRuntimeGenAi.Images.open(str(image.uri))
-            input_tokens = self.tokenizer(prompt, images=image)
-            params.set_inputs(input_tokens)
-        return params
-
     async def _generate_next_token_async(
         self,
         prompt: str,
@@ -77,7 +60,18 @@ async def _generate_next_token_async(
         image: ImageContent | None = None,
     ) -> AsyncGenerator[list[str], Any]:
         try:
-            params = self._prepare_input_params(prompt, settings, image)
+            params = OnnxRuntimeGenAi.GeneratorParams(self.model)
+            params.set_search_options(**settings.prepare_settings_dict())
+            if not self.enable_multi_modality:
+                input_tokens = self.tokenizer.encode(prompt)
+                params.input_ids = input_tokens
+            else:
+                if image is not None:
+                    # With the use of Pybind there is currently no way to load images from bytes
+                    # We can only open images from a file path currently
+                    image = OnnxRuntimeGenAi.Images.open(str(image.uri))
+                input_tokens = self.tokenizer(prompt, images=image)
+                params.set_inputs(input_tokens)
             generator = OnnxRuntimeGenAi.Generator(self.model, params)
 
             while not generator.is_done():

From 467428172ea2d0de0cbbf39b0e449b913d03252c Mon Sep 17 00:00:00 2001
From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com>
Date: Mon, 2 Dec 2024 12:36:56 +0100
Subject: [PATCH 23/23] .Net: Bump ONNX to 0.5.2 (#9644)

### Motivation and Context

- The Latest 0.5.0 package also requires the caller to handle the
resources with the `OgaHandler` instance, when a service is instantiated
this resource needs to be also present and exposed together with the
service. Otherwise a message will be sent to the console and the
application will crash before finishing.

- Resolves #9628

---------

Co-authored-by: westey <164392973+westey-m@users.noreply.github.com>
---
 dotnet/Directory.Packages.props               |  8 +--
 .../ChatCompletion/Onnx_ChatCompletion.cs     | 18 ++++-
 .../Onnx_ChatCompletionStreaming.cs           | 67 +++++++++++--------
 .../Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj  |  2 +-
 dotnet/samples/Demos/OnnxSimpleRAG/Program.cs | 32 +++++++--
 .../Connectors.Onnx/Connectors.Onnx.csproj    |  2 +-
 .../OnnxRuntimeGenAIChatCompletionService.cs  |  1 -
 ...OnnxRuntimeGenAIPromptExecutionSettings.cs |  1 +
 8 files changed, 88 insertions(+), 43 deletions(-)

diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props
index 18d29da0e49d..51ad9b7923f1 100644
--- a/dotnet/Directory.Packages.props
+++ b/dotnet/Directory.Packages.props
@@ -43,7 +43,7 @@
     <PackageVersion Include="Microsoft.CodeAnalysis.CSharp" Version="4.11.0" />
     <PackageVersion Include="Microsoft.Bcl.TimeProvider" Version="8.0.1" />
     <PackageVersion Include="Microsoft.Identity.Client" Version="4.66.2" />
-    <PackageVersion Include="Microsoft.ML.OnnxRuntime" Version="1.19.2" />
+    <PackageVersion Include="Microsoft.ML.OnnxRuntime" Version="1.20.1" />
     <PackageVersion Include="FastBertTokenizer" Version="1.0.28" />
     <PackageVersion Include="PdfPig" Version="0.1.9" />
     <PackageVersion Include="Pinecone.NET" Version="2.1.1" />
@@ -161,8 +161,8 @@
       <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
     </PackageReference>
     <!-- OnnxRuntimeGenAI -->
-    <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.4.0" />
-    <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.4.0" />
-    <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.4.0" />
+    <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI" Version="0.5.2" />
+    <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" Version="0.5.2" />
+    <PackageVersion Include="Microsoft.ML.OnnxRuntimeGenAI.DirectML" Version="0.5.2" />
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs
index 563ed3475b5e..823e711d1942 100644
--- a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs
+++ b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs
@@ -34,7 +34,7 @@ public async Task ServicePromptAsync()
 
         Console.WriteLine("======== Onnx - Chat Completion ========");
 
-        var chatService = new OnnxRuntimeGenAIChatCompletionService(
+        using var chatService = new OnnxRuntimeGenAIChatCompletionService(
             modelId: TestConfiguration.Onnx.ModelId,
             modelPath: TestConfiguration.Onnx.ModelPath);
 
@@ -105,5 +105,21 @@ public async Task ChatPromptAsync()
         reply = await kernel.InvokePromptAsync(chatPrompt.ToString());
 
         Console.WriteLine(reply);
+
+        DisposeServices(kernel);
+    }
+
+    /// <summary>
+    /// To avoid any potential memory leak all disposable services created by the kernel are disposed.
+    /// </summary>
+    /// <param name="kernel">Target kernel</param>
+    private static void DisposeServices(Kernel kernel)
+    {
+        foreach (var target in kernel
+            .GetAllServices<IChatCompletionService>()
+            .OfType<IDisposable>())
+        {
+            target.Dispose();
+        }
     }
 }
diff --git a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs
index d6ad1f05e7f2..2c1bd1369677 100644
--- a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs
+++ b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs
@@ -29,18 +29,36 @@ public class Onnx_ChatCompletionStreaming(ITestOutputHelper output) : BaseTest(o
     /// </list>
     /// </remarks>
     [Fact]
-    public Task StreamChatAsync()
+    public async Task StreamChatAsync()
     {
         Assert.NotNull(TestConfiguration.Onnx.ModelId);   // dotnet user-secrets set "Onnx:ModelId" "<model-id>"
         Assert.NotNull(TestConfiguration.Onnx.ModelPath); // dotnet user-secrets set "Onnx:ModelPath" "<model-folder-path>"
 
         Console.WriteLine("======== Onnx - Chat Completion Streaming ========");
 
-        var chatService = new OnnxRuntimeGenAIChatCompletionService(
+        using var chatService = new OnnxRuntimeGenAIChatCompletionService(
             modelId: TestConfiguration.Onnx.ModelId,
             modelPath: TestConfiguration.Onnx.ModelPath);
 
-        return this.StartStreamingChatAsync(chatService);
+        Console.WriteLine("Chat content:");
+        Console.WriteLine("------------------------");
+
+        var chatHistory = new ChatHistory("You are a librarian, expert about books");
+        OutputLastMessage(chatHistory);
+
+        // First user message
+        chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
+        OutputLastMessage(chatHistory);
+
+        // First assistant message
+        await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant);
+
+        // Second user message
+        chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion?");
+        OutputLastMessage(chatHistory);
+
+        // Second assistant message
+        await StreamMessageOutputAsync(chatService, chatHistory, AuthorRole.Assistant);
     }
 
     /// <summary>
@@ -86,6 +104,8 @@ public async Task StreamChatPromptAsync()
         reply = await StreamMessageOutputFromKernelAsync(kernel, chatPrompt.ToString());
 
         Console.WriteLine(reply);
+
+        DisposeServices(kernel);
     }
 
     /// <summary>
@@ -115,7 +135,7 @@ public async Task StreamTextFromChatAsync()
         Console.WriteLine("======== Stream Text from Chat Content ========");
 
         // Create chat completion service
-        var chatService = new OnnxRuntimeGenAIChatCompletionService(
+        using var chatService = new OnnxRuntimeGenAIChatCompletionService(
             modelId: TestConfiguration.Onnx.ModelId,
             modelPath: TestConfiguration.Onnx.ModelPath);
 
@@ -135,30 +155,7 @@ public async Task StreamTextFromChatAsync()
         }
     }
 
-    private async Task StartStreamingChatAsync(IChatCompletionService chatCompletionService)
-    {
-        Console.WriteLine("Chat content:");
-        Console.WriteLine("------------------------");
-
-        var chatHistory = new ChatHistory("You are a librarian, expert about books");
-        OutputLastMessage(chatHistory);
-
-        // First user message
-        chatHistory.AddUserMessage("Hi, I'm looking for book suggestions");
-        OutputLastMessage(chatHistory);
-
-        // First assistant message
-        await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant);
-
-        // Second user message
-        chatHistory.AddUserMessage("I love history and philosophy, I'd like to learn something new about Greece, any suggestion?");
-        OutputLastMessage(chatHistory);
-
-        // Second assistant message
-        await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant);
-    }
-
-    private async Task StreamMessageOutputAsync(IChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole)
+    private async Task StreamMessageOutputAsync(OnnxRuntimeGenAIChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole)
     {
         bool roleWritten = false;
         string fullMessage = string.Empty;
@@ -205,4 +202,18 @@ private async Task<string> StreamMessageOutputFromKernelAsync(Kernel kernel, str
         Console.WriteLine("\n------------------------");
         return fullMessage;
     }
+
+    /// <summary>
+    /// To avoid any potential memory leak all disposable services created by the kernel are disposed.
+    /// </summary>
+    /// <param name="kernel">Target kernel</param>
+    private static void DisposeServices(Kernel kernel)
+    {
+        foreach (var target in kernel
+            .GetAllServices<IChatCompletionService>()
+            .OfType<IDisposable>())
+        {
+            target.Dispose();
+        }
+    }
 }
diff --git a/dotnet/samples/Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj b/dotnet/samples/Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj
index 8b22d740edb9..bbb5f38ba81d 100644
--- a/dotnet/samples/Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj
+++ b/dotnet/samples/Demos/OnnxSimpleRAG/OnnxSimpleRAG.csproj
@@ -3,7 +3,7 @@
   <PropertyGroup>
     <OutputType>Exe</OutputType>
     <TargetFramework>net8.0</TargetFramework>
-    <NoWarn>$(NoWarn);CA2007;CS0612;VSTHRD111</NoWarn>
+    <NoWarn>$(NoWarn);CA2007;CS0612;VSTHRD111;SKEXP0070;SKEXP0050;SKEXP0001;SKEXP0020</NoWarn>
     <UserSecretsId>5ee045b0-aea3-4f08-8d31-32d1a6f8fed0</UserSecretsId>
   </PropertyGroup>
 
diff --git a/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs b/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs
index 9727e600145f..4ab9cd0ef9b5 100644
--- a/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs
+++ b/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs
@@ -1,17 +1,15 @@
 ﻿// Copyright (c) Microsoft. All rights reserved.
 
-#pragma warning disable SKEXP0070
-#pragma warning disable SKEXP0050
-#pragma warning disable SKEXP0001
-#pragma warning disable SKEXP0020
-
 using System;
 using System.IO;
+using System.Linq;
 using Microsoft.Extensions.Configuration;
 using Microsoft.Extensions.VectorData;
+using Microsoft.ML.OnnxRuntimeGenAI;
 using Microsoft.SemanticKernel;
 using Microsoft.SemanticKernel.ChatCompletion;
 using Microsoft.SemanticKernel.Connectors.InMemory;
+using Microsoft.SemanticKernel.Connectors.Onnx;
 using Microsoft.SemanticKernel.Data;
 using Microsoft.SemanticKernel.Embeddings;
 using Microsoft.SemanticKernel.PromptTemplates.Handlebars;
@@ -29,6 +27,10 @@
 // Path to the vocab file your ONNX BGE-MICRO-V2 model
 var embeddingVocabPath = config["Onnx:EmbeddingVocabPath"]!;
 
+// If using Onnx GenAI 0.5.0 or later, the OgaHandle class must be used to track
+// resources used by the Onnx services, before using any of the Onnx services.
+using var ogaHandle = new OgaHandle();
+
 // Load the services
 var builder = Kernel.CreateBuilder()
     .AddOnnxRuntimeGenAIChatCompletion(chatModelId, chatModelPath)
@@ -38,7 +40,7 @@
 var kernel = builder.Build();
 
 // Get the instances of the services
-var chatService = kernel.GetRequiredService<IChatCompletionService>();
+using var chatService = kernel.GetRequiredService<IChatCompletionService>() as OnnxRuntimeGenAIChatCompletionService;
 var embeddingService = kernel.GetRequiredService<ITextEmbeddingGenerationService>();
 
 // Create a vector store and a collection to store information
@@ -71,9 +73,12 @@ await collection.UpsertAsync(new()
     Console.Write("User > ");
     var question = Console.ReadLine()!;
 
+    // Clean resources and exit the demo if the user input is null or empty
     if (question is null || string.IsNullOrWhiteSpace(question))
     {
-        // Exit the demo if the user input is null or empty
+        // To avoid any potential memory leak all disposable
+        // services created by the kernel are disposed
+        DisposeServices(kernel);
         return;
     }
 
@@ -105,6 +110,19 @@ await collection.UpsertAsync(new()
     Console.WriteLine();
 }
 
+static void DisposeServices(Kernel kernel)
+{
+    foreach (var target in kernel
+        .GetAllServices<IChatCompletionService>()
+        .OfType<IDisposable>())
+    {
+        target.Dispose();
+    }
+}
+
+/// <summary>
+/// Information item to represent the embedding data stored in the memory
+/// </summary>
 internal sealed class InformationItem
 {
     [VectorStoreRecordKey]
diff --git a/dotnet/src/Connectors/Connectors.Onnx/Connectors.Onnx.csproj b/dotnet/src/Connectors/Connectors.Onnx/Connectors.Onnx.csproj
index df49c6da0bfb..b5ff2314face 100644
--- a/dotnet/src/Connectors/Connectors.Onnx/Connectors.Onnx.csproj
+++ b/dotnet/src/Connectors/Connectors.Onnx/Connectors.Onnx.csproj
@@ -5,8 +5,8 @@
     <AssemblyName>Microsoft.SemanticKernel.Connectors.Onnx</AssemblyName>
     <RootNamespace>$(AssemblyName)</RootNamespace>
     <TargetFrameworks>net8.0;netstandard2.0</TargetFrameworks>
-    <VersionSuffix>alpha</VersionSuffix>
     <IsAotCompatible Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net7.0'))">true</IsAotCompatible>
+    <VersionSuffix>alpha</VersionSuffix>
   </PropertyGroup>
 
   <!-- IMPORT NUGET PACKAGE SHARED PROPERTIES -->
diff --git a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs
index 8a6210253729..7419b07799d0 100644
--- a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs
+++ b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs
@@ -25,7 +25,6 @@ public sealed class OnnxRuntimeGenAIChatCompletionService : IChatCompletionServi
     private readonly JsonSerializerOptions? _jsonSerializerOptions;
     private Model? _model;
     private Tokenizer? _tokenizer;
-
     private Dictionary<string, object?> AttributesInternal { get; } = new();
 
     /// <summary>
diff --git a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIPromptExecutionSettings.cs b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIPromptExecutionSettings.cs
index 8a7c0ccd3cae..e8c7f058fd24 100644
--- a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIPromptExecutionSettings.cs
+++ b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIPromptExecutionSettings.cs
@@ -11,6 +11,7 @@ namespace Microsoft.SemanticKernel.Connectors.Onnx;
 /// <summary>
 /// OnnxRuntimeGenAI Execution Settings.
 /// </summary>
+[JsonNumberHandling(JsonNumberHandling.AllowReadingFromString)]
 public sealed class OnnxRuntimeGenAIPromptExecutionSettings : PromptExecutionSettings
 {
     /// <summary>