Merge branch 'structured-outputs-in-prompt' of https://github.com/dmy…

…trostruk/semantic-kernel into structured-outputs-in-prompt
microsoft · Dec 3, 2024 · d4e791b · d4e791b
2 parents 611cf08 + fe80d76
commit d4e791b
Show file tree

Hide file tree

Showing 32 changed files with 300 additions and 187 deletions.
diff --git a/docs/decisions/0031-feature-branch-strategy.md b/docs/decisions/0031-feature-branch-strategy.md
@@ -96,7 +96,7 @@ Cons:
 | Windows Support       | No                                                  | Yes                                                                                     |
 | Linux Support         | Yes                                                 | Yes                                                                                     |
 | MacOS Support         | Yes                                                 | Yes                                                                                     |
-| Number of Models      | [61](https://ollama.ai/library) +Any GGUF converted | [25](https://github.com/lmstudio-ai/model-catalog/tree/main/models) +Any GGUF Converted |
+| Number of Models      | [61](https://ollama.com/search) +Any GGUF converted | [25](https://github.com/lmstudio-ai/model-catalog/tree/main/models) +Any GGUF Converted |
 
 | Model Support   | Ollama | LM Studio |
 | --------------- | ------ | --------- |

diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props
@@ -17,7 +17,7 @@
     <PackageVersion Include="Microsoft.ML.Tokenizers.Data.Cl100kBase" Version="1.0.0" />
     <PackageVersion Include="Microsoft.IdentityModel.JsonWebTokens" Version="6.34.0" />
     <PackageVersion Include="Microsoft.VisualStudio.Threading" Version="17.12.19" />
-    <PackageVersion Include="MSTest.TestFramework" Version="3.6.1" />
+    <PackageVersion Include="MSTest.TestFramework" Version="3.6.3" />
     <PackageVersion Include="Newtonsoft.Json" Version="13.0.3" />
     <PackageVersion Include="Npgsql" Version="8.0.6" />
     <PackageVersion Include="OpenAI" Version="[2.1.0-beta.2]" />
@@ -82,9 +82,9 @@
     <PackageVersion Include="Microsoft.Extensions.Options.DataAnnotations" Version="8.0.0" />
     <PackageVersion Include="Microsoft.Extensions.TimeProvider.Testing" Version="8.10.0" />
     <PackageVersion Include="Microsoft.Extensions.FileProviders.Physical" Version="8.0.0" />
-    <PackageVersion Include="Microsoft.Extensions.FileProviders.Embedded" Version="8.0.0" />
+    <PackageVersion Include="Microsoft.Extensions.FileProviders.Embedded" Version="8.0.11" />
     <!-- Test -->
-    <PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
+    <PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
     <PackageVersion Include="Moq" Version="[4.18.4]" />
     <PackageVersion Include="System.Text.RegularExpressions" Version="4.3.1" />
     <PackageVersion Include="System.Threading.Channels" Version="8.0.0" />

diff --git a/dotnet/docs/MODELS.md b/dotnet/docs/MODELS.md
@@ -8,7 +8,7 @@ In the core Semantic Kernel repo, we plan on supporting up to four deployment ty
 
 - Dedicated API endpoints (e.g., OpenAI's APIs, Mistral.AI, and Google Gemini)
 - Azure AI deployments via the [model catalog](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/model-catalog)
-- Local deployments via [Ollama](https://ollama.ai/library)
+- Local deployments via [Ollama](https://ollama.ai/)
 - Hugging face deployment using the [Hugging Face inference API](https://huggingface.co/docs/api-inference/index)
 
 To support these different deployment types, we will follow a similar pattern to the Azure OpenAI and OpenAI connectors. Each connector uses the same underlying model and abstractions, but the connector constructors may take different parameters. For example, the Azure OpenAI connector expects an Azure endpoint and key, whereas the OpenAI connector expects an OpenAI organization ID and API key.
@@ -23,25 +23,25 @@ Please note that not all of the model interfaces are defined yet. As part of con
 
 ### OpenAI
 
-| Priority | Model                   | Status      | Interface                      | Deployment type | GitHub issue | Developer   | Reviewer |
-| -------- | ----------------------- | ----------- | ------------------------------ | --------------- | ------------ | ----------- | -------- |
-| P0       | GPT-3.5-turbo           | Complete    | `IChatCompletion`              | OpenAI API      | N/A          | N/A         | N/A      |
-| P0       | GPT-3.5-turbo           | Complete    | `IChatCompletion`              | Azure AI        | N/A          | N/A         | N/A      |
-| P0       | GPT-4                   | Complete    | `IChatCompletion`              | OpenAI API      | N/A          | N/A         | N/A      |
-| P0       | GPT-4                   | Complete    | `IChatCompletion`              | Azure AI        | N/A          | N/A         | N/A      |
-| P0       | GPT-4v                  | Complete    | `IChatCompletion`              | OpenAI API      | N/A          | N/A         | N/A      |
-| P0       | GPT-4v                  | Complete    | `IChatCompletion`              | Azure AI        | N/A          | N/A         | N/A      |
-| P0       | text-embedding-ada-002  | Preview     | `IEmbeddingGeneration`         | OpenAI API      | N/A          | N/A         | N/A      |
-| P0       | text-embedding-ada-002  | Preview     | `IEmbeddingGeneration`         | Azure AI        | N/A          | N/A         | N/A      |
-| P0       | DALL·E 3                | Preview     | `ITextToImage`                 | OpenAI API      | N/A          | N/A         | N/A      |
-| P0       | DALL·E 3                | Preview     | `ITextToImage`                 | Azure AI        | N/A          | N/A         | N/A      |
-| P0       | Text-to-speech          | Complete    | `ITextToSpeech`                | OpenAI API      | TBD          | dmytrostruk | TBD      |
-| P0       | Speech-to-text          | Complete    | `ISpeechRecognition`           | OpenAI API      | TBD          | dmytrostruk | TBD      |
-| P1       | openai-whisper-large-v3 | Not started | `ISpeechRecognition`           | Azure AI        | TBD          | TBD         | TBD      |
-| P1       | openai-whisper-large-v3 | Not started | `ISpeechRecognition`           | Hugging Face    | TBD          | TBD         | TBD      |
+| Priority | Model                   | Status      | Interface                      | Deployment type | GitHub issue | Developer    | Reviewer    |
+| -------- | ----------------------- | ----------- | ------------------------------ | --------------- | ------------ | ------------ | ----------- |
+| P0       | GPT-3.5-turbo           | Complete    | `IChatCompletion`              | OpenAI API      | N/A          | N/A          | N/A         |
+| P0       | GPT-3.5-turbo           | Complete    | `IChatCompletion`              | Azure AI        | N/A          | N/A          | N/A         |
+| P0       | GPT-4                   | Complete    | `IChatCompletion`              | OpenAI API      | N/A          | N/A          | N/A         |
+| P0       | GPT-4                   | Complete    | `IChatCompletion`              | Azure AI        | N/A          | N/A          | N/A         |
+| P0       | GPT-4v                  | Complete    | `IChatCompletion`              | OpenAI API      | N/A          | N/A          | N/A         |
+| P0       | GPT-4v                  | Complete    | `IChatCompletion`              | Azure AI        | N/A          | N/A          | N/A         |
+| P0       | text-embedding-ada-002  | Preview     | `IEmbeddingGeneration`         | OpenAI API      | N/A          | N/A          | N/A         |
+| P0       | text-embedding-ada-002  | Preview     | `IEmbeddingGeneration`         | Azure AI        | N/A          | N/A          | N/A         |
+| P0       | DALL·E 3                | Preview     | `ITextToImage`                 | OpenAI API      | N/A          | N/A          | N/A         |
+| P0       | DALL·E 3                | Preview     | `ITextToImage`                 | Azure AI        | N/A          | N/A          | N/A         |
+| P0       | Text-to-speech          | Complete    | `ITextToSpeech`                | OpenAI API      | TBD          | dmytrostruk  | TBD         |
+| P0       | Speech-to-text          | Complete    | `ISpeechRecognition`           | OpenAI API      | TBD          | dmytrostruk  | TBD         |
+| P1       | openai-whisper-large-v3 | Not started | `ISpeechRecognition`           | Azure AI        | TBD          | TBD          | TBD         |
+| P1       | openai-whisper-large-v3 | Not started | `ISpeechRecognition`           | Hugging Face    | TBD          | TBD          | TBD         |
 | P2       | Moderation              | In Progress | `ITextClassification`          | OpenAI API      | #5062        | Krzysztof318 | MarkWallace |
-| P2       | clip-vit-base-patch32   | Not started | `IZeroShotImageClassification` | Azure AI        | TBD          | TBD         | TBD      |
-| P2       | clip-vit-base-patch32   | Not started | `IZeroShotImageClassification` | Hugging Face    | TBD          | TBD         | TBD      |
+| P2       | clip-vit-base-patch32   | Not started | `IZeroShotImageClassification` | Azure AI        | TBD          | TBD          | TBD         |
+| P2       | clip-vit-base-patch32   | Not started | `IZeroShotImageClassification` | Hugging Face    | TBD          | TBD          | TBD         |
 
 ### Microsoft
 

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -146,6 +146,7 @@ environments = [
 ]
 
 [tool.pytest.ini_options]
+testpaths = 'tests'
 addopts = "-ra -q -r fEX"
 asyncio_default_fixture_loop_scope = "function"
 filterwarnings = [

diff --git a/python/samples/concepts/filtering/auto_function_invoke_filters.py b/python/samples/concepts/filtering/auto_function_invoke_filters.py
@@ -4,19 +4,12 @@
 import os
 
 from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai import FunctionChoiceBehavior
 from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAIChatPromptExecutionSettings
-from semantic_kernel.contents import ChatHistory
-from semantic_kernel.contents.chat_message_content import ChatMessageContent
-from semantic_kernel.contents.function_call_content import FunctionCallContent
-from semantic_kernel.contents.function_result_content import FunctionResultContent
+from semantic_kernel.contents import ChatHistory, ChatMessageContent, FunctionCallContent, FunctionResultContent
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
-from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
-    AutoFunctionInvocationContext,
-)
-from semantic_kernel.filters.filter_types import FilterTypes
-from semantic_kernel.functions import KernelArguments
-from semantic_kernel.functions.function_result import FunctionResult
+from semantic_kernel.filters import AutoFunctionInvocationContext, FilterTypes
+from semantic_kernel.functions import FunctionResult, KernelArguments
 
 system_message = """
 You are a chat bot. Your name is Mosscap and

diff --git a/python/samples/concepts/filtering/function_invocation_filters.py b/python/samples/concepts/filtering/function_invocation_filters.py
@@ -6,12 +6,11 @@
 from collections.abc import Callable, Coroutine
 from typing import Any
 
-from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
-from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.exceptions.kernel_exceptions import OperationCancelledException
-from semantic_kernel.filters.filter_types import FilterTypes
-from semantic_kernel.filters.functions.function_invocation_context import FunctionInvocationContext
-from semantic_kernel.kernel import Kernel
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.exceptions import OperationCancelledException
+from semantic_kernel.filters import FilterTypes, FunctionInvocationContext
 
 logger = logging.getLogger(__name__)
 

diff --git a/python/samples/concepts/filtering/function_invocation_filters_stream.py b/python/samples/concepts/filtering/function_invocation_filters_stream.py
@@ -3,15 +3,15 @@
 import asyncio
 import logging
 import os
+from collections.abc import Callable, Coroutine
 from functools import reduce
+from typing import Any
 
-from semantic_kernel.connectors.ai.open_ai.services.open_ai_chat_completion import OpenAIChatCompletion
-from semantic_kernel.contents import AuthorRole
-from semantic_kernel.contents.chat_history import ChatHistory
-from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
-from semantic_kernel.filters.filter_types import FilterTypes
-from semantic_kernel.functions.function_result import FunctionResult
-from semantic_kernel.kernel import Kernel
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion
+from semantic_kernel.contents import AuthorRole, ChatHistory, StreamingChatMessageContent
+from semantic_kernel.filters import FilterTypes, FunctionInvocationContext
+from semantic_kernel.functions import FunctionResult
 
 logger = logging.getLogger(__name__)
 
@@ -32,15 +32,20 @@
 # in the specific case of a filter for streaming functions, you need to override the generator
 # that is present in the function_result.value as seen below.
 @kernel.filter(FilterTypes.FUNCTION_INVOCATION)
-async def streaming_exception_handling(context, next):
+async def streaming_exception_handling(
+    context: FunctionInvocationContext,
+    next: Callable[[FunctionInvocationContext], Coroutine[Any, Any, None]],
+):
     await next(context)
 
     async def override_stream(stream):
         try:
             async for partial in stream:
                 yield partial
         except Exception as e:
-            yield [StreamingChatMessageContent(role=AuthorRole.ASSISTANT, content=f"Exception caught: {e}")]
+            yield [
+                StreamingChatMessageContent(role=AuthorRole.ASSISTANT, content=f"Exception caught: {e}", choice_index=0)
+            ]
 
     stream = context.result.value
     context.result = FunctionResult(function=context.result.function, value=override_stream(stream))

diff --git a/python/semantic_kernel/agents/open_ai/assistant_content_generation.py b/python/semantic_kernel/agents/open_ai/assistant_content_generation.py
@@ -85,17 +85,19 @@ def get_message_contents(message: "ChatMessageContent") -> list[dict[str, Any]]:
     """
     contents: list[dict[str, Any]] = []
     for content in message.items:
-        if isinstance(content, TextContent):
-            contents.append({"type": "text", "text": content.text})
-        elif isinstance(content, ImageContent) and content.uri:
-            contents.append(content.to_dict())
-        elif isinstance(content, FileReferenceContent):
-            contents.append({
-                "type": "image_file",
-                "image_file": {"file_id": content.file_id},
-            })
-        elif isinstance(content, FunctionResultContent):
-            contents.append({"type": "text", "text": content.result})
+        match content:
+            case TextContent():
+                contents.append({"type": "text", "text": content.text})
+            case ImageContent():
+                if content.uri:
+                    contents.append(content.to_dict())
+            case FileReferenceContent():
+                contents.append({
+                    "type": "image_file",
+                    "image_file": {"file_id": content.file_id},
+                })
+            case FunctionResultContent():
+                contents.append({"type": "text", "text": content.result})
     return contents
 
 

diff --git a/python/semantic_kernel/agents/open_ai/azure_assistant_agent.py b/python/semantic_kernel/agents/open_ai/azure_assistant_agent.py
@@ -54,11 +54,11 @@ def __init__(
         enable_code_interpreter: bool | None = None,
         enable_file_search: bool | None = None,
         enable_json_response: bool | None = None,
-        file_ids: list[str] | None = [],
+        file_ids: list[str] | None = None,
         temperature: float | None = None,
         top_p: float | None = None,
         vector_store_id: str | None = None,
-        metadata: dict[str, Any] | None = {},
+        metadata: dict[str, Any] | None = None,
         max_completion_tokens: int | None = None,
         max_prompt_tokens: int | None = None,
         parallel_tool_calls_enabled: bool | None = True,
@@ -150,11 +150,11 @@ def __init__(
             "enable_code_interpreter": enable_code_interpreter,
             "enable_file_search": enable_file_search,
             "enable_json_response": enable_json_response,
-            "file_ids": file_ids,
+            "file_ids": file_ids or [],
             "temperature": temperature,
             "top_p": top_p,
             "vector_store_id": vector_store_id,
-            "metadata": metadata,
+            "metadata": metadata or {},
             "max_completion_tokens": max_completion_tokens,
             "max_prompt_tokens": max_prompt_tokens,
             "parallel_tool_calls_enabled": parallel_tool_calls_enabled,
@@ -199,7 +199,7 @@ async def create(
         temperature: float | None = None,
         top_p: float | None = None,
         vector_store_id: str | None = None,
-        metadata: dict[str, Any] | None = {},
+        metadata: dict[str, Any] | None = None,
         max_completion_tokens: int | None = None,
         max_prompt_tokens: int | None = None,
         parallel_tool_calls_enabled: bool | None = True,
@@ -268,7 +268,7 @@ async def create(
             temperature=temperature,
             top_p=top_p,
             vector_store_id=vector_store_id,
-            metadata=metadata,
+            metadata=metadata or {},
             max_completion_tokens=max_completion_tokens,
             max_prompt_tokens=max_prompt_tokens,
             parallel_tool_calls_enabled=parallel_tool_calls_enabled,

diff --git a/python/semantic_kernel/agents/open_ai/function_action_result.py b/python/semantic_kernel/agents/open_ai/function_action_result.py
@@ -0,0 +1,19 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import logging
+from dataclasses import dataclass
+
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+@experimental_class
+@dataclass
+class FunctionActionResult:
+    """Function Action Result."""
+
+    function_call_content: ChatMessageContent | None
+    function_result_content: ChatMessageContent | None
+    tool_outputs: list[dict[str, str]] | None
diff --git a/python/semantic_kernel/agents/open_ai/open_ai_assistant_agent.py b/python/semantic_kernel/agents/open_ai/open_ai_assistant_agent.py
@@ -50,11 +50,11 @@ def __init__(
         enable_code_interpreter: bool | None = None,
         enable_file_search: bool | None = None,
         enable_json_response: bool | None = None,
-        code_interpreter_file_ids: list[str] | None = [],
+        code_interpreter_file_ids: list[str] | None = None,
         temperature: float | None = None,
         top_p: float | None = None,
         vector_store_id: str | None = None,
-        metadata: dict[str, Any] | None = {},
+        metadata: dict[str, Any] | None = None,
         max_completion_tokens: int | None = None,
         max_prompt_tokens: int | None = None,
         parallel_tool_calls_enabled: bool | None = True,
@@ -125,11 +125,11 @@ def __init__(
             "enable_code_interpreter": enable_code_interpreter,
             "enable_file_search": enable_file_search,
             "enable_json_response": enable_json_response,
-            "code_interpreter_file_ids": code_interpreter_file_ids,
+            "code_interpreter_file_ids": code_interpreter_file_ids or [],
             "temperature": temperature,
             "top_p": top_p,
             "vector_store_id": vector_store_id,
-            "metadata": metadata,
+            "metadata": metadata or {},
             "max_completion_tokens": max_completion_tokens,
             "max_prompt_tokens": max_prompt_tokens,
             "parallel_tool_calls_enabled": parallel_tool_calls_enabled,
@@ -173,7 +173,7 @@ async def create(
         temperature: float | None = None,
         top_p: float | None = None,
         vector_store_id: str | None = None,
-        metadata: dict[str, Any] | None = {},
+        metadata: dict[str, Any] | None = None,
         max_completion_tokens: int | None = None,
         max_prompt_tokens: int | None = None,
         parallel_tool_calls_enabled: bool | None = True,
@@ -236,7 +236,7 @@ async def create(
             temperature=temperature,
             top_p=top_p,
             vector_store_id=vector_store_id,
-            metadata=metadata,
+            metadata=metadata or {},
             max_completion_tokens=max_completion_tokens,
             max_prompt_tokens=max_prompt_tokens,
             parallel_tool_calls_enabled=parallel_tool_calls_enabled,