Python: Yield FunctionResultContent in streaming chat completion path…

…. Update tests. (#9974) ### Motivation and Context Currently, if using SK's Python streaming chat completion path, we only yield the following content types: `StreamingChatMessageContent` and `FunctionCallContent`. We are not yielding `FunctionResultContent` which is valuable for some use cases.  ### Description This PR updates the code to yield `FunctionResultContent` if it exists in the streaming chat completion path. When we merge the function call result content together into a `StreamingChatMessageContent` type, we check if that message has items (which are of type `FunctionResultContent`) and if so, we yield them. The filter path still works because once they're yielded, we break out of the function calling loop. We need to include the `ai_model_id` if it exists for the current PromptExecutionSettings because if performing a `reduce` operation to add two streaming chat message chunks together, the `StreamingChatMessageContent` that has the function results will break if the `ai_model_id` is not set (the error is thrown in the `__add__` override for the `StreamingChatMessageContent`. Some unit tests that cover function calling were also updated -- during the test, the test JSON function args were breaking in the `json.loads` call because they contained single quotes and not double. We're now sanitizing the args, just in case, so we don't break there. This PR fixes: - #9408 - #9968  ### Contribution Checklist  - [X] The code builds clean without any errors or warnings - [X] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [X] All unit tests pass, and I have added new tests where possible - [X] I didn't break anyone 😄
microsoft · Dec 18, 2024 · 16690ed · 16690ed
1 parent 4650d27
commit 16690ed
Show file tree

Hide file tree

Showing 21 changed files with 218 additions and 70 deletions.
diff --git a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py b/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
 from semantic_kernel.functions import KernelArguments
@@ -131,11 +132,13 @@ async def handle_streaming(
     streamed_chunks: list[StreamingChatMessageContent] = []
     result_content = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             result_content.append(message[0])
             print(str(message[0]), end="")
 

diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
 from semantic_kernel.functions import KernelArguments
@@ -130,13 +131,15 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
-    result_content = []
+    result_content: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             result_content.append(message[0])
             print(str(message[0]), end="")
 

diff --git a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins.math_plugin import MathPlugin
 from semantic_kernel.core_plugins.time_plugin import TimePlugin
 from semantic_kernel.functions import KernelArguments
@@ -140,11 +141,13 @@ async def handle_streaming(
     streamed_chunks: list[StreamingChatMessageContent] = []
     result_content = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             result_content.append(message[0])
             print(str(message[0]), end="")
 

diff --git a/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py b/python/samples/concepts/auto_function_calling/function_calling_with_required_type.py
@@ -6,12 +6,13 @@
 from typing import TYPE_CHECKING
 
 from semantic_kernel import Kernel
-from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
+from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
 from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAIChatPromptExecutionSettings
 from semantic_kernel.contents import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.functions import KernelArguments
 
@@ -131,20 +132,32 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
+    result_content = []
     async for message in response:
-        if isinstance(message[0], StreamingChatMessageContent):
+        if (
+            (
+                not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+                or execution_settings.function_choice_behavior.type_ == FunctionChoiceType.REQUIRED
+            )
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
+        ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            result_content.append(message[0])
             print(str(message[0]), end="")
 
     if streamed_chunks:
         streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
-        if hasattr(streaming_chat_message, "content"):
+        if hasattr(streaming_chat_message, "content") and streaming_chat_message.content:
             print(streaming_chat_message.content)
         print("Printing returned tool calls...")
         print_tool_calls(streaming_chat_message)
 
     print("\n")
+    if result_content:
+        return "".join([str(content) for content in result_content])
+    return None
 
 
 async def chat() -> bool:
@@ -164,7 +177,7 @@ async def chat() -> bool:
     arguments["chat_history"] = history
 
     if stream:
-        await handle_streaming(kernel, chat_function, arguments=arguments)
+        result = await handle_streaming(kernel, chat_function, arguments=arguments)
     else:
         result = await kernel.invoke(chat_function, arguments=arguments)
 
@@ -177,6 +190,9 @@ async def chat() -> bool:
             return True
 
         print(f"Mosscap:> {result}")
+
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
     return True
 
 

diff --git a/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py b/python/samples/concepts/auto_function_calling/functions_defined_in_json_prompt.py
@@ -11,6 +11,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
     AutoFunctionInvocationContext,
@@ -144,7 +145,7 @@ async def handle_streaming(
     kernel: Kernel,
     chat_function: "KernelFunction",
     arguments: KernelArguments,
-) -> None:
+) -> str | None:
     response = kernel.invoke_stream(
         chat_function,
         return_function_results=False,
@@ -153,20 +154,29 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
+    result_content: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            result_content.append(message[0])
             print(str(message[0]), end="")
 
     if streamed_chunks:
         streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
+        if hasattr(streaming_chat_message, "content"):
+            print(streaming_chat_message.content)
         print("Auto tool calls is disabled, printing returned tool calls...")
         print_tool_calls(streaming_chat_message)
 
     print("\n")
+    if result_content:
+        return "".join([str(content) for content in result_content])
+    return None
 
 
 async def chat() -> bool:
@@ -187,7 +197,7 @@ async def chat() -> bool:
 
     stream = False
     if stream:
-        await handle_streaming(kernel, chat_function, arguments=arguments)
+        result = await handle_streaming(kernel, chat_function, arguments=arguments)
     else:
         result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments)
 
@@ -200,6 +210,9 @@ async def chat() -> bool:
             return True
 
         print(f"Mosscap:> {result}")
+
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
     return True
 
 

diff --git a/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py b/python/samples/concepts/auto_function_calling/functions_defined_in_yaml_prompt.py
@@ -12,6 +12,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.core_plugins import MathPlugin, TimePlugin
 from semantic_kernel.filters.auto_function_invocation.auto_function_invocation_context import (
     AutoFunctionInvocationContext,
@@ -141,7 +142,7 @@ async def handle_streaming(
     kernel: Kernel,
     chat_function: "KernelFunction",
     arguments: KernelArguments,
-) -> None:
+) -> str | None:
     response = kernel.invoke_stream(
         chat_function,
         return_function_results=False,
@@ -150,20 +151,29 @@ async def handle_streaming(
 
     print("Mosscap:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
+    result_content: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            result_content.append(message[0])
             print(str(message[0]), end="")
 
     if streamed_chunks:
         streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks)
+        if hasattr(streaming_chat_message, "content"):
+            print(streaming_chat_message.content)
         print("Auto tool calls is disabled, printing returned tool calls...")
         print_tool_calls(streaming_chat_message)
 
     print("\n")
+    if result_content:
+        return "".join([str(content) for content in result_content])
+    return None
 
 
 async def chat() -> bool:
@@ -184,8 +194,7 @@ async def chat() -> bool:
 
     stream = False
     if stream:
-        pass
-        # await handle_streaming(kernel, chat_function, arguments=arguments)
+        result = await handle_streaming(kernel, chat_function, arguments=arguments)
     else:
         result = await kernel.invoke(chat_plugin["ChatBot"], arguments=arguments)
 
@@ -198,6 +207,9 @@ async def chat() -> bool:
             return True
 
         print(f"Mosscap:> {result}")
+
+    history.add_user_message(user_input)
+    history.add_assistant_message(str(result))
     return True
 
 

diff --git a/python/samples/concepts/filtering/function_invocation_filters_stream.py b/python/samples/concepts/filtering/function_invocation_filters_stream.py
@@ -71,7 +71,8 @@ async def chat(chat_history: ChatHistory) -> bool:
         function_name="chat", plugin_name="chat", user_input=user_input, chat_history=chat_history
     )
     async for message in responses:
-        streamed_chunks.append(message[0])
+        if isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
+            streamed_chunks.append(message[0])
         print(str(message[0]), end="")
     print("")
     chat_history.add_user_message(user_input)

diff --git a/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py b/python/samples/concepts/plugins/openai_plugin_azure_key_vault.py
@@ -17,6 +17,7 @@
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.function_call_content import FunctionCallContent
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.functions import KernelArguments, KernelFunction, KernelPlugin
 
 # region Helper functions
@@ -209,11 +210,13 @@ async def handle_streaming(
     print("Security Agent:> ", end="")
     streamed_chunks: list[StreamingChatMessageContent] = []
     async for message in response:
-        if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and isinstance(
-            message[0], StreamingChatMessageContent
+        if (
+            not execution_settings.function_choice_behavior.auto_invoke_kernel_functions
+            and isinstance(message[0], StreamingChatMessageContent)
+            and message[0].role == AuthorRole.ASSISTANT
         ):
             streamed_chunks.append(message[0])
-        else:
+        elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT:
             print(str(message[0]), end="")
 
     if streamed_chunks:

diff --git a/python/samples/learn_resources/templates.py b/python/samples/learn_resources/templates.py
@@ -7,6 +7,7 @@
 from semantic_kernel import Kernel
 from semantic_kernel.contents import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.prompt_template import InputVariable, PromptTemplateConfig
 
@@ -144,8 +145,9 @@ async def main():
         all_chunks = []
         print("Assistant:> ", end="")
         async for chunk in result:
-            all_chunks.append(chunk[0])
-            print(str(chunk[0]), end="")
+            if isinstance(chunk[0], StreamingChatMessageContent) and chunk[0].role == AuthorRole.ASSISTANT:
+                all_chunks.append(chunk[0])
+                print(str(chunk[0]), end="")
         print()
 
         history.add_user_message(request)

diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py
@@ -12,7 +12,10 @@
 
 from semantic_kernel.connectors.ai.function_call_behavior import FunctionCallBehavior
 from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
-from semantic_kernel.connectors.ai.function_calling_utils import merge_function_results
+from semantic_kernel.connectors.ai.function_calling_utils import (
+    merge_function_results,
+    merge_streaming_function_results,
+)
 from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior, FunctionChoiceType
 from semantic_kernel.const import AUTO_FUNCTION_INVOCATION_SPAN_NAME
 from semantic_kernel.contents.annotation_content import AnnotationContent
@@ -303,8 +306,18 @@ async def get_streaming_chat_message_contents(
                     ],
                 )
 
+                # Merge and yield the function results, regardless of the termination status
+                # Include the ai_model_id so we can later add two streaming messages together
+                # Some settings may not have an ai_model_id, so we need to check for it
+                ai_model_id = self._get_ai_model_id(settings)
+                function_result_messages = merge_streaming_function_results(
+                    messages=chat_history.messages[-len(results) :],
+                    ai_model_id=ai_model_id,  # type: ignore
+                )
+                if self._yield_function_result_messages(function_result_messages):
+                    yield function_result_messages
+
                 if any(result.terminate for result in results if result is not None):
-                    yield merge_function_results(chat_history.messages[-len(results) :])  # type: ignore
                     break
 
     async def get_streaming_chat_message_content(
@@ -415,4 +428,16 @@ def _start_auto_function_invocation_activity(self, kernel: "Kernel", settings: "
 
         return span
 
+    def _get_ai_model_id(self, settings: "PromptExecutionSettings") -> str:
+        """Retrieve the AI model ID from settings if available.
+
+        Attempt to get ai_model_id from the settings object. If it doesn't exist or
+        is blank, fallback to self.ai_model_id (from AIServiceClientBase).
+        """
+        return getattr(settings, "ai_model_id", self.ai_model_id) or self.ai_model_id
+
+    def _yield_function_result_messages(self, function_result_messages: list) -> bool:
+        """Determine if the function result messages should be yielded."""
+        return len(function_result_messages) > 0 and len(function_result_messages[0].items) > 0
+
     # endregion