From 69c7a6a61bb41d7bae29a0c6276717638d00a4bc Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Wed, 18 Dec 2024 19:39:16 +0900 Subject: [PATCH 1/7] Include a function_invoke_attempt index with Streaming CMC. Update tests and samples. --- .../anthropic_api_function_calling.py | 206 -------------- .../chat_completion_with_function_calling.py | 257 ++++++++++++++++++ .../chat_gpt_api_function_calling.py | 207 -------------- .../chat_mistral_ai_api_function_calling.py | 215 --------------- .../services/anthropic_chat_completion.py | 13 +- .../azure_ai_inference_chat_completion.py | 7 +- .../services/bedrock_chat_completion.py | 8 +- .../ai/chat_completion_client_base.py | 13 +- .../connectors/ai/function_calling_utils.py | 12 +- .../services/google_ai_chat_completion.py | 9 +- .../services/vertex_ai_chat_completion.py | 9 +- .../services/mistral_ai_chat_completion.py | 7 +- .../ollama/services/ollama_chat_completion.py | 15 +- .../services/onnx_gen_ai_chat_completion.py | 9 +- .../open_ai/services/azure_chat_completion.py | 3 +- .../services/open_ai_chat_completion_base.py | 6 +- .../streaming_chat_message_content.py | 36 ++- python/tests/samples/test_concepts.py | 8 +- 18 files changed, 377 insertions(+), 663 deletions(-) delete mode 100644 python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py delete mode 100644 python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py delete mode 100644 python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py diff --git a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py b/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py deleted file mode 100644 index 5769943157db..000000000000 --- a/python/samples/concepts/auto_function_calling/anthropic_api_function_calling.py +++ /dev/null @@ -1,206 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import asyncio -import os -from functools import reduce -from typing import TYPE_CHECKING - -from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.core_plugins.math_plugin import MathPlugin -from semantic_kernel.core_plugins.time_plugin import TimePlugin -from semantic_kernel.functions import KernelArguments - -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - - -system_message = """ -You are a chat bot. Your name is Mosscap and -you have one goal: figure out what people need. -Your full name, should you need to know it, is -Splendid Speckled Mosscap. You communicate -effectively, but you tend to answer with long -flowery prose. You are also a math wizard, -especially for adding and subtracting. -You also excel at joke telling, where your tone is often sarcastic. -Once you have the answer I am looking for, -you will return a full answer to me as soon as possible. -""" - -# This concept example shows how to handle both streaming and non-streaming responses -# To toggle the behavior, set the following flag accordingly: -stream = False - -kernel = Kernel() - -# Note: the underlying model needs to support function calling. -# https://docs.anthropic.com/en/docs/build-with-claude/tool-use#choosing-a-model -kernel.add_service(AnthropicChatCompletion(service_id="chat", ai_model_id="claude-3-opus-20240229")) - -plugins_directory = os.path.join(__file__, "../../../../../prompt_template_samples/") -# adding plugins to the kernel -kernel.add_plugin(MathPlugin(), plugin_name="math") -kernel.add_plugin(TimePlugin(), plugin_name="time") - -chat_function = kernel.add_function( - prompt="{{$chat_history}}{{$user_input}}", - plugin_name="ChatBot", - function_name="Chat", -) - -# Enabling or disabling function calling is done by setting the `function_choice_behavior` attribute for the -# prompt execution settings. When the function_call parameter is set to "auto" the model will decide which -# function to use, if any. -# -# There are two ways to define the `function_choice_behavior` parameter: -# 1. Using the type string as `"auto"` or `"required"`. For example: -# configure `function_choice_behavior="auto"` parameter directly in the execution settings. -# 2. Using the FunctionChoiceBehavior class. For example: -# `function_choice_behavior=FunctionChoiceBehavior.Auto()`. -# Both of these configure the `auto` tool_choice and all of the available plugins/functions -# registered on the kernel. If you want to limit the available plugins/functions, you must -# configure the `filters` dictionary attribute for each type of function choice behavior. -# For example: -# -# from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - -# function_choice_behavior = FunctionChoiceBehavior.Auto( -# filters={"included_functions": ["time-date", "time-time", "math-Add"]} -# ) -# -# The filters attribute allows you to specify either: `included_functions`, `excluded_functions`, -# `included_plugins`, or `excluded_plugins`. - -execution_settings = AnthropicChatPromptExecutionSettings( - service_id="chat", - max_tokens=2000, - temperature=0.7, - top_p=0.8, - function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), -) - -history = ChatHistory() - -history.add_system_message(system_message) -history.add_user_message("Hi there, who are you?") -history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") - -arguments = KernelArguments(settings=execution_settings) - - -def print_tool_calls(message: ChatMessageContent) -> None: - # A helper method to pretty print the tool calls from the message. - # This is only triggered if auto invoke tool calls is disabled. - items = message.items - formatted_tool_calls = [] - for i, item in enumerate(items, start=1): - if isinstance(item, FunctionCallContent): - tool_call_id = item.id - function_name = item.name - function_arguments = item.arguments - formatted_str = ( - f"tool_call {i} id: {tool_call_id}\n" - f"tool_call {i} function name: {function_name}\n" - f"tool_call {i} arguments: {function_arguments}" - ) - formatted_tool_calls.append(formatted_str) - if len(formatted_tool_calls) > 0: - print("Tool calls:\n" + "\n\n".join(formatted_tool_calls)) - else: - print("The model used its own knowledge and didn't return any tool calls.") - - -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="") - streamed_chunks: list[StreamingChatMessageContent] = [] - result_content = [] - async for message in response: - if ( - not execution_settings.function_choice_behavior.auto_invoke_kernel_functions - and isinstance(message[0], StreamingChatMessageContent) - and message[0].role == AuthorRole.ASSISTANT - ): - streamed_chunks.append(message[0]) - elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: - result_content.append(message[0]) - print(str(message[0]), end="") - - if streamed_chunks: - streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): - print(streaming_chat_message.content) - print("Auto tool calls is disabled, printing returned tool calls...") - print_tool_calls(streaming_chat_message) - - print("\n") - if result_content: - return "".join([str(content) for content in result_content]) - return None - - -async def chat() -> bool: - try: - user_input = input("User:> ") - except KeyboardInterrupt: - print("\n\nExiting chat...") - return False - except EOFError: - print("\n\nExiting chat...") - return False - - if user_input == "exit": - print("\n\nExiting chat...") - return False - arguments["user_input"] = user_input - arguments["chat_history"] = history - - if stream: - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - result = await kernel.invoke(chat_function, arguments=arguments) - - # If tools are used, and auto invoke tool calls is False, the response will be of type - # ChatMessageContent with information about the tool calls, which need to be sent - # back to the model to get the final response. - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and len(function_calls) > 0: - print_tool_calls(result.value[0]) - return True - - print(f"Mosscap:> {result}") - - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - return True - - -async def main() -> None: - chatting = True - print( - "Welcome to the chat bot!\ - \n Type 'exit' to exit.\ - \n Try a math question to see the function calling in action (i.e. what is 3+3?)." - ) - while chatting: - chatting = await chat() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py new file mode 100644 index 000000000000..445106e222cb --- /dev/null +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py @@ -0,0 +1,257 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from functools import reduce +from typing import TYPE_CHECKING + +from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.function_call_content import FunctionCallContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.core_plugins.math_plugin import MathPlugin +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +if TYPE_CHECKING: + from semantic_kernel.functions import KernelFunction + +# System message defining the behavior and persona of the chat bot. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. You are also a math wizard, +especially for adding and subtracting. +You also excel at joke telling, where your tone is often sarcastic. +Once you have the answer I am looking for, +you will return a full answer to me as soon as possible. +""" + +# Toggle this flag to switch between streaming and non-streaming modes. +stream = True + +# Create and configure the kernel. +kernel = Kernel() + +# Load some sample plugins (for demonstration of function calling). +kernel.add_plugin(MathPlugin(), plugin_name="math") +kernel.add_plugin(TimePlugin(), plugin_name="time") + +# Define a chat function (a template for how to handle user input). +chat_function = kernel.add_function( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) + +# You can select from the following chat completion services that support function calling: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# Configure the function choice behavior. Here, we set it to Auto with auto_invoke=True. +# - If `auto_invoke=True`, the model will automatically choose and call functions as needed. +# - If `auto_invoke=False`, the model may return tool call instructions that you must handle and call manually. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=True) + +kernel.add_service(chat_completion_service) + +# Pass the request settings to the kernel arguments. +arguments = KernelArguments(settings=request_settings) + +# Create a chat history to store the system message, initial messages, and the conversation. +history = ChatHistory() +history.add_system_message(system_message) +history.add_user_message("Hi there, who are you?") +history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") + + +def print_tool_calls(message: ChatMessageContent) -> None: + """ + A helper function to pretty print the tool calls found in a ChatMessageContent message. + This is useful when auto tool invocation is disabled and the model returns calls that you must handle. + """ + items = message.items + formatted_tool_calls = [] + for i, item in enumerate(items, start=1): + if isinstance(item, FunctionCallContent): + tool_call_id = item.id + function_name = item.name + function_arguments = item.arguments + formatted_str = ( + f"tool_call {i} id: {tool_call_id}\n" + f"tool_call {i} function name: {function_name}\n" + f"tool_call {i} arguments: {function_arguments}" + ) + formatted_tool_calls.append(formatted_str) + if len(formatted_tool_calls) > 0: + print("\n[Tool calls returned by the model]:\n" + "\n\n".join(formatted_tool_calls)) + else: + print("\n[No tool calls returned by the model]") + + +async def handle_streaming( + kernel: Kernel, + chat_function: "KernelFunction", + arguments: KernelArguments, +) -> str | None: + """ + Handle the streaming response from the model. + This function demonstrates two possible paths: + + 1. When auto function calling is ON (auto_invoke=True): + - The model may call tools automatically and produce a continuous + stream of assistant messages. We can simply print these as they come in. + + 2. When auto function calling is OFF (auto_invoke=False): + - The model may instead return tool call instructions embedded in the stream. + We can track these calls using `function_invoke_attempt` attributes and print + them for the user. The user can then manually invoke the tools and return the results + to the model for further completion. + """ + + response = kernel.invoke_stream( + chat_function, + return_function_results=False, + arguments=arguments, + ) + + # We will differentiate behavior based on whether auto invoking kernel functions is enabled. + auto_invoking = request_settings.function_choice_behavior.auto_invoke_kernel_functions + + print("Mosscap:> ", end="", flush=True) + + # If auto_invoking is False, the model may return separate streaming chunks containing tool instructions. + # We'll store them here. + streamed_tool_chunks: list[StreamingChatMessageContent] = [] + + # For content messages (the final assistant's response text), store them here. + streamed_response_chunks: list[StreamingChatMessageContent] = [] + + async for message in response: + msg = message[0] + + # We only expect assistant messages here. + if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: + continue + + if auto_invoking: + # When auto invocation is ON, no special handling is needed. Just print out messages as they arrive. + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + else: + # When auto invocation is OFF, the model may send chunks that represent tool calls. + # Chunks that contain function call instructions will have a function_invoke_attempt attribute. + if hasattr(msg, "function_invoke_attempt"): + # This chunk is part of a tool call instruction sequence + streamed_tool_chunks.append(msg) + else: + # This chunk is normal assistant response text + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + + print("\n", flush=True) + + # If auto function calling was OFF, handle any tool call instructions we captured. + if not auto_invoking and streamed_tool_chunks: + # Group streamed chunks by `function_invoke_attempt` to handle each invocation attempt separately. + grouped_chunks = {} + for chunk in streamed_tool_chunks: + key = getattr(chunk, "function_invoke_attempt", None) + if key is not None: + grouped_chunks.setdefault(key, []).append(chunk) + + # Process each group of chunks + for attempt, chunks in grouped_chunks.items(): + try: + # Combine all chunks for a given attempt into one message. + combined_content = reduce(lambda first, second: first + second, chunks) + if hasattr(combined_content, "content"): + print(f"[function_invoke_attempt {attempt} content]:\n{combined_content.content}") + + print("[Auto function calling is OFF] Here are the returned tool calls:") + print_tool_calls(combined_content) + except Exception as e: + print(f"Error processing chunks for function_invoke_attempt {attempt}: {e}") + + # Return the final concatenated assistant response (if any). + if streamed_response_chunks: + return "".join([str(content) for content in streamed_response_chunks]) + return None + + +async def chat() -> bool: + """ + Continuously prompt the user for input and show the assistant's response. + Type 'exit' to exit. + """ + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + return False + + arguments["user_input"] = user_input + arguments["chat_history"] = history + + if stream: + # Handle streaming responses + result = await handle_streaming(kernel, chat_function, arguments=arguments) + else: + # Handle non-streaming responses + result = await kernel.invoke(chat_function, arguments=arguments) + + # If function calls are returned and auto invoking is off, we must show them. + if not request_settings.function_choice_behavior.auto_invoke_kernel_functions and result and result.value: + # Extract function calls from the returned content + function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] + if len(function_calls) > 0: + print_tool_calls(result.value[0]) + # At this point, you'd handle these calls manually if desired. + # For now, we just print them. + return True + + # If no function calls to handle, just print the assistant's response + if result: + print(f"Mosscap:> {result}") + + # Update the chat history with the user's input and the assistant's response + if result: + history.add_user_message(user_input) + history.add_assistant_message(str(result)) + + return True + + +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." + ) + chatting = True + while chatting: + chatting = await chat() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py deleted file mode 100644 index 2ced79d2f8be..000000000000 --- a/python/samples/concepts/auto_function_calling/chat_gpt_api_function_calling.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import asyncio -import os -from functools import reduce -from typing import TYPE_CHECKING - -from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion, OpenAIChatPromptExecutionSettings -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.core_plugins.math_plugin import MathPlugin -from semantic_kernel.core_plugins.time_plugin import TimePlugin -from semantic_kernel.functions import KernelArguments - -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - - -system_message = """ -You are a chat bot. Your name is Mosscap and -you have one goal: figure out what people need. -Your full name, should you need to know it, is -Splendid Speckled Mosscap. You communicate -effectively, but you tend to answer with long -flowery prose. You are also a math wizard, -especially for adding and subtracting. -You also excel at joke telling, where your tone is often sarcastic. -Once you have the answer I am looking for, -you will return a full answer to me as soon as possible. -""" - -# This concept example shows how to handle both streaming and non-streaming responses -# To toggle the behavior, set the following flag accordingly: -stream = True - -kernel = Kernel() - -# Note: the underlying gpt-35/gpt-4 model version needs to be at least version 0613 to support tools. -kernel.add_service(OpenAIChatCompletion(service_id="chat")) - -plugins_directory = os.path.join(__file__, "../../../../../prompt_template_samples/") -# adding plugins to the kernel -kernel.add_plugin(MathPlugin(), plugin_name="math") -kernel.add_plugin(TimePlugin(), plugin_name="time") - -chat_function = kernel.add_function( - prompt="{{$chat_history}}{{$user_input}}", - plugin_name="ChatBot", - function_name="Chat", -) - -# Enabling or disabling function calling is done by setting the `function_choice_behavior` attribute for the -# prompt execution settings. When the function_call parameter is set to "auto" the model will decide which -# function to use, if any. -# -# There are two ways to define the `function_choice_behavior` parameter: -# 1. Using the type string as `"auto"`, `"required"`, or `"none"`. For example: -# configure `function_choice_behavior="auto"` parameter directly in the execution settings. -# 2. Using the FunctionChoiceBehavior class. For example: -# `function_choice_behavior=FunctionChoiceBehavior.Auto()`. -# Both of these configure the `auto` tool_choice and all of the available plugins/functions -# registered on the kernel. If you want to limit the available plugins/functions, you must -# configure the `filters` dictionary attribute for each type of function choice behavior. -# For example: -# -# from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - -# function_choice_behavior = FunctionChoiceBehavior.Auto( -# filters={"included_functions": ["time-date", "time-time", "math-Add"]} -# ) -# -# The filters attribute allows you to specify either: `included_functions`, `excluded_functions`, -# `included_plugins`, or `excluded_plugins`. - -# Note: the number of responses for auto invoking tool calls is limited to 1. -# If configured to be greater than one, this value will be overridden to 1. -execution_settings = OpenAIChatPromptExecutionSettings( - service_id="chat", - max_tokens=2000, - temperature=0.7, - top_p=0.8, - function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), -) - -history = ChatHistory() - -history.add_system_message(system_message) -history.add_user_message("Hi there, who are you?") -history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") - -arguments = KernelArguments(settings=execution_settings) - - -def print_tool_calls(message: ChatMessageContent) -> None: - # A helper method to pretty print the tool calls from the message. - # This is only triggered if auto invoke tool calls is disabled. - items = message.items - formatted_tool_calls = [] - for i, item in enumerate(items, start=1): - if isinstance(item, FunctionCallContent): - tool_call_id = item.id - function_name = item.name - function_arguments = item.arguments - formatted_str = ( - f"tool_call {i} id: {tool_call_id}\n" - f"tool_call {i} function name: {function_name}\n" - f"tool_call {i} arguments: {function_arguments}" - ) - formatted_tool_calls.append(formatted_str) - if len(formatted_tool_calls) > 0: - print("Tool calls:\n" + "\n\n".join(formatted_tool_calls)) - else: - print("The model used its own knowledge and didn't return any tool calls.") - - -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="") - streamed_chunks: list[StreamingChatMessageContent] = [] - result_content: list[StreamingChatMessageContent] = [] - async for message in response: - if ( - not execution_settings.function_choice_behavior.auto_invoke_kernel_functions - and isinstance(message[0], StreamingChatMessageContent) - and message[0].role == AuthorRole.ASSISTANT - ): - streamed_chunks.append(message[0]) - elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: - result_content.append(message[0]) - print(str(message[0]), end="") - - if streamed_chunks: - streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): - print(streaming_chat_message.content) - print("Auto tool calls is disabled, printing returned tool calls...") - print_tool_calls(streaming_chat_message) - - print("\n") - if result_content: - return "".join([str(content) for content in result_content]) - return None - - -async def chat() -> bool: - try: - user_input = input("User:> ") - except KeyboardInterrupt: - print("\n\nExiting chat...") - return False - except EOFError: - print("\n\nExiting chat...") - return False - - if user_input == "exit": - print("\n\nExiting chat...") - return False - arguments["user_input"] = user_input - arguments["chat_history"] = history - - if stream: - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - result = await kernel.invoke(chat_function, arguments=arguments) - - # If tools are used, and auto invoke tool calls is False, the response will be of type - # ChatMessageContent with information about the tool calls, which need to be sent - # back to the model to get the final response. - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and len(function_calls) > 0: - print_tool_calls(result.value[0]) - return True - - print(f"Mosscap:> {result}") - - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - return True - - -async def main() -> None: - chatting = True - print( - "Welcome to the chat bot!\ - \n Type 'exit' to exit.\ - \n Try a math question to see the function calling in action (i.e. what is 3+3?)." - ) - while chatting: - chatting = await chat() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py b/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py deleted file mode 100644 index 5ee05a835e2a..000000000000 --- a/python/samples/concepts/auto_function_calling/chat_mistral_ai_api_function_calling.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import asyncio -import os -from functools import reduce -from typing import TYPE_CHECKING - -from semantic_kernel import Kernel -from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior -from semantic_kernel.connectors.ai.mistral_ai import MistralAIChatCompletion, MistralAIChatPromptExecutionSettings -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole -from semantic_kernel.core_plugins.math_plugin import MathPlugin -from semantic_kernel.core_plugins.time_plugin import TimePlugin -from semantic_kernel.functions import KernelArguments - -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - - -system_message = """ -You are a chat bot. Your name is Mosscap and -you have one goal: figure out what people need. -Your full name, should you need to know it, is -Splendid Speckled Mosscap. You communicate -effectively, but you tend to answer with long -flowery prose. You are also a math wizard, -especially for adding and subtracting. -You also excel at joke telling, where your tone is often sarcastic. -Once you have the answer I am looking for, -you will return a full answer to me as soon as possible. -""" - -# This concept example shows how to handle both streaming and non-streaming responses -# To toggle the behavior, set the following flag accordingly: -stream = True - -kernel = Kernel() - -# Note: the underlying Model must be Mistral Small, Mistral Large, Mixtral 8x22B, Mistral Nemo. -# You can use MISTRALAI_API_KEY and MISTRALAI_CHAT_MODEL_ID environment variables to set the API key and model ID. -# Or just set it here in the Constructor for testing -kernel.add_service( - MistralAIChatCompletion( - service_id="chat", - # api_key=XXXXXXX, - # ai_model_id="mistral-large", - ) -) - -plugins_directory = os.path.join(__file__, "../../../../../prompt_template_samples/") -# adding plugins to the kernel -kernel.add_plugin(MathPlugin(), plugin_name="math") -kernel.add_plugin(TimePlugin(), plugin_name="time") - -chat_function = kernel.add_function( - prompt="{{$chat_history}}{{$user_input}}", - plugin_name="ChatBot", - function_name="Chat", -) - -# Enabling or disabling function calling is done by setting the `function_choice_behavior` attribute for the -# prompt execution settings. When the function_call parameter is set to "auto" the model will decide which -# function to use, if any. -# -# There are two ways to define the `function_choice_behavior` parameter: -# 1. Using the type string as `"auto"`, `"required"`, or `"none"`. For example: -# configure `function_choice_behavior="auto"` parameter directly in the execution settings. -# 2. Using the FunctionChoiceBehavior class. For example: -# `function_choice_behavior=FunctionChoiceBehavior.Auto()`. -# Both of these configure the `auto` tool_choice and all of the available plugins/functions -# registered on the kernel. If you want to limit the available plugins/functions, you must -# configure the `filters` dictionary attribute for each type of function choice behavior. -# For example: -# -# from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior - -# function_choice_behavior = FunctionChoiceBehavior.Auto( -# filters={"included_functions": ["time-date", "time-time", "math-Add"]} -# ) -# -# The filters attribute allows you to specify either: `included_functions`, `excluded_functions`, -# `included_plugins`, or `excluded_plugins`. - -# Note: the number of responses for auto invoking tool calls is limited to 1. -# If configured to be greater than one, this value will be overridden to 1. -execution_settings = MistralAIChatPromptExecutionSettings( - service_id="chat", - max_tokens=2000, - temperature=0.7, - top_p=0.8, - function_choice_behavior=FunctionChoiceBehavior.Auto(auto_invoke=True), -) - -history = ChatHistory() - -history.add_system_message(system_message) -history.add_user_message("Hi there, who are you?") -history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") - -arguments = KernelArguments(settings=execution_settings) - - -def print_tool_calls(message: ChatMessageContent) -> None: - # A helper method to pretty print the tool calls from the message. - # This is only triggered if auto invoke tool calls is disabled. - items = message.items - formatted_tool_calls = [] - for i, item in enumerate(items, start=1): - if isinstance(item, FunctionCallContent): - tool_call_id = item.id - function_name = item.name - function_arguments = item.arguments - formatted_str = ( - f"tool_call {i} id: {tool_call_id}\n" - f"tool_call {i} function name: {function_name}\n" - f"tool_call {i} arguments: {function_arguments}" - ) - formatted_tool_calls.append(formatted_str) - if len(formatted_tool_calls) > 0: - print("Tool calls:\n" + "\n\n".join(formatted_tool_calls)) - else: - print("The model used its own knowledge and didn't return any tool calls.") - - -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="") - streamed_chunks: list[StreamingChatMessageContent] = [] - result_content = [] - async for message in response: - if ( - not execution_settings.function_choice_behavior.auto_invoke_kernel_functions - and isinstance(message[0], StreamingChatMessageContent) - and message[0].role == AuthorRole.ASSISTANT - ): - streamed_chunks.append(message[0]) - elif isinstance(message[0], StreamingChatMessageContent) and message[0].role == AuthorRole.ASSISTANT: - result_content.append(message[0]) - print(str(message[0]), end="") - - if streamed_chunks: - streaming_chat_message = reduce(lambda first, second: first + second, streamed_chunks) - if hasattr(streaming_chat_message, "content"): - print(streaming_chat_message.content) - print("Auto tool calls is disabled, printing returned tool calls...") - print_tool_calls(streaming_chat_message) - - print("\n") - if result_content: - return "".join([str(content) for content in result_content]) - return None - - -async def chat() -> bool: - try: - user_input = input("User:> ") - except KeyboardInterrupt: - print("\n\nExiting chat...") - return False - except EOFError: - print("\n\nExiting chat...") - return False - - if user_input == "exit": - print("\n\nExiting chat...") - return False - arguments["user_input"] = user_input - arguments["chat_history"] = history - - if stream: - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - result = await kernel.invoke(chat_function, arguments=arguments) - - # If tools are used, and auto invoke tool calls is False, the response will be of type - # ChatMessageContent with information about the tool calls, which need to be sent - # back to the model to get the final response. - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if not execution_settings.function_choice_behavior.auto_invoke_kernel_functions and len(function_calls) > 0: - print_tool_calls(result.value[0]) - return True - - print(f"Mosscap:> {result}") - - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - return True - - -async def main() -> None: - chatting = True - print( - "Welcome to the chat bot!\ - \n Type 'exit' to exit.\ - \n Try a math question to see the function calling in action (i.e. what is 3+3?)." - ) - while chatting: - chatting = await chat() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py index ed2616ba71aa..f8490edba2cd 100644 --- a/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/anthropic/services/anthropic_chat_completion.py @@ -154,6 +154,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, AnthropicChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -164,7 +165,7 @@ async def _inner_get_streaming_chat_message_contents( if settings.system is None and parsed_system_message is not None: settings.system = parsed_system_message - response = self._send_chat_stream_request(settings) + response = self._send_chat_stream_request(settings, function_invoke_attempt) if not isinstance(response, AsyncGenerator): raise ServiceInvalidResponseError("Expected an AsyncGenerator response.") @@ -242,6 +243,7 @@ def _create_streaming_chat_message_content( self, stream_event: TextEvent | ContentBlockStopEvent | RawMessageDeltaEvent, metadata: dict[str, Any] = {}, + function_invoke_attempt: int = 0, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a content block.""" items: list[STREAMING_ITEM_TYPES] = [] @@ -275,6 +277,7 @@ def _create_streaming_chat_message_content( role=AuthorRole.ASSISTANT, finish_reason=finish_reason, items=items, + function_invoke_attempt=function_invoke_attempt, ) def update_settings_from_function_call_configuration_anthropic( @@ -338,7 +341,9 @@ async def _send_chat_request(self, settings: AnthropicChatPromptExecutionSetting return [self._create_chat_message_content(response, response_metadata)] async def _send_chat_stream_request( - self, settings: AnthropicChatPromptExecutionSettings + self, + settings: AnthropicChatPromptExecutionSettings, + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], None]: """Send the chat stream request. @@ -359,7 +364,9 @@ async def _send_chat_stream_request( isinstance(stream_event, ContentBlockStopEvent) and stream_event.content_block.type == "tool_use" ): - yield [self._create_streaming_chat_message_content(stream_event, metadata)] + yield [ + self._create_streaming_chat_message_content(stream_event, metadata, function_invoke_attempt) + ] except Exception as ex: raise ServiceResponseException( f"{type(self)} service failed to complete the request", diff --git a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py index e48268d223bb..8ac10561f142 100644 --- a/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/azure_ai_inference/services/azure_ai_inference_chat_completion.py @@ -138,6 +138,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, AzureAIInferenceChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -157,7 +158,8 @@ async def _inner_get_streaming_chat_message_contents( continue chunk_metadata = self._get_metadata_from_response(chunk) yield [ - self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) for choice in chunk.choices + self._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) + for choice in chunk.choices ] @override @@ -255,6 +257,7 @@ def _create_streaming_chat_message_content( chunk: AsyncStreamingChatCompletions, choice: StreamingChatChoiceUpdate, metadata: dict[str, Any], + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object. @@ -262,6 +265,7 @@ def _create_streaming_chat_message_content( chunk: The chunk from the response. choice: The choice from the response. metadata: The metadata from the response. + function_invoke_attempt: The function invoke attempt. Returns: A streaming chat message content object. @@ -295,6 +299,7 @@ def _create_streaming_chat_message_content( inner_content=chunk, finish_reason=FinishReason(choice.finish_reason) if choice.finish_reason else None, metadata=metadata, + function_invoke_attempt=function_invoke_attempt, ) # endregion diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py index 8827b310ac0d..c163b6ffda74 100644 --- a/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/bedrock/services/bedrock_chat_completion.py @@ -128,6 +128,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: # Not all models support streaming: check if the model supports streaming before proceeding model_info = await self.get_foundation_model_info(self.ai_model_id) @@ -146,7 +147,7 @@ async def _inner_get_streaming_chat_message_contents( elif "contentBlockStart" in event: yield [self._parse_content_block_start_event(event)] elif "contentBlockDelta" in event: - yield [self._parse_content_block_delta_event(event)] + yield [self._parse_content_block_delta_event(event, function_invoke_attempt)] elif "contentBlockStop" in event: continue elif "messageStop" in event: @@ -338,7 +339,9 @@ def _parse_content_block_start_event(self, event: dict[str, Any]) -> StreamingCh inner_content=event, ) - def _parse_content_block_delta_event(self, event: dict[str, Any]) -> StreamingChatMessageContent: + def _parse_content_block_delta_event( + self, event: dict[str, Any], function_invoke_attempt: int + ) -> StreamingChatMessageContent: """Parse the content block delta event. The content block delta event contains the completion. @@ -363,6 +366,7 @@ def _parse_content_block_delta_event(self, event: dict[str, Any]) -> StreamingCh items=items, choice_index=0, inner_content=event, + function_invoke_attempt=function_invoke_attempt, ) def _parse_message_stop_event(self, event: dict[str, Any]) -> StreamingChatMessageContent: diff --git a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py index 6a673dccd5eb..de9edf36c268 100644 --- a/python/semantic_kernel/connectors/ai/chat_completion_client_base.py +++ b/python/semantic_kernel/connectors/ai/chat_completion_client_base.py @@ -64,15 +64,17 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: """Send a streaming chat request to the AI service. Args: - chat_history (ChatHistory): The chat history to send. - settings (PromptExecutionSettings): The settings for the request. + chat_history: The chat history to send. + settings: The settings for the request. + function_invoke_attempt: The current attempt count for automatically invoking functions. Yields: - streaming_chat_message_contents (list[StreamingChatMessageContent]): The streaming chat message contents. + streaming_chat_message_contents: The streaming chat message contents. """ raise NotImplementedError("The _inner_get_streaming_chat_message_contents method is not implemented.") # Below is needed for mypy: https://mypy.readthedocs.io/en/stable/more_types.html#asynchronous-iterators @@ -268,7 +270,9 @@ async def get_streaming_chat_message_contents( # Hold the messages, if there are more than one response, it will not be used, so we flatten all_messages: list["StreamingChatMessageContent"] = [] function_call_returned = False - async for messages in self._inner_get_streaming_chat_message_contents(chat_history, settings): + async for messages in self._inner_get_streaming_chat_message_contents( + chat_history, settings, request_index + ): for msg in messages: if msg is not None: all_messages.append(msg) @@ -313,6 +317,7 @@ async def get_streaming_chat_message_contents( function_result_messages = merge_streaming_function_results( messages=chat_history.messages[-len(results) :], ai_model_id=ai_model_id, # type: ignore + function_invoke_attempt=request_index, ) if self._yield_function_result_messages(function_result_messages): yield function_result_messages diff --git a/python/semantic_kernel/connectors/ai/function_calling_utils.py b/python/semantic_kernel/connectors/ai/function_calling_utils.py index 365d43565ed9..c7ab3dba6b39 100644 --- a/python/semantic_kernel/connectors/ai/function_calling_utils.py +++ b/python/semantic_kernel/connectors/ai/function_calling_utils.py @@ -101,6 +101,7 @@ def merge_function_results( def merge_streaming_function_results( messages: list[ChatMessageContent | StreamingChatMessageContent], ai_model_id: str, + function_invoke_attempt: int, ) -> list[StreamingChatMessageContent]: """Combine multiple streaming function result content types to one streaming chat message content type. @@ -110,6 +111,7 @@ def merge_streaming_function_results( Args: messages: The list of streaming chat message content types. ai_model_id: The AI model ID. + function_invoke_attempt: The function invoke attempt. Returns: The combined streaming chat message content type. @@ -118,4 +120,12 @@ def merge_streaming_function_results( for message in messages: items.extend([item for item in message.items if isinstance(item, FunctionResultContent)]) - return [StreamingChatMessageContent(role=AuthorRole.TOOL, items=items, choice_index=0, ai_model_id=ai_model_id)] + return [ + StreamingChatMessageContent( + role=AuthorRole.TOOL, + items=items, + choice_index=0, + ai_model_id=ai_model_id, + function_invoke_attempt=function_invoke_attempt, + ) + ] diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py index ca4ca998a122..df8f64cf4c6c 100644 --- a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py @@ -147,6 +147,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, GoogleAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -167,7 +168,10 @@ async def _inner_get_streaming_chat_message_contents( ) async for chunk in response: - yield [self._create_streaming_chat_message_content(chunk, candidate) for candidate in chunk.candidates] + yield [ + self._create_streaming_chat_message_content(chunk, candidate, function_invoke_attempt) + for candidate in chunk.candidates + ] @override def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") -> None: @@ -268,12 +272,14 @@ def _create_streaming_chat_message_content( self, chunk: GenerateContentResponse, candidate: Candidate, + function_invoke_attempt: int = 0, ) -> StreamingChatMessageContent: """Create a streaming chat message content object. Args: chunk: The response from the service. candidate: The candidate from the response. + function_invoke_attempt: The function invoke attempt. Returns: A streaming chat message content object. @@ -313,6 +319,7 @@ def _create_streaming_chat_message_content( inner_content=chunk, finish_reason=finish_reason, metadata=response_metadata, + function_invoke_attempt=function_invoke_attempt, ) # endregion diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py index 45d66396ff34..6372c71c5b1c 100644 --- a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_chat_completion.py @@ -142,6 +142,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, VertexAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -162,7 +163,10 @@ async def _inner_get_streaming_chat_message_contents( ) async for chunk in response: - yield [self._create_streaming_chat_message_content(chunk, candidate) for candidate in chunk.candidates] + yield [ + self._create_streaming_chat_message_content(chunk, candidate, function_invoke_attempt) + for candidate in chunk.candidates + ] @override def _verify_function_choice_settings(self, settings: "PromptExecutionSettings") -> None: @@ -262,12 +266,14 @@ def _create_streaming_chat_message_content( self, chunk: GenerationResponse, candidate: Candidate, + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object. Args: chunk: The response from the service. candidate: The candidate from the response. + function_invoke_attempt: The function invoke attempt. Returns: A streaming chat message content object. @@ -308,6 +314,7 @@ def _create_streaming_chat_message_content( inner_content=chunk, finish_reason=finish_reason, metadata=response_metadata, + function_invoke_attempt=function_invoke_attempt, ) # endregion diff --git a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py index 46f0c9f64a2b..b374235225a4 100644 --- a/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py @@ -159,6 +159,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, MistralAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -182,7 +183,9 @@ async def _inner_get_streaming_chat_message_contents( continue chunk_metadata = self._get_metadata_from_response(chunk.data) yield [ - self._create_streaming_chat_message_content(chunk.data, choice, chunk_metadata) + self._create_streaming_chat_message_content( + chunk.data, choice, chunk_metadata, function_invoke_attempt + ) for choice in chunk.data.choices ] @@ -216,6 +219,7 @@ def _create_streaming_chat_message_content( chunk: CompletionChunk, choice: CompletionResponseStreamChoice, chunk_metadata: dict[str, Any], + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a choice.""" metadata = self._get_metadata_from_chat_choice(choice) @@ -234,6 +238,7 @@ def _create_streaming_chat_message_content( role=AuthorRole(choice.delta.role) if choice.delta.role else AuthorRole.ASSISTANT, finish_reason=FinishReason(choice.finish_reason) if choice.finish_reason else None, items=items, + function_invoke_attempt=function_invoke_attempt, ) def _get_metadata_from_response(self, response: ChatCompletionResponse | CompletionChunk) -> dict[str, Any]: diff --git a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py index bfb452d9fc2d..baf2d04f2914 100644 --- a/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py @@ -180,6 +180,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, OllamaChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -202,10 +203,10 @@ async def _inner_get_streaming_chat_message_contents( async for part in response_object: if isinstance(part, ChatResponse): - yield [self._create_streaming_chat_message_content_from_chat_response(part)] + yield [self._create_streaming_chat_message_content_from_chat_response(part, function_invoke_attempt)] continue if isinstance(part, Mapping): - yield [self._create_streaming_chat_message_content(part)] + yield [self._create_streaming_chat_message_content(part, function_invoke_attempt)] continue raise ServiceInvalidResponseError( "Invalid response type from Ollama streaming chat completion. " @@ -215,7 +216,9 @@ async def _inner_get_streaming_chat_message_contents( # endregion def _create_streaming_chat_message_content_from_chat_response( - self, response: ChatResponse + self, + response: ChatResponse, + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a chat message content from the response.""" items: list[STREAMING_ITEM_TYPES] = [] @@ -235,6 +238,7 @@ def _create_streaming_chat_message_content_from_chat_response( inner_content=response, ai_model_id=self.ai_model_id, metadata=self._get_metadata_from_chat_response(response), + function_invoke_attempt=function_invoke_attempt, ) def _parse_tool_calls(self, tool_calls: Sequence[Message.ToolCall] | None, items: list[Any]): @@ -299,7 +303,9 @@ def _create_chat_message_content(self, response: Mapping[str, Any]) -> ChatMessa metadata=self._get_metadata_from_response(response), ) - def _create_streaming_chat_message_content(self, part: Mapping[str, Any]) -> StreamingChatMessageContent: + def _create_streaming_chat_message_content( + self, part: Mapping[str, Any], function_invoke_attempt: int + ) -> StreamingChatMessageContent: """Create a streaming chat message content from the response part.""" items: list[STREAMING_ITEM_TYPES] = [] if not (message := part.get("message", None)): @@ -331,6 +337,7 @@ def _create_streaming_chat_message_content(self, part: Mapping[str, Any]) -> Str inner_content=part, ai_model_id=self.ai_model_id, metadata=self._get_metadata_from_response(part), + function_invoke_attempt=function_invoke_attempt, ) def _get_metadata_from_response(self, response: Mapping[str, Any]) -> dict[str, Any]: diff --git a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py index bb247cb55e43..28521975e366 100644 --- a/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/onnx/services/onnx_gen_ai_chat_completion.py @@ -109,6 +109,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: """Create streaming chat message contents, in the number specified by the settings. @@ -116,6 +117,7 @@ async def _inner_get_streaming_chat_message_contents( chat_history : A list of chat chat_history, that can be rendered into a set of chat_history, from system, user, assistant and function. settings : Settings for the request. + function_invoke_attempt : The function invoke attempt. Yields: A stream representing the response(s) from the LLM. @@ -127,7 +129,7 @@ async def _inner_get_streaming_chat_message_contents( images = self._get_images_from_history(chat_history) async for chunk in self._generate_next_token_async(prompt, settings, images): yield [ - self._create_streaming_chat_message_content(choice_index, new_token) + self._create_streaming_chat_message_content(choice_index, new_token, function_invoke_attempt) for choice_index, new_token in enumerate(chunk) ] @@ -142,12 +144,15 @@ def _create_chat_message_content(self, choice: str) -> ChatMessageContent: ], ) - def _create_streaming_chat_message_content(self, choice_index: int, choice: str) -> StreamingChatMessageContent: + def _create_streaming_chat_message_content( + self, choice_index: int, choice: str, function_invoke_attempt: int + ) -> StreamingChatMessageContent: return StreamingChatMessageContent( role=AuthorRole.ASSISTANT, choice_index=choice_index, content=choice, ai_model_id=self.ai_model_id, + function_invoke_attempt=function_invoke_attempt, ) @override diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py index 73e1a8fe62b7..03289fd45d58 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_chat_completion.py @@ -148,9 +148,10 @@ def _create_streaming_chat_message_content( chunk: ChatCompletionChunk, choice: ChunkChoice, chunk_metadata: dict[str, Any], + function_invoke_attempt: int = 0, ) -> "StreamingChatMessageContent": """Create an Azure streaming chat message content object from a choice.""" - content = super()._create_streaming_chat_message_content(chunk, choice, chunk_metadata) + content = super()._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) assert isinstance(content, StreamingChatMessageContent) and isinstance(choice, ChunkChoice) # nosec return self._add_tool_message_to_chat_message_content(content, choice) diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py index ec918dee605d..0c1e843c5d47 100644 --- a/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py +++ b/python/semantic_kernel/connectors/ai/open_ai/services/open_ai_chat_completion_base.py @@ -96,6 +96,7 @@ async def _inner_get_streaming_chat_message_contents( self, chat_history: "ChatHistory", settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: if not isinstance(settings, OpenAIChatPromptExecutionSettings): settings = self.get_prompt_execution_settings_from_settings(settings) @@ -126,12 +127,13 @@ async def _inner_get_streaming_chat_message_contents( inner_content=chunk, ai_model_id=settings.ai_model_id, metadata=chunk_metadata, + function_invoke_attempt=function_invoke_attempt, ) for i in range(settings.number_of_responses or 1) ] else: yield [ - self._create_streaming_chat_message_content(chunk, choice, chunk_metadata) + self._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) for choice in chunk.choices ] @@ -190,6 +192,7 @@ def _create_streaming_chat_message_content( chunk: ChatCompletionChunk, choice: ChunkChoice, chunk_metadata: dict[str, Any], + function_invoke_attempt: int, ) -> StreamingChatMessageContent: """Create a streaming chat message content object from a choice.""" metadata = self._get_metadata_from_chat_choice(choice) @@ -207,6 +210,7 @@ def _create_streaming_chat_message_content( role=(AuthorRole(choice.delta.role) if choice.delta and choice.delta.role else AuthorRole.ASSISTANT), finish_reason=(FinishReason(choice.finish_reason) if choice.finish_reason else None), items=items, + function_invoke_attempt=function_invoke_attempt, ) def _get_metadata_from_chat_response(self, response: ChatCompletion) -> dict[str, Any]: diff --git a/python/semantic_kernel/contents/streaming_chat_message_content.py b/python/semantic_kernel/contents/streaming_chat_message_content.py index 51110b43ea5c..683b498d0c69 100644 --- a/python/semantic_kernel/contents/streaming_chat_message_content.py +++ b/python/semantic_kernel/contents/streaming_chat_message_content.py @@ -4,6 +4,8 @@ from typing import Any, Union, overload from xml.etree.ElementTree import Element # nosec +from pydantic import Field + from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent @@ -51,6 +53,12 @@ class StreamingChatMessageContent(ChatMessageContent, StreamingContentMixin): __add__: Combines two StreamingChatMessageContent instances. """ + function_invoke_attempt: int | None = Field( + default=0, + description="Tracks the current attempt count for automatically invoking functions. " + "This value increments with each subsequent automatic invocation attempt.", + ) + @overload def __init__( self, @@ -63,6 +71,7 @@ def __init__( finish_reason: FinishReason | None = None, ai_model_id: str | None = None, metadata: dict[str, Any] | None = None, + function_invoke_attempt: int | None = None, ) -> None: ... @overload @@ -77,6 +86,7 @@ def __init__( finish_reason: FinishReason | None = None, ai_model_id: str | None = None, metadata: dict[str, Any] | None = None, + function_invoke_attempt: int | None = None, ) -> None: ... def __init__( # type: ignore @@ -91,26 +101,30 @@ def __init__( # type: ignore finish_reason: FinishReason | None = None, ai_model_id: str | None = None, metadata: dict[str, Any] | None = None, + function_invoke_attempt: int | None = None, ): """Create a new instance of StreamingChatMessageContent. Args: - role: ChatRole - The role of the chat message. - choice_index: int - The index of the choice that generated this response. - items: list[TextContent, FunctionCallContent, FunctionResultContent, ImageContent] - The content. - content: str - The text of the response. - inner_content: Optional[Any] - The inner content of the response, + role: The role of the chat message. + choice_index: The index of the choice that generated this response. + items: The content. + content: The text of the response. + inner_content: The inner content of the response, this should hold all the information from the response so even when not creating a subclass a developer can leverage the full thing. - name: Optional[str] - The name of the response. - encoding: Optional[str] - The encoding of the text. - finish_reason: Optional[FinishReason] - The reason the response was finished. - metadata: Dict[str, Any] - Any metadata that should be attached to the response. - ai_model_id: Optional[str] - The id of the AI model that generated this response. + name: The name of the response. + encoding: The encoding of the text. + finish_reason: The reason the response was finished. + metadata: Any metadata that should be attached to the response. + ai_model_id: The id of the AI model that generated this response. + function_invoke_attempt: Tracks the current attempt count for automatically + invoking functions. This value increments with each subsequent automatic invocation attempt. """ kwargs: dict[str, Any] = { "role": role, "choice_index": choice_index, + "function_invoke_attempt": function_invoke_attempt, } if encoding: kwargs["encoding"] = encoding @@ -180,6 +194,7 @@ def __add__(self, other: "StreamingChatMessageContent") -> "StreamingChatMessage metadata=self.metadata | other.metadata, encoding=self.encoding, finish_reason=self.finish_reason or other.finish_reason, + function_invoke_attempt=self.function_invoke_attempt, ) def to_element(self) -> "Element": @@ -214,5 +229,6 @@ def __hash__(self) -> int: self.encoding, self.finish_reason, self.choice_index, + self.function_invoke_attempt, *self.items, )) diff --git a/python/tests/samples/test_concepts.py b/python/tests/samples/test_concepts.py index bf3ff42ede2c..6e8d4ad1a9e0 100644 --- a/python/tests/samples/test_concepts.py +++ b/python/tests/samples/test_concepts.py @@ -8,7 +8,9 @@ import pytest from pytest import mark, param -from samples.concepts.auto_function_calling.chat_gpt_api_function_calling import main as chat_gpt_api_function_calling +from samples.concepts.auto_function_calling.chat_completion_with_function_calling import ( + main as chat_completion_with_function_calling, +) from samples.concepts.auto_function_calling.functions_defined_in_json_prompt import ( main as function_defined_in_json_prompt, ) @@ -106,9 +108,9 @@ ), ), param( - chat_gpt_api_function_calling, + chat_completion_with_function_calling, ["What is 3+3?", "exit"], - id="chat_gpt_api_function_calling", + id="chat_completion_with_function_calling", marks=pytest.mark.skipif( os.getenv(COMPLETIONS_CONCEPT_SAMPLE, None) is None, reason="Not running completion samples." ), From ac5a1057a4b9ddab29bb22a06961a9fa8bc1bdc4 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Thu, 19 Dec 2024 08:47:21 +0900 Subject: [PATCH 2/7] Break the auto function calling samples into two: streaming and non-streaming --- .../chat_completion_with_function_calling.py | 141 ++-------- ...pletion_with_function_calling_streaming.py | 244 ++++++++++++++++++ 2 files changed, 269 insertions(+), 116 deletions(-) create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py index 445106e222cb..92d92f17db49 100644 --- a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft. All rights reserved. import asyncio -from functools import reduce from typing import TYPE_CHECKING from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings @@ -10,14 +9,21 @@ from semantic_kernel.contents import ChatHistory from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent -from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.core_plugins.math_plugin import MathPlugin from semantic_kernel.core_plugins.time_plugin import TimePlugin from semantic_kernel.functions import KernelArguments if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction + pass + +##################################################################### +# This sample demonstrates how to build a conversational chatbot # +# using Semantic Kernel, featuring dynamic function calling, # +# non-streaming responses, and support for math and time plugins. # +# The chatbot is designed to interact with the user, call functions # +# as needed, and return responses. If auto function calling is # +# disabled, then the tool calls will be printed to the console. # +##################################################################### # System message defining the behavior and persona of the chat bot. system_message = """ @@ -33,9 +39,6 @@ you will return a full answer to me as soon as possible. """ -# Toggle this flag to switch between streaming and non-streaming modes. -stream = True - # Create and configure the kernel. kernel = Kernel() @@ -62,7 +65,7 @@ # - Services.ONNX # - Services.VERTEX_AI # Please make sure you have configured your environment correctly for the selected chat completion service. -chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI) # Configure the function choice behavior. Here, we set it to Auto with auto_invoke=True. # - If `auto_invoke=True`, the model will automatically choose and call functions as needed. @@ -105,96 +108,6 @@ def print_tool_calls(message: ChatMessageContent) -> None: print("\n[No tool calls returned by the model]") -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - """ - Handle the streaming response from the model. - This function demonstrates two possible paths: - - 1. When auto function calling is ON (auto_invoke=True): - - The model may call tools automatically and produce a continuous - stream of assistant messages. We can simply print these as they come in. - - 2. When auto function calling is OFF (auto_invoke=False): - - The model may instead return tool call instructions embedded in the stream. - We can track these calls using `function_invoke_attempt` attributes and print - them for the user. The user can then manually invoke the tools and return the results - to the model for further completion. - """ - - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - # We will differentiate behavior based on whether auto invoking kernel functions is enabled. - auto_invoking = request_settings.function_choice_behavior.auto_invoke_kernel_functions - - print("Mosscap:> ", end="", flush=True) - - # If auto_invoking is False, the model may return separate streaming chunks containing tool instructions. - # We'll store them here. - streamed_tool_chunks: list[StreamingChatMessageContent] = [] - - # For content messages (the final assistant's response text), store them here. - streamed_response_chunks: list[StreamingChatMessageContent] = [] - - async for message in response: - msg = message[0] - - # We only expect assistant messages here. - if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: - continue - - if auto_invoking: - # When auto invocation is ON, no special handling is needed. Just print out messages as they arrive. - streamed_response_chunks.append(msg) - print(str(msg), end="", flush=True) - else: - # When auto invocation is OFF, the model may send chunks that represent tool calls. - # Chunks that contain function call instructions will have a function_invoke_attempt attribute. - if hasattr(msg, "function_invoke_attempt"): - # This chunk is part of a tool call instruction sequence - streamed_tool_chunks.append(msg) - else: - # This chunk is normal assistant response text - streamed_response_chunks.append(msg) - print(str(msg), end="", flush=True) - - print("\n", flush=True) - - # If auto function calling was OFF, handle any tool call instructions we captured. - if not auto_invoking and streamed_tool_chunks: - # Group streamed chunks by `function_invoke_attempt` to handle each invocation attempt separately. - grouped_chunks = {} - for chunk in streamed_tool_chunks: - key = getattr(chunk, "function_invoke_attempt", None) - if key is not None: - grouped_chunks.setdefault(key, []).append(chunk) - - # Process each group of chunks - for attempt, chunks in grouped_chunks.items(): - try: - # Combine all chunks for a given attempt into one message. - combined_content = reduce(lambda first, second: first + second, chunks) - if hasattr(combined_content, "content"): - print(f"[function_invoke_attempt {attempt} content]:\n{combined_content.content}") - - print("[Auto function calling is OFF] Here are the returned tool calls:") - print_tool_calls(combined_content) - except Exception as e: - print(f"Error processing chunks for function_invoke_attempt {attempt}: {e}") - - # Return the final concatenated assistant response (if any). - if streamed_response_chunks: - return "".join([str(content) for content in streamed_response_chunks]) - return None - - async def chat() -> bool: """ Continuously prompt the user for input and show the assistant's response. @@ -213,26 +126,22 @@ async def chat() -> bool: arguments["user_input"] = user_input arguments["chat_history"] = history - if stream: - # Handle streaming responses - result = await handle_streaming(kernel, chat_function, arguments=arguments) - else: - # Handle non-streaming responses - result = await kernel.invoke(chat_function, arguments=arguments) + # Handle non-streaming responses + result = await kernel.invoke(chat_function, arguments=arguments) - # If function calls are returned and auto invoking is off, we must show them. - if not request_settings.function_choice_behavior.auto_invoke_kernel_functions and result and result.value: - # Extract function calls from the returned content - function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] - if len(function_calls) > 0: - print_tool_calls(result.value[0]) - # At this point, you'd handle these calls manually if desired. - # For now, we just print them. - return True + # If function calls are returned and auto invoking is off, we must show them. + if not request_settings.function_choice_behavior.auto_invoke_kernel_functions and result and result.value: + # Extract function calls from the returned content + function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] + if len(function_calls) > 0: + print_tool_calls(result.value[0]) + # At this point, you'd handle these calls manually if desired. + # For now, we just print them. + return True - # If no function calls to handle, just print the assistant's response - if result: - print(f"Mosscap:> {result}") + # If no function calls to handle, just print the assistant's response + if result: + print(f"Mosscap:> {result}") # Update the chat history with the user's input and the assistant's response if result: diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py b/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py new file mode 100644 index 000000000000..e8c6412263cd --- /dev/null +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py @@ -0,0 +1,244 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from functools import reduce +from typing import TYPE_CHECKING + +from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.contents.function_call_content import FunctionCallContent +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.core_plugins.math_plugin import MathPlugin +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +if TYPE_CHECKING: + from semantic_kernel.functions import KernelFunction + +##################################################################### +# This sample demonstrates how to build a conversational chatbot # +# using Semantic Kernel, featuring dynamic function calling, # +# streaming responses, and support for math and time plugins. # +# The chatbot is designed to interact with the user, call functions # +# as needed, and return responses. If auto function calling is # +# disabled, then the tool calls will be printed to the console. # +##################################################################### + +# System message defining the behavior and persona of the chat bot. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. You are also a math wizard, +especially for adding and subtracting. +You also excel at joke telling, where your tone is often sarcastic. +Once you have the answer I am looking for, +you will return a full answer to me as soon as possible. +""" + +# Create and configure the kernel. +kernel = Kernel() + +# Load some sample plugins (for demonstration of function calling). +kernel.add_plugin(MathPlugin(), plugin_name="math") +kernel.add_plugin(TimePlugin(), plugin_name="time") + +# Define a chat function (a template for how to handle user input). +chat_function = kernel.add_function( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) + +# You can select from the following chat completion services that support function calling: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# Configure the function choice behavior. Here, we set it to Auto with auto_invoke=True. +# - If `auto_invoke=True`, the model will automatically choose and call functions as needed. +# - If `auto_invoke=False`, the model may return tool call instructions that you must handle and call manually. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=True) + +kernel.add_service(chat_completion_service) + +# Pass the request settings to the kernel arguments. +arguments = KernelArguments(settings=request_settings) + +# Create a chat history to store the system message, initial messages, and the conversation. +history = ChatHistory() +history.add_system_message(system_message) +history.add_user_message("Hi there, who are you?") +history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") + + +def print_tool_calls(message: ChatMessageContent) -> None: + """ + A helper function to pretty print the tool calls found in a ChatMessageContent message. + This is useful when auto tool invocation is disabled and the model returns calls that you must handle. + """ + items = message.items + formatted_tool_calls = [] + for i, item in enumerate(items, start=1): + if isinstance(item, FunctionCallContent): + tool_call_id = item.id + function_name = item.name + function_arguments = item.arguments + formatted_str = ( + f"tool_call {i} id: {tool_call_id}\n" + f"tool_call {i} function name: {function_name}\n" + f"tool_call {i} arguments: {function_arguments}" + ) + formatted_tool_calls.append(formatted_str) + if len(formatted_tool_calls) > 0: + print("\n[Tool calls returned by the model]:\n" + "\n\n".join(formatted_tool_calls)) + else: + print("\n[No tool calls returned by the model]") + + +async def handle_streaming( + kernel: Kernel, + chat_function: "KernelFunction", + arguments: KernelArguments, +) -> str | None: + """ + Handle the streaming response from the model. + This function demonstrates two possible paths: + + 1. When auto function calling is ON (auto_invoke=True): + - The model may call tools automatically and produce a continuous + stream of assistant messages. We can simply print these as they come in. + + 2. When auto function calling is OFF (auto_invoke=False): + - The model may instead return tool call instructions embedded in the stream. + We can track these calls using `function_invoke_attempt` attributes and print + them for the user. The user can then manually invoke the tools and return the results + to the model for further completion. + """ + + response = kernel.invoke_stream( + chat_function, + return_function_results=False, + arguments=arguments, + ) + + # We will differentiate behavior based on whether auto invoking kernel functions is enabled. + auto_invoking = request_settings.function_choice_behavior.auto_invoke_kernel_functions + + print("Mosscap:> ", end="", flush=True) + + # If auto_invoking is False, the model may return separate streaming chunks containing tool instructions. + # We'll store them here. + streamed_tool_chunks: list[StreamingChatMessageContent] = [] + + # For content messages (the final assistant's response text), store them here. + streamed_response_chunks: list[StreamingChatMessageContent] = [] + + async for message in response: + msg = message[0] + + # We only expect assistant messages here. + if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: + continue + + if auto_invoking: + # When auto invocation is ON, no special handling is needed. Just print out messages as they arrive. + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + else: + # When auto invocation is OFF, the model may send chunks that represent tool calls. + # Chunks that contain function call instructions will have a function_invoke_attempt attribute. + if hasattr(msg, "function_invoke_attempt"): + # This chunk is part of a tool call instruction sequence + streamed_tool_chunks.append(msg) + else: + # This chunk is normal assistant response text + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + + print("\n", flush=True) + + # If auto function calling was OFF, handle any tool call instructions we captured. + if not auto_invoking and streamed_tool_chunks: + # Group streamed chunks by `function_invoke_attempt` to handle each invocation attempt separately. + grouped_chunks = {} + for chunk in streamed_tool_chunks: + key = getattr(chunk, "function_invoke_attempt", None) + if key is not None: + grouped_chunks.setdefault(key, []).append(chunk) + + # Process each group of chunks + for attempt, chunks in grouped_chunks.items(): + try: + # Combine all chunks for a given attempt into one message. + combined_content = reduce(lambda first, second: first + second, chunks) + if hasattr(combined_content, "content"): + print(f"[function_invoke_attempt {attempt} content]:\n{combined_content.content}") + + print("[Auto function calling is OFF] Here are the returned tool calls:") + print_tool_calls(combined_content) + except Exception as e: + print(f"Error processing chunks for function_invoke_attempt {attempt}: {e}") + + # Return the final concatenated assistant response (if any). + if streamed_response_chunks: + return "".join([str(content) for content in streamed_response_chunks]) + return None + + +async def chat() -> bool: + """ + Continuously prompt the user for input and show the assistant's response. + Type 'exit' to exit. + """ + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + return False + + arguments["user_input"] = user_input + arguments["chat_history"] = history + + result = await handle_streaming(kernel, chat_function, arguments=arguments) + + # Update the chat history with the user's input and the assistant's response + if result: + history.add_user_message(user_input) + history.add_assistant_message(str(result)) + + return True + + +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." + ) + chatting = True + while chatting: + chatting = await chat() + + +if __name__ == "__main__": + asyncio.run(main()) From 8464649f4375d60e2a606bfb13b9d2f72578a484 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Thu, 19 Dec 2024 10:01:16 +0900 Subject: [PATCH 3/7] Break samples down further - streaming and non streaming auto invoke and manual invoke --- ...t_completion_with_auto_function_calling.py | 125 +++++++++++++ ...on_with_auto_function_calling_streaming.py | 169 ++++++++++++++++++ ...ompletion_with_manual_function_calling.py} | 14 +- ...with_manual_function_calling_streaming.py} | 32 ++-- 4 files changed, 313 insertions(+), 27 deletions(-) create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py create mode 100644 python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py rename python/samples/concepts/auto_function_calling/{chat_completion_with_function_calling.py => chat_completion_with_manual_function_calling.py} (93%) rename python/samples/concepts/auto_function_calling/{chat_completion_with_function_calling_streaming.py => chat_completion_with_manual_function_calling_streaming.py} (88%) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py new file mode 100644 index 000000000000..2dad6517e31c --- /dev/null +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py @@ -0,0 +1,125 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from typing import TYPE_CHECKING + +from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistory +from semantic_kernel.core_plugins.math_plugin import MathPlugin +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +if TYPE_CHECKING: + pass + +##################################################################### +# This sample demonstrates how to build a conversational chatbot # +# using Semantic Kernel, featuring auto function calling, # +# non-streaming responses, and support for math and time plugins. # +# The chatbot is designed to interact with the user, call functions # +# as needed, and return responses. # +##################################################################### + +# System message defining the behavior and persona of the chat bot. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. You are also a math wizard, +especially for adding and subtracting. +You also excel at joke telling, where your tone is often sarcastic. +Once you have the answer I am looking for, +you will return a full answer to me as soon as possible. +""" + +# Create and configure the kernel. +kernel = Kernel() + +# Load some sample plugins (for demonstration of function calling). +kernel.add_plugin(MathPlugin(), plugin_name="math") +kernel.add_plugin(TimePlugin(), plugin_name="time") + +# Define a chat function (a template for how to handle user input). +chat_function = kernel.add_function( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) + +# You can select from the following chat completion services that support function calling: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=True by default. +# With `auto_invoke=True`, the model will automatically choose and call functions as needed. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto() + +kernel.add_service(chat_completion_service) + +# Pass the request settings to the kernel arguments. +arguments = KernelArguments(settings=request_settings) + +# Create a chat history to store the system message, initial messages, and the conversation. +history = ChatHistory() +history.add_system_message(system_message) +history.add_user_message("Hi there, who are you?") +history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") + + +async def chat() -> bool: + """ + Continuously prompt the user for input and show the assistant's response. + Type 'exit' to exit. + """ + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + return False + + arguments["user_input"] = user_input + arguments["chat_history"] = history + + # Handle non-streaming responses + result = await kernel.invoke(chat_function, arguments=arguments) + + # Update the chat history with the user's input and the assistant's response + if result: + print(f"Mosscap:> {result}") + history.add_user_message(user_input) + history.add_assistant_message(str(result)) + + return True + + +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." + ) + chatting = True + while chatting: + chatting = await chat() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py new file mode 100644 index 000000000000..86435032ba4d --- /dev/null +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py @@ -0,0 +1,169 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +from typing import TYPE_CHECKING + +from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings +from semantic_kernel import Kernel +from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent +from semantic_kernel.contents.utils.author_role import AuthorRole +from semantic_kernel.core_plugins.math_plugin import MathPlugin +from semantic_kernel.core_plugins.time_plugin import TimePlugin +from semantic_kernel.functions import KernelArguments + +if TYPE_CHECKING: + from semantic_kernel.functions import KernelFunction + +##################################################################### +# This sample demonstrates how to build a conversational chatbot # +# using Semantic Kernel, featuring auto function calling, # +# streaming responses, and support for math and time plugins. # +# The chatbot is designed to interact with the user, call functions # +# as needed, and return responses. # +##################################################################### + +# System message defining the behavior and persona of the chat bot. +system_message = """ +You are a chat bot. Your name is Mosscap and +you have one goal: figure out what people need. +Your full name, should you need to know it, is +Splendid Speckled Mosscap. You communicate +effectively, but you tend to answer with long +flowery prose. You are also a math wizard, +especially for adding and subtracting. +You also excel at joke telling, where your tone is often sarcastic. +Once you have the answer I am looking for, +you will return a full answer to me as soon as possible. +""" + +# Create and configure the kernel. +kernel = Kernel() + +# Load some sample plugins (for demonstration of function calling). +kernel.add_plugin(MathPlugin(), plugin_name="math") +kernel.add_plugin(TimePlugin(), plugin_name="time") + +# Define a chat function (a template for how to handle user input). +chat_function = kernel.add_function( + prompt="{{$chat_history}}{{$user_input}}", + plugin_name="ChatBot", + function_name="Chat", +) + +# You can select from the following chat completion services that support function calling: +# - Services.OPENAI +# - Services.AZURE_OPENAI +# - Services.AZURE_AI_INFERENCE +# - Services.ANTHROPIC +# - Services.BEDROCK +# - Services.GOOGLE_AI +# - Services.MISTRAL_AI +# - Services.OLLAMA +# - Services.ONNX +# - Services.VERTEX_AI +# Please make sure you have configured your environment correctly for the selected chat completion service. +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) + +# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=True by default. +# With `auto_invoke=True`, the model will automatically choose and call functions as needed. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto() + +kernel.add_service(chat_completion_service) + +# Pass the request settings to the kernel arguments. +arguments = KernelArguments(settings=request_settings) + +# Create a chat history to store the system message, initial messages, and the conversation. +history = ChatHistory() +history.add_system_message(system_message) +history.add_user_message("Hi there, who are you?") +history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") + + +async def handle_streaming( + kernel: Kernel, + chat_function: "KernelFunction", + arguments: KernelArguments, +) -> str | None: + """ + Handle the streaming response from the model. + This function demonstrates two possible paths: + + When auto function calling is ON (auto_invoke=True): + - The model may call tools automatically and produce a continuous + stream of assistant messages. We can simply print these as they come in. + """ + + response = kernel.invoke_stream( + chat_function, + return_function_results=False, + arguments=arguments, + ) + + print("Mosscap:> ", end="", flush=True) + + # For content messages (the final assistant's response text), store them here. + streamed_response_chunks: list[StreamingChatMessageContent] = [] + + async for message in response: + msg = message[0] + + # We only expect assistant messages here. + if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: + continue + + # When auto invocation is ON, no special handling is needed. Just print out messages as they arrive. + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + + print("\n", flush=True) + + # Return the final concatenated assistant response (if any). + if streamed_response_chunks: + return "".join([str(content) for content in streamed_response_chunks]) + return None + + +async def chat() -> bool: + """ + Continuously prompt the user for input and show the assistant's response. + Type 'exit' to exit. + """ + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + return False + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + return False + + arguments["user_input"] = user_input + arguments["chat_history"] = history + + result = await handle_streaming(kernel, chat_function, arguments=arguments) + + # Update the chat history with the user's input and the assistant's response + if result: + history.add_user_message(user_input) + history.add_assistant_message(str(result)) + + return True + + +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." + ) + chatting = True + while chatting: + chatting = await chat() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py similarity index 93% rename from python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py rename to python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py index 92d92f17db49..978fa3ea24ec 100644 --- a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling.py +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py @@ -18,11 +18,11 @@ ##################################################################### # This sample demonstrates how to build a conversational chatbot # -# using Semantic Kernel, featuring dynamic function calling, # +# using Semantic Kernel, featuring manual function calling, # # non-streaming responses, and support for math and time plugins. # # The chatbot is designed to interact with the user, call functions # -# as needed, and return responses. If auto function calling is # -# disabled, then the tool calls will be printed to the console. # +# as needed, and return responses. With auto function calling # +# disabled, the tool calls will be printed to the console. # ##################################################################### # System message defining the behavior and persona of the chat bot. @@ -67,10 +67,10 @@ # Please make sure you have configured your environment correctly for the selected chat completion service. chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI) -# Configure the function choice behavior. Here, we set it to Auto with auto_invoke=True. -# - If `auto_invoke=True`, the model will automatically choose and call functions as needed. -# - If `auto_invoke=False`, the model may return tool call instructions that you must handle and call manually. -request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=True) +# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=False. +# With `FunctionChoiceBehavior(auto_invoke=False)`, the model may return tool call instructions +# that you must handle and call manually. We will only print the tool calls in this sample. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=False) kernel.add_service(chat_completion_service) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py similarity index 88% rename from python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py rename to python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py index e8c6412263cd..b749ff2a87a0 100644 --- a/python/samples/concepts/auto_function_calling/chat_completion_with_function_calling_streaming.py +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py @@ -70,10 +70,10 @@ # Please make sure you have configured your environment correctly for the selected chat completion service. chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) -# Configure the function choice behavior. Here, we set it to Auto with auto_invoke=True. -# - If `auto_invoke=True`, the model will automatically choose and call functions as needed. -# - If `auto_invoke=False`, the model may return tool call instructions that you must handle and call manually. -request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=True) +# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=False. +# With `FunctionChoiceBehavior(auto_invoke=False)`, the model may return tool call instructions +# that you must handle and call manually. We will only print the tool calls in this sample. +request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=False) kernel.add_service(chat_completion_service) @@ -137,9 +137,6 @@ async def handle_streaming( arguments=arguments, ) - # We will differentiate behavior based on whether auto invoking kernel functions is enabled. - auto_invoking = request_settings.function_choice_behavior.auto_invoke_kernel_functions - print("Mosscap:> ", end="", flush=True) # If auto_invoking is False, the model may return separate streaming chunks containing tool instructions. @@ -156,25 +153,20 @@ async def handle_streaming( if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: continue - if auto_invoking: - # When auto invocation is ON, no special handling is needed. Just print out messages as they arrive. + # When auto invocation is OFF, the model may send chunks that represent tool calls. + # Chunks that contain function call instructions will have a function_invoke_attempt attribute. + if hasattr(msg, "function_invoke_attempt"): + # This chunk is part of a tool call instruction sequence + streamed_tool_chunks.append(msg) + else: + # This chunk is normal assistant response text streamed_response_chunks.append(msg) print(str(msg), end="", flush=True) - else: - # When auto invocation is OFF, the model may send chunks that represent tool calls. - # Chunks that contain function call instructions will have a function_invoke_attempt attribute. - if hasattr(msg, "function_invoke_attempt"): - # This chunk is part of a tool call instruction sequence - streamed_tool_chunks.append(msg) - else: - # This chunk is normal assistant response text - streamed_response_chunks.append(msg) - print(str(msg), end="", flush=True) print("\n", flush=True) # If auto function calling was OFF, handle any tool call instructions we captured. - if not auto_invoking and streamed_tool_chunks: + if streamed_tool_chunks: # Group streamed chunks by `function_invoke_attempt` to handle each invocation attempt separately. grouped_chunks = {} for chunk in streamed_tool_chunks: From e1e9e74567c28e0f4bda67d5d05297e377370af0 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Thu, 19 Dec 2024 12:57:27 +0900 Subject: [PATCH 4/7] Model rebuild for openai plugin predicate context. --- .../chat_completion_with_manual_function_calling.py | 2 +- python/tests/conftest.py | 10 ++++++++++ python/tests/samples/test_concepts.py | 2 +- .../unit/connectors/openapi_plugin/test_sk_openapi.py | 2 +- python/tests/unit/functions/test_kernel_plugins.py | 8 ++++---- python/tests/unit/kernel/test_kernel.py | 2 +- 6 files changed, 18 insertions(+), 8 deletions(-) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py index 978fa3ea24ec..0d91f410adff 100644 --- a/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py @@ -65,7 +65,7 @@ # - Services.ONNX # - Services.VERTEX_AI # Please make sure you have configured your environment correctly for the selected chat completion service. -chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.OPENAI) +chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) # Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=False. # With `FunctionChoiceBehavior(auto_invoke=False)`, the model may return tool call instructions diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 697cce70712e..e6a01549f020 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -15,6 +15,9 @@ from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_prompt_execution_settings import ( OpenAIEmbeddingPromptExecutionSettings, ) +from semantic_kernel.connectors.openai_plugin.openai_function_execution_parameters import ( + OpenAIFunctionExecutionParameters, +) from semantic_kernel.data.record_definition.vector_store_model_decorator import vectorstoremodel from semantic_kernel.data.record_definition.vector_store_model_definition import VectorStoreRecordDefinition from semantic_kernel.data.record_definition.vector_store_record_fields import ( @@ -686,3 +689,10 @@ class DataModelClass(BaseModel): key: Annotated[str, VectorStoreRecordKeyField()] return DataModelClass + + +@fixture +def define_openai_predicate_context(): + from semantic_kernel.connectors.openapi_plugin import OperationSelectionPredicateContext # noqa: F401 + + OpenAIFunctionExecutionParameters.model_rebuild() diff --git a/python/tests/samples/test_concepts.py b/python/tests/samples/test_concepts.py index 6e8d4ad1a9e0..e108221d6217 100644 --- a/python/tests/samples/test_concepts.py +++ b/python/tests/samples/test_concepts.py @@ -8,7 +8,7 @@ import pytest from pytest import mark, param -from samples.concepts.auto_function_calling.chat_completion_with_function_calling import ( +from samples.concepts.auto_function_calling.chat_completion_with_auto_function_calling import ( main as chat_completion_with_function_calling, ) from samples.concepts.auto_function_calling.functions_defined_in_json_prompt import ( diff --git a/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py b/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py index 1d25486b5a86..0df4bc63b0f6 100644 --- a/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py +++ b/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py @@ -749,7 +749,7 @@ def predicate_callback(context): return runner, operations, exec_settings -def test_predicate_callback_applied(openapi_runner_with_predicate_callback): +def test_predicate_callback_applied(openapi_runner_with_predicate_callback, define_openai_predicate_context): _, operations, exec_settings = openapi_runner_with_predicate_callback skipped_operations = [] diff --git a/python/tests/unit/functions/test_kernel_plugins.py b/python/tests/unit/functions/test_kernel_plugins.py index fd9102f7a5c9..8e487e7022cd 100644 --- a/python/tests/unit/functions/test_kernel_plugins.py +++ b/python/tests/unit/functions/test_kernel_plugins.py @@ -498,7 +498,7 @@ def test_from_object_class(custom_plugin_class): @patch("semantic_kernel.connectors.openai_plugin.openai_utils.OpenAIUtils.parse_openai_manifest_for_openapi_spec_url") -async def test_from_openai_from_file(mock_parse_openai_manifest): +async def test_from_openai_from_file(mock_parse_openai_manifest, define_openai_predicate_context): openai_spec_file = os.path.join(os.path.dirname(__file__), "../../assets/test_plugins") with open(os.path.join(openai_spec_file, "TestOpenAIPlugin", "akv-openai.json")) as file: openai_spec = file.read() @@ -526,7 +526,7 @@ async def test_from_openai_from_file(mock_parse_openai_manifest): @patch("httpx.AsyncClient.get") @patch("semantic_kernel.connectors.openai_plugin.openai_utils.OpenAIUtils.parse_openai_manifest_for_openapi_spec_url") -async def test_from_openai_plugin_from_url(mock_parse_openai_manifest, mock_get): +async def test_from_openai_plugin_from_url(mock_parse_openai_manifest, mock_get, define_openai_predicate_context): openai_spec_file_path = os.path.join( os.path.dirname(__file__), "../../assets/test_plugins", "TestOpenAIPlugin", "akv-openai.json" ) @@ -561,12 +561,12 @@ async def test_from_openai_plugin_from_url(mock_parse_openai_manifest, mock_get) mock_get.assert_awaited_once_with(fake_plugin_url, headers={"User-Agent": HTTP_USER_AGENT}) -async def test_from_openai_fail(): +async def test_from_openai_fail(define_openai_predicate_context): with raises(PluginInitializationError): await KernelPlugin.from_openai(plugin_name="TestOpenAIPlugin") -async def test_from_openai_fail_json_parsing(): +async def test_from_openai_fail_json_parsing(define_openai_predicate_context): with raises(PluginInitializationError): await KernelPlugin.from_openai(plugin_name="TestOpenAIPlugin", plugin_str="test") diff --git a/python/tests/unit/kernel/test_kernel.py b/python/tests/unit/kernel/test_kernel.py index 808c69d4fc6e..ef935c030b57 100644 --- a/python/tests/unit/kernel/test_kernel.py +++ b/python/tests/unit/kernel/test_kernel.py @@ -589,7 +589,7 @@ def func2(arg1: str) -> str: @patch("semantic_kernel.connectors.openai_plugin.openai_utils.OpenAIUtils.parse_openai_manifest_for_openapi_spec_url") -async def test_add_plugin_from_openai(mock_parse_openai_manifest, kernel: Kernel): +async def test_add_plugin_from_openai(mock_parse_openai_manifest, kernel: Kernel, define_openai_predicate_context): base_folder = os.path.join(os.path.dirname(__file__), "../../assets/test_plugins") with open(os.path.join(base_folder, "TestOpenAIPlugin", "akv-openai.json")) as file: openai_spec = file.read() From b6f216ed25277dd197cd38791f219a64c791eb7f Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Thu, 19 Dec 2024 13:06:14 +0900 Subject: [PATCH 5/7] model rebuild for openapi tests --- .../unit/connectors/openapi_plugin/test_sk_openapi.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py b/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py index 0df4bc63b0f6..094d57619c53 100644 --- a/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py +++ b/python/tests/unit/connectors/openapi_plugin/test_sk_openapi.py @@ -733,6 +733,10 @@ async def dummy_auth_callback(**kwargs): @pytest.fixture def openapi_runner_with_predicate_callback(): + from semantic_kernel.connectors.openapi_plugin import OperationSelectionPredicateContext # noqa: F401 + + OpenAPIFunctionExecutionParameters.model_rebuild() + # Define a dummy predicate callback def predicate_callback(context): # Skip operations with DELETE method or containing 'internal' in the path @@ -809,6 +813,10 @@ async def test_run_operation_with_error(mock_request, openapi_runner): def test_invalid_server_url_override(): + from semantic_kernel.connectors.openapi_plugin import OperationSelectionPredicateContext # noqa: F401 + + OpenAPIFunctionExecutionParameters.model_rebuild() + with pytest.raises(ValueError, match="Invalid server_url_override: invalid_url"): params = OpenAPIFunctionExecutionParameters(server_url_override="invalid_url") params.model_post_init(None) From 852d6d923e3483a59b0ba982e543d52bb129f7ac Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Thu, 19 Dec 2024 16:37:10 +0900 Subject: [PATCH 6/7] clean up conditional check --- .../chat_completion_with_manual_function_calling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py index 0d91f410adff..162c415c4a64 100644 --- a/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling.py @@ -129,8 +129,8 @@ async def chat() -> bool: # Handle non-streaming responses result = await kernel.invoke(chat_function, arguments=arguments) - # If function calls are returned and auto invoking is off, we must show them. - if not request_settings.function_choice_behavior.auto_invoke_kernel_functions and result and result.value: + # If function calls are returned, we show them on the console. + if result and result.value: # Extract function calls from the returned content function_calls = [item for item in result.value[-1].items if isinstance(item, FunctionCallContent)] if len(function_calls) > 0: From 087ff2678b185d9ef1697dce0d4e0ceda8653fa2 Mon Sep 17 00:00:00 2001 From: Evan Mattson Date: Thu, 19 Dec 2024 16:53:26 +0900 Subject: [PATCH 7/7] Clean up samples --- ...t_completion_with_auto_function_calling.py | 2 +- ...on_with_auto_function_calling_streaming.py | 112 +++------- ..._with_manual_function_calling_streaming.py | 205 +++++++----------- 3 files changed, 112 insertions(+), 207 deletions(-) diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py index 2dad6517e31c..c74ebc322489 100644 --- a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling.py @@ -105,7 +105,7 @@ async def chat() -> bool: if result: print(f"Mosscap:> {result}") history.add_user_message(user_input) - history.add_assistant_message(str(result)) + history.add_message(result.value[0]) # Capture the full context of the response return True diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py index 86435032ba4d..f7aa767ffa23 100644 --- a/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_auto_function_calling_streaming.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft. All rights reserved. import asyncio -from typing import TYPE_CHECKING from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings from semantic_kernel import Kernel @@ -13,9 +12,6 @@ from semantic_kernel.core_plugins.time_plugin import TimePlugin from semantic_kernel.functions import KernelArguments -if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction - ##################################################################### # This sample demonstrates how to build a conversational chatbot # # using Semantic Kernel, featuring auto function calling, # @@ -66,8 +62,7 @@ # Please make sure you have configured your environment correctly for the selected chat completion service. chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) -# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=True by default. -# With `auto_invoke=True`, the model will automatically choose and call functions as needed. +# Configure the function choice behavior. Here, we set it to Auto. request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto() kernel.add_service(chat_completion_service) @@ -82,87 +77,52 @@ history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need.") -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - """ - Handle the streaming response from the model. - This function demonstrates two possible paths: - - When auto function calling is ON (auto_invoke=True): - - The model may call tools automatically and produce a continuous - stream of assistant messages. We can simply print these as they come in. - """ - - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, +async def main() -> None: + print( + "Welcome to the chat bot!\n" + " Type 'exit' to exit.\n" + " Try a math question to see function calling in action (e.g. 'what is 3+3?')." ) - print("Mosscap:> ", end="", flush=True) - - # For content messages (the final assistant's response text), store them here. - streamed_response_chunks: list[StreamingChatMessageContent] = [] - - async for message in response: - msg = message[0] - - # We only expect assistant messages here. - if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: - continue + while True: + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + break - # When auto invocation is ON, no special handling is needed. Just print out messages as they arrive. - streamed_response_chunks.append(msg) - print(str(msg), end="", flush=True) + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + break - print("\n", flush=True) + arguments["user_input"] = user_input + arguments["chat_history"] = history - # Return the final concatenated assistant response (if any). - if streamed_response_chunks: - return "".join([str(content) for content in streamed_response_chunks]) - return None + # Directly handle streaming of the assistant's response here + print("Mosscap:> ", end="", flush=True) + streamed_response_chunks: list[StreamingChatMessageContent] = [] -async def chat() -> bool: - """ - Continuously prompt the user for input and show the assistant's response. - Type 'exit' to exit. - """ - try: - user_input = input("User:> ") - except (KeyboardInterrupt, EOFError): - print("\n\nExiting chat...") - return False + async for message in kernel.invoke_stream( + chat_function, + return_function_results=False, + arguments=arguments, + ): + msg = message[0] - if user_input.lower().strip() == "exit": - print("\n\nExiting chat...") - return False + # We only expect assistant messages here. + if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: + continue - arguments["user_input"] = user_input - arguments["chat_history"] = history + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) - result = await handle_streaming(kernel, chat_function, arguments=arguments) + print("\n", flush=True) - # Update the chat history with the user's input and the assistant's response - if result: - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - - return True - - -async def main() -> None: - print( - "Welcome to the chat bot!\n" - " Type 'exit' to exit.\n" - " Try a math question to see function calling in action (e.g. 'what is 3+3?')." - ) - chatting = True - while chatting: - chatting = await chat() + if streamed_response_chunks: + result = "".join([str(content) for content in streamed_response_chunks]) + history.add_user_message(user_input) + history.add_assistant_message(result) if __name__ == "__main__": diff --git a/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py index b749ff2a87a0..360c0d670f45 100644 --- a/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py +++ b/python/samples/concepts/auto_function_calling/chat_completion_with_manual_function_calling_streaming.py @@ -17,7 +17,7 @@ from semantic_kernel.functions import KernelArguments if TYPE_CHECKING: - from semantic_kernel.functions import KernelFunction + pass ##################################################################### # This sample demonstrates how to build a conversational chatbot # @@ -56,25 +56,12 @@ function_name="Chat", ) -# You can select from the following chat completion services that support function calling: -# - Services.OPENAI -# - Services.AZURE_OPENAI -# - Services.AZURE_AI_INFERENCE -# - Services.ANTHROPIC -# - Services.BEDROCK -# - Services.GOOGLE_AI -# - Services.MISTRAL_AI -# - Services.OLLAMA -# - Services.ONNX -# - Services.VERTEX_AI -# Please make sure you have configured your environment correctly for the selected chat completion service. +# Configure the chat completion service and request settings. chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI) -# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=False. -# With `FunctionChoiceBehavior(auto_invoke=False)`, the model may return tool call instructions -# that you must handle and call manually. We will only print the tool calls in this sample. +# Configure the function choice behavior to Auto with auto_invoke=False. +# This means the model may return tool calls that must be manually handled. request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=False) - kernel.add_service(chat_completion_service) # Pass the request settings to the kernel arguments. @@ -111,125 +98,83 @@ def print_tool_calls(message: ChatMessageContent) -> None: print("\n[No tool calls returned by the model]") -async def handle_streaming( - kernel: Kernel, - chat_function: "KernelFunction", - arguments: KernelArguments, -) -> str | None: - """ - Handle the streaming response from the model. - This function demonstrates two possible paths: - - 1. When auto function calling is ON (auto_invoke=True): - - The model may call tools automatically and produce a continuous - stream of assistant messages. We can simply print these as they come in. - - 2. When auto function calling is OFF (auto_invoke=False): - - The model may instead return tool call instructions embedded in the stream. - We can track these calls using `function_invoke_attempt` attributes and print - them for the user. The user can then manually invoke the tools and return the results - to the model for further completion. - """ - - response = kernel.invoke_stream( - chat_function, - return_function_results=False, - arguments=arguments, - ) - - print("Mosscap:> ", end="", flush=True) - - # If auto_invoking is False, the model may return separate streaming chunks containing tool instructions. - # We'll store them here. - streamed_tool_chunks: list[StreamingChatMessageContent] = [] - - # For content messages (the final assistant's response text), store them here. - streamed_response_chunks: list[StreamingChatMessageContent] = [] - - async for message in response: - msg = message[0] - - # We only expect assistant messages here. - if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: - continue - - # When auto invocation is OFF, the model may send chunks that represent tool calls. - # Chunks that contain function call instructions will have a function_invoke_attempt attribute. - if hasattr(msg, "function_invoke_attempt"): - # This chunk is part of a tool call instruction sequence - streamed_tool_chunks.append(msg) - else: - # This chunk is normal assistant response text - streamed_response_chunks.append(msg) - print(str(msg), end="", flush=True) - - print("\n", flush=True) - - # If auto function calling was OFF, handle any tool call instructions we captured. - if streamed_tool_chunks: - # Group streamed chunks by `function_invoke_attempt` to handle each invocation attempt separately. - grouped_chunks = {} - for chunk in streamed_tool_chunks: - key = getattr(chunk, "function_invoke_attempt", None) - if key is not None: - grouped_chunks.setdefault(key, []).append(chunk) - - # Process each group of chunks - for attempt, chunks in grouped_chunks.items(): - try: - # Combine all chunks for a given attempt into one message. - combined_content = reduce(lambda first, second: first + second, chunks) - if hasattr(combined_content, "content"): - print(f"[function_invoke_attempt {attempt} content]:\n{combined_content.content}") - - print("[Auto function calling is OFF] Here are the returned tool calls:") - print_tool_calls(combined_content) - except Exception as e: - print(f"Error processing chunks for function_invoke_attempt {attempt}: {e}") - - # Return the final concatenated assistant response (if any). - if streamed_response_chunks: - return "".join([str(content) for content in streamed_response_chunks]) - return None - - -async def chat() -> bool: - """ - Continuously prompt the user for input and show the assistant's response. - Type 'exit' to exit. - """ - try: - user_input = input("User:> ") - except (KeyboardInterrupt, EOFError): - print("\n\nExiting chat...") - return False - - if user_input.lower().strip() == "exit": - print("\n\nExiting chat...") - return False - - arguments["user_input"] = user_input - arguments["chat_history"] = history - - result = await handle_streaming(kernel, chat_function, arguments=arguments) - - # Update the chat history with the user's input and the assistant's response - if result: - history.add_user_message(user_input) - history.add_assistant_message(str(result)) - - return True - - async def main() -> None: print( "Welcome to the chat bot!\n" " Type 'exit' to exit.\n" " Try a math question to see function calling in action (e.g. 'what is 3+3?')." ) - chatting = True - while chatting: - chatting = await chat() + + while True: + # Get user input + try: + user_input = input("User:> ") + except (KeyboardInterrupt, EOFError): + print("\n\nExiting chat...") + break + + if user_input.lower().strip() == "exit": + print("\n\nExiting chat...") + break + + # Prepare arguments for the model invocation + arguments["user_input"] = user_input + arguments["chat_history"] = history + + print("Mosscap:> ", end="", flush=True) + + # Lists to store streamed chunks + streamed_tool_chunks: list[StreamingChatMessageContent] = [] + streamed_response_chunks: list[StreamingChatMessageContent] = [] + + async for message in kernel.invoke_stream( + chat_function, + return_function_results=False, + arguments=arguments, + ): + msg = message[0] + + # Expecting assistant messages only + if not isinstance(msg, StreamingChatMessageContent) or msg.role != AuthorRole.ASSISTANT: + continue + + # If auto_invoking is False, the model may send tool calls in separate chunks. + if hasattr(msg, "function_invoke_attempt"): + # This chunk is part of a tool call instruction + streamed_tool_chunks.append(msg) + else: + # Normal assistant response text + streamed_response_chunks.append(msg) + print(str(msg), end="", flush=True) + + print("\n", flush=True) + + # If we have tool call instructions + if streamed_tool_chunks: + # Group streamed tool chunks by `function_invoke_attempt` + grouped_chunks = {} + for chunk in streamed_tool_chunks: + key = getattr(chunk, "function_invoke_attempt", None) + if key is not None: + grouped_chunks.setdefault(key, []).append(chunk) + + # Process each group of chunks + for attempt, chunks in grouped_chunks.items(): + try: + combined_content = reduce(lambda first, second: first + second, chunks) + if hasattr(combined_content, "content"): + print(f"[function_invoke_attempt {attempt} content]:\n{combined_content.content}") + + print("[Auto function calling is OFF] Here are the returned tool calls:") + print_tool_calls(combined_content) + except Exception as e: + print(f"Error processing chunks for function_invoke_attempt {attempt}: {e}") + + # Update the chat history with user input and assistant response, if any + if streamed_response_chunks: + result = "".join([str(content) for content in streamed_response_chunks]) + history.add_user_message(user_input) + history.add_assistant_message(str(result)) if __name__ == "__main__":