Python: Bedrock connector (#9100)

### Motivation and Context  Add Amazon Bedrock AI to Semantic Kernel. Amazon Bedrock is a platform on AWS that offers access to a wide range to LLMs, including proprietary Amazon Titan models. We are integrating it into Semantic Kernel as one of our AI connectors to provide more options to developers. ### Description  1. All interfaces implemented: chat completion (streaming + non-streaming), text completion (streaming + non-streaming), and text embedding. 2. Supports for function calling. 3. Supports for image content. 4. Supports for all models available on Bedrock. ### Contribution Checklist  - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
microsoft · Oct 10, 2024 · 481d16a · 481d16a
1 parent 578fdcf
commit 481d16a
Show file tree

Hide file tree

Showing 44 changed files with 3,547 additions and 69 deletions.
diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
@@ -108,6 +108,12 @@ jobs:
           credentials_json: ${{ secrets.VERTEX_AI_SERVICE_ACCOUNT_KEY }}
       - name: Set up gcloud
         uses: google-github-actions/setup-gcloud@v2
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_REGION }}
       - name: Setup Redis Stack Server
         if: matrix.os == 'ubuntu-latest'
         run: docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest
@@ -236,6 +242,12 @@ jobs:
           credentials_json: ${{ secrets.VERTEX_AI_SERVICE_ACCOUNT_KEY }}
       - name: Set up gcloud
         uses: google-github-actions/setup-gcloud@v2
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_REGION }}
       - name: Setup Redis Stack Server
         if: matrix.os == 'ubuntu-latest'
         run: docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest

diff --git a/python/.cspell.json b/python/.cspell.json
@@ -27,6 +27,7 @@
         "aiplatform",
         "azuredocindex",
         "azuredocs",
+        "boto",
         "contentvector",
         "contoso",
         "datamodel",

diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -118,6 +118,9 @@ weaviate = [
 pandas = [
     "pandas ~= 2.2"
 ]
+aws = [
+    "boto3>=1.28.57",
+]
 
 [tool.uv]
 prerelease = "if-necessary-or-explicit"

diff --git a/python/samples/concepts/chat_completion/azure_chat_gpt_api.py b/python/samples/concepts/chat_completion/azure_chat_gpt_api.py
@@ -76,22 +76,25 @@ async def chat() -> bool:
 
     stream = True
     if stream:
-        answer = kernel.invoke_stream(
+        chunks = kernel.invoke_stream(
             chat_function,
             user_input=user_input,
             chat_history=history,
         )
         print("Mosscap:> ", end="")
-        async for message in answer:
+        answer = ""
+        async for message in chunks:
             print(str(message[0]), end="")
+            answer += str(message[0])
         print("\n")
-        return True
-    answer = await kernel.invoke(
-        chat_function,
-        user_input=user_input,
-        chat_history=history,
-    )
-    print(f"Mosscap:> {answer}")
+    else:
+        answer = await kernel.invoke(
+            chat_function,
+            user_input=user_input,
+            chat_history=history,
+        )
+        print(f"Mosscap:> {answer}")
+
     history.add_user_message(user_input)
     history.add_assistant_message(str(answer))
     return True

diff --git a/python/samples/concepts/chat_completion/chat_anthropic_api.py b/python/samples/concepts/chat_completion/chat_anthropic_api.py
@@ -55,22 +55,25 @@ async def chat() -> bool:
 
     stream = True
     if stream:
-        answer = kernel.invoke_stream(
+        chunks = kernel.invoke_stream(
             chat_function,
             user_input=user_input,
             chat_history=chat_history,
         )
         print("Mosscap:> ", end="")
-        async for message in answer:
+        answer = ""
+        async for message in chunks:
             print(str(message[0]), end="", flush=True)
+            answer += str(message[0])
         print("\n")
-        return True
-    answer = await kernel.invoke(
-        chat_function,
-        user_input=user_input,
-        chat_history=chat_history,
-    )
-    print(f"Mosscap:> {answer}")
+    else:
+        answer = await kernel.invoke(
+            chat_function,
+            user_input=user_input,
+            chat_history=chat_history,
+        )
+        print(f"Mosscap:> {answer}")
+
     chat_history.add_user_message(user_input)
     chat_history.add_assistant_message(str(answer))
     return True

diff --git a/python/samples/concepts/chat_completion/chat_bedrock_api.py b/python/samples/concepts/chat_completion/chat_bedrock_api.py
@@ -0,0 +1,95 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai.bedrock.bedrock_prompt_execution_settings import BedrockChatPromptExecutionSettings
+from semantic_kernel.connectors.ai.bedrock.services.bedrock_chat_completion import BedrockChatCompletion
+from semantic_kernel.contents import ChatHistory
+
+system_message = """
+You are a chat bot. Your name is Mosscap and
+you have one goal: figure out what people need.
+Your full name, should you need to know it, is
+Splendid Speckled Mosscap. You communicate
+effectively, but you tend to answer with long
+flowery prose.
+"""
+
+kernel = Kernel()
+
+service_id = "bedrock-chat"
+kernel.add_service(BedrockChatCompletion(service_id=service_id, model_id="cohere.command-r-v1:0"))
+
+settings = BedrockChatPromptExecutionSettings(
+    max_tokens=2000,
+    temperature=0.7,
+    top_p=0.8,
+    # Cohere Command specific settings: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html
+    extension_data={
+        "presence_penalty": 0.5,
+        "seed": 5,
+    },
+)
+
+chat_function = kernel.add_function(
+    plugin_name="ChatBot",
+    function_name="Chat",
+    prompt="{{$chat_history}}{{$user_input}}",
+    template_format="semantic-kernel",
+    prompt_execution_settings=settings,
+)
+
+chat_history = ChatHistory()
+chat_history.add_user_message("Hi there, who are you?")
+chat_history.add_assistant_message("I am Mosscap, a chat bot. I'm trying to figure out what people need")
+
+
+async def chat() -> bool:
+    try:
+        user_input = input("User:> ")
+    except KeyboardInterrupt:
+        print("\n\nExiting chat...")
+        return False
+    except EOFError:
+        print("\n\nExiting chat...")
+        return False
+
+    if user_input == "exit":
+        print("\n\nExiting chat...")
+        return False
+
+    stream = True
+    if stream:
+        chunks = kernel.invoke_stream(
+            chat_function,
+            user_input=user_input,
+            chat_history=chat_history,
+        )
+        print("Mosscap:> ", end="")
+        answer = ""
+        async for message in chunks:
+            print(str(message[0]), end="", flush=True)
+            answer += str(message[0])
+        print("\n")
+    else:
+        answer = await kernel.invoke(
+            chat_function,
+            user_input=user_input,
+            chat_history=chat_history,
+        )
+        print(f"Mosscap:> {answer}")
+
+    chat_history.add_user_message(user_input)
+    chat_history.add_assistant_message(str(answer))
+    return True
+
+
+async def main() -> None:
+    chatting = True
+    while chatting:
+        chatting = await chat()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/python/samples/concepts/chat_completion/chat_mistral_api.py b/python/samples/concepts/chat_completion/chat_mistral_api.py
@@ -55,22 +55,25 @@ async def chat() -> bool:
 
     stream = True
     if stream:
-        answer = kernel.invoke_stream(
+        chunks = kernel.invoke_stream(
             chat_function,
             user_input=user_input,
             chat_history=chat_history,
         )
         print("Mosscap:> ", end="")
-        async for message in answer:
+        answer = ""
+        async for message in chunks:
             print(str(message[0]), end="")
+            answer += str(message[0])
         print("\n")
-        return True
-    answer = await kernel.invoke(
-        chat_function,
-        user_input=user_input,
-        chat_history=chat_history,
-    )
-    print(f"Mosscap:> {answer}")
+    else:
+        answer = await kernel.invoke(
+            chat_function,
+            user_input=user_input,
+            chat_history=chat_history,
+        )
+        print(f"Mosscap:> {answer}")
+
     chat_history.add_user_message(user_input)
     chat_history.add_assistant_message(str(answer))
     return True

diff --git a/python/semantic_kernel/connectors/ai/bedrock/README.md b/python/semantic_kernel/connectors/ai/bedrock/README.md
@@ -0,0 +1,50 @@
+# Amazon - Bedrock
+
+[Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html) is a service provided by Amazon Web Services (AWS) that allows you to access large language models with a serverless experience. Semantic Kernel provides a connector to access these models from AWS.
+
+## Prerequisites
+
+- An AWS account and [access to the foundation models](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access-permissions.html)
+- [AWS CLI installed](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) and [configured](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration)
+
+### Configuration
+
+Follow this [guide](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration) to configure your environment to use the Bedrock API.
+
+## Supports
+
+### Region
+
+To find model supports by AWS regions, refer to this [AWS documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html).
+
+### Input & Output Modalities
+
+Foundational models in Bedrock support the multiple modalities, including text, image, and embedding. However, not all models support the same modalities. Refer to the [AWS documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html) for more information.
+
+The Bedrock connector supports all modalities except for **image embeddings, and text to image**.
+
+### Text completion vs chat completion
+
+Some models in Bedrock supports only text completion, or only chat completion (aka Converse API), or both. Refer to the [AWS documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-features.html) for more information.
+
+### Tool Use
+
+Not all models in Bedrock support tools. Refer to the [AWS documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-features.html) for more information.
+
+### Streaming vs Non-Streaming
+
+Not all models in Bedrock support streaming. You can use the boto3 client to check if a model supports streaming. Refer to the [AWS documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-supported-models-features.html) and the [Boto3 documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock/client/get_foundation_model.html) for more information.
+
+You can also directly call the `get_foundation_model_info("model_id")` method from the Bedrock connector to check if a model supports streaming.
+
+> Note: The bedrock connector will check if a model supports streaming before making a streaming request to the model.
+
+## Model specific parameters
+
+Foundation models can have specific parameters that are unique to the model or the model provider. You can refer to this [AWS documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html) for more information.
+
+You can pass these parameters via the `extension_data` field in the `PromptExecutionSettings` object.
+
+## Unsupported features
+
+- [Guardrail](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html)
diff --git a/python/semantic_kernel/connectors/ai/bedrock/__init__.py b/python/semantic_kernel/connectors/ai/bedrock/__init__.py
diff --git a/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/bedrock/bedrock_prompt_execution_settings.py
@@ -0,0 +1,44 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+
+from typing import Any
+
+from pydantic import Field
+
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+
+
+class BedrockPromptExecutionSettings(PromptExecutionSettings):
+    """Bedrock Prompt Execution Settings."""
+
+    temperature: float | None = Field(None, ge=0.0, le=1.0)
+    top_p: float | None = Field(None, ge=0.0, le=1.0)
+    top_k: int | None = Field(None, gt=0)
+    max_tokens: int | None = Field(None, gt=0)
+    stop: list[str] = Field(default_factory=list)
+
+
+class BedrockChatPromptExecutionSettings(BedrockPromptExecutionSettings):
+    """Bedrock Chat Prompt Execution Settings."""
+
+    tools: list[dict[str, Any]] | None = Field(
+        None,
+        max_length=64,
+        description="Do not set this manually. It is set by the service based on the function choice configuration.",
+    )
+    tool_choice: dict[str, Any] | None = Field(
+        None,
+        description="Do not set this manually. It is set by the service based on the function choice configuration.",
+    )
+
+
+class BedrockTextPromptExecutionSettings(BedrockPromptExecutionSettings):
+    """Bedrock Text Prompt Execution Settings."""
+
+    ...
+
+
+class BedrockEmbeddingPromptExecutionSettings(PromptExecutionSettings):
+    """Bedrock Embedding Prompt Execution Settings."""
+
+    ...
diff --git a/python/semantic_kernel/connectors/ai/bedrock/bedrock_settings.py b/python/semantic_kernel/connectors/ai/bedrock/bedrock_settings.py
@@ -0,0 +1,34 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from typing import ClassVar
+
+from semantic_kernel.kernel_pydantic import KernelBaseSettings
+from semantic_kernel.utils.experimental_decorator import experimental_class
+
+
+@experimental_class
+class BedrockSettings(KernelBaseSettings):
+    """Amazon Bedrock service settings.
+
+    The settings are first loaded from environment variables with
+    the prefix 'BEDROCK_'.
+    If the environment variables are not found, the settings can
+    be loaded from a .env file with the encoding 'utf-8'.
+    If the settings are not found in the .env file, the settings
+    are ignored; however, validation will fail alerting that the
+    settings are missing.
+
+    Optional settings for prefix 'BEDROCK_' are:
+        - chat_model_id: str | None - The Amazon Bedrock chat model ID to use.
+            (Env var BEDROCK_CHAT_MODEL_ID)
+        - text_model_id: str | None - The Amazon Bedrock text model ID to use.
+            (Env var BEDROCK_TEXT_MODEL_ID)
+        - embedding_model_id: str | None - The Amazon Bedrock embedding model ID to use.
+            (Env var BEDROCK_EMBEDDING_MODEL_ID)
+    """
+
+    env_prefix: ClassVar[str] = "BEDROCK_"
+
+    chat_model_id: str | None = None
+    text_model_id: str | None = None
+    embedding_model_id: str | None = None
diff --git a/python/semantic_kernel/connectors/ai/bedrock/services/__init__.py b/python/semantic_kernel/connectors/ai/bedrock/services/__init__.py