Python: Vertex AI Connector (#7481)

### Motivation and Context  We are adding a new AI connector for the talking to the Gemini API on Google Vertex AI: https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/overview Note that this is for Gemini hosted on Vertex AI. Google also offers Gemini access on their Google AI platform: https://cloud.google.com/vertex-ai/generative-ai/docs/migrate/migrate-google-ai. We will have another connector along with this for accessing Gemini on Google AI. ### Description  The new connector contains 3 AI services: - Chat completion - Text completion - Text embedding TODO: Function calling. > Function calling is not included in this PR to reduce to the size of the PR. ### Contribution Checklist  - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄 --------- Co-authored-by: Evan Mattson <[email protected]>
microsoft · Jul 31, 2024 · 7997e79 · 7997e79
1 parent 79c029f
commit 7997e79
Show file tree

Hide file tree

Showing 31 changed files with 1,978 additions and 74 deletions.
diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
@@ -81,6 +81,13 @@ jobs:
         run: |
           ollama pull ${{ vars.OLLAMA_MODEL }}
           ollama list
+      - name: Google auth
+        uses: google-github-actions/auth@v2
+        with:
+          project_id: ${{ vars.VERTEX_AI_PROJECT_ID }}
+          credentials_json: ${{ secrets.VERTEX_AI_SERVICE_ACCOUNT_KEY }}
+      - name: Set up gcloud
+        uses: google-github-actions/setup-gcloud@v2
       - name: Run Integration Tests
         id: run_tests
         shell: bash
@@ -113,6 +120,9 @@ jobs:
           GOOGLE_AI_GEMINI_MODEL_ID: ${{ vars.GOOGLE_AI_GEMINI_MODEL_ID }}
           GOOGLE_AI_EMBEDDING_MODEL_ID: ${{ vars.GOOGLE_AI_EMBEDDING_MODEL_ID }}
           GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
+          VERTEX_AI_PROJECT_ID: ${{ vars.VERTEX_AI_PROJECT_ID }}
+          VERTEX_AI_GEMINI_MODEL_ID: ${{ vars.VERTEX_AI_GEMINI_MODEL_ID }}
+          VERTEX_AI_EMBEDDING_MODEL_ID: ${{ vars.VERTEX_AI_EMBEDDING_MODEL_ID }}
         run: |
           if ${{ matrix.os == 'ubuntu-latest' }}; then
             docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest
@@ -167,6 +177,15 @@ jobs:
           ollama pull ${{ vars.OLLAMA_MODEL }}
           ollama list
 
+      - name: Google auth
+        uses: google-github-actions/auth@v2
+        with:
+          project_id: ${{ vars.VERTEX_AI_PROJECT_ID }}
+          credentials_json: ${{ secrets.VERTEX_AI_SERVICE_ACCOUNT_KEY }}
+
+      - name: Set up gcloud
+        uses: google-github-actions/setup-gcloud@v2
+
       - name: Run Integration Tests
         id: run_tests
         shell: bash
@@ -199,6 +218,9 @@ jobs:
           GOOGLE_AI_GEMINI_MODEL_ID: ${{ vars.GOOGLE_AI_GEMINI_MODEL_ID }}
           GOOGLE_AI_EMBEDDING_MODEL_ID: ${{ vars.GOOGLE_AI_EMBEDDING_MODEL_ID }}
           GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
+          VERTEX_AI_PROJECT_ID: ${{ vars.VERTEX_AI_PROJECT_ID }}
+          VERTEX_AI_GEMINI_MODEL_ID: ${{ vars.VERTEX_AI_GEMINI_MODEL_ID }}
+          VERTEX_AI_EMBEDDING_MODEL_ID: ${{ vars.VERTEX_AI_EMBEDDING_MODEL_ID }}
         run: |
           if ${{ matrix.os == 'ubuntu-latest' }}; then
             docker run -d --name redis-stack-server -p 6379:6379 redis/redis-stack-server:latest

diff --git a/python/.cspell.json b/python/.cspell.json
@@ -45,6 +45,8 @@
         "generativeai",
         "genai",
         "protos",
-        "endregion"
+        "endregion",
+        "vertexai",
+        "aiplatform"
     ]
 }
diff --git a/python/poetry.lock b/python/poetry.lock
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -48,6 +48,7 @@ azure-cosmos = { version = "^4.7.0", optional = true}
 # chroma
 chromadb = { version = ">=0.4.13,<0.6.0", optional = true}
 # google
+google-cloud-aiplatform = { version = "^1.60.0", optional = true}
 google-generativeai = { version = "^0.7.2", optional = true}
 # hugging face
 transformers = { version = "^4.28.1", extras=["torch"], optional = true}
@@ -99,6 +100,7 @@ azure-core = "^1.28.0"
 azure-cosmos = "^4.7.0"
 mistralai = "^0.4.1"
 ollama = "^0.2.1"
+google-cloud-aiplatform = "^1.60.0"
 google-generativeai = "^0.7.2"
 transformers = { version = "^4.28.1", extras=["torch"]}
 sentence-transformers = "^2.2.2"
@@ -117,6 +119,7 @@ msgraph-sdk = "^1.2.0"
 # chroma
 chromadb = ">=0.4.13,<0.6.0"
 # google
+google-cloud-aiplatform = "^1.60.0"
 google-generativeai = "^0.7.2"
 # hugging face
 transformers = { version = "^4.28.1", extras=["torch"]}
@@ -150,7 +153,7 @@ all = ["transformers", "sentence-transformers", "qdrant-client", "chromadb", "py
 
 azure = ["azure-ai-inference", "azure-search-documents", "azure-core", "azure-identity", "azure-cosmos", "msgraph-sdk"]
 chromadb = ["chromadb"]
-google = ["google-generativeai"]
+google = ["google-cloud-aiplatform", "google-generativeai"]
 hugging_face = ["transformers", "sentence-transformers"]
 milvus = ["pymilvus", "milvus"]
 mistralai = ["mistralai"]

diff --git a/python/semantic_kernel/connectors/ai/google/README.md b/python/semantic_kernel/connectors/ai/google/README.md
@@ -0,0 +1,50 @@
+# Google - Gemini
+
+Gemini models are Google's large language models. Semantic Kernel provides two connectors to access these models from Google Cloud.
+
+## Google AI
+
+You can access the Gemini API from Google AI Studio. This mode of access is for quick prototyping as it relies on API keys.
+
+Follow [these instructions](https://cloud.google.com/docs/authentication/api-keys) to create an API key.
+
+Once you have an API key, you can start using Gemini models in SK using the `google_ai` connector. Example:
+
+```Python
+kernel = Kernel()
+kernel.add_service(
+    GoogleAIChatCompletion(
+        gemini_model_id="gemini-1.5-flash",
+        api_key="...",
+    )
+)
+...
+```
+
+> Alternatively, you can use an .env file to store the model id and api key.
+
+## Vertex AI
+
+Google also offers access to Gemini through its Vertex AI platform. Vertex AI provides a more complete solution to build your enterprise AI applications end-to-end. You can read more about it [here](https://cloud.google.com/vertex-ai/generative-ai/docs/migrate/migrate-google-ai).
+
+This mode of access requires a Google Cloud service account. Follow these [instructions](https://cloud.google.com/vertex-ai/generative-ai/docs/migrate/migrate-google-ai) to create a Google Cloud project if you don't have one already. Remember the `project id` as it is required to access the models.
+
+Follow the steps below to set up your environment to use the Vertex AI API:
+
+- [Install the gcloud CLI](https://cloud.google.com/sdk/docs/install)
+- [Initialize the gcloud CLI](https://cloud.google.com/sdk/docs/initializing)
+
+Once you have your project and your environment is set up, you can start using Gemini models in SK using the `vertex_ai` connector. Example:
+
+```Python
+kernel = Kernel()
+kernel.add_service(
+    VertexAIChatCompletion(
+        project_id="...",
+        gemini_model_id="gemini-1.5-flash",
+    )
+)
+...
+```
+
+> Alternatively, you can use an .env file to store the model id and project id.
diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/google_ai_chat_completion.py
@@ -16,11 +16,11 @@
 )
 from semantic_kernel.connectors.ai.google.google_ai.services.google_ai_base import GoogleAIBase
 from semantic_kernel.connectors.ai.google.google_ai.services.utils import (
-    filter_system_message,
     finish_reason_from_google_ai_to_semantic_kernel,
     format_assistant_message,
     format_user_message,
 )
+from semantic_kernel.connectors.ai.google.shared_utils import filter_system_message
 from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.contents.utils.finish_reason import FinishReason

diff --git a/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/google_ai/services/utils.py
@@ -4,11 +4,9 @@
 
 from google.generativeai.protos import Blob, Candidate, Part
 
-from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
 from semantic_kernel.contents.image_content import ImageContent
 from semantic_kernel.contents.text_content import TextContent
-from semantic_kernel.contents.utils.author_role import AuthorRole
 from semantic_kernel.contents.utils.finish_reason import FinishReason as SemanticKernelFinishReason
 from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
 
@@ -34,24 +32,6 @@ def finish_reason_from_google_ai_to_semantic_kernel(
     return None
 
 
-def filter_system_message(chat_history: ChatHistory) -> str | None:
-    """Filter the first system message from the chat history.
-
-    If there are multiple system messages, raise an error.
-    If there are no system messages, return None.
-    """
-    if len([message for message in chat_history if message.role == AuthorRole.SYSTEM]) > 1:
-        raise ServiceInvalidRequestError(
-            "Multiple system messages in chat history. Only one system message is expected."
-        )
-
-    for message in chat_history:
-        if message.role == AuthorRole.SYSTEM:
-            return message.content
-
-    return None
-
-
 def format_user_message(message: ChatMessageContent) -> list[Part]:
     """Format a user message to the expected object for the client.
 
@@ -72,7 +52,7 @@ def format_user_message(message: ChatMessageContent) -> list[Part]:
             if item.data_uri:
                 parts.append(Part(inline_data=Blob(mime_type=item.mime_type, data=item.data)))
             else:
-                # The Google AI API doesn't support image from an arbitrary URI:
+                # The Google AI API doesn't support images from arbitrary URIs:
                 # https://github.com/google-gemini/generative-ai-python/issues/357
                 raise ServiceInvalidRequestError(
                     "ImageContent without data_uri in User message while formatting chat history for Google AI"

diff --git a/python/semantic_kernel/connectors/ai/google/shared_utils.py b/python/semantic_kernel/connectors/ai/google/shared_utils.py
@@ -0,0 +1,23 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents.utils.author_role import AuthorRole
+from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
+
+
+def filter_system_message(chat_history: ChatHistory) -> str | None:
+    """Filter the first system message from the chat history.
+
+    If there are multiple system messages, raise an error.
+    If there are no system messages, return None.
+    """
+    if len([message for message in chat_history if message.role == AuthorRole.SYSTEM]) > 1:
+        raise ServiceInvalidRequestError(
+            "Multiple system messages in chat history. Only one system message is expected."
+        )
+
+    for message in chat_history:
+        if message.role == AuthorRole.SYSTEM:
+            return message.content
+
+    return None
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/__init__.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/__init__.py
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/__init__.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/__init__.py
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/utils.py
@@ -0,0 +1,74 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from google.cloud.aiplatform_v1beta1.types.content import Blob, Candidate, Part
+
+from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.image_content import ImageContent
+from semantic_kernel.contents.text_content import TextContent
+from semantic_kernel.contents.utils.finish_reason import FinishReason as SemanticKernelFinishReason
+from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError
+
+
+def finish_reason_from_vertex_ai_to_semantic_kernel(
+    finish_reason: Candidate.FinishReason,
+) -> SemanticKernelFinishReason | None:
+    """Convert a Vertex AI FinishReason to a Semantic Kernel FinishReason.
+
+    This is best effort and may not cover all cases as the enums are not identical.
+    """
+    if finish_reason == Candidate.FinishReason.STOP:
+        return SemanticKernelFinishReason.STOP
+
+    if finish_reason == Candidate.FinishReason.MAX_TOKENS:
+        return SemanticKernelFinishReason.LENGTH
+
+    if finish_reason == Candidate.FinishReason.SAFETY:
+        return SemanticKernelFinishReason.CONTENT_FILTER
+
+    return None
+
+
+def format_user_message(message: ChatMessageContent) -> list[Part]:
+    """Format a user message to the expected object for the client.
+
+    Args:
+        message: The user message.
+
+    Returns:
+        The formatted user message as a list of parts.
+    """
+    if not any(isinstance(item, (ImageContent)) for item in message.items):
+        return [Part(text=message.content)]
+
+    parts: list[Part] = []
+    for item in message.items:
+        if isinstance(item, TextContent):
+            parts.append(Part(text=message.content))
+        elif isinstance(item, ImageContent):
+            if item.data_uri:
+                parts.append(Part(inline_data=Blob(mime_type=item.mime_type, data=item.data)))
+            else:
+                # The Google AI API doesn't support images from arbitrary URIs:
+                # https://github.com/google-gemini/generative-ai-python/issues/357
+                raise ServiceInvalidRequestError(
+                    "ImageContent without data_uri in User message while formatting chat history for Google AI"
+                )
+        else:
+            raise ServiceInvalidRequestError(
+                "Unsupported item type in User message while formatting chat history for Google AI"
+                f" Inference: {type(item)}"
+            )
+
+    return parts
+
+
+def format_assistant_message(message: ChatMessageContent) -> list[Part]:
+    """Format an assistant message to the expected object for the client.
+
+    Args:
+        message: The assistant message.
+
+    Returns:
+        The formatted assistant message as a list of parts.
+    """
+    return [Part(text=message.content)]
diff --git a/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py b/python/semantic_kernel/connectors/ai/google/vertex_ai/services/vertex_ai_base.py
@@ -0,0 +1,12 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from abc import ABC
+
+from semantic_kernel.connectors.ai.google.vertex_ai.vertex_ai_settings import VertexAISettings
+from semantic_kernel.kernel_pydantic import KernelBaseModel
+
+
+class VertexAIBase(KernelBaseModel, ABC):
+    """Vertex AI Service."""
+
+    service_settings: VertexAISettings