From 69b9fc4ab48814bc8d50dc5e0fcc38e2966d5e22 Mon Sep 17 00:00:00 2001
From: Mark Wallace <127216156+markwallace-microsoft@users.noreply.github.com>
Date: Mon, 7 Oct 2024 16:57:07 +0100
Subject: [PATCH] .Net: Update Qdrant Memory Connector to new Text Search
Design (#9076)
### Motivation and Context
Closes #6733
### Description
### Contribution Checklist
- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone :smile:
---------
Co-authored-by: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com>
---
.../Memory/Qdrant/QdrantTextSearchTests.cs | 93 +++++++++++++++++++
.../Memory/Qdrant/QdrantVectorStoreFixture.cs | 74 +++++++++++----
.../QdrantVectorStoreRecordCollectionTests.cs | 23 +++--
3 files changed, 163 insertions(+), 27 deletions(-)
create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantTextSearchTests.cs
diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantTextSearchTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantTextSearchTests.cs
new file mode 100644
index 000000000000..32769f2abb18
--- /dev/null
+++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantTextSearchTests.cs
@@ -0,0 +1,93 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Threading.Tasks;
+using Microsoft.SemanticKernel.Connectors.Qdrant;
+using Microsoft.SemanticKernel.Data;
+using SemanticKernel.IntegrationTests.Data;
+using Xunit;
+using static SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant.QdrantVectorStoreFixture;
+
+namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant;
+
+///
+/// Integration tests for using with .
+///
+[Collection("QdrantVectorStoreCollection")]
+public class QdrantTextSearchTests(QdrantVectorStoreFixture fixture) : BaseVectorStoreTextSearchTests
+{
+ ///
+ public override Task CreateTextSearchAsync()
+ {
+ if (this.VectorStore is null)
+ {
+ this.EmbeddingGenerator = fixture.EmbeddingGenerator;
+ this.VectorStore = new QdrantVectorStore(fixture.QdrantClient);
+ }
+
+ var options = new QdrantVectorStoreRecordCollectionOptions
+ {
+ HasNamedVectors = true,
+ VectorStoreRecordDefinition = fixture.HotelVectorStoreRecordDefinition,
+ };
+ var vectorSearch = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, "namedVectorsHotels", options);
+ var stringMapper = new HotelInfoTextSearchStringMapper();
+ var resultMapper = new HotelInfoTextSearchResultMapper();
+
+ var result = new VectorStoreTextSearch(vectorSearch, this.EmbeddingGenerator!, stringMapper, resultMapper);
+ return Task.FromResult(result);
+ }
+
+ ///
+ public override string GetQuery() => "Find a great hotel";
+
+ ///
+ public override TextSearchFilter GetTextSearchFilter() => new TextSearchFilter().Equality("HotelName", "My Hotel 11");
+
+ ///
+ public override bool VerifySearchResults(object[] results, string query, TextSearchFilter? filter = null)
+ {
+ Assert.NotNull(results);
+ Assert.NotEmpty(results);
+ Assert.Equal(filter is null ? 4 : 1, results.Length);
+ foreach (var result in results)
+ {
+ Assert.NotNull(result);
+ Assert.IsType(result);
+ }
+
+ return true;
+ }
+
+ ///
+ /// String mapper which converts a Hotel to a string.
+ ///
+ protected sealed class HotelInfoTextSearchStringMapper : ITextSearchStringMapper
+ {
+ ///
+ public string MapFromResultToString(object result)
+ {
+ if (result is HotelInfo hotel)
+ {
+ return $"{hotel.HotelName} {hotel.Description}";
+ }
+ throw new ArgumentException("Invalid result type.");
+ }
+ }
+
+ ///
+ /// Result mapper which converts a Hotel to a TextSearchResult.
+ ///
+ protected sealed class HotelInfoTextSearchResultMapper : ITextSearchResultMapper
+ {
+ ///
+ public TextSearchResult MapFromResultToTextSearchResult(object result)
+ {
+ if (result is HotelInfo hotel)
+ {
+ return new TextSearchResult(name: hotel.HotelName, value: hotel.Description, link: $"id://{hotel.HotelId}");
+ }
+ throw new ArgumentException("Invalid result type.");
+ }
+ }
+}
diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs
index d1a314829547..ae10dff5a19d 100644
--- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs
+++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs
@@ -3,12 +3,17 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
+using Azure.Identity;
using Docker.DotNet;
using Docker.DotNet.Models;
using Grpc.Core;
+using Microsoft.Extensions.Configuration;
+using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
using Microsoft.SemanticKernel.Data;
+using Microsoft.SemanticKernel.Embeddings;
using Qdrant.Client;
using Qdrant.Client.Grpc;
+using SemanticKernel.IntegrationTests.TestSettings;
using Xunit;
namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant;
@@ -21,6 +26,19 @@ public class QdrantVectorStoreFixture : IAsyncLifetime
/// The id of the qdrant container that we are testing with.
private string? _containerId = null;
+ /// The vector dimension.
+ private const int VectorDimensions = 1536;
+
+ ///
+ /// Test Configuration setup.
+ ///
+ private static readonly IConfigurationRoot s_configuration = new ConfigurationBuilder()
+ .AddJsonFile(path: "testsettings.json", optional: true, reloadOnChange: true)
+ .AddJsonFile(path: "testsettings.development.json", optional: true, reloadOnChange: true)
+ .AddEnvironmentVariables()
+ .AddUserSecrets()
+ .Build();
+
#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable.
///
@@ -41,7 +59,7 @@ public QdrantVectorStoreFixture()
new VectorStoreRecordDataProperty("HotelRating", typeof(float)) { IsFilterable = true },
new VectorStoreRecordDataProperty("Tags", typeof(List)),
new VectorStoreRecordDataProperty("Description", typeof(string)),
- new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance }
+ new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = VectorDimensions, DistanceFunction = DistanceFunction.ManhattanDistance }
}
};
this.HotelWithGuidIdVectorStoreRecordDefinition = new VectorStoreRecordDefinition
@@ -51,9 +69,17 @@ public QdrantVectorStoreFixture()
new VectorStoreRecordKeyProperty("HotelId", typeof(Guid)),
new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true, IsFullTextSearchable = true },
new VectorStoreRecordDataProperty("Description", typeof(string)),
- new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance }
+ new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = VectorDimensions, DistanceFunction = DistanceFunction.ManhattanDistance }
}
};
+ AzureOpenAIConfiguration? embeddingsConfig = s_configuration.GetSection("AzureOpenAIEmbeddings").Get();
+ Assert.NotNull(embeddingsConfig);
+ Assert.NotEmpty(embeddingsConfig.DeploymentName);
+ Assert.NotEmpty(embeddingsConfig.Endpoint);
+ this.EmbeddingGenerator = new AzureOpenAITextEmbeddingGenerationService(
+ deploymentName: embeddingsConfig.DeploymentName,
+ endpoint: embeddingsConfig.Endpoint,
+ credential: new AzureCliCredential());
}
#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable.
@@ -61,6 +87,11 @@ public QdrantVectorStoreFixture()
/// Gets the qdrant client connection to use for tests.
public QdrantClient QdrantClient { get; private set; }
+ ///
+ /// Gets the embedding generator to use for generating embeddings for text.
+ ///
+ public ITextEmbeddingGenerationService EmbeddingGenerator { get; private set; }
+
/// Gets the manually created vector store record definition for our test model.
public VectorStoreRecordDefinition HotelVectorStoreRecordDefinition { get; private set; }
@@ -80,7 +111,7 @@ public async Task InitializeAsync()
// Create schemas for the vector store.
var vectorParamsMap = new VectorParamsMap();
- vectorParamsMap.Map.Add("DescriptionEmbedding", new VectorParams { Size = 4, Distance = Distance.Cosine });
+ vectorParamsMap.Map.Add("DescriptionEmbedding", new VectorParams { Size = VectorDimensions, Distance = Distance.Cosine });
// Wait for the qdrant container to be ready.
var retryCount = 0;
@@ -107,11 +138,11 @@ await this.QdrantClient.CreateCollectionAsync(
await this.QdrantClient.CreateCollectionAsync(
"singleVectorHotels",
- new VectorParams { Size = 4, Distance = Distance.Cosine });
+ new VectorParams { Size = VectorDimensions, Distance = Distance.Cosine });
await this.QdrantClient.CreateCollectionAsync(
"singleVectorGuidIdHotels",
- new VectorParams { Size = 4, Distance = Distance.Cosine });
+ new VectorParams { Size = VectorDimensions, Distance = Distance.Cosine });
// Create test data common to both named and unnamed vectors.
var tags = new ListValue();
@@ -121,15 +152,18 @@ await this.QdrantClient.CreateCollectionAsync(
tagsValue.ListValue = tags;
// Create some test data using named vectors.
- var embedding = new[] { 30f, 31f, 32f, 33f };
+ var embedding = await this.EmbeddingGenerator.GenerateEmbeddingAsync("This is a great hotel.");
+ var embeddingArray = embedding.ToArray();
var namedVectors1 = new NamedVectors();
var namedVectors2 = new NamedVectors();
var namedVectors3 = new NamedVectors();
+ var namedVectors4 = new NamedVectors();
- namedVectors1.Vectors.Add("DescriptionEmbedding", embedding);
- namedVectors2.Vectors.Add("DescriptionEmbedding", embedding);
- namedVectors3.Vectors.Add("DescriptionEmbedding", embedding);
+ namedVectors1.Vectors.Add("DescriptionEmbedding", embeddingArray);
+ namedVectors2.Vectors.Add("DescriptionEmbedding", embeddingArray);
+ namedVectors3.Vectors.Add("DescriptionEmbedding", embeddingArray);
+ namedVectors4.Vectors.Add("DescriptionEmbedding", embeddingArray);
List namedVectorPoints =
[
@@ -151,6 +185,12 @@ await this.QdrantClient.CreateCollectionAsync(
Vectors = new Vectors { Vectors_ = namedVectors3 },
Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." }
},
+ new PointStruct
+ {
+ Id = 14,
+ Vectors = new Vectors { Vectors_ = namedVectors4 },
+ Payload = { ["HotelName"] = "My Hotel 14", ["HotelCode"] = 14, ["parking_is_included"] = false, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." }
+ },
];
await this.QdrantClient.UpsertAsync("namedVectorsHotels", namedVectorPoints);
@@ -161,19 +201,19 @@ await this.QdrantClient.CreateCollectionAsync(
new PointStruct
{
Id = 11,
- Vectors = embedding,
+ Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["parking_is_included"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." }
},
new PointStruct
{
Id = 12,
- Vectors = embedding,
+ Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." }
},
new PointStruct
{
Id = 13,
- Vectors = embedding,
+ Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." }
},
];
@@ -186,19 +226,19 @@ await this.QdrantClient.CreateCollectionAsync(
new PointStruct
{
Id = Guid.Parse("11111111-1111-1111-1111-111111111111"),
- Vectors = embedding,
+ Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 11", ["Description"] = "This is a great hotel." }
},
new PointStruct
{
Id = Guid.Parse("22222222-2222-2222-2222-222222222222"),
- Vectors = embedding,
+ Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 12", ["Description"] = "This is a great hotel." }
},
new PointStruct
{
Id = Guid.Parse("33333333-3333-3333-3333-333333333333"),
- Vectors = embedding,
+ Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 13", ["Description"] = "This is a great hotel." }
},
];
@@ -295,7 +335,7 @@ public record HotelInfo()
public string Description { get; set; }
/// A vector field.
- [VectorStoreRecordVector(4, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)]
+ [VectorStoreRecordVector(VectorDimensions, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)]
public ReadOnlyMemory? DescriptionEmbedding { get; set; }
}
@@ -318,7 +358,7 @@ public record HotelInfoWithGuidId()
public string Description { get; set; }
/// A vector field.
- [VectorStoreRecordVector(4, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)]
+ [VectorStoreRecordVector(VectorDimensions, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)]
public ReadOnlyMemory? DescriptionEmbedding { get; set; }
}
}
diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs
index 72c173f07145..1757e155689c 100644
--- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs
+++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs
@@ -7,6 +7,7 @@
using System.Threading.Tasks;
using Microsoft.SemanticKernel.Connectors.Qdrant;
using Microsoft.SemanticKernel.Data;
+using Microsoft.SemanticKernel.Embeddings;
using Qdrant.Client.Grpc;
using Xunit;
using Xunit.Abstractions;
@@ -56,14 +57,15 @@ public async Task ItCanCreateACollectionUpsertGetAndSearchAsync(bool hasNamedVec
};
var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, testCollectionName, options);
- var record = this.CreateTestHotel(30);
+ var record = await this.CreateTestHotelAsync(30, fixture.EmbeddingGenerator);
// Act
await sut.CreateCollectionAsync();
var upsertResult = await sut.UpsertAsync(record);
var getResult = await sut.GetAsync(30, new GetRecordOptions { IncludeVectors = true });
+ var vector = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("A great hotel");
var actual = await sut.VectorizedSearchAsync(
- new ReadOnlyMemory(new[] { 30f, 31f, 32f, 33f }),
+ vector,
new VectorSearchOptions { Filter = new VectorSearchFilter().EqualTo("HotelCode", 30) });
// Assert
@@ -130,7 +132,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition
};
var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options);
- var record = this.CreateTestHotel(20);
+ var record = await this.CreateTestHotelAsync(20, fixture.EmbeddingGenerator);
// Act.
var upsertResult = await sut.UpsertAsync(record);
@@ -166,7 +168,7 @@ public async Task ItCanUpsertAndRemoveDocumentWithGuidIdToVectorStoreAsync()
HotelId = Guid.Parse("55555555-5555-5555-5555-555555555555"),
HotelName = "My Hotel 5",
Description = "This is a great hotel.",
- DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f },
+ DescriptionEmbedding = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("This is a great hotel."),
};
// Act.
@@ -308,7 +310,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti
};
var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options);
- await sut.UpsertAsync(this.CreateTestHotel(20));
+ await sut.UpsertAsync(await this.CreateTestHotelAsync(20, fixture.EmbeddingGenerator));
// Act.
await sut.DeleteAsync(20);
@@ -334,7 +336,7 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDef
};
var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options);
- await sut.UpsertAsync(this.CreateTestHotel(20));
+ await sut.UpsertAsync(await this.CreateTestHotelAsync(20, fixture.EmbeddingGenerator));
// Act.
// Also delete a non-existing key to test that the operation does not fail for these.
@@ -386,9 +388,10 @@ public async Task ItCanSearchWithFilterAsync(bool useRecordDefinition, string co
var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options);
// Act.
+ var vector = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("A great hotel");
var filter = filterType == "equality" ? new VectorSearchFilter().EqualTo("HotelName", "My Hotel 11") : new VectorSearchFilter().AnyTagEqualTo("Tags", "t1");
var actual = await sut.VectorizedSearchAsync(
- new ReadOnlyMemory([30f, 31f, 32f, 33f]),
+ vector,
new()
{
Filter = filter
@@ -433,7 +436,7 @@ public async Task ItCanUpsertAndRetrieveUsingTheGenericMapperAsync()
},
Vectors =
{
- { "DescriptionEmbedding", new ReadOnlyMemory(new[] { 30f, 31f, 32f, 33f }) }
+ { "DescriptionEmbedding", await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("This is a generic mapper hotel") }
}
});
var localGetResult = await sut.GetAsync(40, new GetRecordOptions { IncludeVectors = true });
@@ -464,7 +467,7 @@ public async Task ItCanUpsertAndRetrieveUsingTheGenericMapperAsync()
Assert.IsType>(localGetResult.Vectors["DescriptionEmbedding"]);
}
- private HotelInfo CreateTestHotel(uint hotelId)
+ private async Task CreateTestHotelAsync(uint hotelId, ITextEmbeddingGenerationService embeddingGenerator)
{
return new HotelInfo
{
@@ -475,7 +478,7 @@ private HotelInfo CreateTestHotel(uint hotelId)
ParkingIncluded = true,
Tags = { "t1", "t2" },
Description = "This is a great hotel.",
- DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f },
+ DescriptionEmbedding = await embeddingGenerator.GenerateEmbeddingAsync("This is a great hotel."),
};
}