From 69b9fc4ab48814bc8d50dc5e0fcc38e2966d5e22 Mon Sep 17 00:00:00 2001 From: Mark Wallace <127216156+markwallace-microsoft@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:57:07 +0100 Subject: [PATCH] .Net: Update Qdrant Memory Connector to new Text Search Design (#9076) ### Motivation and Context Closes #6733 ### Description ### Contribution Checklist - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone :smile: --------- Co-authored-by: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> --- .../Memory/Qdrant/QdrantTextSearchTests.cs | 93 +++++++++++++++++++ .../Memory/Qdrant/QdrantVectorStoreFixture.cs | 74 +++++++++++---- .../QdrantVectorStoreRecordCollectionTests.cs | 23 +++-- 3 files changed, 163 insertions(+), 27 deletions(-) create mode 100644 dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantTextSearchTests.cs diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantTextSearchTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantTextSearchTests.cs new file mode 100644 index 000000000000..32769f2abb18 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantTextSearchTests.cs @@ -0,0 +1,93 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Threading.Tasks; +using Microsoft.SemanticKernel.Connectors.Qdrant; +using Microsoft.SemanticKernel.Data; +using SemanticKernel.IntegrationTests.Data; +using Xunit; +using static SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant.QdrantVectorStoreFixture; + +namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; + +/// +/// Integration tests for using with . +/// +[Collection("QdrantVectorStoreCollection")] +public class QdrantTextSearchTests(QdrantVectorStoreFixture fixture) : BaseVectorStoreTextSearchTests +{ + /// + public override Task CreateTextSearchAsync() + { + if (this.VectorStore is null) + { + this.EmbeddingGenerator = fixture.EmbeddingGenerator; + this.VectorStore = new QdrantVectorStore(fixture.QdrantClient); + } + + var options = new QdrantVectorStoreRecordCollectionOptions + { + HasNamedVectors = true, + VectorStoreRecordDefinition = fixture.HotelVectorStoreRecordDefinition, + }; + var vectorSearch = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, "namedVectorsHotels", options); + var stringMapper = new HotelInfoTextSearchStringMapper(); + var resultMapper = new HotelInfoTextSearchResultMapper(); + + var result = new VectorStoreTextSearch(vectorSearch, this.EmbeddingGenerator!, stringMapper, resultMapper); + return Task.FromResult(result); + } + + /// + public override string GetQuery() => "Find a great hotel"; + + /// + public override TextSearchFilter GetTextSearchFilter() => new TextSearchFilter().Equality("HotelName", "My Hotel 11"); + + /// + public override bool VerifySearchResults(object[] results, string query, TextSearchFilter? filter = null) + { + Assert.NotNull(results); + Assert.NotEmpty(results); + Assert.Equal(filter is null ? 4 : 1, results.Length); + foreach (var result in results) + { + Assert.NotNull(result); + Assert.IsType(result); + } + + return true; + } + + /// + /// String mapper which converts a Hotel to a string. + /// + protected sealed class HotelInfoTextSearchStringMapper : ITextSearchStringMapper + { + /// + public string MapFromResultToString(object result) + { + if (result is HotelInfo hotel) + { + return $"{hotel.HotelName} {hotel.Description}"; + } + throw new ArgumentException("Invalid result type."); + } + } + + /// + /// Result mapper which converts a Hotel to a TextSearchResult. + /// + protected sealed class HotelInfoTextSearchResultMapper : ITextSearchResultMapper + { + /// + public TextSearchResult MapFromResultToTextSearchResult(object result) + { + if (result is HotelInfo hotel) + { + return new TextSearchResult(name: hotel.HotelName, value: hotel.Description, link: $"id://{hotel.HotelId}"); + } + throw new ArgumentException("Invalid result type."); + } + } +} diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs index d1a314829547..ae10dff5a19d 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreFixture.cs @@ -3,12 +3,17 @@ using System; using System.Collections.Generic; using System.Threading.Tasks; +using Azure.Identity; using Docker.DotNet; using Docker.DotNet.Models; using Grpc.Core; +using Microsoft.Extensions.Configuration; +using Microsoft.SemanticKernel.Connectors.AzureOpenAI; using Microsoft.SemanticKernel.Data; +using Microsoft.SemanticKernel.Embeddings; using Qdrant.Client; using Qdrant.Client.Grpc; +using SemanticKernel.IntegrationTests.TestSettings; using Xunit; namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant; @@ -21,6 +26,19 @@ public class QdrantVectorStoreFixture : IAsyncLifetime /// The id of the qdrant container that we are testing with. private string? _containerId = null; + /// The vector dimension. + private const int VectorDimensions = 1536; + + /// + /// Test Configuration setup. + /// + private static readonly IConfigurationRoot s_configuration = new ConfigurationBuilder() + .AddJsonFile(path: "testsettings.json", optional: true, reloadOnChange: true) + .AddJsonFile(path: "testsettings.development.json", optional: true, reloadOnChange: true) + .AddEnvironmentVariables() + .AddUserSecrets() + .Build(); + #pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. /// @@ -41,7 +59,7 @@ public QdrantVectorStoreFixture() new VectorStoreRecordDataProperty("HotelRating", typeof(float)) { IsFilterable = true }, new VectorStoreRecordDataProperty("Tags", typeof(List)), new VectorStoreRecordDataProperty("Description", typeof(string)), - new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance } + new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = VectorDimensions, DistanceFunction = DistanceFunction.ManhattanDistance } } }; this.HotelWithGuidIdVectorStoreRecordDefinition = new VectorStoreRecordDefinition @@ -51,9 +69,17 @@ public QdrantVectorStoreFixture() new VectorStoreRecordKeyProperty("HotelId", typeof(Guid)), new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true, IsFullTextSearchable = true }, new VectorStoreRecordDataProperty("Description", typeof(string)), - new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance } + new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory?)) { Dimensions = VectorDimensions, DistanceFunction = DistanceFunction.ManhattanDistance } } }; + AzureOpenAIConfiguration? embeddingsConfig = s_configuration.GetSection("AzureOpenAIEmbeddings").Get(); + Assert.NotNull(embeddingsConfig); + Assert.NotEmpty(embeddingsConfig.DeploymentName); + Assert.NotEmpty(embeddingsConfig.Endpoint); + this.EmbeddingGenerator = new AzureOpenAITextEmbeddingGenerationService( + deploymentName: embeddingsConfig.DeploymentName, + endpoint: embeddingsConfig.Endpoint, + credential: new AzureCliCredential()); } #pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable. @@ -61,6 +87,11 @@ public QdrantVectorStoreFixture() /// Gets the qdrant client connection to use for tests. public QdrantClient QdrantClient { get; private set; } + /// + /// Gets the embedding generator to use for generating embeddings for text. + /// + public ITextEmbeddingGenerationService EmbeddingGenerator { get; private set; } + /// Gets the manually created vector store record definition for our test model. public VectorStoreRecordDefinition HotelVectorStoreRecordDefinition { get; private set; } @@ -80,7 +111,7 @@ public async Task InitializeAsync() // Create schemas for the vector store. var vectorParamsMap = new VectorParamsMap(); - vectorParamsMap.Map.Add("DescriptionEmbedding", new VectorParams { Size = 4, Distance = Distance.Cosine }); + vectorParamsMap.Map.Add("DescriptionEmbedding", new VectorParams { Size = VectorDimensions, Distance = Distance.Cosine }); // Wait for the qdrant container to be ready. var retryCount = 0; @@ -107,11 +138,11 @@ await this.QdrantClient.CreateCollectionAsync( await this.QdrantClient.CreateCollectionAsync( "singleVectorHotels", - new VectorParams { Size = 4, Distance = Distance.Cosine }); + new VectorParams { Size = VectorDimensions, Distance = Distance.Cosine }); await this.QdrantClient.CreateCollectionAsync( "singleVectorGuidIdHotels", - new VectorParams { Size = 4, Distance = Distance.Cosine }); + new VectorParams { Size = VectorDimensions, Distance = Distance.Cosine }); // Create test data common to both named and unnamed vectors. var tags = new ListValue(); @@ -121,15 +152,18 @@ await this.QdrantClient.CreateCollectionAsync( tagsValue.ListValue = tags; // Create some test data using named vectors. - var embedding = new[] { 30f, 31f, 32f, 33f }; + var embedding = await this.EmbeddingGenerator.GenerateEmbeddingAsync("This is a great hotel."); + var embeddingArray = embedding.ToArray(); var namedVectors1 = new NamedVectors(); var namedVectors2 = new NamedVectors(); var namedVectors3 = new NamedVectors(); + var namedVectors4 = new NamedVectors(); - namedVectors1.Vectors.Add("DescriptionEmbedding", embedding); - namedVectors2.Vectors.Add("DescriptionEmbedding", embedding); - namedVectors3.Vectors.Add("DescriptionEmbedding", embedding); + namedVectors1.Vectors.Add("DescriptionEmbedding", embeddingArray); + namedVectors2.Vectors.Add("DescriptionEmbedding", embeddingArray); + namedVectors3.Vectors.Add("DescriptionEmbedding", embeddingArray); + namedVectors4.Vectors.Add("DescriptionEmbedding", embeddingArray); List namedVectorPoints = [ @@ -151,6 +185,12 @@ await this.QdrantClient.CreateCollectionAsync( Vectors = new Vectors { Vectors_ = namedVectors3 }, Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." } }, + new PointStruct + { + Id = 14, + Vectors = new Vectors { Vectors_ = namedVectors4 }, + Payload = { ["HotelName"] = "My Hotel 14", ["HotelCode"] = 14, ["parking_is_included"] = false, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } + }, ]; await this.QdrantClient.UpsertAsync("namedVectorsHotels", namedVectorPoints); @@ -161,19 +201,19 @@ await this.QdrantClient.CreateCollectionAsync( new PointStruct { Id = 11, - Vectors = embedding, + Vectors = embeddingArray, Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["parking_is_included"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." } }, new PointStruct { Id = 12, - Vectors = embedding, + Vectors = embeddingArray, Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." } }, new PointStruct { Id = 13, - Vectors = embedding, + Vectors = embeddingArray, Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." } }, ]; @@ -186,19 +226,19 @@ await this.QdrantClient.CreateCollectionAsync( new PointStruct { Id = Guid.Parse("11111111-1111-1111-1111-111111111111"), - Vectors = embedding, + Vectors = embeddingArray, Payload = { ["HotelName"] = "My Hotel 11", ["Description"] = "This is a great hotel." } }, new PointStruct { Id = Guid.Parse("22222222-2222-2222-2222-222222222222"), - Vectors = embedding, + Vectors = embeddingArray, Payload = { ["HotelName"] = "My Hotel 12", ["Description"] = "This is a great hotel." } }, new PointStruct { Id = Guid.Parse("33333333-3333-3333-3333-333333333333"), - Vectors = embedding, + Vectors = embeddingArray, Payload = { ["HotelName"] = "My Hotel 13", ["Description"] = "This is a great hotel." } }, ]; @@ -295,7 +335,7 @@ public record HotelInfo() public string Description { get; set; } /// A vector field. - [VectorStoreRecordVector(4, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)] + [VectorStoreRecordVector(VectorDimensions, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)] public ReadOnlyMemory? DescriptionEmbedding { get; set; } } @@ -318,7 +358,7 @@ public record HotelInfoWithGuidId() public string Description { get; set; } /// A vector field. - [VectorStoreRecordVector(4, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)] + [VectorStoreRecordVector(VectorDimensions, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)] public ReadOnlyMemory? DescriptionEmbedding { get; set; } } } diff --git a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs index 72c173f07145..1757e155689c 100644 --- a/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Memory/Qdrant/QdrantVectorStoreRecordCollectionTests.cs @@ -7,6 +7,7 @@ using System.Threading.Tasks; using Microsoft.SemanticKernel.Connectors.Qdrant; using Microsoft.SemanticKernel.Data; +using Microsoft.SemanticKernel.Embeddings; using Qdrant.Client.Grpc; using Xunit; using Xunit.Abstractions; @@ -56,14 +57,15 @@ public async Task ItCanCreateACollectionUpsertGetAndSearchAsync(bool hasNamedVec }; var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, testCollectionName, options); - var record = this.CreateTestHotel(30); + var record = await this.CreateTestHotelAsync(30, fixture.EmbeddingGenerator); // Act await sut.CreateCollectionAsync(); var upsertResult = await sut.UpsertAsync(record); var getResult = await sut.GetAsync(30, new GetRecordOptions { IncludeVectors = true }); + var vector = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("A great hotel"); var actual = await sut.VectorizedSearchAsync( - new ReadOnlyMemory(new[] { 30f, 31f, 32f, 33f }), + vector, new VectorSearchOptions { Filter = new VectorSearchFilter().EqualTo("HotelCode", 30) }); // Assert @@ -130,7 +132,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition }; var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options); - var record = this.CreateTestHotel(20); + var record = await this.CreateTestHotelAsync(20, fixture.EmbeddingGenerator); // Act. var upsertResult = await sut.UpsertAsync(record); @@ -166,7 +168,7 @@ public async Task ItCanUpsertAndRemoveDocumentWithGuidIdToVectorStoreAsync() HotelId = Guid.Parse("55555555-5555-5555-5555-555555555555"), HotelName = "My Hotel 5", Description = "This is a great hotel.", - DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + DescriptionEmbedding = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("This is a great hotel."), }; // Act. @@ -308,7 +310,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti }; var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options); - await sut.UpsertAsync(this.CreateTestHotel(20)); + await sut.UpsertAsync(await this.CreateTestHotelAsync(20, fixture.EmbeddingGenerator)); // Act. await sut.DeleteAsync(20); @@ -334,7 +336,7 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDef }; var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options); - await sut.UpsertAsync(this.CreateTestHotel(20)); + await sut.UpsertAsync(await this.CreateTestHotelAsync(20, fixture.EmbeddingGenerator)); // Act. // Also delete a non-existing key to test that the operation does not fail for these. @@ -386,9 +388,10 @@ public async Task ItCanSearchWithFilterAsync(bool useRecordDefinition, string co var sut = new QdrantVectorStoreRecordCollection(fixture.QdrantClient, collectionName, options); // Act. + var vector = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("A great hotel"); var filter = filterType == "equality" ? new VectorSearchFilter().EqualTo("HotelName", "My Hotel 11") : new VectorSearchFilter().AnyTagEqualTo("Tags", "t1"); var actual = await sut.VectorizedSearchAsync( - new ReadOnlyMemory([30f, 31f, 32f, 33f]), + vector, new() { Filter = filter @@ -433,7 +436,7 @@ public async Task ItCanUpsertAndRetrieveUsingTheGenericMapperAsync() }, Vectors = { - { "DescriptionEmbedding", new ReadOnlyMemory(new[] { 30f, 31f, 32f, 33f }) } + { "DescriptionEmbedding", await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("This is a generic mapper hotel") } } }); var localGetResult = await sut.GetAsync(40, new GetRecordOptions { IncludeVectors = true }); @@ -464,7 +467,7 @@ public async Task ItCanUpsertAndRetrieveUsingTheGenericMapperAsync() Assert.IsType>(localGetResult.Vectors["DescriptionEmbedding"]); } - private HotelInfo CreateTestHotel(uint hotelId) + private async Task CreateTestHotelAsync(uint hotelId, ITextEmbeddingGenerationService embeddingGenerator) { return new HotelInfo { @@ -475,7 +478,7 @@ private HotelInfo CreateTestHotel(uint hotelId) ParkingIncluded = true, Tags = { "t1", "t2" }, Description = "This is a great hotel.", - DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f }, + DescriptionEmbedding = await embeddingGenerator.GenerateEmbeddingAsync("This is a great hotel."), }; }