From d738c72c5aebda422d7ae267daee0e2d70b3a080 Mon Sep 17 00:00:00 2001 From: markwallace-microsoft <127216156+markwallace-microsoft@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:23:54 +0100 Subject: [PATCH] Add more unit tests for VectorStoreTextSearch --- .github/_typos.toml | 1 + .../Memory/VolatileVectorStore_LoadData.cs | 7 +- .../Data/VectorStoreTextSearchTests.cs | 149 +++++++++++++++++- 3 files changed, 155 insertions(+), 2 deletions(-) diff --git a/.github/_typos.toml b/.github/_typos.toml index bbd0f3bcd790f..ba35bd886e35b 100644 --- a/.github/_typos.toml +++ b/.github/_typos.toml @@ -17,6 +17,7 @@ extend-exclude = [ "*response.json", "test_content.txt", "google_what_is_the_semantic_kernel.json", + "what-is-semantic-kernel.json", "serializedChatHistoryV1_15_1.json", "MultipleFunctionsVsParameters.cs" ] diff --git a/dotnet/samples/Concepts/Memory/VolatileVectorStore_LoadData.cs b/dotnet/samples/Concepts/Memory/VolatileVectorStore_LoadData.cs index acbcb6e062d06..6a9e8d37e6f33 100644 --- a/dotnet/samples/Concepts/Memory/VolatileVectorStore_LoadData.cs +++ b/dotnet/samples/Concepts/Memory/VolatileVectorStore_LoadData.cs @@ -17,10 +17,15 @@ public class VolatileVectorStore_LoadData(ITestOutputHelper output) : BaseTest(o [Fact] public async Task LoadStringListAndSearchAsync() { + // Create a logging handler to output HTTP requests and responses + var handler = new LoggingHandler(new HttpClientHandler(), this.Output); + var httpClient = new HttpClient(handler); + // Create an embedding generation service. var embeddingGenerationService = new OpenAITextEmbeddingGenerationService( modelId: TestConfiguration.OpenAI.EmbeddingModelId, - apiKey: TestConfiguration.OpenAI.ApiKey); + apiKey: TestConfiguration.OpenAI.ApiKey, + httpClient: httpClient); // Construct a volatile vector store. var vectorStore = new VolatileVectorStore(); diff --git a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreTextSearchTests.cs b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreTextSearchTests.cs index 0021eba634f11..fc21e27203cad 100644 --- a/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreTextSearchTests.cs +++ b/dotnet/src/SemanticKernel.UnitTests/Data/VectorStoreTextSearchTests.cs @@ -46,6 +46,151 @@ public void CanCreateVectorStoreTextSearchWithIVectorizableTextSearch() Assert.NotNull(sut); } + [Fact] + public async Task CanSearchWithVectorizedSearchAsync() + { + // Arrange. + var sut = await CreateVectorStoreTextSearchFromVectorizedSearchAsync(); + + // Act. + KernelSearchResults searchResults = await sut.SearchAsync("What is the Semantic Kernel?", new() { Count = 2, Offset = 0 }); + var results = await ToListAsync(searchResults.Results); + + Assert.Equal(2, results.Count); + } + + [Fact] + public async Task CanGetTextSearchResultsWithVectorizedSearchAsync() + { + // Arrange. + var sut = await CreateVectorStoreTextSearchFromVectorizedSearchAsync(); + + // Act. + KernelSearchResults searchResults = await sut.GetTextSearchResultsAsync("What is the Semantic Kernel?", new() { Count = 2, Offset = 0 }); + var results = await ToListAsync(searchResults.Results); + + Assert.Equal(2, results.Count); + } + + [Fact] + public async Task CanGetSearchResultsWithVectorizedSearchAsync() + { + // Arrange. + var sut = await CreateVectorStoreTextSearchFromVectorizedSearchAsync(); + + // Act. + KernelSearchResults searchResults = await sut.GetSearchResultsAsync("What is the Semantic Kernel?", new() { Count = 2, Offset = 0 }); + var results = await ToListAsync(searchResults.Results); + + Assert.Equal(2, results.Count); + } + + [Fact] + public async Task CanSearchWithVectorizableTextSearchAsync() + { + // Arrange. + var sut = await CreateVectorStoreTextSearchFromVectorizableTextSearchAsync(); + + // Act. + KernelSearchResults searchResults = await sut.SearchAsync("What is the Semantic Kernel?", new() { Count = 2, Offset = 0 }); + var results = await ToListAsync(searchResults.Results); + + Assert.Equal(2, results.Count); + } + + [Fact] + public async Task CanGetTextSearchResultsWithVectorizableTextSearchAsync() + { + // Arrange. + var sut = await CreateVectorStoreTextSearchFromVectorizableTextSearchAsync(); + + // Act. + KernelSearchResults searchResults = await sut.GetTextSearchResultsAsync("What is the Semantic Kernel?", new() { Count = 2, Offset = 0 }); + var results = await ToListAsync(searchResults.Results); + + Assert.Equal(2, results.Count); + } + + [Fact] + public async Task CanGetSearchResultsWithVectorizableTextSearchAsync() + { + // Arrange. + var sut = await CreateVectorStoreTextSearchFromVectorizableTextSearchAsync(); + + // Act. + KernelSearchResults searchResults = await sut.GetSearchResultsAsync("What is the Semantic Kernel?", new() { Count = 2, Offset = 0 }); + var results = await ToListAsync(searchResults.Results); + + Assert.Equal(2, results.Count); + } + + /// + /// Create a from a . + /// + private static async Task> CreateVectorStoreTextSearchFromVectorizedSearchAsync() + { + var vectorStore = new VolatileVectorStore(); + var vectorSearch = vectorStore.GetCollection("records"); + var stringMapper = new DataModelTextSearchStringMapper(); + var resultMapper = new DataModelTextSearchResultMapper(); + var embeddingService = new MockTextEmbeddingGenerationService(); + await AddRecordsAsync(vectorSearch, embeddingService); + var sut = new VectorStoreTextSearch(vectorSearch, embeddingService, stringMapper, resultMapper); + return sut; + } + + /// + /// Create a from a . + /// + private static async Task> CreateVectorStoreTextSearchFromVectorizableTextSearchAsync() + { + var vectorStore = new VolatileVectorStore(); + var vectorSearch = vectorStore.GetCollection("records"); + var stringMapper = new DataModelTextSearchStringMapper(); + var resultMapper = new DataModelTextSearchResultMapper(); + var embeddingService = new MockTextEmbeddingGenerationService(); + await AddRecordsAsync(vectorSearch, embeddingService); + var vectorizableTextSearch = new VectorizedSearchWrapper(vectorSearch, new MockTextEmbeddingGenerationService()); + var sut = new VectorStoreTextSearch(vectorizableTextSearch, stringMapper, resultMapper); + return sut; + } + + /// + /// Concert an to a . + /// + private static async ValueTask> ToListAsync(IAsyncEnumerable source, CancellationToken cancellationToken = default) + { + var result = new List(); + + await foreach (var item in source.WithCancellation(cancellationToken).ConfigureAwait(false)) + { + result.Add(item); + } + + return result; + } + + /// + /// Add sample records to the vector store record collection. + /// + private static async Task AddRecordsAsync( + IVectorStoreRecordCollection recordCollection, + ITextEmbeddingGenerationService embeddingService, + int? count = 10) + { + await recordCollection.CreateCollectionIfNotExistsAsync(); + for (var i = 0; i < count; i++) + { + DataModel dataModel = new() + { + Key = Guid.NewGuid(), + Text = $"Record {i}", + Embedding = await embeddingService.GenerateEmbeddingAsync($"Record {i}") + }; + await recordCollection.UpsertAsync(dataModel); + } + } + /// /// String mapper which converts a DataModel to a string. /// @@ -119,13 +264,15 @@ public async IAsyncEnumerable> VectorizableTextSearc /// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store. /// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration. /// +#pragma warning disable CA1812 // Avoid uninstantiated internal classes private sealed class DataModel +#pragma warning restore CA1812 // Avoid uninstantiated internal classes { [VectorStoreRecordKey] public Guid Key { get; init; } [VectorStoreRecordData] - public string Text { get; init; } + public required string Text { get; init; } [VectorStoreRecordVector(1536)] public ReadOnlyMemory Embedding { get; init; }