Skip to content

Commit

Permalink
.Net: Update Qdrant Memory Connector to new Text Search Design (#9076)
Browse files Browse the repository at this point in the history
### Motivation and Context

Closes #6733 

### Description

<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [ ] The code builds clean without any errors or warnings
- [ ] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [ ] All unit tests pass, and I have added new tests where possible
- [ ] I didn't break anyone 😄

---------

Co-authored-by: Roger Barreto <[email protected]>
  • Loading branch information
markwallace-microsoft and RogerBarreto authored Oct 7, 2024
1 parent fba0aba commit 69b9fc4
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 27 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using System.Threading.Tasks;
using Microsoft.SemanticKernel.Connectors.Qdrant;
using Microsoft.SemanticKernel.Data;
using SemanticKernel.IntegrationTests.Data;
using Xunit;
using static SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant.QdrantVectorStoreFixture;

namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant;

/// <summary>
/// Integration tests for using <see cref="QdrantVectorStore"/> with <see cref="ITextSearch"/>.
/// </summary>
[Collection("QdrantVectorStoreCollection")]
public class QdrantTextSearchTests(QdrantVectorStoreFixture fixture) : BaseVectorStoreTextSearchTests
{
/// <inheritdoc/>
public override Task<ITextSearch> CreateTextSearchAsync()
{
if (this.VectorStore is null)
{
this.EmbeddingGenerator = fixture.EmbeddingGenerator;
this.VectorStore = new QdrantVectorStore(fixture.QdrantClient);
}

var options = new QdrantVectorStoreRecordCollectionOptions<HotelInfo>
{
HasNamedVectors = true,
VectorStoreRecordDefinition = fixture.HotelVectorStoreRecordDefinition,
};
var vectorSearch = new QdrantVectorStoreRecordCollection<HotelInfo>(fixture.QdrantClient, "namedVectorsHotels", options);
var stringMapper = new HotelInfoTextSearchStringMapper();
var resultMapper = new HotelInfoTextSearchResultMapper();

var result = new VectorStoreTextSearch<HotelInfo>(vectorSearch, this.EmbeddingGenerator!, stringMapper, resultMapper);
return Task.FromResult<ITextSearch>(result);
}

/// <inheritdoc/>
public override string GetQuery() => "Find a great hotel";

/// <inheritdoc/>
public override TextSearchFilter GetTextSearchFilter() => new TextSearchFilter().Equality("HotelName", "My Hotel 11");

/// <inheritdoc/>
public override bool VerifySearchResults(object[] results, string query, TextSearchFilter? filter = null)
{
Assert.NotNull(results);
Assert.NotEmpty(results);
Assert.Equal(filter is null ? 4 : 1, results.Length);
foreach (var result in results)
{
Assert.NotNull(result);
Assert.IsType<HotelInfo>(result);
}

return true;
}

/// <summary>
/// String mapper which converts a Hotel to a string.
/// </summary>
protected sealed class HotelInfoTextSearchStringMapper : ITextSearchStringMapper
{
/// <inheritdoc />
public string MapFromResultToString(object result)
{
if (result is HotelInfo hotel)
{
return $"{hotel.HotelName} {hotel.Description}";
}
throw new ArgumentException("Invalid result type.");
}
}

/// <summary>
/// Result mapper which converts a Hotel to a TextSearchResult.
/// </summary>
protected sealed class HotelInfoTextSearchResultMapper : ITextSearchResultMapper
{
/// <inheritdoc />
public TextSearchResult MapFromResultToTextSearchResult(object result)
{
if (result is HotelInfo hotel)
{
return new TextSearchResult(name: hotel.HotelName, value: hotel.Description, link: $"id://{hotel.HotelId}");
}
throw new ArgumentException("Invalid result type.");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Azure.Identity;
using Docker.DotNet;
using Docker.DotNet.Models;
using Grpc.Core;
using Microsoft.Extensions.Configuration;
using Microsoft.SemanticKernel.Connectors.AzureOpenAI;
using Microsoft.SemanticKernel.Data;
using Microsoft.SemanticKernel.Embeddings;
using Qdrant.Client;
using Qdrant.Client.Grpc;
using SemanticKernel.IntegrationTests.TestSettings;
using Xunit;

namespace SemanticKernel.IntegrationTests.Connectors.Memory.Qdrant;
Expand All @@ -21,6 +26,19 @@ public class QdrantVectorStoreFixture : IAsyncLifetime
/// <summary>The id of the qdrant container that we are testing with.</summary>
private string? _containerId = null;

/// <summary>The vector dimension.</summary>
private const int VectorDimensions = 1536;

/// <summary>
/// Test Configuration setup.
/// </summary>
private static readonly IConfigurationRoot s_configuration = new ConfigurationBuilder()
.AddJsonFile(path: "testsettings.json", optional: true, reloadOnChange: true)
.AddJsonFile(path: "testsettings.development.json", optional: true, reloadOnChange: true)
.AddEnvironmentVariables()
.AddUserSecrets<QdrantVectorStoreFixture>()
.Build();

#pragma warning disable CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable.

/// <summary>
Expand All @@ -41,7 +59,7 @@ public QdrantVectorStoreFixture()
new VectorStoreRecordDataProperty("HotelRating", typeof(float)) { IsFilterable = true },
new VectorStoreRecordDataProperty("Tags", typeof(List<string>)),
new VectorStoreRecordDataProperty("Description", typeof(string)),
new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory<float>?)) { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance }
new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory<float>?)) { Dimensions = VectorDimensions, DistanceFunction = DistanceFunction.ManhattanDistance }
}
};
this.HotelWithGuidIdVectorStoreRecordDefinition = new VectorStoreRecordDefinition
Expand All @@ -51,16 +69,29 @@ public QdrantVectorStoreFixture()
new VectorStoreRecordKeyProperty("HotelId", typeof(Guid)),
new VectorStoreRecordDataProperty("HotelName", typeof(string)) { IsFilterable = true, IsFullTextSearchable = true },
new VectorStoreRecordDataProperty("Description", typeof(string)),
new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory<float>?)) { Dimensions = 4, DistanceFunction = DistanceFunction.ManhattanDistance }
new VectorStoreRecordVectorProperty("DescriptionEmbedding", typeof(ReadOnlyMemory<float>?)) { Dimensions = VectorDimensions, DistanceFunction = DistanceFunction.ManhattanDistance }
}
};
AzureOpenAIConfiguration? embeddingsConfig = s_configuration.GetSection("AzureOpenAIEmbeddings").Get<AzureOpenAIConfiguration>();
Assert.NotNull(embeddingsConfig);
Assert.NotEmpty(embeddingsConfig.DeploymentName);
Assert.NotEmpty(embeddingsConfig.Endpoint);
this.EmbeddingGenerator = new AzureOpenAITextEmbeddingGenerationService(
deploymentName: embeddingsConfig.DeploymentName,
endpoint: embeddingsConfig.Endpoint,
credential: new AzureCliCredential());
}

#pragma warning restore CS8618 // Non-nullable field must contain a non-null value when exiting constructor. Consider declaring as nullable.

/// <summary>Gets the qdrant client connection to use for tests.</summary>
public QdrantClient QdrantClient { get; private set; }

/// <summary>
/// Gets the embedding generator to use for generating embeddings for text.
/// </summary>
public ITextEmbeddingGenerationService EmbeddingGenerator { get; private set; }

/// <summary>Gets the manually created vector store record definition for our test model.</summary>
public VectorStoreRecordDefinition HotelVectorStoreRecordDefinition { get; private set; }

Expand All @@ -80,7 +111,7 @@ public async Task InitializeAsync()

// Create schemas for the vector store.
var vectorParamsMap = new VectorParamsMap();
vectorParamsMap.Map.Add("DescriptionEmbedding", new VectorParams { Size = 4, Distance = Distance.Cosine });
vectorParamsMap.Map.Add("DescriptionEmbedding", new VectorParams { Size = VectorDimensions, Distance = Distance.Cosine });

// Wait for the qdrant container to be ready.
var retryCount = 0;
Expand All @@ -107,11 +138,11 @@ await this.QdrantClient.CreateCollectionAsync(

await this.QdrantClient.CreateCollectionAsync(
"singleVectorHotels",
new VectorParams { Size = 4, Distance = Distance.Cosine });
new VectorParams { Size = VectorDimensions, Distance = Distance.Cosine });

await this.QdrantClient.CreateCollectionAsync(
"singleVectorGuidIdHotels",
new VectorParams { Size = 4, Distance = Distance.Cosine });
new VectorParams { Size = VectorDimensions, Distance = Distance.Cosine });

// Create test data common to both named and unnamed vectors.
var tags = new ListValue();
Expand All @@ -121,15 +152,18 @@ await this.QdrantClient.CreateCollectionAsync(
tagsValue.ListValue = tags;

// Create some test data using named vectors.
var embedding = new[] { 30f, 31f, 32f, 33f };
var embedding = await this.EmbeddingGenerator.GenerateEmbeddingAsync("This is a great hotel.");
var embeddingArray = embedding.ToArray();

var namedVectors1 = new NamedVectors();
var namedVectors2 = new NamedVectors();
var namedVectors3 = new NamedVectors();
var namedVectors4 = new NamedVectors();

namedVectors1.Vectors.Add("DescriptionEmbedding", embedding);
namedVectors2.Vectors.Add("DescriptionEmbedding", embedding);
namedVectors3.Vectors.Add("DescriptionEmbedding", embedding);
namedVectors1.Vectors.Add("DescriptionEmbedding", embeddingArray);
namedVectors2.Vectors.Add("DescriptionEmbedding", embeddingArray);
namedVectors3.Vectors.Add("DescriptionEmbedding", embeddingArray);
namedVectors4.Vectors.Add("DescriptionEmbedding", embeddingArray);

List<PointStruct> namedVectorPoints =
[
Expand All @@ -151,6 +185,12 @@ await this.QdrantClient.CreateCollectionAsync(
Vectors = new Vectors { Vectors_ = namedVectors3 },
Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." }
},
new PointStruct
{
Id = 14,
Vectors = new Vectors { Vectors_ = namedVectors4 },
Payload = { ["HotelName"] = "My Hotel 14", ["HotelCode"] = 14, ["parking_is_included"] = false, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." }
},
];

await this.QdrantClient.UpsertAsync("namedVectorsHotels", namedVectorPoints);
Expand All @@ -161,19 +201,19 @@ await this.QdrantClient.CreateCollectionAsync(
new PointStruct
{
Id = 11,
Vectors = embedding,
Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 11", ["HotelCode"] = 11, ["parking_is_included"] = true, ["Tags"] = tagsValue, ["HotelRating"] = 4.5f, ["Description"] = "This is a great hotel." }
},
new PointStruct
{
Id = 12,
Vectors = embedding,
Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 12", ["HotelCode"] = 12, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." }
},
new PointStruct
{
Id = 13,
Vectors = embedding,
Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 13", ["HotelCode"] = 13, ["parking_is_included"] = false, ["Description"] = "This is a great hotel." }
},
];
Expand All @@ -186,19 +226,19 @@ await this.QdrantClient.CreateCollectionAsync(
new PointStruct
{
Id = Guid.Parse("11111111-1111-1111-1111-111111111111"),
Vectors = embedding,
Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 11", ["Description"] = "This is a great hotel." }
},
new PointStruct
{
Id = Guid.Parse("22222222-2222-2222-2222-222222222222"),
Vectors = embedding,
Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 12", ["Description"] = "This is a great hotel." }
},
new PointStruct
{
Id = Guid.Parse("33333333-3333-3333-3333-333333333333"),
Vectors = embedding,
Vectors = embeddingArray,
Payload = { ["HotelName"] = "My Hotel 13", ["Description"] = "This is a great hotel." }
},
];
Expand Down Expand Up @@ -295,7 +335,7 @@ public record HotelInfo()
public string Description { get; set; }

/// <summary>A vector field.</summary>
[VectorStoreRecordVector(4, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)]
[VectorStoreRecordVector(VectorDimensions, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)]
public ReadOnlyMemory<float>? DescriptionEmbedding { get; set; }
}

Expand All @@ -318,7 +358,7 @@ public record HotelInfoWithGuidId()
public string Description { get; set; }

/// <summary>A vector field.</summary>
[VectorStoreRecordVector(4, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)]
[VectorStoreRecordVector(VectorDimensions, IndexKind.Hnsw, DistanceFunction.ManhattanDistance)]
public ReadOnlyMemory<float>? DescriptionEmbedding { get; set; }
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using System.Threading.Tasks;
using Microsoft.SemanticKernel.Connectors.Qdrant;
using Microsoft.SemanticKernel.Data;
using Microsoft.SemanticKernel.Embeddings;
using Qdrant.Client.Grpc;
using Xunit;
using Xunit.Abstractions;
Expand Down Expand Up @@ -56,14 +57,15 @@ public async Task ItCanCreateACollectionUpsertGetAndSearchAsync(bool hasNamedVec
};
var sut = new QdrantVectorStoreRecordCollection<HotelInfo>(fixture.QdrantClient, testCollectionName, options);

var record = this.CreateTestHotel(30);
var record = await this.CreateTestHotelAsync(30, fixture.EmbeddingGenerator);

// Act
await sut.CreateCollectionAsync();
var upsertResult = await sut.UpsertAsync(record);
var getResult = await sut.GetAsync(30, new GetRecordOptions { IncludeVectors = true });
var vector = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("A great hotel");
var actual = await sut.VectorizedSearchAsync(
new ReadOnlyMemory<float>(new[] { 30f, 31f, 32f, 33f }),
vector,
new VectorSearchOptions { Filter = new VectorSearchFilter().EqualTo("HotelCode", 30) });

// Assert
Expand Down Expand Up @@ -130,7 +132,7 @@ public async Task ItCanUpsertDocumentToVectorStoreAsync(bool useRecordDefinition
};
var sut = new QdrantVectorStoreRecordCollection<HotelInfo>(fixture.QdrantClient, collectionName, options);

var record = this.CreateTestHotel(20);
var record = await this.CreateTestHotelAsync(20, fixture.EmbeddingGenerator);

// Act.
var upsertResult = await sut.UpsertAsync(record);
Expand Down Expand Up @@ -166,7 +168,7 @@ public async Task ItCanUpsertAndRemoveDocumentWithGuidIdToVectorStoreAsync()
HotelId = Guid.Parse("55555555-5555-5555-5555-555555555555"),
HotelName = "My Hotel 5",
Description = "This is a great hotel.",
DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f },
DescriptionEmbedding = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("This is a great hotel."),
};

// Act.
Expand Down Expand Up @@ -308,7 +310,7 @@ public async Task ItCanRemoveDocumentFromVectorStoreAsync(bool useRecordDefiniti
};
var sut = new QdrantVectorStoreRecordCollection<HotelInfo>(fixture.QdrantClient, collectionName, options);

await sut.UpsertAsync(this.CreateTestHotel(20));
await sut.UpsertAsync(await this.CreateTestHotelAsync(20, fixture.EmbeddingGenerator));

// Act.
await sut.DeleteAsync(20);
Expand All @@ -334,7 +336,7 @@ public async Task ItCanRemoveManyDocumentsFromVectorStoreAsync(bool useRecordDef
};
var sut = new QdrantVectorStoreRecordCollection<HotelInfo>(fixture.QdrantClient, collectionName, options);

await sut.UpsertAsync(this.CreateTestHotel(20));
await sut.UpsertAsync(await this.CreateTestHotelAsync(20, fixture.EmbeddingGenerator));

// Act.
// Also delete a non-existing key to test that the operation does not fail for these.
Expand Down Expand Up @@ -386,9 +388,10 @@ public async Task ItCanSearchWithFilterAsync(bool useRecordDefinition, string co
var sut = new QdrantVectorStoreRecordCollection<HotelInfo>(fixture.QdrantClient, collectionName, options);

// Act.
var vector = await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("A great hotel");
var filter = filterType == "equality" ? new VectorSearchFilter().EqualTo("HotelName", "My Hotel 11") : new VectorSearchFilter().AnyTagEqualTo("Tags", "t1");
var actual = await sut.VectorizedSearchAsync(
new ReadOnlyMemory<float>([30f, 31f, 32f, 33f]),
vector,
new()
{
Filter = filter
Expand Down Expand Up @@ -433,7 +436,7 @@ public async Task ItCanUpsertAndRetrieveUsingTheGenericMapperAsync()
},
Vectors =
{
{ "DescriptionEmbedding", new ReadOnlyMemory<float>(new[] { 30f, 31f, 32f, 33f }) }
{ "DescriptionEmbedding", await fixture.EmbeddingGenerator.GenerateEmbeddingAsync("This is a generic mapper hotel") }
}
});
var localGetResult = await sut.GetAsync(40, new GetRecordOptions { IncludeVectors = true });
Expand Down Expand Up @@ -464,7 +467,7 @@ public async Task ItCanUpsertAndRetrieveUsingTheGenericMapperAsync()
Assert.IsType<ReadOnlyMemory<float>>(localGetResult.Vectors["DescriptionEmbedding"]);
}

private HotelInfo CreateTestHotel(uint hotelId)
private async Task<HotelInfo> CreateTestHotelAsync(uint hotelId, ITextEmbeddingGenerationService embeddingGenerator)
{
return new HotelInfo
{
Expand All @@ -475,7 +478,7 @@ private HotelInfo CreateTestHotel(uint hotelId)
ParkingIncluded = true,
Tags = { "t1", "t2" },
Description = "This is a great hotel.",
DescriptionEmbedding = new[] { 30f, 31f, 32f, 33f },
DescriptionEmbedding = await embeddingGenerator.GenerateEmbeddingAsync("This is a great hotel."),
};
}

Expand Down

0 comments on commit 69b9fc4

Please sign in to comment.