-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> DuckDB is becoming a popular option for in memory OLAP storage. This pr contributes a MemoryStorage implementation on DuckDB using both tile and in memory setup. ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> DuckDB memory storage implementation
- Loading branch information
Showing
8 changed files
with
1,239 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 28 additions & 0 deletions
28
dotnet/src/Connectors/Connectors.Memory.DuckDB/Connectors.Memory.DuckDB.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<!-- THIS PROPERTY GROUP MUST COME FIRST --> | ||
<AssemblyName>Microsoft.SemanticKernel.Connectors.Memory.DuckDB</AssemblyName> | ||
<RootNamespace>$(AssemblyName)</RootNamespace> | ||
<TargetFramework>netstandard2.0</TargetFramework> | ||
</PropertyGroup> | ||
|
||
<!-- IMPORT NUGET PACKAGE SHARED PROPERTIES --> | ||
<Import Project="$(RepoRoot)/dotnet/nuget/nuget-package.props" /> | ||
|
||
<PropertyGroup> | ||
<!-- NuGet Package Settings --> | ||
<Title>Semantic Kernel - DuckDB Connector</Title> | ||
<Description>DuckDB connector for Semantic Kernel skills and semantic memory</Description> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="DuckDB.NET.Data.Full" /> | ||
<PackageReference Include="DuckDB.NET.Data" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\..\SemanticKernel\SemanticKernel.csproj" /> | ||
</ItemGroup> | ||
|
||
</Project> |
189 changes: 189 additions & 0 deletions
189
dotnet/src/Connectors/Connectors.Memory.DuckDB/Database.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,189 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Runtime.CompilerServices; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
using DuckDB.NET.Data; | ||
|
||
namespace Microsoft.SemanticKernel.Connectors.Memory.DuckDB; | ||
|
||
internal struct DatabaseEntry | ||
{ | ||
public string Key { get; set; } | ||
|
||
public string MetadataString { get; set; } | ||
|
||
public string EmbeddingString { get; set; } | ||
|
||
public string? Timestamp { get; set; } | ||
} | ||
|
||
internal sealed class Database | ||
{ | ||
private const string TableName = "SKMemoryTable"; | ||
|
||
public Database() { } | ||
|
||
public Task CreateTableAsync(DuckDBConnection conn, CancellationToken cancellationToken = default) | ||
{ | ||
using var cmd = conn.CreateCommand(); | ||
cmd.CommandText = $@" | ||
CREATE TABLE IF NOT EXISTS {TableName}( | ||
collection TEXT, | ||
key TEXT, | ||
metadata TEXT, | ||
embedding TEXT, | ||
timestamp TEXT, | ||
PRIMARY KEY(collection, key))"; | ||
return cmd.ExecuteNonQueryAsync(cancellationToken); | ||
} | ||
|
||
public async Task CreateCollectionAsync(DuckDBConnection conn, string collectionName, CancellationToken cancellationToken = default) | ||
{ | ||
if (await this.DoesCollectionExistsAsync(conn, collectionName, cancellationToken).ConfigureAwait(false)) | ||
{ | ||
// Collection already exists | ||
return; | ||
} | ||
|
||
using var cmd = conn.CreateCommand(); | ||
cmd.CommandText = $@" | ||
INSERT INTO {TableName} VALUES (?1,?2,?3,?4,?5 ); "; | ||
cmd.Parameters.Add(new DuckDBParameter(collectionName)); | ||
cmd.Parameters.Add(new DuckDBParameter(string.Empty)); | ||
cmd.Parameters.Add(new DuckDBParameter(string.Empty)); | ||
cmd.Parameters.Add(new DuckDBParameter(string.Empty)); | ||
cmd.Parameters.Add(new DuckDBParameter(string.Empty)); | ||
|
||
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); | ||
} | ||
|
||
public async Task UpdateOrInsertAsync(DuckDBConnection conn, | ||
string collection, string key, string? metadata, string? embedding, string? timestamp, CancellationToken cancellationToken = default) | ||
{ | ||
using var cmd = conn.CreateCommand(); | ||
cmd.CommandText = $@" | ||
INSERT INTO {TableName} VALUES(?1, ?2, ?3, ?4, ?5) | ||
ON CONFLICT (collection, key) DO UPDATE SET metadata=?3, embedding=?4, timestamp=?5; "; | ||
cmd.Parameters.Add(new DuckDBParameter(collection)); | ||
cmd.Parameters.Add(new DuckDBParameter(key)); | ||
cmd.Parameters.Add(new DuckDBParameter(metadata ?? string.Empty)); | ||
cmd.Parameters.Add(new DuckDBParameter(embedding ?? string.Empty)); | ||
cmd.Parameters.Add(new DuckDBParameter(timestamp ?? string.Empty)); | ||
await cmd.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); | ||
} | ||
|
||
public async Task<bool> DoesCollectionExistsAsync(DuckDBConnection conn, | ||
string collectionName, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
var collections = await this.GetCollectionsAsync(conn, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); | ||
return collections.Contains(collectionName); | ||
} | ||
|
||
public async IAsyncEnumerable<string> GetCollectionsAsync(DuckDBConnection conn, | ||
[EnumeratorCancellation] CancellationToken cancellationToken = default) | ||
{ | ||
using var cmd = conn.CreateCommand(); | ||
cmd.CommandText = $@" | ||
SELECT DISTINCT collection | ||
FROM {TableName};"; | ||
|
||
using var dataReader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); | ||
while (await dataReader.ReadAsync(cancellationToken).ConfigureAwait(false)) | ||
{ | ||
yield return dataReader.GetString("collection"); | ||
} | ||
} | ||
|
||
public async IAsyncEnumerable<DatabaseEntry> ReadAllAsync(DuckDBConnection conn, | ||
string collectionName, | ||
[EnumeratorCancellation] CancellationToken cancellationToken = default) | ||
{ | ||
using var cmd = conn.CreateCommand(); | ||
cmd.CommandText = $@" | ||
SELECT * FROM {TableName} | ||
WHERE collection=?1;"; | ||
cmd.Parameters.Add(new DuckDBParameter(collectionName)); | ||
|
||
using var dataReader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); | ||
while (await dataReader.ReadAsync(cancellationToken).ConfigureAwait(false)) | ||
{ | ||
string key = dataReader.GetString("key"); | ||
if (string.IsNullOrWhiteSpace(key)) | ||
{ | ||
continue; | ||
} | ||
string metadata = dataReader.GetString("metadata"); | ||
string embedding = dataReader.GetString("embedding"); | ||
string timestamp = dataReader.GetString("timestamp"); | ||
yield return new DatabaseEntry() { Key = key, MetadataString = metadata, EmbeddingString = embedding, Timestamp = timestamp }; | ||
} | ||
} | ||
|
||
public async Task<DatabaseEntry?> ReadAsync(DuckDBConnection conn, | ||
string collectionName, | ||
string key, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
using var cmd = conn.CreateCommand(); | ||
cmd.CommandText = $@" | ||
SELECT * FROM {TableName} | ||
WHERE collection=?1 | ||
AND key=?2; "; | ||
cmd.Parameters.Add(new DuckDBParameter(collectionName)); | ||
cmd.Parameters.Add(new DuckDBParameter(key)); | ||
|
||
using var dataReader = await cmd.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); | ||
if (await dataReader.ReadAsync(cancellationToken).ConfigureAwait(false)) | ||
{ | ||
string metadata = dataReader.GetString(dataReader.GetOrdinal("metadata")); | ||
string embedding = dataReader.GetString(dataReader.GetOrdinal("embedding")); | ||
string timestamp = dataReader.GetString(dataReader.GetOrdinal("timestamp")); | ||
return new DatabaseEntry() | ||
{ | ||
Key = key, | ||
MetadataString = metadata, | ||
EmbeddingString = embedding, | ||
Timestamp = timestamp | ||
}; | ||
} | ||
|
||
return null; | ||
} | ||
|
||
public Task DeleteCollectionAsync(DuckDBConnection conn, string collectionName, CancellationToken cancellationToken = default) | ||
{ | ||
using var cmd = conn.CreateCommand(); | ||
cmd.CommandText = $@" | ||
DELETE FROM {TableName} | ||
WHERE collection=?;"; | ||
cmd.Parameters.Add(new DuckDBParameter(collectionName)); | ||
return cmd.ExecuteNonQueryAsync(cancellationToken); | ||
} | ||
|
||
public Task DeleteAsync(DuckDBConnection conn, string collectionName, string key, CancellationToken cancellationToken = default) | ||
{ | ||
using var cmd = conn.CreateCommand(); | ||
cmd.CommandText = $@" | ||
DELETE FROM {TableName} | ||
WHERE collection=?1 | ||
AND key=?2; "; | ||
cmd.Parameters.Add(new DuckDBParameter(collectionName)); | ||
cmd.Parameters.Add(new DuckDBParameter(key)); | ||
return cmd.ExecuteNonQueryAsync(cancellationToken); | ||
} | ||
|
||
public Task DeleteEmptyAsync(DuckDBConnection conn, string collectionName, CancellationToken cancellationToken = default) | ||
{ | ||
using var cmd = conn.CreateCommand(); | ||
cmd.CommandText = $@" | ||
DELETE FROM {TableName} | ||
WHERE collection=?1 | ||
AND key IS NULL"; | ||
cmd.Parameters.Add(new DuckDBParameter(collectionName)); | ||
return cmd.ExecuteNonQueryAsync(cancellationToken); | ||
} | ||
} |
14 changes: 14 additions & 0 deletions
14
dotnet/src/Connectors/Connectors.Memory.DuckDB/DuckDBExtensions.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using System.Data.Common; | ||
|
||
namespace Microsoft.SemanticKernel.Connectors.Memory.DuckDB; | ||
|
||
internal static class DuckDBExtensions | ||
{ | ||
public static string GetString(this DbDataReader reader, string fieldName) | ||
{ | ||
int ordinal = reader.GetOrdinal(fieldName); | ||
return reader.GetString(ordinal); | ||
} | ||
} |
Oops, something went wrong.