diff --git a/src/Sir.Cmd/AnalyzeDocumentCommand.cs b/src/Sir.Cmd/AnalyzeDocumentCommand.cs index 324581ca..0180ffbf 100644 --- a/src/Sir.Cmd/AnalyzeDocumentCommand.cs +++ b/src/Sir.Cmd/AnalyzeDocumentCommand.cs @@ -3,6 +3,7 @@ using Microsoft.Extensions.Logging; using Sir.Documents; using Sir.IO; +using Sir.KeyValue; using Sir.Strings; namespace Sir.Cmd @@ -19,9 +20,9 @@ public void Run(IDictionary args, ILogger logger) var model = new BagOfCharsModel(); var embedding = new SortedList(); - using (var sessionFactory = new SessionFactory(logger)) - using (var documents = new DocumentStreamSession(dataDirectory, sessionFactory)) - using (var documentReader = new DocumentReader(dataDirectory, collectionId, sessionFactory)) + using (var kvwriter = new KeyValueWriter(dataDirectory, collectionId)) + using (var documents = new DocumentStreamSession(dataDirectory, kvwriter)) + using (var documentReader = new DocumentReader(dataDirectory, collectionId)) { var doc = documents.ReadDocument((collectionId, documentId), select, documentReader); diff --git a/src/Sir.Cmd/Program.cs b/src/Sir.Cmd/Program.cs index 02f64dde..39ca179e 100644 --- a/src/Sir.Cmd/Program.cs +++ b/src/Sir.Cmd/Program.cs @@ -70,10 +70,6 @@ static void Main(string[] args) { TruncateIndex(flags["directory"], flags["collection"], logger); } - else if (command == "optimize") - { - Optimize(flags, logger); - } else if (command == "rename") { Rename(flags["directory"], flags["collection"], flags["newCollection"], logger); @@ -122,32 +118,6 @@ private static IDictionary ParseArgs(string[] args) return dic; } - private static void Optimize(IDictionary args, ILogger logger) - { - var dataDirectory = args["directory"]; - var collection = args["collection"]; - var skip = int.Parse(args["skip"]); - var take = int.Parse(args["take"]); - var reportFrequency = int.Parse(args["reportFrequency"]); - var pageSize = int.Parse(args["pageSize"]); - var fields = new HashSet(args["fields"].Split(',')); - var model = new BagOfCharsModel(); - - using (var sessionFactory = new SessionFactory(logger)) - { - sessionFactory.Optimize( - dataDirectory, - collection, - fields, - model, - new LogStructuredIndexingStrategy(model), - skip, - take, - reportFrequency, - pageSize); - } - } - private static void Slice(IDictionary args) { var file = args["sourceFileName"]; diff --git a/src/Sir.Cmd/Sir.Cmd.csproj b/src/Sir.Cmd/Sir.Cmd.csproj index 03ae7c2c..62d3cae5 100644 --- a/src/Sir.Cmd/Sir.Cmd.csproj +++ b/src/Sir.Cmd/Sir.Cmd.csproj @@ -13,6 +13,7 @@ + diff --git a/src/Sir.Cmd/ValidateCommand.cs b/src/Sir.Cmd/ValidateCommand.cs index 0200d37c..3a2f83ae 100644 --- a/src/Sir.Cmd/ValidateCommand.cs +++ b/src/Sir.Cmd/ValidateCommand.cs @@ -24,13 +24,14 @@ public void Run(IDictionary args, ILogger logger) var count = 0; var embedding = new SortedList(); + using(var kvwriter = new KeyValue.KeyValueWriter(dir, collectionId)) using (var sessionFactory = new SessionFactory(logger)) using (var validateSession = new ValidateSession( collectionId, - new SearchSession(dir, sessionFactory, model, new LogStructuredIndexingStrategy(model), logger), - new QueryParser(dir, sessionFactory, model, embedding: embedding, logger: logger))) + new SearchSession(dir, model, new LogStructuredIndexingStrategy(model), kvwriter, logger), + new QueryParser(dir, kvwriter, model, embedding: embedding, logger: logger))) { - using (var documents = new DocumentStreamSession(dir, sessionFactory)) + using (var documents = new DocumentStreamSession(dir, kvwriter)) { foreach (var doc in documents.ReadDocuments(collectionId, selectFields, skip, take)) { diff --git a/src/Sir.CommonCrawl/CCHelper.cs b/src/Sir.CommonCrawl/CCHelper.cs index a0aeca46..96211de3 100644 --- a/src/Sir.CommonCrawl/CCHelper.cs +++ b/src/Sir.CommonCrawl/CCHelper.cs @@ -35,8 +35,8 @@ public static void WriteWatSegment( }; using (var sessionFactory = new SessionFactory(logger)) - using (var writeSession = new WriteSession(new DocumentWriter(sessionFactory, dataDirectory, collectionId))) - using (var indexSession = new IndexSession(model, new LogStructuredIndexingStrategy(model), sessionFactory, dataDirectory, collectionId, logger)) + using (var writeSession = new WriteSession(new DocumentWriter(dataDirectory, collectionId))) + using (var indexSession = new IndexSession(model, new LogStructuredIndexingStrategy(model), dataDirectory, collectionId, logger)) { using (var queue = new ProducerConsumerQueue(document => { diff --git a/src/Sir.CommonCrawl/Sir.CommonCrawl.csproj b/src/Sir.CommonCrawl/Sir.CommonCrawl.csproj index 36a53fb3..b9731258 100644 --- a/src/Sir.CommonCrawl/Sir.CommonCrawl.csproj +++ b/src/Sir.CommonCrawl/Sir.CommonCrawl.csproj @@ -13,7 +13,7 @@ - + diff --git a/src/Sir.Core/IStreamDispatcher.cs b/src/Sir.Core/IStreamDispatcher.cs deleted file mode 100644 index 131353e0..00000000 --- a/src/Sir.Core/IStreamDispatcher.cs +++ /dev/null @@ -1,15 +0,0 @@ -using Sir.IO; -using System.IO; - -namespace Sir -{ - public interface IStreamDispatcher - { - Stream CreateAppendStream(string directory, ulong collectionId, string fileExtension); - Stream CreateAppendStream(string directory, ulong collectionId, long keyId, string fileExtension); - Stream CreateReadStream(string fileName); - void RegisterKeyMapping(string directory, ulong collectionId, ulong keyHash, long keyId); - bool TryGetKeyId(string directory, ulong collectionId, ulong keyHash, out long keyId); - long GetKeyId(string directory, ulong collectionId, ulong keyHash); - } -} \ No newline at end of file diff --git a/src/Sir.Document/DocumentReader.cs b/src/Sir.Document/DocumentReader.cs index d5e10fb3..17c87483 100644 --- a/src/Sir.Document/DocumentReader.cs +++ b/src/Sir.Document/DocumentReader.cs @@ -19,14 +19,14 @@ public class DocumentReader : IDisposable public ulong CollectionId { get; } - public DocumentReader(string directory, ulong collectionId, IStreamDispatcher database) + public DocumentReader(string directory, ulong collectionId) { - var valueStream = database.CreateReadStream(Path.Combine(directory, string.Format("{0}.val", collectionId))); - var keyStream = database.CreateReadStream(Path.Combine(directory, string.Format("{0}.key", collectionId))); - var docStream = database.CreateReadStream(Path.Combine(directory, string.Format("{0}.docs", collectionId))); - var valueIndexStream = database.CreateReadStream(Path.Combine(directory, string.Format("{0}.vix", collectionId))); - var keyIndexStream = database.CreateReadStream(Path.Combine(directory, string.Format("{0}.kix", collectionId))); - var docIndexStream = database.CreateReadStream(Path.Combine(directory, string.Format("{0}.dix", collectionId))); + var valueStream = CreateReadStream(Path.Combine(directory, string.Format("{0}.val", collectionId))); + var keyStream = CreateReadStream(Path.Combine(directory, string.Format("{0}.key", collectionId))); + var docStream = CreateReadStream(Path.Combine(directory, string.Format("{0}.docs", collectionId))); + var valueIndexStream = CreateReadStream(Path.Combine(directory, string.Format("{0}.vix", collectionId))); + var keyIndexStream = CreateReadStream(Path.Combine(directory, string.Format("{0}.kix", collectionId))); + var docIndexStream = CreateReadStream(Path.Combine(directory, string.Format("{0}.dix", collectionId))); _vals = new ValueReader(valueStream); _keys = new ValueReader(keyStream); @@ -38,6 +38,11 @@ public DocumentReader(string directory, ulong collectionId, IStreamDispatcher da CollectionId = collectionId; } + public static Stream CreateReadStream(string fileName) + { + return new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, 4096, FileOptions.SequentialScan); + } + public (long offset, int length) GetDocumentAddress(long docId) { return _docIx.Get(docId); diff --git a/src/Sir.Document/DocumentWriter.cs b/src/Sir.Document/DocumentWriter.cs index 3da6e7b2..82ba9aa1 100644 --- a/src/Sir.Document/DocumentWriter.cs +++ b/src/Sir.Document/DocumentWriter.cs @@ -12,10 +12,10 @@ public class DocumentWriter : KeyValueWriter, IDisposable private readonly DocMapWriter _docs; private readonly DocIndexWriter _docIx; - public DocumentWriter(IStreamDispatcher streamDispatcher, string directory, ulong collectionId) : base(directory, collectionId, streamDispatcher) + public DocumentWriter(string directory, ulong collectionId) : base(directory, collectionId) { - var docStream = streamDispatcher.CreateAppendStream(directory, collectionId, "docs"); - var docIndexStream = streamDispatcher.CreateAppendStream(directory, collectionId, "dix"); + var docStream = CreateAppendStream(directory, collectionId, "docs"); + var docIndexStream = CreateAppendStream(directory, collectionId, "dix"); _docs = new DocMapWriter(docStream); _docIx = new DocIndexWriter(docIndexStream); diff --git a/src/Sir.HttpServer/HttpReader.cs b/src/Sir.HttpServer/HttpReader.cs index 874eac98..5aa23a86 100644 --- a/src/Sir.HttpServer/HttpReader.cs +++ b/src/Sir.HttpServer/HttpReader.cs @@ -4,6 +4,7 @@ using Microsoft.AspNetCore.Http; using Microsoft.Extensions.Logging; using Newtonsoft.Json; +using Sir.KeyValue; namespace Sir.HttpServer { @@ -58,7 +59,7 @@ public async Task Read(HttpRequest request, IModel model) _logger.LogDebug($"parsed query: {queryLog}"); #endif - using (var readSession = new SearchSession(_config.Get("data_dir"), _sessionFactory, model, new LogStructuredIndexingStrategy(model), _logger)) + using (var readSession = new SearchSession(_config.Get("data_dir"), model, new LogStructuredIndexingStrategy(model), new KeyValueWriter(_config.Get("data_dir"), _config.Get("default_collection").ToHash()), _logger)) { return readSession.Search(query, skip, take); } diff --git a/src/Sir.HttpServer/ServiceConfiguration.cs b/src/Sir.HttpServer/ServiceConfiguration.cs index 7c9893db..99b7e65f 100644 --- a/src/Sir.HttpServer/ServiceConfiguration.cs +++ b/src/Sir.HttpServer/ServiceConfiguration.cs @@ -20,11 +20,11 @@ public static IServiceProvider Configure(IServiceCollection services) var model = new BagOfCharsModel(); var sessionFactory = new SessionFactory(logger); var directory = config.Get("data_dir"); - var qp = new QueryParser(directory, sessionFactory, model, logger: logger); + var defaultCollection = config.Get("default_collection"); + var qp = new QueryParser(directory, new KeyValue.KeyValueWriter(directory, defaultCollection.ToHash()), model, logger: logger); var httpParser = new HttpQueryParser(qp); services.AddSingleton(typeof(IModel), model); - services.AddSingleton(typeof(IStreamDispatcher), sessionFactory); services.AddSingleton(typeof(SessionFactory), sessionFactory); services.AddSingleton(typeof(QueryParser), qp); services.AddSingleton(typeof(HttpQueryParser), httpParser); diff --git a/src/Sir.HttpServer/Sir.HttpServer.csproj b/src/Sir.HttpServer/Sir.HttpServer.csproj index bcab7564..716e20da 100644 --- a/src/Sir.HttpServer/Sir.HttpServer.csproj +++ b/src/Sir.HttpServer/Sir.HttpServer.csproj @@ -46,7 +46,8 @@ - + + diff --git a/src/Sir.HttpServer/StringQueryFormatter.cs b/src/Sir.HttpServer/StringQueryFormatter.cs deleted file mode 100644 index 258cdbca..00000000 --- a/src/Sir.HttpServer/StringQueryFormatter.cs +++ /dev/null @@ -1,33 +0,0 @@ -using Microsoft.AspNetCore.Http; -using Microsoft.Extensions.Logging; -using Newtonsoft.Json; -using System.Collections.Generic; -using System.Threading.Tasks; - -namespace Sir.HttpServer -{ - public class StringQueryFormatter : IQueryFormatter - { - private readonly SessionFactory _sessionFactory; - private readonly ILogger _log; - private readonly string _directory; - - public StringQueryFormatter(string directory, SessionFactory sessionFactory, ILogger log) - { - _sessionFactory = sessionFactory; - _log = log; - _directory = directory; - } - - public async Task Format(HttpRequest request, IModel tokenizer) - { - var parser = new HttpQueryParser(new QueryParser(_directory, _sessionFactory, tokenizer, logger: _log)); - var query = await parser.ParseRequest(request); - var dictionary = new Dictionary(); - - parser.ParseQuery(query, dictionary); - - return JsonConvert.SerializeObject(dictionary, Formatting.Indented); - } - } -} \ No newline at end of file diff --git a/src/Sir.HttpServer/sir.ini b/src/Sir.HttpServer/sir.ini index 9fe31d0b..2eae3db2 100644 --- a/src/Sir.HttpServer/sir.ini +++ b/src/Sir.HttpServer/sir.ini @@ -1,2 +1,3 @@ data_dir=appdata\database +default_collection=wikipedia admin_password=SuperWiseInformationRetrieval123! \ No newline at end of file diff --git a/src/Sir.ImageTests/Sir.ImageTests.csproj b/src/Sir.ImageTests/Sir.ImageTests.csproj index d6c92f56..d9cdbbc4 100644 --- a/src/Sir.ImageTests/Sir.ImageTests.csproj +++ b/src/Sir.ImageTests/Sir.ImageTests.csproj @@ -16,8 +16,9 @@ - + + diff --git a/src/Sir.Images/LinearClassifierImageModel.cs b/src/Sir.Images/LinearClassifierImageModel.cs index c9d90a07..c1b60205 100644 --- a/src/Sir.Images/LinearClassifierImageModel.cs +++ b/src/Sir.Images/LinearClassifierImageModel.cs @@ -5,7 +5,7 @@ namespace Sir.Images { - public class LinearClassifierImageModel : DistanceCalculator, IModel + public class LinearClassifierImageModel : Sir.DistanceCalculator, IModel { public double IdenticalAngle => 0.95d; public double FoldAngle => 0.75d; diff --git a/src/Sir.Images/Sir.Images.csproj b/src/Sir.Images/Sir.Images.csproj index 5db42529..55371ed7 100644 --- a/src/Sir.Images/Sir.Images.csproj +++ b/src/Sir.Images/Sir.Images.csproj @@ -5,7 +5,7 @@ - + diff --git a/src/Sir.Core/Debug/GraphInfo.cs b/src/Sir.InformationRetreival/Debug/GraphInfo.cs similarity index 100% rename from src/Sir.Core/Debug/GraphInfo.cs rename to src/Sir.InformationRetreival/Debug/GraphInfo.cs diff --git a/src/Sir.Core/Debug/IndexInfo.cs b/src/Sir.InformationRetreival/Debug/IndexInfo.cs similarity index 100% rename from src/Sir.Core/Debug/IndexInfo.cs rename to src/Sir.InformationRetreival/Debug/IndexInfo.cs diff --git a/src/Sir.Core/DistanceCalculator.cs b/src/Sir.InformationRetreival/DistanceCalculator.cs similarity index 100% rename from src/Sir.Core/DistanceCalculator.cs rename to src/Sir.InformationRetreival/DistanceCalculator.cs diff --git a/src/Sir.Core/Document.cs b/src/Sir.InformationRetreival/Document.cs similarity index 100% rename from src/Sir.Core/Document.cs rename to src/Sir.InformationRetreival/Document.cs diff --git a/src/Sir.Core/DoubleExtensions.cs b/src/Sir.InformationRetreival/DoubleExtensions.cs similarity index 100% rename from src/Sir.Core/DoubleExtensions.cs rename to src/Sir.InformationRetreival/DoubleExtensions.cs diff --git a/src/Sir.Core/Field.cs b/src/Sir.InformationRetreival/Field.cs similarity index 100% rename from src/Sir.Core/Field.cs rename to src/Sir.InformationRetreival/Field.cs diff --git a/src/Sir.Core/Hit.cs b/src/Sir.InformationRetreival/Hit.cs similarity index 100% rename from src/Sir.Core/Hit.cs rename to src/Sir.InformationRetreival/Hit.cs diff --git a/src/Sir.Core/ICommand.cs b/src/Sir.InformationRetreival/ICommand.cs similarity index 100% rename from src/Sir.Core/ICommand.cs rename to src/Sir.InformationRetreival/ICommand.cs diff --git a/src/Sir.Core/IIndexReadWriteStrategy.cs b/src/Sir.InformationRetreival/IIndexReadWriteStrategy.cs similarity index 80% rename from src/Sir.Core/IIndexReadWriteStrategy.cs rename to src/Sir.InformationRetreival/IIndexReadWriteStrategy.cs index 033b7b3e..d3ae6a30 100644 --- a/src/Sir.Core/IIndexReadWriteStrategy.cs +++ b/src/Sir.InformationRetreival/IIndexReadWriteStrategy.cs @@ -7,6 +7,6 @@ public interface IIndexReadWriteStrategy { void Put(VectorNode column, VectorNode node); Hit GetMatchOrNull(ISerializableVector vector, IModel model, ColumnReader reader); - void Commit(string directory, ulong collectionId, long keyId, VectorNode tree, IStreamDispatcher streamDispatcher, ILogger logger = null); + void Commit(string directory, ulong collectionId, long keyId, VectorNode tree, ILogger logger = null); } } diff --git a/src/Sir.Core/IModel.cs b/src/Sir.InformationRetreival/IModel.cs similarity index 100% rename from src/Sir.Core/IModel.cs rename to src/Sir.InformationRetreival/IModel.cs diff --git a/src/Sir.Core/IO/ColumnReader.cs b/src/Sir.InformationRetreival/IO/ColumnReader.cs similarity index 100% rename from src/Sir.Core/IO/ColumnReader.cs rename to src/Sir.InformationRetreival/IO/ColumnReader.cs diff --git a/src/Sir.Core/IO/ColumnWriter.cs b/src/Sir.InformationRetreival/IO/ColumnWriter.cs similarity index 100% rename from src/Sir.Core/IO/ColumnWriter.cs rename to src/Sir.InformationRetreival/IO/ColumnWriter.cs diff --git a/src/Sir.Core/IO/GraphBuilder.cs b/src/Sir.InformationRetreival/IO/GraphBuilder.cs similarity index 100% rename from src/Sir.Core/IO/GraphBuilder.cs rename to src/Sir.InformationRetreival/IO/GraphBuilder.cs diff --git a/src/Sir.Core/IO/PageIndexReader.cs b/src/Sir.InformationRetreival/IO/PageIndexReader.cs similarity index 100% rename from src/Sir.Core/IO/PageIndexReader.cs rename to src/Sir.InformationRetreival/IO/PageIndexReader.cs diff --git a/src/Sir.Core/IO/PageIndexWriter.cs b/src/Sir.InformationRetreival/IO/PageIndexWriter.cs similarity index 100% rename from src/Sir.Core/IO/PageIndexWriter.cs rename to src/Sir.InformationRetreival/IO/PageIndexWriter.cs diff --git a/src/Sir.Core/IO/PathFinder.cs b/src/Sir.InformationRetreival/IO/PathFinder.cs similarity index 100% rename from src/Sir.Core/IO/PathFinder.cs rename to src/Sir.InformationRetreival/IO/PathFinder.cs diff --git a/src/Sir.Core/IO/PostingsReader.cs b/src/Sir.InformationRetreival/IO/PostingsReader.cs similarity index 92% rename from src/Sir.Core/IO/PostingsReader.cs rename to src/Sir.InformationRetreival/IO/PostingsReader.cs index 29a1291e..5ab4a567 100644 --- a/src/Sir.Core/IO/PostingsReader.cs +++ b/src/Sir.InformationRetreival/IO/PostingsReader.cs @@ -1,4 +1,5 @@ using Microsoft.Extensions.Logging; +using Sir.Documents; using System; using System.Buffers; using System.Collections.Generic; @@ -17,9 +18,9 @@ public class PostingsReader : IDisposable private readonly ILogger _logger; private readonly ulong _collectionId; - public PostingsReader(string directory, ulong collectionId, long keyId, IStreamDispatcher streamDispatcher, ILogger logger = null) + public PostingsReader(string directory, ulong collectionId, long keyId, ILogger logger = null) { - _stream = streamDispatcher.CreateReadStream(Path.Combine(directory, $"{collectionId}.{keyId}.pos")); + _stream = DocumentReader.CreateReadStream(Path.Combine(directory, $"{collectionId}.{keyId}.pos")); _logger = logger; _collectionId = collectionId; } diff --git a/src/Sir.Core/IO/PostingsResolver.cs b/src/Sir.InformationRetreival/IO/PostingsResolver.cs similarity index 89% rename from src/Sir.Core/IO/PostingsResolver.cs rename to src/Sir.InformationRetreival/IO/PostingsResolver.cs index 158cdb32..0eade620 100644 --- a/src/Sir.Core/IO/PostingsResolver.cs +++ b/src/Sir.InformationRetreival/IO/PostingsResolver.cs @@ -11,7 +11,7 @@ public class PostingsResolver : IDisposable /// /// Read posting list document IDs into memory. /// - public void Resolve(IQuery query, IStreamDispatcher sessionFactory, ILogger logger = null) + public void Resolve(IQuery query, ILogger logger = null) { foreach (var term in query.AllTerms()) { @@ -23,7 +23,7 @@ public void Resolve(IQuery query, IStreamDispatcher sessionFactory, ILogger logg if (!_readers.TryGetValue(key, out reader)) { - reader = new PostingsReader(term.Directory, term.CollectionId, term.KeyId, sessionFactory, logger); + reader = new PostingsReader(term.Directory, term.CollectionId, term.KeyId, logger); if (reader != null) { diff --git a/src/Sir.Core/IQuery.cs b/src/Sir.InformationRetreival/IQuery.cs similarity index 100% rename from src/Sir.Core/IQuery.cs rename to src/Sir.InformationRetreival/IQuery.cs diff --git a/src/Sir.Core/IReducer.cs b/src/Sir.InformationRetreival/IReducer.cs similarity index 100% rename from src/Sir.Core/IReducer.cs rename to src/Sir.InformationRetreival/IReducer.cs diff --git a/src/Sir.Core/IndexingStrategies/LogStructuredIndexingStrategy.cs b/src/Sir.InformationRetreival/IndexingStrategies/LogStructuredIndexingStrategy.cs similarity index 64% rename from src/Sir.Core/IndexingStrategies/LogStructuredIndexingStrategy.cs rename to src/Sir.InformationRetreival/IndexingStrategies/LogStructuredIndexingStrategy.cs index 8f458b52..8bc679c6 100644 --- a/src/Sir.Core/IndexingStrategies/LogStructuredIndexingStrategy.cs +++ b/src/Sir.InformationRetreival/IndexingStrategies/LogStructuredIndexingStrategy.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.Logging; using Sir.IO; +using Sir.KeyValue; using System.Diagnostics; namespace Sir @@ -23,14 +24,14 @@ public void Put(VectorNode column, VectorNode node) column.AddOrAppend(node, _model); } - public void Commit(string directory, ulong collectionId, long keyId, VectorNode tree, IStreamDispatcher streamDispatcher, ILogger logger = null) + public void Commit(string directory, ulong collectionId, long keyId, VectorNode tree, ILogger logger = null) { var time = Stopwatch.StartNew(); - using (var vectorStream = streamDispatcher.CreateAppendStream(directory, collectionId, keyId, "vec")) - using (var postingsStream = streamDispatcher.CreateAppendStream(directory, collectionId, keyId, "pos")) - using (var columnWriter = new ColumnWriter(streamDispatcher.CreateAppendStream(directory, collectionId, keyId, "ix"))) - using (var pageIndexWriter = new PageIndexWriter(streamDispatcher.CreateAppendStream(directory, collectionId, keyId, "ixtp"))) + using (var vectorStream = KeyValueWriter.CreateAppendStream(directory, collectionId, keyId, "vec")) + using (var postingsStream = KeyValueWriter.CreateAppendStream(directory, collectionId, keyId, "pos")) + using (var columnWriter = new ColumnWriter(KeyValueWriter.CreateAppendStream(directory, collectionId, keyId, "ix"))) + using (var pageIndexWriter = new PageIndexWriter(KeyValueWriter.CreateAppendStream(directory, collectionId, keyId, "ixtp"))) { var size = columnWriter.CreatePage(tree, vectorStream, postingsStream, pageIndexWriter); diff --git a/src/Sir.Core/IndexingStrategies/SupervisedLogStructuredIndexingStrategy.cs b/src/Sir.InformationRetreival/IndexingStrategies/SupervisedLogStructuredIndexingStrategy.cs similarity index 65% rename from src/Sir.Core/IndexingStrategies/SupervisedLogStructuredIndexingStrategy.cs rename to src/Sir.InformationRetreival/IndexingStrategies/SupervisedLogStructuredIndexingStrategy.cs index 7f8294ed..e571bc4c 100644 --- a/src/Sir.Core/IndexingStrategies/SupervisedLogStructuredIndexingStrategy.cs +++ b/src/Sir.InformationRetreival/IndexingStrategies/SupervisedLogStructuredIndexingStrategy.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.Logging; using Sir.IO; +using Sir.KeyValue; using System.Diagnostics; namespace Sir @@ -23,14 +24,14 @@ public void Put(VectorNode column, VectorNode node) column.AddOrAppendSupervised(node, _model); } - public void Commit(string directory, ulong collectionId, long keyId, VectorNode tree, IStreamDispatcher streamDispatcher, ILogger logger = null) + public void Commit(string directory, ulong collectionId, long keyId, VectorNode tree, ILogger logger = null) { var time = Stopwatch.StartNew(); - using (var vectorStream = streamDispatcher.CreateAppendStream(directory, collectionId, keyId, "vec")) - using (var postingsStream = streamDispatcher.CreateAppendStream(directory, collectionId, keyId, "pos")) - using (var columnWriter = new ColumnWriter(streamDispatcher.CreateAppendStream(directory, collectionId, keyId, "ix"))) - using (var pageIndexWriter = new PageIndexWriter(streamDispatcher.CreateAppendStream(directory, collectionId, keyId, "ixtp"))) + using (var vectorStream = KeyValueWriter.CreateAppendStream(directory, collectionId, keyId, "vec")) + using (var postingsStream = KeyValueWriter.CreateAppendStream(directory, collectionId, keyId, "pos")) + using (var columnWriter = new ColumnWriter(KeyValueWriter.CreateAppendStream(directory, collectionId, keyId, "ix"))) + using (var pageIndexWriter = new PageIndexWriter(KeyValueWriter.CreateAppendStream(directory, collectionId, keyId, "ixtp"))) { var size = columnWriter.CreatePage(tree, vectorStream, postingsStream, pageIndexWriter); diff --git a/src/Sir.Core/ListHelper.cs b/src/Sir.InformationRetreival/ListHelper.cs similarity index 100% rename from src/Sir.Core/ListHelper.cs rename to src/Sir.InformationRetreival/ListHelper.cs diff --git a/src/Sir.Core/Parsers/IQueryFormatter.cs b/src/Sir.InformationRetreival/Parsers/IQueryFormatter.cs similarity index 100% rename from src/Sir.Core/Parsers/IQueryFormatter.cs rename to src/Sir.InformationRetreival/Parsers/IQueryFormatter.cs diff --git a/src/Sir.Core/Parsers/QueryParser.cs b/src/Sir.InformationRetreival/Parsers/QueryParser.cs similarity index 95% rename from src/Sir.Core/Parsers/QueryParser.cs rename to src/Sir.InformationRetreival/Parsers/QueryParser.cs index 6e297a51..e95017e2 100644 --- a/src/Sir.Core/Parsers/QueryParser.cs +++ b/src/Sir.InformationRetreival/Parsers/QueryParser.cs @@ -1,20 +1,21 @@ using Microsoft.Extensions.Logging; using Newtonsoft.Json; +using Sir.KeyValue; using System.Collections.Generic; namespace Sir { public class QueryParser { - private readonly IStreamDispatcher _sessionFactory; + private readonly KeyValueWriter _kvwriter; private readonly IModel _model; private readonly ILogger _logger; private readonly string _directory; private readonly SortedList _embedding; - public QueryParser(string directory, IStreamDispatcher sessionFactory, IModel model, SortedList embedding = null, ILogger logger = null) + public QueryParser(string directory, KeyValueWriter kvwriter, IModel model, SortedList embedding = null, ILogger logger = null) { - _sessionFactory = sessionFactory; + _kvwriter = kvwriter; _model = model; _logger = logger; _directory = directory; @@ -242,7 +243,7 @@ private IList CreateTerms(ulong collectionId, string key, T value, bool an long keyId; var terms = new List(); - if (_sessionFactory.TryGetKeyId(_directory, collectionId, key.ToHash(), out keyId)) + if (_kvwriter.TryGetKeyId(_directory, collectionId, key.ToHash(), out keyId)) { var tokens = _model.CreateEmbedding(value, label, _embedding); diff --git a/src/Sir.Core/PluginReader.cs b/src/Sir.InformationRetreival/PluginReader.cs similarity index 100% rename from src/Sir.Core/PluginReader.cs rename to src/Sir.InformationRetreival/PluginReader.cs diff --git a/src/Sir.Core/ProducerConsumerQueue.cs b/src/Sir.InformationRetreival/ProducerConsumerQueue.cs similarity index 100% rename from src/Sir.Core/ProducerConsumerQueue.cs rename to src/Sir.InformationRetreival/ProducerConsumerQueue.cs diff --git a/src/Sir.Core/Query.cs b/src/Sir.InformationRetreival/Query.cs similarity index 100% rename from src/Sir.Core/Query.cs rename to src/Sir.InformationRetreival/Query.cs diff --git a/src/Sir.Core/README.md b/src/Sir.InformationRetreival/README.md similarity index 100% rename from src/Sir.Core/README.md rename to src/Sir.InformationRetreival/README.md diff --git a/src/Sir.Core/ScoredResult.cs b/src/Sir.InformationRetreival/ScoredResult.cs similarity index 100% rename from src/Sir.Core/ScoredResult.cs rename to src/Sir.InformationRetreival/ScoredResult.cs diff --git a/src/Sir.Core/Scorer.cs b/src/Sir.InformationRetreival/Scorer.cs similarity index 100% rename from src/Sir.Core/Scorer.cs rename to src/Sir.InformationRetreival/Scorer.cs diff --git a/src/Sir.Core/SearchResult.cs b/src/Sir.InformationRetreival/SearchResult.cs similarity index 100% rename from src/Sir.Core/SearchResult.cs rename to src/Sir.InformationRetreival/SearchResult.cs diff --git a/src/Sir.Core/SerializableVector.cs b/src/Sir.InformationRetreival/SerializableVector.cs similarity index 100% rename from src/Sir.Core/SerializableVector.cs rename to src/Sir.InformationRetreival/SerializableVector.cs diff --git a/src/Sir.Search/DocumentStreamSession.cs b/src/Sir.InformationRetreival/Session/DocumentStreamSession.cs similarity index 96% rename from src/Sir.Search/DocumentStreamSession.cs rename to src/Sir.InformationRetreival/Session/DocumentStreamSession.cs index 4b878856..b70bc3bd 100644 --- a/src/Sir.Search/DocumentStreamSession.cs +++ b/src/Sir.InformationRetreival/Session/DocumentStreamSession.cs @@ -1,5 +1,6 @@ using Sir.Documents; using Sir.IO; +using Sir.KeyValue; using System; using System.Collections.Generic; using System.IO; @@ -9,17 +10,16 @@ namespace Sir public class DocumentStreamSession : IDisposable { private readonly string _directory; - private readonly IStreamDispatcher _database; + private readonly KeyValueWriter _kvwriter; private readonly IDictionary _documentReaders; - public DocumentStreamSession(string directory, IStreamDispatcher database) + public DocumentStreamSession(string directory, KeyValueWriter kvwriter) { _directory = directory; - _database = database; + _kvwriter = kvwriter; _documentReaders = new Dictionary(); } - public int Count(ulong collectionId) { var reader = GetOrCreateDocumentReader(collectionId); @@ -105,7 +105,7 @@ public IEnumerable ReadDocumentValues( var took = 0; long docId = skip; - var keyId = _database.GetKeyId(_directory, collectionId, field.ToHash()); + var keyId = _kvwriter.GetKeyId(_directory, collectionId, field.ToHash()); while (docId < docCount && took++ < take) { @@ -250,7 +250,7 @@ private DocumentReader GetOrCreateDocumentReader(ulong collectionId) if (!_documentReaders.TryGetValue(collectionId, out reader)) { - reader = new DocumentReader(_directory, collectionId, _database); + reader = new DocumentReader(_directory, collectionId); _documentReaders.Add(collectionId, reader); } @@ -263,6 +263,8 @@ public virtual void Dispose() { reader.Dispose(); } + + _kvwriter.Dispose(); } } } \ No newline at end of file diff --git a/src/Sir.Search/IIndexSession.cs b/src/Sir.InformationRetreival/Session/IIndexSession.cs similarity index 100% rename from src/Sir.Search/IIndexSession.cs rename to src/Sir.InformationRetreival/Session/IIndexSession.cs diff --git a/src/Sir.Search/ISearchSession.cs b/src/Sir.InformationRetreival/Session/ISearchSession.cs similarity index 100% rename from src/Sir.Search/ISearchSession.cs rename to src/Sir.InformationRetreival/Session/ISearchSession.cs diff --git a/src/Sir.Search/IndexDebugger.cs b/src/Sir.InformationRetreival/Session/IndexDebugger.cs similarity index 100% rename from src/Sir.Search/IndexDebugger.cs rename to src/Sir.InformationRetreival/Session/IndexDebugger.cs diff --git a/src/Sir.Search/IndexSession.cs b/src/Sir.InformationRetreival/Session/IndexSession.cs similarity index 94% rename from src/Sir.Search/IndexSession.cs rename to src/Sir.InformationRetreival/Session/IndexSession.cs index 467aa968..df8869b3 100644 --- a/src/Sir.Search/IndexSession.cs +++ b/src/Sir.InformationRetreival/Session/IndexSession.cs @@ -11,7 +11,6 @@ public class IndexSession : IIndexSession, IDisposable private readonly IModel _model; private readonly IIndexReadWriteStrategy _indexingStrategy; private readonly IDictionary _index; - private readonly IStreamDispatcher _sessionFactory; private readonly string _directory; private readonly ulong _collectionId; private readonly ILogger _logger; @@ -20,7 +19,6 @@ public class IndexSession : IIndexSession, IDisposable public IndexSession( IModel model, IIndexReadWriteStrategy indexingStrategy, - IStreamDispatcher sessionFactory, string directory, ulong collectionId, ILogger logger = null) @@ -28,7 +26,6 @@ public IndexSession( _model = model; _indexingStrategy = indexingStrategy; _index = new Dictionary(); - _sessionFactory = sessionFactory; _directory = directory; _collectionId = collectionId; _logger = logger; @@ -91,7 +88,7 @@ public void Commit(long keyId) var column = _index[keyId]; - _indexingStrategy.Commit(_directory, _collectionId, keyId, column, _sessionFactory, _logger); + _indexingStrategy.Commit(_directory, _collectionId, keyId, column, _logger); if (_logger != null) _logger.LogInformation($"committing index to disk for key {keyId} took {time.Elapsed}"); diff --git a/src/Sir.Search/IndexWriter.cs b/src/Sir.InformationRetreival/Session/IndexWriter.cs similarity index 76% rename from src/Sir.Search/IndexWriter.cs rename to src/Sir.InformationRetreival/Session/IndexWriter.cs index cc16bedc..9bb03a58 100644 --- a/src/Sir.Search/IndexWriter.cs +++ b/src/Sir.InformationRetreival/Session/IndexWriter.cs @@ -1,9 +1,6 @@ using Microsoft.Extensions.Logging; -using Sir.IO; using System; using System.Collections.Generic; -using System.Diagnostics; -using System.IO; namespace Sir { @@ -11,18 +8,15 @@ public class IndexWriter : IDisposable { private readonly string _directory; private readonly ulong _collectionId; - private readonly IStreamDispatcher _sessionFactory; private readonly ILogger _logger; public IndexWriter( string directory, ulong collectionId, - IStreamDispatcher sessionFactory, ILogger logger = null) { _directory = directory; _collectionId = collectionId; - _sessionFactory = sessionFactory; _logger = logger; } @@ -34,7 +28,7 @@ public void Commit(IDictionary index, IIndexReadWriteStrategy { foreach (var column in index) { - indexingStrategy.Commit(_directory, _collectionId, column.Key, column.Value, _sessionFactory); + indexingStrategy.Commit(_directory, _collectionId, column.Key, column.Value); } } } diff --git a/src/Sir.Search/SearchSession.cs b/src/Sir.InformationRetreival/Session/SearchSession.cs similarity index 93% rename from src/Sir.Search/SearchSession.cs rename to src/Sir.InformationRetreival/Session/SearchSession.cs index a4f16bad..003ed964 100644 --- a/src/Sir.Search/SearchSession.cs +++ b/src/Sir.InformationRetreival/Session/SearchSession.cs @@ -1,5 +1,7 @@ using Microsoft.Extensions.Logging; +using Sir.Documents; using Sir.IO; +using Sir.KeyValue; using System; using System.Collections.Generic; using System.Diagnostics; @@ -12,7 +14,6 @@ namespace Sir /// public class SearchSession : DocumentStreamSession, IDisposable, ISearchSession { - private readonly IStreamDispatcher _sessionFactory; private readonly IModel _model; private readonly IIndexReadWriteStrategy _indexStrategy; private readonly PostingsResolver _postingsResolver; @@ -22,14 +23,13 @@ public class SearchSession : DocumentStreamSession, IDisposable, ISearchSession public SearchSession( string directory, - IStreamDispatcher sessionFactory, IModel model, IIndexReadWriteStrategy indexStrategy, + KeyValueWriter kvwriter, ILogger logger = null, PostingsResolver postingsResolver = null, - Scorer scorer = null) : base(directory, sessionFactory) + Scorer scorer = null) : base(directory, kvwriter) { - _sessionFactory = sessionFactory; _model = model; _indexStrategy = indexStrategy; _postingsResolver = postingsResolver ?? new PostingsResolver(); @@ -96,7 +96,7 @@ private ScoredResult Execute(IQuery query, int skip, int take, bool identicalMat timer.Restart(); // Read postings lists - _postingsResolver.Resolve(query, _sessionFactory, _logger); + _postingsResolver.Resolve(query, _logger); LogDebug($"reading postings took {timer.Elapsed}"); timer.Restart(); @@ -158,12 +158,12 @@ private ColumnReader CreateColumnReader(string directory, ulong collectionId, lo var vectorFileName = Path.Combine(directory, $"{collectionId}.{keyId}.vec"); var pageIndexFileName = Path.Combine(directory, $"{collectionId}.{keyId}.ixtp"); - using (var pageIndexReader = new PageIndexReader(_sessionFactory.CreateReadStream(pageIndexFileName))) + using (var pageIndexReader = new PageIndexReader(DocumentReader.CreateReadStream(pageIndexFileName))) { return new ColumnReader( pageIndexReader.ReadAll(), - _sessionFactory.CreateReadStream(ixFileName), - _sessionFactory.CreateReadStream(vectorFileName)); + DocumentReader.CreateReadStream(ixFileName), + DocumentReader.CreateReadStream(vectorFileName)); } } diff --git a/src/Sir.InformationRetreival/Session/SessionFactory.cs b/src/Sir.InformationRetreival/Session/SessionFactory.cs new file mode 100644 index 00000000..969251e6 --- /dev/null +++ b/src/Sir.InformationRetreival/Session/SessionFactory.cs @@ -0,0 +1,212 @@ +using Microsoft.Extensions.Logging; +using Sir.Documents; +using Sir.KeyValue; +using System; +using System.Collections.Generic; +using System.IO; + +namespace Sir +{ + public class SessionFactory : IDisposable + { + private ILogger _logger; + + public SessionFactory(ILogger logger = null) + { + _logger = logger; + } + + private void LogInformation(string message) + { + if (_logger != null) + _logger.LogInformation(message); + } + + private void LogTrace(string message) + { + if (_logger != null) + _logger.LogTrace(message); + } + + private void LogDebug(string message) + { + if (_logger != null) + _logger.LogDebug(message); + } + + private void LogError(Exception ex, string message) + { + if (_logger != null) + _logger.LogError(ex, message); + } + + public long GetDocCount(string directory, string collection) + { + var fileName = Path.Combine(directory, $"{collection.ToHash()}.dix"); + + if (!File.Exists(fileName)) + return 0; + + return new FileInfo(fileName).Length / DocIndexWriter.BlockSize; + } + + public void Truncate(string directory, ulong collectionId) + { + var count = 0; + + if (Directory.Exists(directory)) + { + foreach (var file in Directory.GetFiles(directory, $"{collectionId}*")) + { + File.Delete(file); + count++; + } + } + + LogInformation($"truncated collection {collectionId} ({count} files affected)"); + } + + public void TruncateIndex(string directory, ulong collectionId) + { + var count = 0; + + foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.ix")) + { + File.Delete(file); + count++; + } + foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.ixp")) + { + File.Delete(file); + count++; + } + foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.ixtp")) + { + File.Delete(file); + count++; + } + foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.vec")) + { + File.Delete(file); + count++; + } + foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.pos")) + { + File.Delete(file); + count++; + } + + LogInformation($"truncated index {collectionId} ({count} files affected)"); + } + + public void Rename(string directory, ulong currentCollectionId, ulong newCollectionId) + { + var count = 0; + + var from = currentCollectionId.ToString(); + var to = newCollectionId.ToString(); + + foreach (var file in Directory.GetFiles(directory, $"{currentCollectionId}*")) + { + File.Move(file, file.Replace(from, to)); + count++; + } + + LogInformation($"renamed collection {currentCollectionId} to {newCollectionId} ({count} files affected)"); + } + + public void StoreDataAndBuildInMemoryIndex(IEnumerable job, WriteSession writeSession, IndexSession indexSession, int reportSize = 1000, bool label = true) + { + var debugger = new IndexDebugger(_logger, reportSize); + + foreach (var document in job) + { + writeSession.Put(document); + + foreach (var field in document.Fields) + { + if (field.Value != null) + { + indexSession.Put(document.Id, field.KeyId, (T)field.Value, label); + } + } + + debugger.Step(indexSession); + } + } + + public void StoreDataAndBuildInMemoryIndex( + Document document, + WriteSession writeSession, + IndexSession indexSession, + bool label = true) + { + writeSession.Put(document); + + foreach (var field in document.Fields) + { + if (field.Value != null && field.Value is T typedValue) + { + indexSession.Put(document.Id, field.KeyId, typedValue, label); + } + } + } + + public void StoreDataAndPersistIndex(string directory, ulong collectionId, IEnumerable job, IModel model, IIndexReadWriteStrategy indexStrategy, int reportSize = 1000) + { + using (var writeSession = new WriteSession(new DocumentWriter(directory, collectionId))) + using (var indexSession = new IndexSession(model, indexStrategy, directory, collectionId)) + { + StoreDataAndBuildInMemoryIndex(job, writeSession, indexSession, reportSize); + + indexSession.Commit(); + } + } + + public void Store(string directory, ulong collectionId, IEnumerable job) + { + using (var writeSession = new WriteSession(new DocumentWriter(directory, collectionId))) + { + foreach (var document in job) + writeSession.Put(document); + } + } + + public bool DocumentExists(string directory, string collection, string key, T value, IModel model, bool label = true) + { + using (var kvwriter = new KeyValueWriter(directory, collection.ToHash())) + { + var query = new QueryParser(directory, kvwriter, model, logger: _logger) + .Parse(collection, value, key, key, and: true, or: false, label); + + if (query != null) + { + + using (var searchSession = new SearchSession(directory, model, new LogStructuredIndexingStrategy(model), kvwriter, _logger)) + { + var document = searchSession.SearchScalar(query); + + if (document != null) + { + if (document.Score >= model.IdenticalAngle) + return true; + } + } + } + } + + return false; + } + + public FileStream CreateLockFile(string directory, ulong collectionId) + { + return new FileStream(Path.Combine(directory, collectionId + ".lock"), + FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None, + 4096, FileOptions.RandomAccess | FileOptions.DeleteOnClose); + } + + public void Dispose() + { + } + } +} \ No newline at end of file diff --git a/src/Sir.Search/ValidateSession.cs b/src/Sir.InformationRetreival/Session/ValidateSession.cs similarity index 100% rename from src/Sir.Search/ValidateSession.cs rename to src/Sir.InformationRetreival/Session/ValidateSession.cs diff --git a/src/Sir.Search/WriteSession.cs b/src/Sir.InformationRetreival/Session/WriteSession.cs similarity index 100% rename from src/Sir.Search/WriteSession.cs rename to src/Sir.InformationRetreival/Session/WriteSession.cs diff --git a/src/Sir.Core/Sir.Core.csproj b/src/Sir.InformationRetreival/Sir.InformationRetreival.csproj similarity index 86% rename from src/Sir.Core/Sir.Core.csproj rename to src/Sir.InformationRetreival/Sir.InformationRetreival.csproj index 95b7f7a1..d5da688a 100644 --- a/src/Sir.Core/Sir.Core.csproj +++ b/src/Sir.InformationRetreival/Sir.InformationRetreival.csproj @@ -23,4 +23,8 @@ + + + + diff --git a/src/Sir.Core/SystemFields.cs b/src/Sir.InformationRetreival/SystemFields.cs similarity index 100% rename from src/Sir.Core/SystemFields.cs rename to src/Sir.InformationRetreival/SystemFields.cs diff --git a/src/Sir.Core/Term.cs b/src/Sir.InformationRetreival/Term.cs similarity index 100% rename from src/Sir.Core/Term.cs rename to src/Sir.InformationRetreival/Term.cs diff --git a/src/Sir.Core/VectorNode.cs b/src/Sir.InformationRetreival/VectorNode.cs similarity index 100% rename from src/Sir.Core/VectorNode.cs rename to src/Sir.InformationRetreival/VectorNode.cs diff --git a/src/Sir.Core/HashOperations.cs b/src/Sir.KeyValue/HashOperations.cs similarity index 100% rename from src/Sir.Core/HashOperations.cs rename to src/Sir.KeyValue/HashOperations.cs diff --git a/src/Sir.Core/ISerializableVector.cs b/src/Sir.KeyValue/ISerializableVector.cs similarity index 100% rename from src/Sir.Core/ISerializableVector.cs rename to src/Sir.KeyValue/ISerializableVector.cs diff --git a/src/Sir.Core/IStreamable.cs b/src/Sir.KeyValue/IStreamable.cs similarity index 100% rename from src/Sir.Core/IStreamable.cs rename to src/Sir.KeyValue/IStreamable.cs diff --git a/src/Sir.KeyValue/KeyValueWriter.cs b/src/Sir.KeyValue/KeyValueWriter.cs index 494b3be4..de7a1531 100644 --- a/src/Sir.KeyValue/KeyValueWriter.cs +++ b/src/Sir.KeyValue/KeyValueWriter.cs @@ -1,4 +1,6 @@ using System; +using System.Collections.Concurrent; +using System.IO; namespace Sir.KeyValue { @@ -12,21 +14,21 @@ public class KeyValueWriter : IDisposable private readonly ValueIndexWriter _valIx; private readonly ValueIndexWriter _keyIx; private readonly ulong _collectionId; - private readonly IStreamDispatcher _streamDispatcher; private readonly string _directory; private readonly object _keyLock = new object(); - - public KeyValueWriter(string directory, ulong collectionId, IStreamDispatcher streamDispatcher) + private ConcurrentDictionary> _keyCache; + + public KeyValueWriter(string directory, ulong collectionId) : this( - new ValueWriter(streamDispatcher.CreateAppendStream(directory, collectionId, "val")), - new ValueWriter(streamDispatcher.CreateAppendStream(directory, collectionId, "key")), - new ValueIndexWriter(streamDispatcher.CreateAppendStream(directory, collectionId, "vix")), - new ValueIndexWriter(streamDispatcher.CreateAppendStream(directory, collectionId, "kix")) + new ValueWriter(CreateAppendStream(directory, collectionId, "val")), + new ValueWriter(CreateAppendStream(directory, collectionId, "key")), + new ValueIndexWriter(CreateAppendStream(directory, collectionId, "vix")), + new ValueIndexWriter(CreateAppendStream(directory, collectionId, "kix")) ) { _collectionId = collectionId; - _streamDispatcher = streamDispatcher; _directory = directory; + _keyCache = new ConcurrentDictionary>(); } public KeyValueWriter(ValueWriter values, ValueWriter keys, ValueIndexWriter valIx, ValueIndexWriter keyIx) @@ -37,16 +39,50 @@ public KeyValueWriter(ValueWriter values, ValueWriter keys, ValueIndexWriter val _keyIx = keyIx; } + public static Stream CreateAppendStream(string directory, ulong collectionId, string fileExtension) + { + if (!Directory.Exists(directory)) + { + Directory.CreateDirectory(directory); + } + + var fileName = Path.Combine(directory, $"{collectionId}.{fileExtension}"); + + if (!File.Exists(fileName)) + { + using (var fs = new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)) { } + } + + return new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite); + } + + public static Stream CreateAppendStream(string directory, ulong collectionId, long keyId, string fileExtension) + { + if (!Directory.Exists(directory)) + { + Directory.CreateDirectory(directory); + } + + var fileName = Path.Combine(directory, $"{collectionId}.{keyId}.{fileExtension}"); + + if (!File.Exists(fileName)) + { + using (var fs = new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)) { } + } + + return new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite); + } + public long EnsureKeyExistsSafely(string keyStr) { var keyHash = keyStr.ToHash(); long keyId; - if (!_streamDispatcher.TryGetKeyId(_directory, _collectionId, keyHash, out keyId)) + if (!TryGetKeyId(_directory, _collectionId, keyHash, out keyId)) { lock (_keyLock) { - if (!_streamDispatcher.TryGetKeyId(_directory, _collectionId, keyHash, out keyId)) + if (!TryGetKeyId(_directory, _collectionId, keyHash, out keyId)) { // We have a new key! @@ -56,7 +92,7 @@ public long EnsureKeyExistsSafely(string keyStr) keyId = PutKeyInfo(keyInfo.offset, keyInfo.len, keyInfo.dataType); // store key mapping - _streamDispatcher.RegisterKeyMapping(_directory, _collectionId, keyHash, keyId); + RegisterKeyMapping(_directory, _collectionId, keyHash, keyId); } } } @@ -69,7 +105,7 @@ public long EnsureKeyExists(string keyStr) var keyHash = keyStr.ToHash(); long keyId; - if (!_streamDispatcher.TryGetKeyId(_directory, _collectionId, keyHash, out keyId)) + if (!TryGetKeyId(_directory, _collectionId, keyHash, out keyId)) { // We have a new key! @@ -79,7 +115,7 @@ public long EnsureKeyExists(string keyStr) keyId = PutKeyInfo(keyInfo.offset, keyInfo.len, keyInfo.dataType); // store key mapping - _streamDispatcher.RegisterKeyMapping(_directory, _collectionId, keyHash, keyId); + RegisterKeyMapping(_directory, _collectionId, keyHash, keyId); } return keyId; @@ -126,6 +162,92 @@ public void OverwriteFixedLengthValue(long offset, object value, Type type) _vals.Put(value); } + public void RegisterKeyMapping(string directory, ulong collectionId, ulong keyHash, long keyId) + { + var key = Path.Combine(directory, collectionId.ToString()).ToHash(); + var keys = _keyCache.GetOrAdd(key, (key) => { return new ConcurrentDictionary(); }); + var keyMapping = keys.GetOrAdd(keyHash, (key) => + { + using (var stream = CreateAppendStream(directory, collectionId, "kmap")) + { + stream.Write(BitConverter.GetBytes(keyHash), 0, sizeof(ulong)); + } + return keyId; + }); + } + + public long GetKeyId(string directory, ulong collectionId, ulong keyHash) + { + var key = Path.Combine(directory, collectionId.ToString()).ToHash(); + + ConcurrentDictionary keys; + + if (!_keyCache.TryGetValue(key, out keys)) + { + ReadKeysIntoCache(directory); + } + + if (keys != null || _keyCache.TryGetValue(key, out keys)) + { + return keys[keyHash]; + } + + throw new Exception($"unable to find key {keyHash} for collection {collectionId} in directory {directory}."); + } + + public bool TryGetKeyId(string directory, ulong collectionId, ulong keyHash, out long keyId) + { + var key = Path.Combine(directory, collectionId.ToString()).ToHash(); + + ConcurrentDictionary keys; + + if (!_keyCache.TryGetValue(key, out keys)) + { + ReadKeysIntoCache(directory); + } + + if (keys != null || _keyCache.TryGetValue(key, out keys)) + { + if (keys.TryGetValue(keyHash, out keyId)) + { + return true; + } + } + + keyId = -1; + return false; + } + + private void ReadKeysIntoCache(string directory) + { + foreach (var keyFile in Directory.GetFiles(directory, "*.kmap")) + { + var collectionId = ulong.Parse(Path.GetFileNameWithoutExtension(keyFile)); + var key = Path.Combine(directory, collectionId.ToString()).ToHash(); + + var keys = _keyCache.GetOrAdd(key, (k) => + { + var ks = new ConcurrentDictionary(); + + using (var stream = new FileStream(keyFile, FileMode.OpenOrCreate, FileAccess.Read, FileShare.ReadWrite)) + { + long i = 0; + var buf = new byte[sizeof(ulong)]; + var read = stream.Read(buf, 0, buf.Length); + + while (read > 0) + { + ks.TryAdd(BitConverter.ToUInt64(buf, 0), i++); + + read = stream.Read(buf, 0, buf.Length); + } + } + + return ks; + }); + } + } + public virtual void Dispose() { _vals.Dispose(); diff --git a/src/Sir.KeyValue/Sir.KeyValue.csproj b/src/Sir.KeyValue/Sir.KeyValue.csproj index 390a7d90..1f078aaa 100644 --- a/src/Sir.KeyValue/Sir.KeyValue.csproj +++ b/src/Sir.KeyValue/Sir.KeyValue.csproj @@ -4,10 +4,6 @@ net5.0 - - - - false @@ -16,4 +12,8 @@ + + + + diff --git a/src/Sir.Mnist/IndexMnistCommand.cs b/src/Sir.Mnist/IndexMnistCommand.cs index 81ac1b5e..186c685f 100644 --- a/src/Sir.Mnist/IndexMnistCommand.cs +++ b/src/Sir.Mnist/IndexMnistCommand.cs @@ -32,8 +32,8 @@ public void Run(IDictionary args, ILogger logger) { sessionFactory.Truncate(dataDirectory, collectionId); - using (var writeSession = new WriteSession(new DocumentWriter(sessionFactory, dataDirectory, collectionId))) - using (var indexSession = new IndexSession(model, new SupervisedLogStructuredIndexingStrategy(model), sessionFactory, dataDirectory, collectionId)) + using (var writeSession = new WriteSession(new DocumentWriter(dataDirectory, collectionId))) + using (var indexSession = new IndexSession(model, new SupervisedLogStructuredIndexingStrategy(model), dataDirectory, collectionId)) { var imageIndexId = writeSession.EnsureKeyExists("image"); diff --git a/src/Sir.Mnist/Sir.Mnist.csproj b/src/Sir.Mnist/Sir.Mnist.csproj index f5607585..e7d189b9 100644 --- a/src/Sir.Mnist/Sir.Mnist.csproj +++ b/src/Sir.Mnist/Sir.Mnist.csproj @@ -14,7 +14,7 @@ - + diff --git a/src/Sir.Mnist/ValidateMnistCommand.cs b/src/Sir.Mnist/ValidateMnistCommand.cs index 161598f3..3a7fcf24 100644 --- a/src/Sir.Mnist/ValidateMnistCommand.cs +++ b/src/Sir.Mnist/ValidateMnistCommand.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.Logging; using Sir.Images; +using Sir.KeyValue; using System.Collections.Generic; using System.Diagnostics; using System.Linq; @@ -24,10 +25,10 @@ public void Run(IDictionary args, ILogger logger) var errors = 0; var model = new LinearClassifierImageModel(); - using (var sessionFactory = new SessionFactory(logger: logger)) - using (var querySession = new SearchSession(dataDirectory, sessionFactory, model, new SupervisedLogStructuredIndexingStrategy(model), logger)) + using (var kvwriter = new KeyValueWriter(dataDirectory, collection.ToHash())) + using (var querySession = new SearchSession(dataDirectory, model, new SupervisedLogStructuredIndexingStrategy(model), kvwriter, logger)) { - var queryParser = new QueryParser(dataDirectory, sessionFactory, model, logger: logger); + var queryParser = new QueryParser(dataDirectory, kvwriter, model, logger: logger); foreach (var image in images) { diff --git a/src/Sir.Search/SessionFactory.cs b/src/Sir.Search/SessionFactory.cs deleted file mode 100644 index 547a0d5f..00000000 --- a/src/Sir.Search/SessionFactory.cs +++ /dev/null @@ -1,448 +0,0 @@ -using Microsoft.Extensions.Logging; -using Sir.Core; -using Sir.Documents; -using Sir.IO; -using System; -using System.Collections.Concurrent; -using System.Collections.Generic; -using System.Diagnostics; -using System.IO; - -namespace Sir -{ - /// - /// Stream dispatcher with helper methods for writing, indexing, optimizing, updating and truncating document collections. - /// - public class SessionFactory : IDisposable, IStreamDispatcher - { - private ConcurrentDictionary> _keys; - private ILogger _logger; - - public SessionFactory(ILogger logger = null) - { - _logger = logger; - _keys = new ConcurrentDictionary>(); - - LogTrace($"database initiated"); - } - - - - public IEnumerable Select(string directory, ulong collectionId, HashSet select, int skip = 0, int take = 0) - { - using (var reader = new DocumentStreamSession(directory, this)) - { - foreach (var document in reader.ReadDocuments(collectionId, select, skip, take)) - { - yield return document; - } - } - } - - private void LogInformation(string message) - { - if (_logger != null) - _logger.LogInformation(message); - } - - private void LogTrace(string message) - { - if (_logger != null) - _logger.LogTrace(message); - } - - private void LogDebug(string message) - { - if (_logger != null) - _logger.LogDebug(message); - } - - private void LogError(Exception ex, string message) - { - if (_logger != null) - _logger.LogError(ex, message); - } - - public long GetDocCount(string directory, string collection) - { - var fileName = Path.Combine(directory, $"{collection.ToHash()}.dix"); - - if (!File.Exists(fileName)) - return 0; - - return new FileInfo(fileName).Length / DocIndexWriter.BlockSize; - } - - public void Truncate(string directory, ulong collectionId) - { - var count = 0; - - if (Directory.Exists(directory)) - { - foreach (var file in Directory.GetFiles(directory, $"{collectionId}*")) - { - File.Delete(file); - count++; - } - - var keyStr = Path.Combine(directory, collectionId.ToString()); - var key = keyStr.ToHash(); - _keys.Remove(key, out _); - } - - LogInformation($"truncated collection {collectionId} ({count} files affected)"); - } - - public void TruncateIndex(string directory, ulong collectionId) - { - var count = 0; - - foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.ix")) - { - File.Delete(file); - count++; - } - foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.ixp")) - { - File.Delete(file); - count++; - } - foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.ixtp")) - { - File.Delete(file); - count++; - } - foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.vec")) - { - File.Delete(file); - count++; - } - foreach (var file in Directory.GetFiles(directory, $"{collectionId}*.pos")) - { - File.Delete(file); - count++; - } - - LogInformation($"truncated index {collectionId} ({count} files affected)"); - } - - public void Rename(string directory, ulong currentCollectionId, ulong newCollectionId) - { - var count = 0; - - var from = currentCollectionId.ToString(); - var to = newCollectionId.ToString(); - - foreach (var file in Directory.GetFiles(directory, $"{currentCollectionId}*")) - { - File.Move(file, file.Replace(from, to)); - count++; - } - - var key = Path.Combine(directory, currentCollectionId.ToString()).ToHash(); - - _keys.Remove(key, out _); - - LogInformation($"renamed collection {currentCollectionId} to {newCollectionId} ({count} files affected)"); - } - - public void Optimize( - string directory, - string collection, - HashSet selectFields, - IModel model, - IIndexReadWriteStrategy indexStrategy, - int skipDocuments = 0, - int takeDocuments = 0, - int reportFrequency = 1000, - int pageSize = 100000) - { - var collectionId = collection.ToHash(); - - LogDebug($"optimizing indices for {string.Join(',', selectFields)} in collection {collectionId}"); - - using (var debugger = new IndexDebugger(_logger, reportFrequency)) - using (var documents = new DocumentStreamSession(directory, this)) - { - using (var writeQueue = new ProducerConsumerQueue>(indexSession => - { - indexSession.Commit(); - })) - { - var took = 0; - var skip = skipDocuments; - - while (took < takeDocuments) - { - var payload = documents.GetDocumentsAsVectors( - collectionId, - selectFields, - model, - false, - skip, - pageSize); - - var count = 0; - - using (var indexSession = new IndexSession(model, indexStrategy, this, directory, collectionId)) - { - foreach (var document in payload) - { - foreach (var node in document.Nodes) - { - indexSession.Put(node); - } - - count++; - - debugger.Step(indexSession); - } - - writeQueue.Enqueue(indexSession); - } - - if (count == 0) - break; - - took += count; - skip += pageSize; - } - } - } - - LogDebug($"optimized collection {collection}"); - } - - public void StoreDataAndBuildInMemoryIndex(IEnumerable job, WriteSession writeSession, IndexSession indexSession, int reportSize = 1000, bool label = true) - { - var debugger = new IndexDebugger(_logger, reportSize); - - foreach (var document in job) - { - writeSession.Put(document); - - foreach (var field in document.Fields) - { - if (field.Value != null) - { - indexSession.Put(document.Id, field.KeyId, (T)field.Value, label); - } - } - - debugger.Step(indexSession); - } - } - - public void StoreDataAndBuildInMemoryIndex( - Document document, - WriteSession writeSession, - IndexSession indexSession, - bool label = true) - { - writeSession.Put(document); - - foreach (var field in document.Fields) - { - if (field.Value != null && field.Value is T typedValue) - { - indexSession.Put(document.Id, field.KeyId, typedValue, label); - } - } - } - - public void StoreDataAndPersistIndex(string directory, ulong collectionId, IEnumerable job, IModel model, IIndexReadWriteStrategy indexStrategy, int reportSize = 1000) - { - using (var writeSession = new WriteSession(new DocumentWriter(this, directory, collectionId))) - using (var indexSession = new IndexSession(model, indexStrategy, this, directory, collectionId)) - { - StoreDataAndBuildInMemoryIndex(job, writeSession, indexSession, reportSize); - - indexSession.Commit(); - } - } - - public void Store(string directory, ulong collectionId, IEnumerable job) - { - using (var writeSession = new WriteSession(new DocumentWriter(this, directory, collectionId))) - { - foreach (var document in job) - writeSession.Put(document); - } - } - - public bool DocumentExists(string directory, string collection, string key, T value, IModel model, bool label = true) - { - var query = new QueryParser(directory, this, model, logger: _logger) - .Parse(collection, value, key, key, and: true, or: false, label); - - if (query != null) - { - using (var searchSession = new SearchSession(directory, this, model, new LogStructuredIndexingStrategy(model), _logger)) - { - var document = searchSession.SearchScalar(query); - - if (document != null) - { - if (document.Score >= model.IdenticalAngle) - return true; - } - } - } - - return false; - } - - public FileStream CreateLockFile(string directory, ulong collectionId) - { - return new FileStream(Path.Combine(directory, collectionId + ".lock"), - FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None, - 4096, FileOptions.RandomAccess | FileOptions.DeleteOnClose); - } - - private void ReadKeysIntoCache(string directory) - { - foreach (var keyFile in Directory.GetFiles(directory, "*.kmap")) - { - var collectionId = ulong.Parse(Path.GetFileNameWithoutExtension(keyFile)); - var key = Path.Combine(directory, collectionId.ToString()).ToHash(); - - var keys = _keys.GetOrAdd(key, (k) => - { - var ks = new ConcurrentDictionary(); - - using (var stream = new FileStream(keyFile, FileMode.OpenOrCreate, FileAccess.Read, FileShare.ReadWrite)) - { - long i = 0; - var buf = new byte[sizeof(ulong)]; - var read = stream.Read(buf, 0, buf.Length); - - while (read > 0) - { - ks.TryAdd(BitConverter.ToUInt64(buf, 0), i++); - - read = stream.Read(buf, 0, buf.Length); - } - } - - return ks; - }); - } - } - - public void RegisterKeyMapping(string directory, ulong collectionId, ulong keyHash, long keyId) - { - var key = Path.Combine(directory, collectionId.ToString()).ToHash(); - var keys = _keys.GetOrAdd(key, (key) => { return new ConcurrentDictionary(); }); - var keyMapping = keys.GetOrAdd(keyHash, (key) => - { - using (var stream = CreateAppendStream(directory, collectionId, "kmap")) - { - stream.Write(BitConverter.GetBytes(keyHash), 0, sizeof(ulong)); - } - return keyId; - }); - } - - public long GetKeyId(string directory, ulong collectionId, ulong keyHash) - { - var key = Path.Combine(directory, collectionId.ToString()).ToHash(); - - ConcurrentDictionary keys; - - if (!_keys.TryGetValue(key, out keys)) - { - ReadKeysIntoCache(directory); - } - - if (keys != null || _keys.TryGetValue(key, out keys)) - { - return keys[keyHash]; - } - - throw new Exception($"unable to find key {keyHash} for collection {collectionId} in directory {directory}."); - } - - public bool TryGetKeyId(string directory, ulong collectionId, ulong keyHash, out long keyId) - { - var key = Path.Combine(directory, collectionId.ToString()).ToHash(); - - ConcurrentDictionary keys; - - if (!_keys.TryGetValue(key, out keys)) - { - ReadKeysIntoCache(directory); - } - - if (keys != null || _keys.TryGetValue(key, out keys)) - { - if (keys.TryGetValue(keyHash, out keyId)) - { - return true; - } - } - - keyId = -1; - return false; - } - - public Stream CreateAsyncReadStream(string fileName) - { - return new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, 4096, FileOptions.Asynchronous); - } - - public Stream CreateReadStream(string fileName) - { - LogTrace($"opening {fileName}"); - - return new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite, 4096, FileOptions.SequentialScan); - } - - public Stream CreateAsyncAppendStream(string fileName) - { - return new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite, 4096, FileOptions.Asynchronous); - } - - public Stream CreateAppendStream(string directory, ulong collectionId, string fileExtension) - { - if (!Directory.Exists(directory)) - { - Directory.CreateDirectory(directory); - } - - var fileName = Path.Combine(directory, $"{collectionId}.{fileExtension}"); - - if (!File.Exists(fileName)) - { - using (var fs = new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)) {} - } - - return new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite); - } - - public Stream CreateAppendStream(string directory, ulong collectionId, long keyId, string fileExtension) - { - if (!Directory.Exists(directory)) - { - Directory.CreateDirectory(directory); - } - - var fileName = Path.Combine(directory, $"{collectionId}.{keyId}.{fileExtension}"); - - if (!File.Exists(fileName)) - { - LogTrace($"creating {fileName}"); - - using (var fs = new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite)) {} - } - - LogTrace($"opening {fileName}"); - - return new FileStream(fileName, FileMode.Append, FileAccess.Write, FileShare.ReadWrite); - } - - public void Dispose() - { - LogTrace($"database disposed"); - } - } -} \ No newline at end of file diff --git a/src/Sir.Search/Sir.Session.csproj b/src/Sir.Search/Sir.Session.csproj index 9fef293f..82111fd7 100644 --- a/src/Sir.Search/Sir.Session.csproj +++ b/src/Sir.Search/Sir.Session.csproj @@ -1,11 +1,14 @@ - + net5.0 - + + + + diff --git a/src/Sir.StringCompare/Sir.StringCompare.csproj b/src/Sir.StringCompare/Sir.StringCompare.csproj index f76b20e1..0d42a25b 100644 --- a/src/Sir.StringCompare/Sir.StringCompare.csproj +++ b/src/Sir.StringCompare/Sir.StringCompare.csproj @@ -15,7 +15,7 @@ - + diff --git a/src/Sir.StringTests/Sir.StringTests.csproj b/src/Sir.StringTests/Sir.StringTests.csproj index 44d87dae..517fb75d 100644 --- a/src/Sir.StringTests/Sir.StringTests.csproj +++ b/src/Sir.StringTests/Sir.StringTests.csproj @@ -16,7 +16,8 @@ - + + diff --git a/src/Sir.Strings/Sir.Strings.csproj b/src/Sir.Strings/Sir.Strings.csproj index c69c9781..b2cd5eff 100644 --- a/src/Sir.Strings/Sir.Strings.csproj +++ b/src/Sir.Strings/Sir.Strings.csproj @@ -22,12 +22,11 @@ - - + - + diff --git a/src/Sir.Wikipedia/IndexWikipediaCommand.cs b/src/Sir.Wikipedia/IndexWikipediaCommand.cs index 4a3f89b5..6282f38e 100644 --- a/src/Sir.Wikipedia/IndexWikipediaCommand.cs +++ b/src/Sir.Wikipedia/IndexWikipediaCommand.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.Logging; using Sir.IO; +using Sir.KeyValue; using Sir.Strings; using System.Collections.Generic; using System.IO; @@ -49,11 +50,12 @@ public void Run(IDictionary args, ILogger logger) //} using (var debugger = new IndexDebugger(logger, sampleSize)) - using (var documents = new DocumentStreamSession(dataDirectory, streamDispatcher)) + using(var kvWriter = new KeyValueWriter(dataDirectory, collectionId)) + using (var documents = new DocumentStreamSession(dataDirectory, kvWriter)) { foreach (var batch in documents.ReadDocuments(collectionId, fieldsOfInterest, skip, take).Batch(pageSize)) { - using (var indexSession = new IndexSession(model, indexStrategy, streamDispatcher, dataDirectory, collectionId, logger)) + using (var indexSession = new IndexSession(model, indexStrategy, dataDirectory, collectionId, logger)) { foreach (var document in batch) { diff --git a/src/Sir.Wikipedia/Sir.Wikipedia.csproj b/src/Sir.Wikipedia/Sir.Wikipedia.csproj index 12ac8176..02b82466 100644 --- a/src/Sir.Wikipedia/Sir.Wikipedia.csproj +++ b/src/Sir.Wikipedia/Sir.Wikipedia.csproj @@ -17,7 +17,7 @@ - + diff --git a/src/Sir.Wikipedia/WriteWikipediaCommand.cs b/src/Sir.Wikipedia/WriteWikipediaCommand.cs index 12c09c6b..58889dac 100644 --- a/src/Sir.Wikipedia/WriteWikipediaCommand.cs +++ b/src/Sir.Wikipedia/WriteWikipediaCommand.cs @@ -40,7 +40,7 @@ public void Run(IDictionary args, ILogger logger) { var debugger = new BatchDebugger("write session", logger, sampleSize); - using (var writeSession = new WriteSession(new DocumentWriter(sessionFactory, dataDirectory, collectionId))) + using (var writeSession = new WriteSession(new DocumentWriter(dataDirectory, collectionId))) { foreach (var document in payload) { diff --git a/src/Sir.sln b/src/Sir.sln index f92cd5c0..2f28d33c 100644 --- a/src/Sir.sln +++ b/src/Sir.sln @@ -1,9 +1,9 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.29326.143 +# Visual Studio Version 17 +VisualStudioVersion = 17.6.33927.249 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.Core", "Sir.Core\Sir.Core.csproj", "{2002DD08-0083-4184-BB1A-2469B608DE95}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.InformationRetreival", "Sir.InformationRetreival\Sir.InformationRetreival.csproj", "{2002DD08-0083-4184-BB1A-2469B608DE95}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "resources", "resources", "{CA96B574-72B3-4954-8B51-4C961A6F6622}" ProjectSection(SolutionItems) = preProject @@ -40,8 +40,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.Mnist", "Sir.Mnist\Sir. EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.Wikipedia", "Sir.Wikipedia\Sir.Wikipedia.csproj", "{6F3C960C-7652-430C-A253-081E1506BA81}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.WebFront", "Sir.WebFront\Sir.WebFront.csproj", "{1731E700-FC10-45BB-9F94-E4835A8D64D9}" -EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.Strings", "Sir.Strings\Sir.Strings.csproj", "{AB275A5B-E72E-475A-8E1A-FFA7A5F9C932}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.StringCompare", "Sir.StringCompare\Sir.StringCompare.csproj", "{C6050E65-9411-41E3-A6EE-0A45E6FFB4FC}" @@ -50,8 +48,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.Cmd", "Sir.Cmd\Sir.Cmd. EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.Images", "Sir.Images\Sir.Images.csproj", "{1DC66643-0C0A-48AC-9019-5C64C002BA32}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.Session", "Sir.Search\Sir.Session.csproj", "{8D6F6922-620E-4D70-A8BF-D0F08B713C83}" -EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "core", "core", "{644A2FAF-6617-41F6-88B7-92F21493B048}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "models", "models", "{0C62844F-36C0-4029-BB36-3C27C0F29272}" @@ -96,10 +92,6 @@ Global {6F3C960C-7652-430C-A253-081E1506BA81}.Debug|Any CPU.Build.0 = Debug|Any CPU {6F3C960C-7652-430C-A253-081E1506BA81}.Release|Any CPU.ActiveCfg = Release|Any CPU {6F3C960C-7652-430C-A253-081E1506BA81}.Release|Any CPU.Build.0 = Release|Any CPU - {1731E700-FC10-45BB-9F94-E4835A8D64D9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {1731E700-FC10-45BB-9F94-E4835A8D64D9}.Debug|Any CPU.Build.0 = Debug|Any CPU - {1731E700-FC10-45BB-9F94-E4835A8D64D9}.Release|Any CPU.ActiveCfg = Release|Any CPU - {1731E700-FC10-45BB-9F94-E4835A8D64D9}.Release|Any CPU.Build.0 = Release|Any CPU {AB275A5B-E72E-475A-8E1A-FFA7A5F9C932}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {AB275A5B-E72E-475A-8E1A-FFA7A5F9C932}.Debug|Any CPU.Build.0 = Debug|Any CPU {AB275A5B-E72E-475A-8E1A-FFA7A5F9C932}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -116,10 +108,6 @@ Global {1DC66643-0C0A-48AC-9019-5C64C002BA32}.Debug|Any CPU.Build.0 = Debug|Any CPU {1DC66643-0C0A-48AC-9019-5C64C002BA32}.Release|Any CPU.ActiveCfg = Release|Any CPU {1DC66643-0C0A-48AC-9019-5C64C002BA32}.Release|Any CPU.Build.0 = Release|Any CPU - {8D6F6922-620E-4D70-A8BF-D0F08B713C83}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {8D6F6922-620E-4D70-A8BF-D0F08B713C83}.Debug|Any CPU.Build.0 = Debug|Any CPU - {8D6F6922-620E-4D70-A8BF-D0F08B713C83}.Release|Any CPU.ActiveCfg = Release|Any CPU - {8D6F6922-620E-4D70-A8BF-D0F08B713C83}.Release|Any CPU.Build.0 = Release|Any CPU {20F14A4E-99AE-42FB-B447-6B78F1398406}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {20F14A4E-99AE-42FB-B447-6B78F1398406}.Debug|Any CPU.Build.0 = Debug|Any CPU {20F14A4E-99AE-42FB-B447-6B78F1398406}.Release|Any CPU.ActiveCfg = Release|Any CPU @@ -140,12 +128,10 @@ Global {304CAC1B-825A-4D89-AE4C-9C0FC5206607} = {B97268C5-0BFA-4022-BA3F-C07C1F239C8D} {436647C9-EDFF-44AD-AF2F-ABC4EBA70ED7} = {B97268C5-0BFA-4022-BA3F-C07C1F239C8D} {6F3C960C-7652-430C-A253-081E1506BA81} = {B97268C5-0BFA-4022-BA3F-C07C1F239C8D} - {1731E700-FC10-45BB-9F94-E4835A8D64D9} = {23D1F5F4-6D57-4995-98F6-38EED88C2260} {AB275A5B-E72E-475A-8E1A-FFA7A5F9C932} = {0C62844F-36C0-4029-BB36-3C27C0F29272} {C6050E65-9411-41E3-A6EE-0A45E6FFB4FC} = {B97268C5-0BFA-4022-BA3F-C07C1F239C8D} {CEDD3CA9-D38D-43BF-9013-212AE6332CE0} = {23D1F5F4-6D57-4995-98F6-38EED88C2260} {1DC66643-0C0A-48AC-9019-5C64C002BA32} = {0C62844F-36C0-4029-BB36-3C27C0F29272} - {8D6F6922-620E-4D70-A8BF-D0F08B713C83} = {644A2FAF-6617-41F6-88B7-92F21493B048} {23D1F5F4-6D57-4995-98F6-38EED88C2260} = {B97268C5-0BFA-4022-BA3F-C07C1F239C8D} {20F14A4E-99AE-42FB-B447-6B78F1398406} = {35E2693A-1A42-4690-81A8-D424C3D24AD1} {BD85D84A-0F4E-4880-A0CB-128BA9F34EDF} = {35E2693A-1A42-4690-81A8-D424C3D24AD1}