Skip to content

Commit

Permalink
add command helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
kreeben committed Oct 19, 2022
1 parent ec25fe6 commit e3f1085
Show file tree
Hide file tree
Showing 15 changed files with 45 additions and 26 deletions.
6 changes: 5 additions & 1 deletion build.bat
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
dotnet build src/sir.httpserver --configuration Debug
dotnet build src/sir.webfront --configuration Debug
dotnet build src/sir.cmd --configuration Debug
dotnet build src/sir.httpserver --configuration Release
dotnet build src/sir --configuration Release
dotnet build src/sir.webfront --configuration Release
dotnet build src/sir.cmd --configuration Release
2 changes: 1 addition & 1 deletion index.bat
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sir.bat indexwikipedia --directory C:\projects\resin\src\Sir.HttpServer\AppData\database --file d:\enwiki-20211122-cirrussearch-content.json.gz --collection wikipedia --skip 0 --take 500 --pageSize 100 --sampleSize 100
sir.bat indexwikipedia --directory C:\projects\resin\src\Sir.HttpServer\AppData\database --file d:\enwiki-20211122-cirrussearch-content.json.gz --collection wikipedia --skip 0 --take 10000 --pageSize 1000 --sampleSize 100
2 changes: 1 addition & 1 deletion src/Sir.Core/Field.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public void Analyze<T>(IModel<T> model, bool label, IStreamDispatcher streamDisp
{
foreach (var token in tokens)
{
model.ExecutePut<string>(Tree, new VectorNode(token, keyId: KeyId), reader);
model.Put<string>(Tree, new VectorNode(token, keyId: KeyId), reader);
}

_tokens = GetTokens();
Expand Down
4 changes: 2 additions & 2 deletions src/Sir.Core/IIndexReadWriteStrategy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{
public interface IIndexReadWriteStrategy
{
void ExecutePut<T>(VectorNode column, VectorNode node, IColumnReader reader);
Hit ExecuteGetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader);
void Put<T>(VectorNode column, VectorNode node, IColumnReader reader);
Hit GetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader);
}
}
2 changes: 1 addition & 1 deletion src/Sir.Core/IO/GraphBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public static VectorNode CreateTree<T>(this IModel<T> model, IIndexReadWriteStra
{
foreach (var vector in model.CreateEmbedding(item, true))
{
indexingStrategy.ExecutePut<T>(root, new VectorNode(vector), reader);
indexingStrategy.Put<T>(root, new VectorNode(vector), reader);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ public OptimizedPageIndexingStrategy(IModel model)
_model = model;
}

public Hit ExecuteGetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader)
public Hit GetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader)
{
return reader.ClosestMatchOrNullStoppingAtBestPage(vector, model);
}

public void ExecutePut<T>(VectorNode column, VectorNode node, IColumnReader reader)
public void Put<T>(VectorNode column, VectorNode node, IColumnReader reader)
{
var existing = reader.ClosestMatchOrNullScanningAllPages(node.Vector, _model);
var existing = reader.ClosestMatchOrNullStoppingAtBestPage(node.Vector, _model);

if (existing != null && existing.Score >= _model.IdenticalAngle)
{
Expand Down
4 changes: 2 additions & 2 deletions src/Sir.Images/LinearClassifierImageModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public class LinearClassifierImageModel : DistanceCalculator, IModel<IImage>
public double FoldAngle => 0.75d;
public override int NumOfDimensions => 784;

public void ExecutePut<T>(VectorNode column, VectorNode node, IColumnReader reader)
public void Put<T>(VectorNode column, VectorNode node, IColumnReader reader)
{
column.AddOrAppendSupervised(node, this);
}
Expand All @@ -23,7 +23,7 @@ public IEnumerable<ISerializableVector> CreateEmbedding(IImage data, bool label)
yield return new SerializableVector(pixels, data.Label);
}

public Hit ExecuteGetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader)
public Hit GetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader)
{
return reader.ClosestMatchOrNullScanningAllPages(vector, model);
}
Expand Down
2 changes: 1 addition & 1 deletion src/Sir.Search/InMemoryIndexSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public void Put(VectorNode documentTree)

foreach (var node in PathFinder.All(documentTree))
{
_indexingStrategy.ExecutePut<T>(
_indexingStrategy.Put<T>(
column,
new VectorNode(node.Vector, docIds: node.DocIds),
GetReader(documentTree.KeyId.Value));
Expand Down
2 changes: 1 addition & 1 deletion src/Sir.Search/SearchSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ private void Scan(IQuery query)

if (reader != null)
{
var hit =_model.ExecuteGetClosestMatchOrNull(term.Vector, _model, reader);
var hit =_model.GetClosestMatchOrNull(term.Vector, _model, reader);

if (hit != null)
{
Expand Down
24 changes: 16 additions & 8 deletions src/Sir.Search/SessionFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,25 @@ public long GetDocCount(string directory, string collection)
public void Truncate(string directory, ulong collectionId)
{
var count = 0;
var key = Path.Combine(directory, collectionId.ToString()).ToHash();

foreach (var file in Directory.GetFiles(directory, $"{collectionId}*"))
if (Directory.Exists(directory))
{
File.Delete(file);
count++;
}
var path = Path.Combine(directory, collectionId.ToString());
var key = path.ToHash();

lock (_syncKeys)
{
_keys.Remove(key, out _);
if (Directory.Exists(path))
{
foreach (var file in Directory.GetFiles(path, $"{collectionId}*"))
{
File.Delete(file);
count++;
}

lock (_syncKeys)
{
_keys.Remove(key, out _);
}
}
}

LogInformation($"truncated collection {collectionId} ({count} files affected)");
Expand Down
4 changes: 2 additions & 2 deletions src/Sir.Strings/BagOfCharsModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public class BagOfCharsModel : DistanceCalculator, IModel<string>
public double FoldAngle => 0.55d;
public override int NumOfDimensions => System.Text.Unicode.UnicodeRanges.All.Length;

public void ExecutePut<T>(VectorNode column, VectorNode node, IColumnReader reader)
public void Put<T>(VectorNode column, VectorNode node, IColumnReader reader)
{
column.AddOrAppend(node, this);
}
Expand Down Expand Up @@ -65,7 +65,7 @@ public IEnumerable<ISerializableVector> CreateEmbedding(string data, bool label)
}
}

public Hit ExecuteGetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader)
public Hit GetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader)
{
return reader.ClosestMatchOrNullScanningAllPages(vector, model);
}
Expand Down
4 changes: 2 additions & 2 deletions src/Sir.Strings/NGramModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public NGramModel(BagOfCharsModel wordTokenizer)
NumOfDimensions = wordTokenizer.NumOfDimensions * 2;
}

public void ExecutePut<T>(VectorNode column, VectorNode node, IColumnReader reader)
public void Put<T>(VectorNode column, VectorNode node, IColumnReader reader)
{
column.AddOrAppend(node, this);
}
Expand Down Expand Up @@ -51,7 +51,7 @@ public IEnumerable<ISerializableVector> CreateEmbedding(string data, bool label)
}
}

public Hit ExecuteGetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader)
public Hit GetClosestMatchOrNull(ISerializableVector vector, IModel model, IColumnReader reader)
{
return reader.ClosestMatchOrNullScanningAllPages(vector, model);
}
Expand Down
4 changes: 4 additions & 0 deletions src/Sir.sln
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,15 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "resources", "resources", "{
AssemblyInfo.cs = AssemblyInfo.cs
..\build.bat = ..\build.bat
..\compare.bat = ..\compare.bat
..\index.bat = ..\index.bat
..\INSTALL.md = ..\INSTALL.md
..\push-to-nuget.bat = ..\push-to-nuget.bat
..\README.md = ..\README.md
..\sir.bat = ..\sir.bat
..\test.bat = ..\test.bat
..\truncate.bat = ..\truncate.bat
..\USER-GUIDE.md = ..\USER-GUIDE.md
..\validate.bat = ..\validate.bat
EndProjectSection
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sir.HttpServer", "Sir.HttpServer\Sir.HttpServer.csproj", "{C94C2F5D-AE55-4157-A74A-26D49EE73E96}"
Expand Down
3 changes: 3 additions & 0 deletions test.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
call sir.bat truncate --directory C:\projects\resin\src\Sir.HttpServer\AppData\database --collection wikipedia
call index.bat
call validate.bat
2 changes: 1 addition & 1 deletion validate.bat
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sir.bat validate --directory C:\projects\resin\src\Sir.HttpServer\AppData\database --collection wikipedia --skip 0 --take 1000
sir.bat validate --directory C:\projects\resin\src\Sir.HttpServer\AppData\database --collection wikipedia --skip 0 --take 10000

0 comments on commit e3f1085

Please sign in to comment.