From 671df1849d7dcd035f6f2d8660a9b2f7fee5446e Mon Sep 17 00:00:00 2001 From: CascadingRadium Date: Tue, 21 Nov 2023 21:14:22 +0530 Subject: [PATCH] use maxKNNScore --- knn_test.go | 9 +++++---- search/scorer/scorer_knn.go | 26 +++++++++++++------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/knn_test.go b/knn_test.go index 4accb8032..32e2ff62d 100644 --- a/knn_test.go +++ b/knn_test.go @@ -20,6 +20,7 @@ package bleve import ( "archive/zip" "encoding/json" + "math" "math/rand" "testing" @@ -355,8 +356,8 @@ func runKNNTest(t *testing.T, randomizeDocuments bool) { numIndexPartitions: 1, expectedResults: map[string]testResult{ "doc7": { - score: 2357.022603955158, - scoreBreakdown: []float64{0, 0, 7071.067811865475}, + score: math.MaxFloat64, + scoreBreakdown: []float64{0, 0, math.MaxFloat64 / 3.0}, }, "doc29": { score: 0.6774608026082964, @@ -402,8 +403,8 @@ func runKNNTest(t *testing.T, randomizeDocuments bool) { numIndexPartitions: 4, expectedResults: map[string]testResult{ "doc7": { - score: 2357.022603955158, - scoreBreakdown: []float64{0, 0, 7071.067811865475}, + score: math.MaxFloat64, + scoreBreakdown: []float64{0, 0, math.MaxFloat64 / 3.0}, }, "doc29": { score: 0.567426591648309, diff --git a/search/scorer/scorer_knn.go b/search/scorer/scorer_knn.go index 4a93633f9..7623fec46 100644 --- a/search/scorer/scorer_knn.go +++ b/search/scorer/scorer_knn.go @@ -18,6 +18,7 @@ package scorer import ( + "math" "reflect" "github.com/blevesearch/bleve/v2/search" @@ -60,8 +61,9 @@ func NewKNNQueryScorer(queryVector []float32, queryField string, queryBoost floa } } -// TODO: Better value needed here? -const maxEuclideanDistance = 10000.0 +// Score used when the knnMatch.Score = 0 -> +// the query and indexed vector are exactly the same. +const maxKNNScore = math.MaxFloat64 func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext, knnMatch *index.VectorDoc) *search.DocumentMatch { @@ -70,20 +72,18 @@ func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext, if sqs.includeScore || sqs.options.Explain { var scoreExplanation *search.Explanation score := knnMatch.Score - if sqs.similarityMetric == index.EuclideanDistance { - // in case of euclidean distance being the distance metric, - // an exact vector (perfect match), would return distance = 0 - if score == 0 { - score = maxEuclideanDistance - } else { - // euclidean distances need to be inverted to work with - // tf-idf scoring - score = 1.0 / score - } + // in case of euclidean distance being the distance metric, + // an exact vector (perfect match), would return distance = 0 + if score == 0 { + score = maxKNNScore + } else { + // euclidean distances need to be inverted to work with + // tf-idf scoring + score = 1.0 / score } // if the query weight isn't 1, multiply - if sqs.queryWeight != 1.0 { + if sqs.queryWeight != 1.0 && score != maxKNNScore { score = score * sqs.queryWeight }