From bd57cb67d657f25b8a713227798ce7fb5cfa7b5b Mon Sep 17 00:00:00 2001 From: Aditi Ahuja <48997495+metonymic-smokey@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:22:17 +0530 Subject: [PATCH] MB-64636 - Modified Weight for KNN Scorer (#2127) 1. Changed weight of a kNN query to 1 to allow the boost value to kick in when computing query score. Since the weight of only the kNN scorer is changed, this will not impact how boosting is calculated for other types of queries. To reduce the kNN score relative to the FTS query score, set boost to <1. 2. Added a unit test which demonstrates boost increasing scores even for pure kNN queries(kNN + match none query). --- search/scorer/scorer_knn.go | 2 +- search_knn_test.go | 91 +++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/search/scorer/scorer_knn.go b/search/scorer/scorer_knn.go index 2f832efab..8d9043427 100644 --- a/search/scorer/scorer_knn.go +++ b/search/scorer/scorer_knn.go @@ -128,7 +128,7 @@ func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext, } func (sqs *KNNQueryScorer) Weight() float64 { - return sqs.queryBoost * sqs.queryBoost + return 1.0 } func (sqs *KNNQueryScorer) SetQueryNorm(qnorm float64) { diff --git a/search_knn_test.go b/search_knn_test.go index 7001814e3..00c7797ab 100644 --- a/search_knn_test.go +++ b/search_knn_test.go @@ -1208,6 +1208,97 @@ func TestSimilaritySearchMultipleSegments(t *testing.T) { } } +// Test to determine the impact of boost on kNN queries. +func TestKNNScoreBoosting(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + const dims = 5 + getRandomVector := func() []float32 { + vec := make([]float32, dims) + for i := 0; i < dims; i++ { + vec[i] = rand.Float32() + } + return vec + } + + dataset := make([]map[string]interface{}, 10) + + // Indexing just a few docs to populate index. + for i := 0; i < 100; i++ { + dataset = append(dataset, map[string]interface{}{ + "type": "vectorStuff", + "content": strconv.Itoa(i), + "vector": getRandomVector(), + }) + } + + indexMapping := NewIndexMapping() + indexMapping.TypeField = "type" + indexMapping.DefaultAnalyzer = "en" + documentMapping := NewDocumentMapping() + indexMapping.AddDocumentMapping("vectorStuff", documentMapping) + + contentFieldMapping := NewTextFieldMapping() + contentFieldMapping.Index = true + contentFieldMapping.Store = true + documentMapping.AddFieldMappingsAt("content", contentFieldMapping) + + vecFieldMapping := mapping.NewVectorFieldMapping() + vecFieldMapping.Index = true + vecFieldMapping.Dims = 5 + vecFieldMapping.Similarity = "dot_product" + documentMapping.AddFieldMappingsAt("vector", vecFieldMapping) + + index, err := New(tmpIndexPath, indexMapping) + if err != nil { + t.Fatal(err) + } + defer func() { + err := index.Close() + if err != nil { + t.Fatal(err) + } + }() + + batch := index.NewBatch() + for i := 0; i < len(dataset); i++ { + batch.Index(strconv.Itoa(i), dataset[i]) + } + + err = index.Batch(batch) + if err != nil { + t.Fatal(err) + } + + queryVec := getRandomVector() + searchRequest := NewSearchRequest(NewMatchNoneQuery()) + searchRequest.AddKNN("vector", queryVec, 3, 1.0) + searchRequest.Fields = []string{"content", "vector"} + + hits, _ := index.Search(searchRequest) + hitsMap := make(map[string]float64, 0) + for _, hit := range hits.Hits { + hitsMap[hit.ID] = (hit.Score) + } + + searchRequest2 := NewSearchRequest(NewMatchNoneQuery()) + searchRequest.AddKNN("vector", queryVec, 3, 10.0) + searchRequest.Fields = []string{"content", "vector"} + + hits2, _ := index.Search(searchRequest2) + hitsMap2 := make(map[string]float64, 0) + for _, hit := range hits2.Hits { + hitsMap2[hit.ID] = (hit.Score) + } + + for _, hit := range hits2.Hits { + if hitsMap[hit.ID] != hitsMap2[hit.ID]/10 { + t.Errorf("boosting not working: %v %v \n", hitsMap[hit.ID], hitsMap2[hit.ID]) + } + } +} + // Test to see if KNN Operators get added right to the query. func TestKNNOperator(t *testing.T) { tmpIndexPath := createTmpIndexPath(t)