Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding KNN scorer explanation #1914

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ require (
github.com/blevesearch/zapx/v13 v13.3.10
github.com/blevesearch/zapx/v14 v14.3.10
github.com/blevesearch/zapx/v15 v15.3.13
github.com/blevesearch/zapx/v16 v16.0.0-20231110151736-c56571088d10
github.com/blevesearch/zapx/v16 v16.0.0-20231121170046-dd26ea1bbf91
github.com/couchbase/moss v0.2.0
github.com/golang/protobuf v1.3.2
github.com/spf13/cobra v1.7.0
Expand All @@ -32,7 +32,7 @@ require (
)

require (
github.com/blevesearch/go-faiss v1.0.3-0.20231110151003-0ea762e5c06d // indirect
github.com/blevesearch/go-faiss v1.0.3 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/couchbase/ghistogram v0.1.0 // indirect
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ github.com/blevesearch/bleve_index_api v1.1.3 h1:aNyMEiWFviY/1zYm7JCr2lZRIiYX0TM
github.com/blevesearch/bleve_index_api v1.1.3/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.18 h1:Np8jycHTZ5scFe7VEPLrDoHnnb9C4j636ue/CGrhtDw=
github.com/blevesearch/geo v0.1.18/go.mod h1:uRMGWG0HJYfWfFJpK3zTdnnr1K+ksZTuWKhXeSokfnM=
github.com/blevesearch/go-faiss v1.0.3-0.20231110151003-0ea762e5c06d h1:qIVY0mozIvyrOJso6EnuWnXMDjOy9DqCC+TOEdthdNg=
github.com/blevesearch/go-faiss v1.0.3-0.20231110151003-0ea762e5c06d/go.mod h1:jrxHrbl42X/RnDPI+wBoZU8joxxuRwedrxqswQ3xfU8=
github.com/blevesearch/go-faiss v1.0.3 h1:NZfqZif0+OfcPVM1IDI9gjc3P3jsETR+EN54L+OlfWQ=
github.com/blevesearch/go-faiss v1.0.3/go.mod h1:jrxHrbl42X/RnDPI+wBoZU8joxxuRwedrxqswQ3xfU8=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
Expand Down Expand Up @@ -43,8 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
github.com/blevesearch/zapx/v15 v15.3.13 h1:6EkfaZiPlAxqXz0neniq35my6S48QI94W/wyhnpDHHQ=
github.com/blevesearch/zapx/v15 v15.3.13/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
github.com/blevesearch/zapx/v16 v16.0.0-20231110151736-c56571088d10 h1:+QxeHgc8Tt5AZUZmivIJ7zUHtZ882IaIwiHZ2ycc4ss=
github.com/blevesearch/zapx/v16 v16.0.0-20231110151736-c56571088d10/go.mod h1:ZlLFTYYHQrXB8KsgVOoQ1arnT8GKHMlOVDzMi5i57iU=
github.com/blevesearch/zapx/v16 v16.0.0-20231121170046-dd26ea1bbf91 h1:FPbRalI5eNPVYlV6tv3Nle2KsVPrJAYSn00+nONz0i0=
github.com/blevesearch/zapx/v16 v16.0.0-20231121170046-dd26ea1bbf91/go.mod h1:1wZRO5mWgYFa9LDNomxqbcyJ1JU827X/V7fCIdssSIg=
github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=
Expand Down
10 changes: 9 additions & 1 deletion index_alias_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
if len(i.indexes) == 1 {
return i.indexes[0].SearchInContext(ctx, req)
}

return MultiSearch(ctx, req, i.indexes...)
}

Expand Down Expand Up @@ -453,6 +452,10 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
originalSize := req.Size
if len(indexes) > 1 {
req.Size = adjustRequestSizeForKNN(req, len(indexes))
}

// run search on each index in separate go routine
var waitGroup sync.WaitGroup
Expand Down Expand Up @@ -491,6 +494,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
indexErrors[asr.Name] = asr.Err
}
}
req.Size = originalSize

// merge just concatenated all the hits
// now lets clean it up
Expand All @@ -504,6 +508,10 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
}
}

if len(indexes) > 1 {
mergeKNNResults(req, sr)
}

sortFunc := req.SortFunc()
// sort all hits with the requested order
if len(req.Sort) > 0 {
Expand Down
15 changes: 10 additions & 5 deletions index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
ctx = context.WithValue(ctx, search.GeoBufferPoolCallbackKey,
search.GeoBufferPoolCallbackFunc(getBufferPool))

// Using a disjunction query to get union of results from KNN query
// and the original query
searchQuery := disjunctQueryWithKNN(req)
// Using a query to get results from KNN queries
// and the original query based on the KNN operator.
searchQuery, err := queryWithKNN(req)
if err != nil {
return nil, err
}

searcher, err := searchQuery.Searcher(ctx, indexReader, i.m, search.SearcherOptions{
Explain: req.Explain,
Expand Down Expand Up @@ -642,7 +645,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
req.SearchAfter = nil
}

return &SearchResult{
rv := &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Expand All @@ -653,7 +656,9 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
MaxScore: coll.MaxScore(),
Took: searchDuration,
Facets: coll.FacetResults(),
}, nil
}
mergeKNNResults(req, rv)
return rv, nil
}

func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
Expand Down
18 changes: 16 additions & 2 deletions search/scorer/scorer_conjunction.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,28 @@ func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQuery
}
}

func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch {
func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, originalPositions []int) *search.DocumentMatch {
var sum float64
var childrenExplanations []*search.Explanation
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}

scoreBreakdown := make([]float64, len(constituents))
for i, docMatch := range constituents {
sum += docMatch.Score
if originalPositions != nil {
// for use in conjunction searcher
// the originalPositions are the positions of the searchers
// pre sort, since conjunction searcher sorts the searchers
// in order of their Count().
scoreBreakdown[originalPositions[i]] = docMatch.Score
} else {
// the indexes of searchers are the original searcher positions
// eg boolean searcher also uses the conjunction scorer,
// with index 0 being the must (conjunction) searcher
// and index 1 being the should (disjunction) searcher
scoreBreakdown[i] = docMatch.Score
}
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
Expand All @@ -65,6 +78,7 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
rv.ScoreBreakdown = scoreBreakdown
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])

Expand Down
12 changes: 10 additions & 2 deletions search/scorer/scorer_disjunction.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,22 @@ func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQuery
}
}

func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int,
matchingIdxs []int, originalPositions []int) *search.DocumentMatch {

var sum float64
var childrenExplanations []*search.Explanation
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}

scoreBreakdown := make([]float64, countTotal)
for i, docMatch := range constituents {
sum += docMatch.Score
if originalPositions != nil {
scoreBreakdown[originalPositions[matchingIdxs[i]]] = docMatch.Score
} else {
scoreBreakdown[matchingIdxs[i]] = docMatch.Score
}
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
Expand All @@ -75,6 +82,7 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [
// reuse constituents[0] as the return value
rv := constituents[0]
rv.Score = newScore
rv.ScoreBreakdown = scoreBreakdown
rv.Expl = newExpl
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])
Expand Down
85 changes: 70 additions & 15 deletions search/scorer/scorer_knn.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
package scorer

import (
"fmt"
"math"
"reflect"

"github.com/blevesearch/bleve/v2/search"
Expand All @@ -32,34 +34,35 @@ func init() {
}

type KNNQueryScorer struct {
queryVector []float32
queryField string
queryWeight float64
queryBoost float64
queryNorm float64
docTerm uint64
docTotal uint64
options search.SearcherOptions
includeScore bool
similarityMetric string
queryVector []float32
queryField string
queryWeight float64
queryBoost float64
queryNorm float64
options search.SearcherOptions
includeScore bool
similarityMetric string
queryWeightExplanation *search.Explanation
}

func NewKNNQueryScorer(queryVector []float32, queryField string, queryBoost float64,
docTerm uint64, docTotal uint64, options search.SearcherOptions,
options search.SearcherOptions,
similarityMetric string) *KNNQueryScorer {
return &KNNQueryScorer{
queryVector: queryVector,
queryField: queryField,
queryBoost: queryBoost,
queryWeight: 1.0,
docTerm: docTerm,
docTotal: docTotal,
options: options,
includeScore: options.Score != "none",
similarityMetric: similarityMetric,
}
}

// Score used when the knnMatch.Score = 0 ->
// the query and indexed vector are exactly the same.
const maxKNNScore = math.MaxFloat64

func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext,
knnMatch *index.VectorDoc) *search.DocumentMatch {
rv := ctx.DocumentMatchPool.Get()
Expand All @@ -68,14 +71,48 @@ func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext,
var scoreExplanation *search.Explanation
score := knnMatch.Score
if sqs.similarityMetric == index.EuclideanDistance {
// eucliden distances need to be inverted to work
// in case of euclidean distance being the distance metric,
// an exact vector (perfect match), would return distance = 0
if score == 0 {
score = maxKNNScore
}
// euclidean distances need to be inverted to work with
// tf-idf scoring
score = 1.0 / score
}

if sqs.options.Explain {
childrenExplanations := make([]*search.Explanation, 1)
childrenExplanations[0] = &search.Explanation{
Value: score,
Message: fmt.Sprintf("vector(field(%s:%s) with similarity_metric(%s)=%f",
sqs.queryField, knnMatch.ID, sqs.similarityMetric, score),
}
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s in doc %s), score of:",
sqs.queryField, knnMatch.ID),
Children: childrenExplanations,
}
}

// if the query weight isn't 1, multiply
if sqs.queryWeight != 1.0 {
if sqs.queryWeight != 1.0 && score != maxKNNScore {
score = score * sqs.queryWeight
if sqs.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = sqs.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
// Product of score * weight
// Avoid adding the query vector to the explanation since vectors
// can get quite large.
Message: fmt.Sprintf("weight(%s:query Vector^%f in %s), product of:",
sqs.queryField, sqs.queryBoost, knnMatch.ID),
Children: childExplanations,
}
}
}

if sqs.includeScore {
Expand All @@ -100,4 +137,22 @@ func (sqs *KNNQueryScorer) SetQueryNorm(qnorm float64) {

// update the query weight
sqs.queryWeight = sqs.queryBoost * sqs.queryNorm

if sqs.options.Explain {
childrenExplanations := make([]*search.Explanation, 2)
childrenExplanations[0] = &search.Explanation{
Value: sqs.queryBoost,
Message: "boost",
}
childrenExplanations[1] = &search.Explanation{
Value: sqs.queryNorm,
Message: "queryNorm",
}
sqs.queryWeightExplanation = &search.Explanation{
Value: sqs.queryWeight,
Message: fmt.Sprintf("queryWeight(%s:query Vector^%f), product of:",
sqs.queryField, sqs.queryBoost),
Children: childrenExplanations,
}
}
}
Loading
Loading