Skip to content

Commit

Permalink
Optionally skip multisearch after presearch (#1961)
Browse files Browse the repository at this point in the history
If a search request has KNN requests in it, and
- the main query is a match_none 
- the facets request is nil 

the final search result is constructed with just the presearch results,
and multi search is skipped.

---------

Co-authored-by: Abhinav Dangeti <[email protected]>
  • Loading branch information
CascadingRadium and abhinavdangeti authored Jan 12, 2024
1 parent 473beac commit c227083
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 26 deletions.
67 changes: 53 additions & 14 deletions index_alias_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
// - the request does not already have preSearchData
// - the request requires presearch
var preSearchDuration time.Duration
if req.PreSearchData == nil && preSearchRequired(ctx, req) {
var sr *SearchResult
if req.PreSearchData == nil && preSearchRequired(req) {
searchStart := time.Now()
preSearchResult, err := preSearch(ctx, req, i.indexes...)
if err != nil {
Expand All @@ -217,19 +218,28 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest
// so that the errors are not lost
if preSearchResult.Status.Failed > 0 {
return preSearchResult, nil
}

// if there are no errors, then merge the data in the presearch result
preSearchResult = mergePreSearchResult(req, preSearchResult, i.indexes)
if requestSatisfiedByPreSearch(req) {
sr = finalizeSearchResult(req, preSearchResult)
// no need to run the 2nd phase MultiSearch(..)
} else {
// if there are no errors, then use the presearch data
// to execute the query
preSearchData, err = mergePreSearchData(req, preSearchResult, i.indexes)
preSearchData, err = constructPreSearchData(req, preSearchResult, i.indexes)
if err != nil {
return nil, err
}
}
preSearchDuration = time.Since(searchStart)
}
sr, err := MultiSearch(ctx, req, preSearchData, i.indexes...)
if err != nil {
return nil, err

// check if search result was generated as part of presearch itself
if sr == nil {
sr, err = MultiSearch(ctx, req, preSearchData, i.indexes...)
if err != nil {
return nil, err
}
}
sr.Took += preSearchDuration
return sr, nil
Expand Down Expand Up @@ -505,7 +515,7 @@ type asyncSearchResult struct {
Err error
}

func preSearchRequired(ctx context.Context, req *SearchRequest) bool {
func preSearchRequired(req *SearchRequest) bool {
return requestHasKNN(req)
}

Expand All @@ -528,21 +538,50 @@ func tagHitsWithIndexName(sr *SearchResult, indexName string) {
}
}

func mergePreSearchData(req *SearchRequest, res *SearchResult,
indexes []Index) (map[string]map[string]interface{}, error) {
// if the request is satisfied by just the presearch result,
// finalize the result and return it directly without
// performing multi search
func finalizeSearchResult(req *SearchRequest, preSearchResult *SearchResult) *SearchResult {
// global values across all hits irrespective of pagination settings
preSearchResult.Total = uint64(preSearchResult.Hits.Len())
for i, hit := range preSearchResult.Hits {
if hit.Score > preSearchResult.MaxScore {
preSearchResult.MaxScore = hit.Score
}
hit.HitNumber = uint64(i)
}
if requestHasKNN(req) {
preSearchResult = constructKNNSearchResult(req, preSearchResult)
}
return preSearchResult
}

func mergePreSearchResult(req *SearchRequest, res *SearchResult,
indexes []Index) *SearchResult {
if requestHasKNN(req) {
res.Hits = mergeKNNDocumentMatches(req, res.Hits)
}
return res
}

func requestSatisfiedByPreSearch(req *SearchRequest) bool {
if requestHasKNN(req) && isKNNrequestSatisfiedByPreSearch(req) {
return true
}
return false
}

func constructPreSearchData(req *SearchRequest, preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) {
mergedOut := make(map[string]map[string]interface{}, len(indexes))
for _, index := range indexes {
mergedOut[index.Name()] = make(map[string]interface{})
}
var err error
if requestHasKNN(req) {
distributedHits, err := mergeKNNDocumentMatches(req, res.Hits, indexes)
mergedOut, err = constructKnnPresearchData(mergedOut, preSearchResult, indexes)
if err != nil {
return nil, err
}
for _, index := range indexes {
mergedOut[index.Name()][search.KnnPreSearchDataKey] = distributedHits[index.Name()]
}
}
return mergedOut, nil
}
Expand Down
2 changes: 1 addition & 1 deletion index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
if len(req.Fields) > 0 || highlighter != nil {
doc, err := r.Document(hit.ID)
if err == nil && doc != nil {
if len(req.Fields) > 0 {
if len(req.Fields) > 0 && hit.Fields == nil {
totalStoredFieldsBytes = doc.StoredFieldsBytes()
fieldsToLoad := deDuplicate(req.Fields)
for _, f := range fieldsToLoad {
Expand Down
32 changes: 25 additions & 7 deletions search/collector/topn.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
break
}

err = hc.prepareDocumentMatch(searchContext, reader, next)
err = hc.prepareDocumentMatch(searchContext, reader, next, false)
if err != nil {
break
}
Expand All @@ -247,7 +247,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
// we may have some knn hits left that did not match any of the top N tf-idf hits
// we need to add them to the collector store to consider them as well.
for _, knnDoc := range hc.knnHits {
err = hc.prepareDocumentMatch(searchContext, reader, knnDoc)
err = hc.prepareDocumentMatch(searchContext, reader, knnDoc, true)
if err != nil {
return err
}
Expand Down Expand Up @@ -304,11 +304,24 @@ func (hc *TopNCollector) adjustDocumentMatch(ctx *search.SearchContext,
}

func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
reader index.IndexReader, d *search.DocumentMatch) (err error) {
reader index.IndexReader, d *search.DocumentMatch, isKnnDoc bool) (err error) {

// visit field terms for features that require it (sort, facets)
if len(hc.neededFields) > 0 {
err = hc.visitFieldTerms(reader, d)
if !isKnnDoc && len(hc.neededFields) > 0 {
err = hc.visitFieldTerms(reader, d, hc.updateFieldVisitor)
if err != nil {
return err
}
} else if isKnnDoc && hc.facetsBuilder != nil {
// we need to visit the field terms for the knn document
// only for those fields that are required for faceting
// and not for sorting. This is because the knn document's
// sort value is already computed in the knn collector.
err = hc.visitFieldTerms(reader, d, func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
})
if err != nil {
return err
}
Expand All @@ -322,6 +335,11 @@ func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
if d.Score > hc.maxScore {
hc.maxScore = d.Score
}
// early exit as the document match had its sort value calculated in the knn
// collector itself
if isKnnDoc {
return nil
}

// see if we need to load ID (at this early stage, for example to sort on it)
if hc.needDocIds && d.ID == "" {
Expand Down Expand Up @@ -399,7 +417,7 @@ func MakeTopNDocumentMatchHandler(

// visitFieldTerms is responsible for visiting the field terms of the
// search hit, and passing visited terms to the sort and facet builder
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch, v index.DocValueVisitor) error {
if hc.facetsBuilder != nil {
hc.facetsBuilder.StartDoc()
}
Expand All @@ -413,7 +431,7 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc
}
}

err := hc.dvReader.VisitDocValues(d.IndexInternalID, hc.updateFieldVisitor)
err := hc.dvReader.VisitDocValues(d.IndexInternalID, v)
if hc.facetsBuilder != nil {
hc.facetsBuilder.EndDoc()
}
Expand Down
134 changes: 132 additions & 2 deletions search_knn.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"encoding/json"
"fmt"
"sort"
"strconv"

"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/collector"
Expand Down Expand Up @@ -246,6 +247,35 @@ func validateKNN(req *SearchRequest) error {
return nil
}

func addSortAndFieldsToKNNHits(req *SearchRequest, knnHits []*search.DocumentMatch, reader index.IndexReader) (err error) {
requiredSortFields := req.Sort.RequiredFields()
var dvReader index.DocValueReader
var updateFieldVisitor index.DocValueVisitor
if len(requiredSortFields) > 0 {
dvReader, err = reader.DocValueReader(requiredSortFields)
if err != nil {
return err
}
updateFieldVisitor = func(field string, term []byte) {
req.Sort.UpdateVisitor(field, term)
}
}
for _, hit := range knnHits {
if len(requiredSortFields) > 0 {
err = dvReader.VisitDocValues(hit.IndexInternalID, updateFieldVisitor)
if err != nil {
return err
}
}
req.Sort.Value(hit)
err, _ = LoadAndHighlightFields(hit, req, "", reader, nil)
if err != nil {
return err
}
}
return nil
}

func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, reader index.IndexReader, preSearch bool) ([]*search.DocumentMatch, error) {
KNNQuery, kArray, sumOfK, err := createKNNQuery(req)
if err != nil {
Expand All @@ -266,6 +296,14 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea
if !preSearch {
knnHits = finalizeKNNResults(req, knnHits, len(req.KNN))
}
// at this point, irrespective of whether it is a presearch or not,
// the knn hits are populated with Sort and Fields.
// it must be ensured downstream that the Sort and Fields are not
// re-evaluated, for these hits.
err = addSortAndFieldsToKNNHits(req, knnHits, reader)
if err != nil {
return nil, err
}
return knnHits, nil
}

Expand Down Expand Up @@ -321,7 +359,7 @@ func finalizeKNNResults(req *SearchRequest, knnHits []*search.DocumentMatch, num
return knnHits
}

func mergeKNNDocumentMatches(req *SearchRequest, knnHits []*search.DocumentMatch, indexes []Index) (map[string][]*search.DocumentMatch, error) {
func mergeKNNDocumentMatches(req *SearchRequest, knnHits []*search.DocumentMatch) []*search.DocumentMatch {
kArray := make([]int64, len(req.KNN))
for i, knnReq := range req.KNN {
kArray[i] = knnReq.K
Expand All @@ -334,7 +372,7 @@ func mergeKNNDocumentMatches(req *SearchRequest, knnHits []*search.DocumentMatch
// fixup the document, since this was already done in the first phase.
// hence error is always nil.
mergedKNNhits, _ := knnStore.Final(nil)
return validateAndDistributeKNNHits(finalizeKNNResults(req, mergedKNNhits, len(req.KNN)), indexes)
return finalizeKNNResults(req, mergedKNNhits, len(req.KNN))
}

// when we are setting KNN hits in the preSearchData, we need to make sure that
Expand Down Expand Up @@ -383,10 +421,102 @@ func requestHasKNN(req *SearchRequest) bool {
return len(req.KNN) > 0
}

// returns true if the search request contains a KNN request that can be
// satisfied by just performing a presearch, completely bypassing the
// actual search.
func isKNNrequestSatisfiedByPreSearch(req *SearchRequest) bool {
// if req.Query is not match_none => then we need to go to phase 2
// to perform the actual query.
if _, ok := req.Query.(*query.MatchNoneQuery); !ok {
return false
}
// req.Query is a match_none query
//
// if request contains facets, we need to perform phase 2 to calculate
// the facet result. Since documents were removed as part of the
// merging process after phase 1, if the facet results were to be calculated
// during phase 1, then they will be now be incorrect, since merging would
// remove some documents.
if req.Facets != nil {
return false
}
// the request is a match_none query and does not contain any facets
// so we can satisfy the request using just the preSearch result.
return true
}

func constructKnnPresearchData(mergedOut map[string]map[string]interface{}, preSearchResult *SearchResult,
indexes []Index) (map[string]map[string]interface{}, error) {

distributedHits, err := validateAndDistributeKNNHits([]*search.DocumentMatch(preSearchResult.Hits), indexes)
if err != nil {
return nil, err
}
for _, index := range indexes {
mergedOut[index.Name()][search.KnnPreSearchDataKey] = distributedHits[index.Name()]
}
return mergedOut, nil
}

// if the search request is satisfied by preSearch, the merged KNN hits
// are used to construct the final search result and returned, which bypasses
// the actual search.
func constructKNNSearchResult(req *SearchRequest, preSearchResult *SearchResult) *SearchResult {
if req.SearchAfter != nil || req.SearchBefore != nil {
var dummyDoc *search.DocumentMatch
for pos, ss := range req.Sort {
if ss.RequiresDocID() {
dummyDoc.ID = req.SearchAfter[pos]
}
if ss.RequiresScoring() {
if score, err := strconv.ParseFloat(req.SearchAfter[pos], 64); err == nil {
dummyDoc.Score = score
}
}
}
if req.SearchAfter != nil {
dummyDoc.Sort = req.SearchAfter
} else {
dummyDoc.Sort = req.SearchBefore
}
numDocs := 0
for _, hit := range preSearchResult.Hits {
dummyDoc.HitNumber = hit.HitNumber
if req.SearchAfter != nil && req.Sort.Compare(req.Sort.CacheIsScore(), req.Sort.CacheDescending(), hit, dummyDoc) > 0 {
preSearchResult.Hits[numDocs] = hit
numDocs++
} else if req.SearchBefore != nil && req.Sort.Compare(req.Sort.CacheIsScore(), req.Sort.CacheDescending(), hit, dummyDoc) < 0 {
preSearchResult.Hits[numDocs] = hit
numDocs++
}
}
preSearchResult.Hits = preSearchResult.Hits[:numDocs]
}
sortFunc := req.SortFunc()
// sort all hits with the requested order
if len(req.Sort) > 0 {
sorter := newSearchHitSorter(req.Sort, preSearchResult.Hits)
sortFunc(sorter)
}
// now skip over the correct From
if req.From > 0 && len(preSearchResult.Hits) > req.From {
preSearchResult.Hits = preSearchResult.Hits[req.From:]
} else if req.From > 0 {
preSearchResult.Hits = search.DocumentMatchCollection{}
}
// now trim to the correct size
if req.Size > 0 && len(preSearchResult.Hits) > req.Size {
preSearchResult.Hits = preSearchResult.Hits[0:req.Size]
}
return preSearchResult
}

func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
dummyReq.KNN = realReq.KNN
dummyReq.KNNOperator = knnOperatorOr
dummyReq.Explain = realReq.Explain
dummyReq.Fields = realReq.Fields
dummyReq.Sort = realReq.Sort
}

// the preSearchData for KNN is a list of DocumentMatch objects
Expand Down
Loading

0 comments on commit c227083

Please sign in to comment.