diff --git a/index_alias_impl.go b/index_alias_impl.go index 3c7cdcd32..e0d2d6948 100644 --- a/index_alias_impl.go +++ b/index_alias_impl.go @@ -16,6 +16,7 @@ package bleve import ( "context" + "fmt" "sync" "time" @@ -31,6 +32,10 @@ type indexAliasImpl struct { indexes []Index mutex sync.RWMutex open bool + // if all the indexes in tha alias have the same mapping + // then the user can set the mapping here to avoid + // checking the mapping of each index in the alias + mapping mapping.IndexMapping } // NewIndexAlias creates a new IndexAlias over the provided @@ -168,7 +173,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest // indicates that this index alias is set as an Index // in another alias, so we need to do a preSearch search // and NOT a real search - return preSearchDataSearch(ctx, req, i.indexes...) + flags := &preSearchFlags{ + knn: requestHasKNN(req), // set knn flag if the request has KNN + } + return preSearchDataSearch(ctx, req, flags, i.indexes...) } // at this point we know we are doing a real search @@ -182,12 +190,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest // if necessary var preSearchData map[string]map[string]interface{} if req.PreSearchData != nil { - if requestHasKNN(req) { - var err error - preSearchData, err = redistributeKNNPreSearchData(req, i.indexes) - if err != nil { - return nil, err - } + var err error + preSearchData, err = redistributePreSearchData(req, i.indexes) + if err != nil { + return nil, err } } @@ -208,9 +214,10 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest // - the request requires preSearch var preSearchDuration time.Duration var sr *SearchResult - if req.PreSearchData == nil && preSearchRequired(req) { + flags := preSearchRequired(req, i.mapping) + if req.PreSearchData == nil && flags != nil { searchStart := time.Now() - preSearchResult, err := preSearch(ctx, req, i.indexes...) + preSearchResult, err := preSearch(ctx, req, flags, i.indexes...) if err != nil { return nil, err } @@ -221,17 +228,17 @@ func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest return preSearchResult, nil } // finalize the preSearch result now - finalizePreSearchResult(req, preSearchResult) + finalizePreSearchResult(req, flags, preSearchResult) // if there are no errors, then merge the data in the preSearch result // and construct the preSearchData to be used in the actual search // if the request is satisfied by the preSearch result, then we can // directly return the preSearch result as the final result - if requestSatisfiedByPreSearch(req) { + if requestSatisfiedByPreSearch(req, flags) { sr = finalizeSearchResult(req, preSearchResult) // no need to run the 2nd phase MultiSearch(..) } else { - preSearchData, err = constructPreSearchData(req, preSearchResult, i.indexes) + preSearchData, err = constructPreSearchData(req, flags, preSearchResult, i.indexes) if err != nil { return nil, err } @@ -352,6 +359,20 @@ func (i *indexAliasImpl) Close() error { return nil } +// SetIndexMapping sets the mapping for the alias and must be used +// ONLY when all the indexes in the alias have the same mapping. +// This is to avoid checking the mapping of each index in the alias +// when executing a search request. +func (i *indexAliasImpl) SetIndexMapping(m mapping.IndexMapping) error { + i.mutex.Lock() + defer i.mutex.Unlock() + if !i.open { + return ErrorIndexClosed + } + i.mapping = m + return nil +} + func (i *indexAliasImpl) Mapping() mapping.IndexMapping { i.mutex.RLock() defer i.mutex.RUnlock() @@ -360,6 +381,11 @@ func (i *indexAliasImpl) Mapping() mapping.IndexMapping { return nil } + // if the mapping is already set, return it + if i.mapping != nil { + return i.mapping + } + err := i.isAliasToSingleIndex() if err != nil { return nil @@ -520,21 +546,35 @@ type asyncSearchResult struct { Err error } -func preSearchRequired(req *SearchRequest) bool { - return requestHasKNN(req) +// preSearchFlags is a struct to hold flags indicating why preSearch is required +type preSearchFlags struct { + knn bool +} + +// preSearchRequired checks if preSearch is required and returns the presearch flags struct +// indicating which preSearch is required +func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFlags { + // Check for KNN query + knn := requestHasKNN(req) + if knn { + return &preSearchFlags{ + knn: knn, + } + } + return nil } -func preSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) { +func preSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) { // create a dummy request with a match none query // since we only care about the preSearchData in PreSearch dummyRequest := &SearchRequest{ Query: query.NewMatchNoneQuery(), } newCtx := context.WithValue(ctx, search.PreSearchKey, true) - if requestHasKNN(req) { + if flags.knn { addKnnToDummyRequest(dummyRequest, req) } - return preSearchDataSearch(newCtx, dummyRequest, indexes...) + return preSearchDataSearch(newCtx, dummyRequest, flags, indexes...) } // if the request is satisfied by just the preSearch result, @@ -585,20 +625,26 @@ func finalizeSearchResult(req *SearchRequest, preSearchResult *SearchResult) *Se return preSearchResult } -func requestSatisfiedByPreSearch(req *SearchRequest) bool { - if requestHasKNN(req) && isKNNrequestSatisfiedByPreSearch(req) { +func requestSatisfiedByPreSearch(req *SearchRequest, flags *preSearchFlags) bool { + if flags == nil { + return false + } + if flags.knn && isKNNrequestSatisfiedByPreSearch(req) { return true } return false } -func constructPreSearchData(req *SearchRequest, preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) { +func constructPreSearchData(req *SearchRequest, flags *preSearchFlags, preSearchResult *SearchResult, indexes []Index) (map[string]map[string]interface{}, error) { + if flags == nil || preSearchResult == nil { + return nil, fmt.Errorf("invalid input, flags: %v, preSearchResult: %v", flags, preSearchResult) + } mergedOut := make(map[string]map[string]interface{}, len(indexes)) for _, index := range indexes { mergedOut[index.Name()] = make(map[string]interface{}) } var err error - if requestHasKNN(req) { + if flags.knn { mergedOut, err = constructKnnPreSearchData(mergedOut, preSearchResult, indexes) if err != nil { return nil, err @@ -607,7 +653,7 @@ func constructPreSearchData(req *SearchRequest, preSearchResult *SearchResult, i return mergedOut, nil } -func preSearchDataSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*SearchResult, error) { +func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) { asyncResults := make(chan *asyncSearchResult, len(indexes)) // run search on each index in separate go routine var waitGroup sync.WaitGroup @@ -638,7 +684,7 @@ func preSearchDataSearch(ctx context.Context, req *SearchRequest, indexes ...Ind if prp == nil { // first valid preSearch result // create a new preSearch result processor - prp = createPreSearchResultProcessor(req) + prp = createPreSearchResultProcessor(req, flags) } prp.add(asr.Result, asr.Name) if sr == nil { @@ -684,6 +730,45 @@ func preSearchDataSearch(ctx context.Context, req *SearchRequest, indexes ...Ind return sr, nil } +// redistributePreSearchData redistributes the preSearchData sent in the search request to an index alias +// which would happen in the case of an alias tree and depending on the level of the tree, the preSearchData +// needs to be redistributed to the indexes at that level +func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) { + rv := make(map[string]map[string]interface{}) + for _, index := range indexes { + rv[index.Name()] = make(map[string]interface{}) + } + if knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch); ok { + // the preSearchData for KNN is a list of DocumentMatch objects + // that need to be redistributed to the right index. + // This is used only in the case of an alias tree, where the indexes + // are at the leaves of the tree, and the master alias is at the root. + // At each level of the tree, the preSearchData needs to be redistributed + // to the indexes/aliases at that level. Because the preSearchData is + // specific to each final index at the leaf. + segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes) + if err != nil { + return nil, err + } + for _, index := range indexes { + rv[index.Name()][search.KnnPreSearchDataKey] = segregatedKnnHits[index.Name()] + } + } + return rv, nil +} + +// finalizePreSearchResult finalizes the preSearch result by applying the finalization steps +// specific to the preSearch flags +func finalizePreSearchResult(req *SearchRequest, flags *preSearchFlags, preSearchResult *SearchResult) { + // if flags is nil then return + if flags == nil { + return + } + if flags.knn { + preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits) + } +} + // hitsInCurrentPage returns the hits in the current page // using the From and Size parameters in the request func hitsInCurrentPage(req *SearchRequest, hits []*search.DocumentMatch) []*search.DocumentMatch { diff --git a/pre_search.go b/pre_search.go index c8c55bfbc..eebf824ab 100644 --- a/pre_search.go +++ b/pre_search.go @@ -26,6 +26,8 @@ type preSearchResultProcessor interface { finalize(*SearchResult) } +// ----------------------------------------------------------------------------- +// KNN preSearchResultProcessor for handling KNN presearch results type knnPreSearchResultProcessor struct { addFn func(sr *SearchResult, indexName string) finalizeFn func(sr *SearchResult) @@ -44,16 +46,49 @@ func (k *knnPreSearchResultProcessor) finalize(sr *SearchResult) { } // ----------------------------------------------------------------------------- +// Master struct that can hold any number of presearch result processors +type compositePreSearchResultProcessor struct { + presearchResultProcessors []preSearchResultProcessor +} + +// Implements the add method, which forwards to all the internal processors +func (m *compositePreSearchResultProcessor) add(sr *SearchResult, indexName string) { + for _, p := range m.presearchResultProcessors { + p.add(sr, indexName) + } +} -func finalizePreSearchResult(req *SearchRequest, preSearchResult *SearchResult) { - if requestHasKNN(req) { - preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits) +// Implements the finalize method, which forwards to all the internal processors +func (m *compositePreSearchResultProcessor) finalize(sr *SearchResult) { + for _, p := range m.presearchResultProcessors { + p.finalize(sr) } } -func createPreSearchResultProcessor(req *SearchRequest) preSearchResultProcessor { - if requestHasKNN(req) { - return newKnnPreSearchResultProcessor(req) +// ----------------------------------------------------------------------------- +// Function to create the appropriate preSearchResultProcessor(s) +func createPreSearchResultProcessor(req *SearchRequest, flags *preSearchFlags) preSearchResultProcessor { + // return nil for invalid input + if flags == nil || req == nil { + return nil + } + var processors []preSearchResultProcessor + // Add KNN processor if the request has KNN + if flags.knn { + if knnProcessor := newKnnPreSearchResultProcessor(req); knnProcessor != nil { + processors = append(processors, knnProcessor) + } + } + // Return based on the number of processors, optimizing for the common case of 1 processor + // If there are no processors, return nil + switch len(processors) { + case 0: + return nil + case 1: + return processors[0] + default: + return &compositePreSearchResultProcessor{ + presearchResultProcessors: processors, + } } - return &knnPreSearchResultProcessor{} // equivalent to nil } diff --git a/search.go b/search.go index 7861d24b8..402109e05 100644 --- a/search.go +++ b/search.go @@ -589,3 +589,8 @@ func (r *SearchRequest) SortFunc() func(data sort.Interface) { return sort.Sort } + +func isMatchNoneQuery(q query.Query) bool { + _, ok := q.(*query.MatchNoneQuery) + return ok +} diff --git a/search/query/query.go b/search/query/query.go index d263a0e54..9cbf5c53b 100644 --- a/search/query/query.go +++ b/search/query/query.go @@ -423,3 +423,57 @@ func DumpQuery(m mapping.IndexMapping, query Query) (string, error) { data, err := json.MarshalIndent(q, "", " ") return string(data), err } + +// FieldSet represents a set of queried fields. +type FieldSet map[string]struct{} + +// ExtractFields returns a set of fields referenced by the query. +// The returned set may be nil if the query does not explicitly reference any field +// and the DefaultSearchField is unset in the index mapping. +func ExtractFields(q Query, m mapping.IndexMapping, fs FieldSet) (FieldSet, error) { + if q == nil || m == nil { + return fs, nil + } + var err error + switch q := q.(type) { + case FieldableQuery: + f := q.Field() + if f == "" { + f = m.DefaultSearchField() + } + if f != "" { + if fs == nil { + fs = make(FieldSet) + } + fs[f] = struct{}{} + } + case *QueryStringQuery: + var expandedQuery Query + expandedQuery, err = expandQuery(m, q) + if err == nil { + fs, err = ExtractFields(expandedQuery, m, fs) + } + case *BooleanQuery: + for _, subq := range []Query{q.Must, q.Should, q.MustNot} { + fs, err = ExtractFields(subq, m, fs) + if err != nil { + break + } + } + case *ConjunctionQuery: + for _, subq := range q.Conjuncts { + fs, err = ExtractFields(subq, m, fs) + if err != nil { + break + } + } + case *DisjunctionQuery: + for _, subq := range q.Disjuncts { + fs, err = ExtractFields(subq, m, fs) + if err != nil { + break + } + } + } + return fs, err +} diff --git a/search/query/query_test.go b/search/query/query_test.go index 0028c956b..60c1fa374 100644 --- a/search/query/query_test.go +++ b/search/query/query_test.go @@ -16,6 +16,7 @@ package query import ( "reflect" + "sort" "strings" "testing" "time" @@ -785,3 +786,257 @@ func TestParseEmptyQuery(t *testing.T) { t.Errorf("[2] Expected %#v, got %#v", expect, rv) } } + +func TestExtractFields(t *testing.T) { + testQueries := []struct { + query string + expFields []string + }{ + { + query: `{"term":"water","field":"desc"}`, + expFields: []string{"desc"}, + }, + { + query: `{ + "must": { + "conjuncts": [ + { + "match": "water", + "prefix_length": 0, + "fuzziness": 0 + } + ] + }, + "should": { + "disjuncts": [ + { + "match": "beer", + "prefix_length": 0, + "fuzziness": 0 + } + ], + "min": 0 + }, + "must_not": { + "disjuncts": [ + { + "match": "light", + "prefix_length": 0, + "fuzziness": 0 + } + ], + "min": 0 + } + }`, + expFields: []string{"_all"}, + }, + { + query: `{ + "must": { + "conjuncts": [ + { + "match": "water", + "prefix_length": 0, + "field": "desc", + "fuzziness": 0 + } + ] + }, + "should": { + "disjuncts": [ + { + "match": "beer", + "prefix_length": 0, + "field": "desc", + "fuzziness": 0 + } + ], + "min": 0 + }, + "must_not": { + "disjuncts": [ + { + "match": "light", + "prefix_length": 0, + "field": "genre", + "fuzziness": 0 + } + ], + "min": 0 + } + }`, + expFields: []string{"desc", "genre"}, + }, + { + query: ` + { + "conjuncts": [ + { + "conjuncts": [ + { + "conjuncts": [ + { + "conjuncts": [ + { + "field": "date", + "start": "2002-09-05T08:09:00Z", + "end": "2007-03-01T03:52:00Z", + "inclusive_start": true, + "inclusive_end": true + }, + { + "field": "number", + "min": 1260295, + "max": 3917314, + "inclusive_min": true, + "inclusive_max": true + } + ] + }, + { + "conjuncts": [ + { + "field": "date2", + "start": "2004-08-21T18:30:00Z", + "end": "2006-03-24T08:08:00Z", + "inclusive_start": true, + "inclusive_end": true + }, + { + "field": "number", + "min": 165449, + "max": 3847517, + "inclusive_min": true, + "inclusive_max": true + } + ] + } + ] + }, + { + "conjuncts": [ + { + "conjuncts": [ + { + "field": "date", + "start": "2004-09-02T22:15:00Z", + "end": "2008-06-22T15:06:00Z", + "inclusive_start": true, + "inclusive_end": true + }, + { + "field": "number2", + "min": 876843, + "max": 3363351, + "inclusive_min": true, + "inclusive_max": true + } + ] + }, + { + "conjuncts": [ + { + "field": "date", + "start": "2000-12-03T21:35:00Z", + "end": "2008-02-07T05:00:00Z", + "inclusive_start": true, + "inclusive_end": true + }, + { + "field": "number", + "min": 2021479, + "max": 4763404, + "inclusive_min": true, + "inclusive_max": true + } + ] + } + ] + } + ] + }, + { + "conjuncts": [ + { + "conjuncts": [ + { + "field": "date3", + "start": "2000-03-13T07:13:00Z", + "end": "2005-09-19T09:33:00Z", + "inclusive_start": true, + "inclusive_end": true + }, + { + "field": "number", + "min": 883125, + "max": 4817433, + "inclusive_min": true, + "inclusive_max": true + } + ] + }, + { + "conjuncts": [ + { + "field": "date", + "start": "2002-08-10T22:42:00Z", + "end": "2008-02-10T23:19:00Z", + "inclusive_start": true, + "inclusive_end": true + }, + { + "field": "number", + "min": 896115, + "max": 3897074, + "inclusive_min": true, + "inclusive_max": true + } + ] + } + ] + } + ] + }`, + expFields: []string{"date", "number", "date2", "number2", "date3"}, + }, + { + query: `{ + "query" : "hardworking people" + }`, + expFields: []string{"_all"}, + }, + { + query: `{ + "query" : "text:hardworking people" + }`, + expFields: []string{"text", "_all"}, + }, + { + query: `{ + "query" : "text:\"hardworking people\"" + }`, + expFields: []string{"text"}, + }, + } + + m := mapping.NewIndexMapping() + for i, test := range testQueries { + q, err := ParseQuery([]byte(test.query)) + if err != nil { + t.Fatal(err) + } + fields, err := ExtractFields(q, m, nil) + if err != nil { + t.Fatal(err) + } + var fieldsSlice []string + for k := range fields { + fieldsSlice = append(fieldsSlice, k) + } + sort.Strings(test.expFields) + sort.Strings(fieldsSlice) + if !reflect.DeepEqual(fieldsSlice, test.expFields) { + t.Errorf("Test %d: expected %v, got %v", i, test.expFields, fieldsSlice) + } + } +} diff --git a/search_knn.go b/search_knn.go index 309b36593..e5fd595d4 100644 --- a/search_knn.go +++ b/search_knn.go @@ -381,7 +381,7 @@ func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, rea continue } - if _, ok := filterQ.(*query.MatchNoneQuery); ok { + if isMatchNoneQuery(filterQ) { // Filtering required since no hits are eligible. requiresFiltering[idx] = true // a match none query just means none the documents are eligible @@ -559,7 +559,7 @@ func requestHasKNN(req *SearchRequest) bool { func isKNNrequestSatisfiedByPreSearch(req *SearchRequest) bool { // if req.Query is not match_none => then we need to go to phase 2 // to perform the actual query. - if _, ok := req.Query.(*query.MatchNoneQuery); !ok { + if !isMatchNoneQuery(req.Query) { return false } // req.Query is a match_none query @@ -598,41 +598,6 @@ func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) { dummyReq.Sort = realReq.Sort } -// the preSearchData for KNN is a list of DocumentMatch objects -// that need to be redistributed to the right index. -// This is used only in the case of an alias tree, where the indexes -// are at the leaves of the tree, and the master alias is at the root. -// At each level of the tree, the preSearchData needs to be redistributed -// to the indexes/aliases at that level. Because the preSearchData is -// specific to each final index at the leaf. -func redistributeKNNPreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) { - knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch) - if !ok { - return nil, fmt.Errorf("request does not have knn preSearchData for redistribution") - } - segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes) - if err != nil { - return nil, err - } - - rv := make(map[string]map[string]interface{}) - for _, index := range indexes { - rv[index.Name()] = make(map[string]interface{}) - } - - for _, index := range indexes { - for k, v := range req.PreSearchData { - switch k { - case search.KnnPreSearchDataKey: - rv[index.Name()][k] = segregatedKnnHits[index.Name()] - default: - rv[index.Name()][k] = v - } - } - } - return rv, nil -} - func newKnnPreSearchResultProcessor(req *SearchRequest) *knnPreSearchResultProcessor { kArray := make([]int64, len(req.KNN)) for i, knnReq := range req.KNN { diff --git a/search_no_knn.go b/search_no_knn.go index bb72e15a9..c91980589 100644 --- a/search_no_knn.go +++ b/search_no_knn.go @@ -187,7 +187,7 @@ func requestHasKNN(req *SearchRequest) bool { func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) { } -func redistributeKNNPreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) { +func validateAndDistributeKNNHits(knnHits []*search.DocumentMatch, indexes []Index) (map[string][]*search.DocumentMatch, error) { return nil, nil }