diff --git a/doc_test.go b/doc_test.go index ffc8da9..3a4ebd1 100644 --- a/doc_test.go +++ b/doc_test.go @@ -179,7 +179,11 @@ func (s *stubField) Compose(field string, length int, freq index.TokenFrequencie // ----------------------------------------------------------------------------- type stubSynonymField struct { - name string + name string + analyzer string + input []string + synonyms []string + synonymMap map[string][]string } @@ -200,7 +204,18 @@ func (s *stubSynonymField) EncodedFieldType() byte { } func (s *stubSynonymField) Analyze() { - + var analyzedInput []string + if len(s.input) > 0 { + analyzedInput = make([]string, 0, len(s.input)) + for _, term := range s.input { + analyzedInput = append(analyzedInput, analyzeStubTerm(term, s.analyzer)) + } + } + analyzedSynonyms := make([]string, 0, len(s.synonyms)) + for _, syn := range s.synonyms { + analyzedSynonyms = append(analyzedSynonyms, analyzeStubTerm(syn, s.analyzer)) + } + s.synonymMap = processSynonymData(analyzedInput, analyzedSynonyms) } func (s *stubSynonymField) Options() index.FieldIndexingOptions { @@ -219,30 +234,46 @@ func (s *stubSynonymField) NumPlainTextBytes() uint64 { return 0 } -func (sf *stubSynonymField) VisitSynonymDefinitions(visitor func(term string, synonyms []string)) { +func (sf *stubSynonymField) IterateSynonyms(visitor func(term string, synonyms []string)) { for term, synonyms := range sf.synonymMap { visitor(term, synonyms) } } -func analyzeStubTerm(term string) string { +func processSynonymData(input []string, synonyms []string) map[string][]string { + var synonymMap map[string][]string + if len(input) > 0 { + // Map each term to the same list of synonyms. + synonymMap = make(map[string][]string, len(input)) + for _, term := range input { + synonymMap[term] = append([]string(nil), synonyms...) // Avoid sharing slices. + } + } else { + synonymMap = make(map[string][]string, len(synonyms)) + // Precompute a map where each synonym points to all other synonyms. + for i, elem := range synonyms { + synonymMap[elem] = make([]string, 0, len(synonyms)-1) + for j, otherElem := range synonyms { + if i != j { + synonymMap[elem] = append(synonymMap[elem], otherElem) + } + } + } + } + return synonymMap +} + +func analyzeStubTerm(term string, analyzer string) string { lowerCaseTerm := strings.ToLower(term) return lowerCaseTerm } -func newStubSynonymField(name, analyzer string, synonymMap map[string][]string) index.SynonymField { - analyzedSynonymDefs := make(map[string][]string, len(synonymMap)) - for term, synonyms := range synonymMap { - analyzedTerm := analyzeStubTerm(term) - analyzedSynonyms := make([]string, 0, len(synonyms)) - for _, syn := range synonyms { - analyzedSynonyms = append(analyzedSynonyms, analyzeStubTerm(syn)) - } - analyzedSynonymDefs[analyzedTerm] = analyzedSynonyms - } +func newStubSynonymField(name string, analyzer string, input []string, synonyms []string) index.SynonymField { return &stubSynonymField{ - name: name, - synonymMap: analyzedSynonymDefs, + name: name, + analyzer: analyzer, + input: input, + synonyms: synonyms, } } diff --git a/section_synonym.go b/section_synonym.go index 6828028..ef237bc 100644 --- a/section_synonym.go +++ b/section_synonym.go @@ -143,7 +143,7 @@ func (so *synonymIndexOpaque) process(field index.SynonymField, fieldID uint16, termSynMap := so.SynonymTermToID[tid] - field.VisitSynonymDefinitions(func(term string, synonyms []string) { + field.IterateSynonyms(func(term string, synonyms []string) { pid := thesaurus[term] - 1 bs := so.Synonyms[pid] @@ -183,7 +183,7 @@ func (so *synonymIndexOpaque) realloc() { termSynMap := so.SynonymTermToID[thesaurusID] - synField.VisitSynonymDefinitions(func(term string, synonyms []string) { + synField.IterateSynonyms(func(term string, synonyms []string) { _, exists := thesaurus[term] if !exists { pidNext++ diff --git a/thesaurus_test.go b/thesaurus_test.go index a545c2f..f4e6b99 100644 --- a/thesaurus_test.go +++ b/thesaurus_test.go @@ -27,33 +27,15 @@ import ( segment "github.com/blevesearch/scorch_segment_api/v2" ) -func createEquivalentSynonymMap(input []string, resultMap map[string][]string) map[string][]string { - if resultMap == nil { - resultMap = make(map[string][]string) - } - for _, elem := range input { - for _, otherElem := range input { - if elem != otherElem { - resultMap[elem] = append(resultMap[elem], otherElem) - } - } - } - return resultMap -} - -func buildTestSynonymDocument(id string, collection string, terms []string, synonyms []string) index.Document { - var synonymMap map[string][]string - if terms == nil { - numEntries := len(synonyms) * (len(synonyms) - 1) - synonymMap = make(map[string][]string, numEntries) - synonymMap = createEquivalentSynonymMap(synonyms, synonymMap) - } else { - synonymMap = make(map[string][]string, len(terms)) - for _, term := range terms { - synonymMap[term] = synonyms - } - } - synDoc := newStubSynonymDocument(id, newStubSynonymField(collection, "standard", synonymMap)) +func buildTestSynonymDocument(id string, synonymSource string, terms []string, synonyms []string) index.Document { + // Create the synonym document using stubs. + stubAnalyzer := "standard" + // Create the synonym field. + synField := newStubSynonymField(synonymSource, stubAnalyzer, terms, synonyms) + // Analyze the synonyms. + synField.Analyze() + // Create the synonym document. + synDoc := newStubSynonymDocument(id, synField) synDoc.AddIDField() return synDoc } @@ -122,7 +104,7 @@ func checkWithDeletes(except *roaring.Bitmap, collectionName string, testSynonym if len(synonyms) != len(expectedSynonyms) { return errors.New("unexpected number of synonyms, expected: " + strconv.Itoa(len(expectedSynonyms)) + " got: " + - strconv.Itoa(len(synonyms)) + " for term: " + term + " when excepting: " + except.String()) + strconv.Itoa(len(synonyms)) + " for term: " + term) } sort.Strings(synonyms) sort.Strings(expectedSynonyms) @@ -167,6 +149,20 @@ type testSynonymDefinition struct { synonyms []string } +func createEquivalentSynonymMap(input []string, resultMap map[string][]string) map[string][]string { + if resultMap == nil { + resultMap = make(map[string][]string) + } + for _, elem := range input { + for _, otherElem := range input { + if elem != otherElem { + resultMap[elem] = append(resultMap[elem], otherElem) + } + } + } + return resultMap +} + func createExpectedSynonymMap(input []testSynonymDefinition) map[string][]string { rv := make(map[string][]string) for _, testSynonymDefinition := range input {