diff --git a/go.mod b/go.mod index e82187e..155e222 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.21 require ( github.com/RoaringBitmap/roaring v1.9.3 - github.com/blevesearch/bleve_index_api v1.1.12 + github.com/blevesearch/bleve_index_api v1.2.0 ) require ( diff --git a/go.sum b/go.sum index 433da92..9b45c05 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,8 @@ github.com/RoaringBitmap/roaring v1.9.3 h1:t4EbC5qQwnisr5PrP9nt0IRhRTb9gMUgQF4t4 github.com/RoaringBitmap/roaring v1.9.3/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90= github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA= github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= -github.com/blevesearch/bleve_index_api v1.1.12 h1:P4bw9/G/5rulOF7SJ9l4FsDoo7UFJ+5kexNy1RXfegY= -github.com/blevesearch/bleve_index_api v1.1.12/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8= +github.com/blevesearch/bleve_index_api v1.2.0 h1:/DXMMWBwx/UmGKM1xDhTwDoJI5yQrG6rqRWPFcOgUVo= +github.com/blevesearch/bleve_index_api v1.2.0/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/segment.go b/segment.go index 8e4a3d9..a6d1183 100644 --- a/segment.go +++ b/segment.go @@ -178,3 +178,59 @@ type FieldStats interface { Aggregate(stats FieldStats) Fetch() map[string]map[string]uint64 } + +// ThesaurusSegment provides access to a thesaurus within a specific segment of the index. +type ThesaurusSegment interface { + Segment + // Thesaurus returns the Thesaurus with the specified name. + Thesaurus(name string) (Thesaurus, error) +} + +// Thesaurus encapsulates a structured collection of terms and their associated synonyms. +type Thesaurus interface { + // SynonymsList retrieves a list of synonyms for the specified term. The `except` parameter + // excludes specific synonyms, such as those originating from deleted documents. The `prealloc` + // parameter allows the use of preallocated memory to optimize performance. + SynonymsList(term []byte, except *roaring.Bitmap, prealloc SynonymsList) (SynonymsList, error) + + // AutomatonIterator creates an iterator over the thesaurus keys/terms using the provided automaton. + // The iteration is constrained by the specified key range [startKeyInclusive, endKeyExclusive). + // These terms or keys are the ones that have a SynonymsList associated with them, in the thesaurus. + AutomatonIterator(a Automaton, startKeyInclusive, endKeyExclusive []byte) ThesaurusIterator + + // Contains checks if the given key exists in the thesaurus. + Contains(key []byte) (bool, error) +} + +// ThesaurusIterator iterates over terms in a thesaurus. +type ThesaurusIterator interface { + // Next returns the next entry in the thesaurus or an error if iteration fails. + Next() (*index.ThesaurusEntry, error) +} + +// SynonymsList represents a list of synonyms for a term. +type SynonymsList interface { + // Iterator returns an iterator to traverse the list of synonyms. + // The `prealloc` parameter can be used to reuse existing memory for the iterator. + Iterator(prealloc SynonymsIterator) SynonymsIterator + + Size() int +} + +// SynonymsIterator provides a mechanism to iterate over a list of synonyms. +type SynonymsIterator interface { + // Next returns the next synonym in the list or an error if iteration fails. + Next() (Synonym, error) + + Size() int +} + +// Synonym represents a single synonym for a term in the thesaurus. +type Synonym interface { + // Number returns the document number from which the synonym originates. + Number() uint32 + // Term returns the textual representation of the synonym. + Term() string + + Size() int +}