-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy paththesaurus.go
159 lines (135 loc) · 4.02 KB
/
thesaurus.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zap
import (
"bytes"
"fmt"
"github.com/RoaringBitmap/roaring"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
"github.com/blevesearch/vellum"
)
// Thesaurus is the zap representation of a Thesaurus
type Thesaurus struct {
sb *SegmentBase
name string
fieldID uint16
synIDTermMap map[uint32][]byte
fst *vellum.FST
fstReader *vellum.Reader
}
// represents an immutable, empty Thesaurus
var emptyThesaurus = &Thesaurus{}
// SynonymsList returns the synonyms list for the specified term
func (t *Thesaurus) SynonymsList(term []byte, except *roaring.Bitmap, prealloc segment.SynonymsList) (segment.SynonymsList, error) {
var preallocSL *SynonymsList
sl, ok := prealloc.(*SynonymsList)
if ok && sl != nil {
preallocSL = sl
}
return t.synonymsList(term, except, preallocSL)
}
func (t *Thesaurus) synonymsList(term []byte, except *roaring.Bitmap, rv *SynonymsList) (*SynonymsList, error) {
if t.fstReader == nil {
if rv == nil || rv == emptySynonymsList {
return emptySynonymsList, nil
}
return t.synonymsListInit(rv, except), nil
}
synonymsOffset, exists, err := t.fstReader.Get(term)
if err != nil {
return nil, fmt.Errorf("vellum err: %v", err)
}
if !exists {
if rv == nil || rv == emptySynonymsList {
return emptySynonymsList, nil
}
return t.synonymsListInit(rv, except), nil
}
return t.synonymsListFromOffset(synonymsOffset, except, rv)
}
func (t *Thesaurus) synonymsListFromOffset(synonymsOffset uint64, except *roaring.Bitmap, rv *SynonymsList) (*SynonymsList, error) {
rv = t.synonymsListInit(rv, except)
err := rv.read(synonymsOffset, t)
if err != nil {
return nil, err
}
return rv, nil
}
func (t *Thesaurus) synonymsListInit(rv *SynonymsList, except *roaring.Bitmap) *SynonymsList {
if rv == nil || rv == emptySynonymsList {
rv = &SynonymsList{}
rv.buffer = bytes.NewReader(nil)
} else {
synonyms := rv.synonyms
buf := rv.buffer
if synonyms != nil {
synonyms.Clear()
}
if buf != nil {
buf.Reset(nil)
}
*rv = SynonymsList{} // clear the struct
rv.synonyms = synonyms
rv.buffer = buf
}
rv.sb = t.sb
rv.except = except
rv.synIDTermMap = t.synIDTermMap
return rv
}
func (t *Thesaurus) Contains(key []byte) (bool, error) {
if t.fst != nil {
return t.fst.Contains(key)
}
return false, nil
}
// AutomatonIterator returns an iterator which only visits terms
// having the the vellum automaton and start/end key range
func (t *Thesaurus) AutomatonIterator(a segment.Automaton,
startKeyInclusive, endKeyExclusive []byte) segment.ThesaurusIterator {
if t.fst != nil {
rv := &ThesaurusIterator{
t: t,
}
itr, err := t.fst.Search(a, startKeyInclusive, endKeyExclusive)
if err == nil {
rv.itr = itr
} else if err != vellum.ErrIteratorDone {
rv.err = err
}
return rv
}
return emptyThesaurusIterator
}
var emptyThesaurusIterator = &ThesaurusIterator{}
// ThesaurusIterator is an iterator for term dictionary
type ThesaurusIterator struct {
t *Thesaurus
itr vellum.Iterator
err error
entry index.ThesaurusEntry
}
// Next returns the next entry in the dictionary
func (i *ThesaurusIterator) Next() (*index.ThesaurusEntry, error) {
if i.err != nil && i.err != vellum.ErrIteratorDone {
return nil, i.err
} else if i.itr == nil || i.err == vellum.ErrIteratorDone {
return nil, nil
}
term, _ := i.itr.Current()
i.entry.Term = string(term)
i.err = i.itr.Next()
return &i.entry, nil
}