forked from blevesearch/vellum
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge_iterator.go
188 lines (172 loc) · 4.76 KB
/
merge_iterator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package vellum
import (
"bytes"
)
// MergeFunc is used to choose the new value for a key when merging a slice
// of iterators, and the same key is observed with multiple values.
// Values presented to the MergeFunc will be in the same order as the
// original slice creating the MergeIterator. This allows some MergeFunc
// implementations to prioritize one iterator over another.
type MergeFunc func([]uint64) uint64
// MergeIterator implements the Iterator interface by traversing a slice
// of iterators and merging the contents of them. If the same key exists
// in mulitipe underlying iterators, a user-provided MergeFunc will be
// invoked to choose the new value.
type MergeIterator struct {
itrs []Iterator
f MergeFunc
currKs [][]byte
currVs []uint64
lowK []byte
lowV uint64
lowIdxs []int
mergeV []uint64
}
// NewMergeIterator creates a new MergeIterator over the provided slice of
// Iterators and with the specified MergeFunc to resolve duplicate keys.
func NewMergeIterator(itrs []Iterator, f MergeFunc) (*MergeIterator, error) {
rv := &MergeIterator{
itrs: itrs,
f: f,
currKs: make([][]byte, len(itrs)),
currVs: make([]uint64, len(itrs)),
lowIdxs: make([]int, 0, len(itrs)),
mergeV: make([]uint64, 0, len(itrs)),
}
rv.init()
if rv.lowK == nil {
return rv, ErrIteratorDone
}
return rv, nil
}
func (m *MergeIterator) init() {
for i, itr := range m.itrs {
m.currKs[i], m.currVs[i] = itr.Current()
}
m.updateMatches()
}
func (m *MergeIterator) updateMatches() {
if len(m.itrs) < 1 {
return
}
m.lowK = m.currKs[0]
m.lowIdxs = m.lowIdxs[:0]
m.lowIdxs = append(m.lowIdxs, 0)
for i := 1; i < len(m.itrs); i++ {
if m.currKs[i] == nil {
continue
}
cmp := bytes.Compare(m.currKs[i], m.lowK)
if m.lowK == nil || cmp < 0 {
// reached a new low
m.lowK = m.currKs[i]
m.lowIdxs = m.lowIdxs[:0]
m.lowIdxs = append(m.lowIdxs, i)
} else if cmp == 0 {
m.lowIdxs = append(m.lowIdxs, i)
}
}
if len(m.lowIdxs) > 1 {
// merge multiple values
m.mergeV = m.mergeV[:0]
for _, vi := range m.lowIdxs {
m.mergeV = append(m.mergeV, m.currVs[vi])
}
m.lowV = m.f(m.mergeV)
} else if len(m.lowIdxs) == 1 {
m.lowV = m.currVs[m.lowIdxs[0]]
}
}
// Current returns the key and value currently pointed to by this iterator.
// If the iterator is not pointing at a valid value (because Iterator/Next/Seek)
// returned an error previously, it may return nil,0.
func (m *MergeIterator) Current() ([]byte, uint64) {
return m.lowK, m.lowV
}
// Next advances this iterator to the next key/value pair. If there is none,
// then ErrIteratorDone is returned.
func (m *MergeIterator) Next() error {
// move all the current low iterators to next
for _, vi := range m.lowIdxs {
err := m.itrs[vi].Next()
if err != nil && err != ErrIteratorDone {
return err
}
m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current()
}
m.updateMatches()
if m.lowK == nil {
return ErrIteratorDone
}
return nil
}
// Seek advances this iterator to the specified key/value pair. If this key
// is not in the FST, Current() will return the next largest key. If this
// seek operation would go past the last key, then ErrIteratorDone is returned.
func (m *MergeIterator) Seek(key []byte) error {
for i := range m.itrs {
err := m.itrs[i].Seek(key)
if err != nil && err != ErrIteratorDone {
return err
}
}
m.updateMatches()
if m.lowK == nil {
return ErrIteratorDone
}
return nil
}
// Close will attempt to close all the underlying Iterators. If any errors
// are encountered, the first will be returned.
func (m *MergeIterator) Close() error {
var rv error
for i := range m.itrs {
// close all iterators, return first error if any
err := m.itrs[i].Close()
if rv == nil {
rv = err
}
}
return rv
}
// MergeMin chooses the minimum value
func MergeMin(vals []uint64) uint64 {
rv := vals[0]
for _, v := range vals[1:] {
if v < rv {
rv = v
}
}
return rv
}
// MergeMax chooses the maximum value
func MergeMax(vals []uint64) uint64 {
rv := vals[0]
for _, v := range vals[1:] {
if v > rv {
rv = v
}
}
return rv
}
// MergeSum sums the values
func MergeSum(vals []uint64) uint64 {
rv := vals[0]
for _, v := range vals[1:] {
rv += v
}
return rv
}