diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1acdd98..e0db47e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ jobs: test: strategy: matrix: - go-version: [1.17.x, 1.18.x, 1.19.x] + go-version: [1.20.x, 1.21.x, 1.22.x] platform: [ubuntu-latest, macos-latest, windows-latest] runs-on: ${{ matrix.platform }} steps: diff --git a/automaton.go b/automaton.go index 70398f2..4014797 100644 --- a/automaton.go +++ b/automaton.go @@ -83,3 +83,8 @@ func (m *AlwaysMatch) Accept(int, byte) int { // creating an alwaysMatchAutomaton to avoid unnecessary repeated allocations. var alwaysMatchAutomaton = &AlwaysMatch{} + +type FuzzyAutomaton interface { + Automaton + EditDistance(int) uint8 +} diff --git a/fst_iterator.go b/fst_iterator.go index 2c6b0d6..f5c374e 100644 --- a/fst_iterator.go +++ b/fst_iterator.go @@ -44,6 +44,11 @@ type Iterator interface { Close() error } +type FuzzyIterator interface { + Iterator + EditDistance() uint8 +} + // FSTIterator is a structure for iterating key/value pairs in this FST in // lexicographic order. Iterators should be constructed with the FSTIterator // method on the parent FST structure. @@ -61,6 +66,8 @@ type FSTIterator struct { autStatesStack []int nextStart []byte + + editDistance uint8 } func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte, @@ -74,6 +81,10 @@ func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte, return rv, nil } +func (i *FSTIterator) EditDistance() uint8 { + return i.editDistance +} + // Reset resets the Iterator' internal state to allow for iterator // reuse (e.g. pooling). func (i *FSTIterator) Reset(f *FST, @@ -206,6 +217,9 @@ OUTER: cmp := bytes.Compare(i.keysStack, i.nextStart) if cmp > 0 { + if fa, ok := i.aut.(FuzzyAutomaton); ok { + i.editDistance = fa.EditDistance(autCurr) + } // in final state greater than start key return nil } diff --git a/go.mod b/go.mod index d1059a8..1e97f08 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/blevesearch/vellum -go 1.18 +go 1.21 require ( github.com/bits-and-blooms/bitset v1.2.0 diff --git a/levenshtein/dfa.go b/levenshtein/dfa.go index d0e43ca..3fc26ed 100644 --- a/levenshtein/dfa.go +++ b/levenshtein/dfa.go @@ -28,23 +28,27 @@ type DFA struct { ed uint8 } -/// Returns the initial state +// Returns the initial state func (d *DFA) initialState() int { return d.initState } -/// Returns the Levenshtein distance associated to the -/// current state. +// Returns the Levenshtein distance associated to the +// current state. func (d *DFA) distance(stateId int) Distance { return d.distances[stateId] } -/// Returns the number of states in the `DFA`. +func (d *DFA) EditDistance(stateId int) uint8 { + return d.distances[stateId].distance() +} + +// Returns the number of states in the `DFA`. func (d *DFA) numStates() int { return len(d.transitions) } -/// Returns the destination state reached after consuming a given byte. +// Returns the destination state reached after consuming a given byte. func (d *DFA) transition(fromState int, b uint8) int { return int(d.transitions[fromState][b]) }