Skip to content

Commit

Permalink
MB-61640: Fuzzy Dynamic Scoring (#20)
Browse files Browse the repository at this point in the history
Co-authored-by: Abhinav Dangeti <[email protected]>
  • Loading branch information
Likith101 and abhinavdangeti authored Nov 20, 2024
1 parent d984e35 commit ad113db
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ jobs:
test:
strategy:
matrix:
go-version: [1.17.x, 1.18.x, 1.19.x]
go-version: [1.20.x, 1.21.x, 1.22.x]
platform: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.platform }}
steps:
Expand Down
5 changes: 5 additions & 0 deletions automaton.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,8 @@ func (m *AlwaysMatch) Accept(int, byte) int {

// creating an alwaysMatchAutomaton to avoid unnecessary repeated allocations.
var alwaysMatchAutomaton = &AlwaysMatch{}

type FuzzyAutomaton interface {
Automaton
EditDistance(int) uint8
}
14 changes: 14 additions & 0 deletions fst_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ type Iterator interface {
Close() error
}

type FuzzyIterator interface {
Iterator
EditDistance() uint8
}

// FSTIterator is a structure for iterating key/value pairs in this FST in
// lexicographic order. Iterators should be constructed with the FSTIterator
// method on the parent FST structure.
Expand All @@ -61,6 +66,8 @@ type FSTIterator struct {
autStatesStack []int

nextStart []byte

editDistance uint8
}

func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
Expand All @@ -74,6 +81,10 @@ func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
return rv, nil
}

func (i *FSTIterator) EditDistance() uint8 {
return i.editDistance
}

// Reset resets the Iterator' internal state to allow for iterator
// reuse (e.g. pooling).
func (i *FSTIterator) Reset(f *FST,
Expand Down Expand Up @@ -206,6 +217,9 @@ OUTER:

cmp := bytes.Compare(i.keysStack, i.nextStart)
if cmp > 0 {
if fa, ok := i.aut.(FuzzyAutomaton); ok {
i.editDistance = fa.EditDistance(autCurr)
}
// in final state greater than start key
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/blevesearch/vellum

go 1.18
go 1.21

require (
github.com/bits-and-blooms/bitset v1.2.0
Expand Down
14 changes: 9 additions & 5 deletions levenshtein/dfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,27 @@ type DFA struct {
ed uint8
}

/// Returns the initial state
// Returns the initial state
func (d *DFA) initialState() int {
return d.initState
}

/// Returns the Levenshtein distance associated to the
/// current state.
// Returns the Levenshtein distance associated to the
// current state.
func (d *DFA) distance(stateId int) Distance {
return d.distances[stateId]
}

/// Returns the number of states in the `DFA`.
func (d *DFA) EditDistance(stateId int) uint8 {
return d.distances[stateId].distance()
}

// Returns the number of states in the `DFA`.
func (d *DFA) numStates() int {
return len(d.transitions)
}

/// Returns the destination state reached after consuming a given byte.
// Returns the destination state reached after consuming a given byte.
func (d *DFA) transition(fromState int, b uint8) int {
return int(d.transitions[fromState][b])
}
Expand Down

0 comments on commit ad113db

Please sign in to comment.