Skip to content

Commit

Permalink
MB-61640: Fuzzy Dynamic Scoring
Browse files Browse the repository at this point in the history
 - Allow edit distances for terms to be read
  • Loading branch information
Likith101 committed Oct 22, 2024
1 parent d984e35 commit f287967
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 5 deletions.
5 changes: 5 additions & 0 deletions automaton.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,8 @@ func (m *AlwaysMatch) Accept(int, byte) int {

// creating an alwaysMatchAutomaton to avoid unnecessary repeated allocations.
var alwaysMatchAutomaton = &AlwaysMatch{}

type FuzzyAutomaton interface {
Automaton
EditDistance(int) uint8
}
14 changes: 14 additions & 0 deletions fst_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ type Iterator interface {
Close() error
}

type FuzzyIterator interface {
Iterator
Distance() uint8
}

// FSTIterator is a structure for iterating key/value pairs in this FST in
// lexicographic order. Iterators should be constructed with the FSTIterator
// method on the parent FST structure.
Expand All @@ -61,6 +66,8 @@ type FSTIterator struct {
autStatesStack []int

nextStart []byte

keysDistance uint8
}

func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
Expand All @@ -74,6 +81,10 @@ func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte,
return rv, nil
}

func (i *FSTIterator) Distance() uint8 {
return i.keysDistance
}

// Reset resets the Iterator' internal state to allow for iterator
// reuse (e.g. pooling).
func (i *FSTIterator) Reset(f *FST,
Expand Down Expand Up @@ -206,6 +217,9 @@ OUTER:

cmp := bytes.Compare(i.keysStack, i.nextStart)
if cmp > 0 {
if fa, ok := i.aut.(FuzzyAutomaton); ok {
i.keysDistance = fa.EditDistance(autCurr)
}
// in final state greater than start key
return nil
}
Expand Down
14 changes: 9 additions & 5 deletions levenshtein/dfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,27 @@ type DFA struct {
ed uint8
}

/// Returns the initial state
// Returns the initial state
func (d *DFA) initialState() int {
return d.initState
}

/// Returns the Levenshtein distance associated to the
/// current state.
// Returns the Levenshtein distance associated to the
// current state.
func (d *DFA) distance(stateId int) Distance {
return d.distances[stateId]
}

/// Returns the number of states in the `DFA`.
func (d *DFA) EditDistance(stateId int) uint8 {
return d.distances[stateId].distance()
}

// Returns the number of states in the `DFA`.
func (d *DFA) numStates() int {
return len(d.transitions)
}

/// Returns the destination state reached after consuming a given byte.
// Returns the destination state reached after consuming a given byte.
func (d *DFA) transition(fromState int, b uint8) int {
return int(d.transitions[fromState][b])
}
Expand Down

0 comments on commit f287967

Please sign in to comment.