Skip to content

Commit

Permalink
fix two bugs found by fuzz tests
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Nov 29, 2023
1 parent 75bc109 commit 42ff8ba
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Fixed
~~~~~
* fixed crash inside hashmap grow function leading to a crash in the
Damerau-Levenshtein implementation
* fixed incorrect flagging of similar characters in Jaro similarity
* fixed wraparound in Longest Common Subsequence

[0.3.1] - 2023-11-29
^^^^^^^^^^^^^^^^^^^^
Expand Down
8 changes: 8 additions & 0 deletions src/distance/indel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -798,4 +798,12 @@ mod tests {
test_distance("Иванко".chars(), "Петрунко".chars(), None, None)
);
}

#[test]
fn fuzzing_regressions() {
assert_eq!(
Some(2),
test_distance("ab".chars(), "ac".chars(), None, None)
);
}
}
43 changes: 38 additions & 5 deletions src/distance/jaro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ struct FlaggedCharsWord {
}

impl FlaggedCharsWord {
const fn count_common_chars(&self) -> usize {
fn count_common_chars(&self) -> usize {
debug_assert_eq!(self.p_flag.count_ones(), self.t_flag.count_ones());

self.p_flag.count_ones() as usize
}
}
Expand All @@ -41,6 +43,17 @@ struct FlaggedCharsMultiword {

impl FlaggedCharsMultiword {
fn count_common_chars(&self) -> usize {
debug_assert_eq!(
self.p_flag
.iter()
.map(|x| x.count_ones() as usize)
.sum::<usize>(),
self.t_flag
.iter()
.map(|x| x.count_ones() as usize)
.sum::<usize>()
);

if self.p_flag.len() < self.t_flag.len() {
self.p_flag.iter().map(|x| x.count_ones() as usize).sum()
} else {
Expand Down Expand Up @@ -198,22 +211,22 @@ fn flag_similar_characters_step<CharT>(
return;
}
if pm_j[1] != 0 {
flagged.p_flag[word] |= blsi_u64(pm_j[1]);
flagged.p_flag[word + 1] |= blsi_u64(pm_j[1]);
flagged.t_flag[j_word] |= 1_u64 << j_pos;
return;
}
if pm_j[2] != 0 {
flagged.p_flag[word] |= blsi_u64(pm_j[2]);
flagged.p_flag[word + 2] |= blsi_u64(pm_j[2]);
flagged.t_flag[j_word] |= 1_u64 << j_pos;
return;
}
if pm_j[3] != 0 {
flagged.p_flag[word] |= blsi_u64(pm_j[3]);
flagged.p_flag[word + 3] |= blsi_u64(pm_j[3]);
flagged.t_flag[j_word] |= 1_u64 << j_pos;
return;
}

word += 3;
word += 4;
}
}

Expand Down Expand Up @@ -266,6 +279,7 @@ where

for (j, ch2) in s2.enumerate() {
flag_similar_characters_step(pm, ch2, &mut flagged, j, &bound_mask);
flagged.count_common_chars();

if j + bound + 1 < len1 {
bound_mask.last_mask = (bound_mask.last_mask << 1) | 1;
Expand Down Expand Up @@ -337,6 +351,7 @@ where

let mut transpositions = 0;
let mut s2_pos = 0_usize;

while flagged_chars != 0 {
while t_flag == 0 {
text_word += 1;
Expand Down Expand Up @@ -1120,4 +1135,22 @@ mod tests {
0.0001
);
}

#[test]
fn fuzzing_regressions() {
{
let s1 = "afddddddddddddddddddddddddddddddddddddddddadacccccccdddddddddd%,ccaa{1}ccccdccccccccccccccccccccc\
cccccccccccccccccccccccccccccccccccccccccccccccczcecccccccccccccccccccccccccccccccccccccccccccccc\
cccccccccdddddddd디ccc디Gcddddccccccccccccccccccccccccccccccccccccccccccccccccccccccaccccccccccccc\
ccccccccccccccccccccccccccccccccccccccccccccea,ccccccccccccccccccccccccccccccccccccccc";
let s2 = "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddd\
dddddddddddddddddddddddddddddf,ccczюec*ceч;e,";

assert_delta!(
Some(0.1),
_test_distance(s1.chars(), s2.chars(), None, None),
0.32144
);
}
}
}
21 changes: 14 additions & 7 deletions src/distance/lcs_seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -452,13 +452,12 @@ where
let affix = remove_common_affix(s1, len1, s2, len2);
let mut lcs_sim = affix.prefix_len + affix.suffix_len;
if affix.len1 != 0 && affix.len2 != 0 {
lcs_sim += mbleven2018(
affix.s1,
affix.len1,
affix.s2,
affix.len2,
score_cutoff - lcs_sim,
)?;
let adjusted_cutoff = if score_cutoff >= lcs_sim {
score_cutoff - lcs_sim
} else {
0
};
lcs_sim += mbleven2018(affix.s1, affix.len1, affix.s2, affix.len2, adjusted_cutoff)?;
}

if lcs_sim >= score_cutoff {
Expand Down Expand Up @@ -1216,4 +1215,12 @@ mod tests {
test_distance("Иванко".chars(), "Петрунко".chars(), None, None)
);
}

#[test]
fn fuzzing_regressions() {
assert_eq!(
Some(1),
test_distance("ab".chars(), "ac".chars(), None, None)
);
}
}

0 comments on commit 42ff8ba

Please sign in to comment.