Skip to content

Commit

Permalink
clippy clean
Browse files Browse the repository at this point in the history
  • Loading branch information
jean-pierreBoth committed Jul 5, 2024
1 parent 00e8fb5 commit a7fb9a9
Show file tree
Hide file tree
Showing 13 changed files with 77 additions and 101 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "hnsw_rs"
version = "0.3.0"
version = "0.3.1"
authors = ["[email protected]"]
description = "Ann based on Hierarchical Navigable Small World Graphs from Yu.A. Malkov and D.A Yashunin"
license = "MIT/Apache-2.0"
Expand Down Expand Up @@ -95,7 +95,7 @@ anndists = { version = "0.1.2" }
hdf5 = { version = "0.8" }
ndarray = { version = "0.15" }
skiplist = { version = "0.5" }
tempfile = "3"
tempfile = { version = "3" }


[features]
Expand Down
4 changes: 3 additions & 1 deletion examples/ann-glove25-angular.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::needless_range_loop)]

use cpu_time::ProcessTime;
use std::time::{Duration, SystemTime};

Expand Down Expand Up @@ -180,7 +182,7 @@ pub fn search<Dist>(
let recall = knn_neighbours_d.iter().filter(|d| *d <= &max_dist).count();
recalls.push(recall);
let mut ratio = 0.;
if knn_neighbours_d.len() >= 1 {
if !knn_neighbours_d.is_empty() {
ratio = knn_neighbours_d[knn_neighbours_d.len() - 1] / max_dist;
}
last_distances_ratio.push(ratio);
Expand Down
4 changes: 3 additions & 1 deletion examples/ann-mnist-784-euclidean.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::needless_range_loop)]

use cpu_time::ProcessTime;
use std::time::{Duration, SystemTime};

Expand Down Expand Up @@ -132,7 +134,7 @@ pub fn main() {
.count();
recalls.push(recall);
let mut ratio = 0.;
if knn_neighbours_dist.len() >= 1 {
if !knn_neighbours_dist.is_empty() {
ratio = knn_neighbours_dist[knn_neighbours_dist.len() - 1] / max_dist;
}
last_distances_ratio.push(ratio);
Expand Down
4 changes: 3 additions & 1 deletion examples/ann-sift1m-128-euclidean.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(clippy::needless_range_loop)]

use cpu_time::ProcessTime;
use env_logger::Builder;
use std::time::{Duration, SystemTime};
Expand Down Expand Up @@ -159,7 +161,7 @@ pub fn search<Dist>(
let recall = knn_neighbours_d.iter().filter(|d| *d <= &max_dist).count();
recalls.push(recall);
let mut ratio = 0.;
if knn_neighbours_d.len() >= 1 {
if !knn_neighbours_d.is_empty() {
ratio = knn_neighbours_d[knn_neighbours_d.len() - 1] / max_dist;
}
last_distances_ratio.push(ratio);
Expand Down
4 changes: 1 addition & 3 deletions examples/levensthein.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,9 @@ fn main() {
}
words.push(String::from("abcdj"));
//
let mut i = 0;
for w in &words {
for (i, w) in words.iter().enumerate() {
let vec: Vec<u16> = w.chars().map(|c| c as u16).collect();
hns.insert((&vec, i));
i = i + 1;
}
// create a filter
let mut filter: Vec<usize> = Vec::new();
Expand Down
8 changes: 4 additions & 4 deletions examples/random.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#![allow(clippy::needless_range_loop)]
#![allow(clippy::range_zip_with_len)]

use cpu_time::ProcessTime;
use rand::distributions::Uniform;
use rand::prelude::*;
Expand All @@ -16,10 +19,7 @@ fn main() {
let unif = Uniform::<f32>::new(0., 1.);
let mut data = Vec::with_capacity(nb_elem);
for _ in 0..nb_elem {
let column = (0..dim)
.into_iter()
.map(|_| rng.sample(unif))
.collect::<Vec<f32>>();
let column = (0..dim).map(|_| rng.sample(unif)).collect::<Vec<f32>>();
data.push(column);
}
// give an id to each data
Expand Down
19 changes: 8 additions & 11 deletions src/datamap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,8 @@ mod tests {
let ef_construct = 25;
let nb_connection = 10;
let hnsw = Hnsw::<f32, DistL1>::new(nb_connection, nbcolumn, 16, ef_construct, DistL1 {});
for i in 0..data.len() {
hnsw.insert((&data[i], i));
for (i, d) in data.iter().enumerate() {
hnsw.insert((d, i));
}
// some loggin info
hnsw.dump_layer_info();
Expand Down Expand Up @@ -424,28 +424,25 @@ mod tests {
let ef_construct = 25;
let nb_connection = 10;
let hnsw = Hnsw::<u32, DistL1>::new(nb_connection, nbcolumn, 16, ef_construct, DistL1 {});
for i in 0..data.len() {
hnsw.insert((&data[i], i));
for (i, d) in data.iter().enumerate() {
hnsw.insert((d, i));
}
// some loggin info
hnsw.dump_layer_info();
// dump in a file. Must take care of name as tests runs in // !!!
let fname = "mmap_order_test";
let directory = tempfile::tempdir().unwrap();
let _res = hnsw.file_dump(directory.path(), &fname);
let _res = hnsw.file_dump(directory.path(), fname);
// now we have check that datamap seems ok, test reload of hnsw with mmap
let datamap: DataMap =
DataMap::from_hnswdump::<u32>(directory.path(), &fname.to_string()).unwrap();
let datamap: DataMap = DataMap::from_hnswdump::<u32>(directory.path(), fname).unwrap();
// testing type check
assert!(datamap.check_data_type::<u32>());
assert!(!datamap.check_data_type::<f32>());
info!("Datamap iteration order checking");
let keys = datamap.get_dataid_iter();
let mut ukey = 0usize;
for dataid in keys {
for (i, dataid) in keys.enumerate() {
let v = datamap.get_data::<u32>(dataid).unwrap();
assert_eq!(v, &data[*dataid], "dataid = {}, ukey = {}", dataid, ukey);
ukey += 1;
assert_eq!(v, &data[*dataid], "dataid = {}, ukey = {}", dataid, i);
}
// rm files generated!
let _ = std::fs::remove_file("mmap_order_test.hnsw.data");
Expand Down
4 changes: 2 additions & 2 deletions src/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ mod tests {
let ef_construct = 25;
let nb_connection = 10;
let hnsw = Hnsw::<f32, DistL1>::new(nb_connection, nbcolumn, 16, ef_construct, DistL1 {});
for i in 0..data.len() {
hnsw.insert((&data[i], i));
for (i, d) in data.iter().enumerate() {
hnsw.insert((d, i));
}
// some loggin info
hnsw.dump_layer_info();
Expand Down
38 changes: 15 additions & 23 deletions src/hnsw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1701,25 +1701,21 @@ mod tests {
ef_construct,
dist::DistL1 {},
);
for i in 0..data.len() {
hns.insert((&data[i], i));
for (i, d) in data.iter().enumerate() {
hns.insert((d, i));
}
let cpu_time = start.elapsed();
println!(" test_insert_iter_point time inserting {:?}", cpu_time);

hns.dump_layer_info();
// now check iteration
let mut ptiter = hns.get_point_indexation().into_iter();
let ptiter = hns.get_point_indexation().into_iter();
let mut nb_dumped = 0;
loop {
if let Some(_point) = ptiter.next() {
// println!("point : {:?}", _point.p_id);
nb_dumped += 1;
} else {
break;
}
} // end while
//
for _point in ptiter {
// println!("point : {:?}", _point.p_id);
nb_dumped += 1;
}
//
assert_eq!(nb_dumped, nbcolumn);
} // end of test_iter_point

Expand Down Expand Up @@ -1753,8 +1749,8 @@ mod tests {
ef_construct,
dist::DistL1 {},
);
for i in 0..data.len() {
hns.insert((&data[i], i));
for (i, d) in data.iter().enumerate() {
hns.insert((d, i));
}
let cpu_time = start.elapsed();
println!(" test_insert_iter_point time inserting {:?}", cpu_time);
Expand All @@ -1763,17 +1759,13 @@ mod tests {
// now check iteration
let layer_num = 0;
let nbpl = hns.get_point_indexation().get_layer_nb_point(layer_num);
let mut layer_iter = hns.get_point_indexation().get_layer_iterator(layer_num);
let layer_iter = hns.get_point_indexation().get_layer_iterator(layer_num);
//
let mut nb_dumped = 0;
loop {
if let Some(_point) = layer_iter.next() {
// println!("point : {:?}", _point.p_id);
nb_dumped += 1;
} else {
break;
}
} // end while
for _point in layer_iter {
// println!("point : {:?}", _point.p_id);
nb_dumped += 1;
}
println!(
"test_iter_layerpoint : nb point in layer {} , nb found {}",
nbpl, nb_dumped
Expand Down
28 changes: 14 additions & 14 deletions src/hnswio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1372,7 +1372,7 @@ mod tests {

fn my_fn(v1: &[f32], v2: &[f32]) -> f32 {
let norm_l1: f32 = v1.iter().zip(v2.iter()).map(|t| (*t.0 - *t.1).abs()).sum();
norm_l1 as f32
norm_l1
}

#[test]
Expand Down Expand Up @@ -1404,8 +1404,8 @@ mod tests {
ef_construct,
dist::DistL1 {},
);
for i in 0..data.len() {
hnsw.insert((&data[i], i));
for (i, d) in data.iter().enumerate() {
hnsw.insert((d, i));
}
// some loggin info
hnsw.dump_layer_info();
Expand Down Expand Up @@ -1454,8 +1454,8 @@ mod tests {
ef_construct,
mydist,
);
for i in 0..data.len() {
hnsw.insert((&data[i], i));
for (i, d) in data.iter().enumerate() {
hnsw.insert((d, i));
}
// some loggin info
hnsw.dump_layer_info();
Expand Down Expand Up @@ -1502,8 +1502,8 @@ mod tests {
ef_construct,
dist::DistL1 {},
);
for i in 0..data.len() {
hnsw.insert((&data[i], i));
for (i, d) in data.iter().enumerate() {
hnsw.insert((d, i));
}
// some loggin info
hnsw.dump_layer_info();
Expand Down Expand Up @@ -1554,8 +1554,8 @@ mod tests {
ef_construct,
dist::DistL1 {},
);
for i in 0..data.len() {
hnsw.insert((&data[i], i));
for (i, d) in data.iter().enumerate() {
hnsw.insert((d, i));
}
// some loggin info
hnsw.dump_layer_info();
Expand Down Expand Up @@ -1593,8 +1593,8 @@ mod tests {
first_with_mmap
);
let nb_in = hnsw.get_nb_point();
for i in 0..data.len() {
hnsw.insert((&data[i], i + nb_in));
for (i, d) in data.iter().enumerate() {
hnsw.insert((d, i + nb_in));
}
//
let search_res = hnsw.search(&first, 5, ef_construct);
Expand Down Expand Up @@ -1625,7 +1625,7 @@ mod tests {
//
// TODO: redump and care about mmapped file, so we do not overwrite
//
let dump_init = DumpInit::new(directory.path(), &fname, false);
let dump_init = DumpInit::new(directory.path(), fname, false);
info!("will use basename : {}", dump_init.get_basename());
let res = hnsw.file_dump(directory.path(), dump_init.get_basename());
if res.is_err() {
Expand Down Expand Up @@ -1663,8 +1663,8 @@ mod tests {
Hnsw::<f32, dist::DistL1>::new(nb_connection, 0, 16, ef_construct, dist::DistL1 {});
let fname = "empty_db";
let directory = tempfile::tempdir()?;
let _res = hnsw.file_dump(directory.path(), &fname);
let mut reloader = HnswIo::new(directory.path(), &fname);
let _res = hnsw.file_dump(directory.path(), fname);
let mut reloader = HnswIo::new(directory.path(), fname);
let hnsw_loaded_res = reloader.load_hnsw::<f32, DistL1>();
assert!(hnsw_loaded_res.is_err());
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion tests/deallocation_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ fn main() {
if counter % 1_000_000 == 0 {
println!("counter : {}", counter)
}
counter = counter + 1;
counter += 1;
}
}
28 changes: 9 additions & 19 deletions tests/filtertest.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#![allow(clippy::needless_range_loop)]
#![allow(clippy::range_zip_with_len)]

use anndists::dist::*;
use hnsw_rs::prelude::*;
use rand::{distributions::Uniform, Rng};
Expand All @@ -19,19 +22,14 @@ fn generate_random_string(len: usize) -> String {
fn search_closure_filter(
word: &str,
hns: &Hnsw<u16, DistLevenshtein>,
words: &Vec<String>,
filter_vector: &Vec<usize>,
words: &[String],
filter_vector: &[usize],
) {
// transform string to u16 values
let vec: Vec<u16> = word.chars().map(|c| c as u16).collect();
// now create a closure using this filter_vector
// here we can off course implement more advanced filter logic
let filter = |id: &usize| -> bool {
match filter_vector.binary_search(id) {
Ok(_) => true,
Err(_) => false,
}
};
let filter = |id: &usize| -> bool { filter_vector.binary_search(id).is_ok() };

// Now let us do the search by using the defined clojure, which in turn uses our vector
// ids not in the vector will not be indluced in the search results
Expand Down Expand Up @@ -65,11 +63,9 @@ fn filter_levenstein() {
words.push(tw);
}

let mut i = 0;
for w in &words {
for (i, w) in words.iter().enumerate() {
let vec: Vec<u16> = w.chars().map(|c| c as u16).collect();
hns.insert((&vec, i));
i = i + 1;
if i % 1000 == 0 {
println!("Inserting: {:?}", i);
}
Expand Down Expand Up @@ -158,10 +154,7 @@ fn filter_l2() {
let unif = Uniform::<f32>::new(0., 1.);
let mut data = Vec::with_capacity(nb_elem);
for _ in 0..nb_elem {
let column = (0..dim)
.into_iter()
.map(|_| rng.sample(unif))
.collect::<Vec<f32>>();
let column = (0..dim).map(|_| rng.sample(unif)).collect::<Vec<f32>>();
data.push(column);
}
// give an id to each data
Expand All @@ -176,10 +169,7 @@ fn filter_l2() {
//
let ef_search = 30;
let knbn = 10;
let vec_tosearch = (0..dim)
.into_iter()
.map(|_| rng.sample(unif))
.collect::<Vec<f32>>();
let vec_tosearch = (0..dim).map(|_| rng.sample(unif)).collect::<Vec<f32>>();
//
// Create a sorted vector of ids
// the ids in the vector will be used as a filter
Expand Down
Loading

0 comments on commit a7fb9a9

Please sign in to comment.