diff --git a/Cargo.toml b/Cargo.toml index 0bf1e32..a8a2eb6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hnsw_rs" -version = "0.3.0" +version = "0.3.1" authors = ["jeanpierre.both@gmail.com"] description = "Ann based on Hierarchical Navigable Small World Graphs from Yu.A. Malkov and D.A Yashunin" license = "MIT/Apache-2.0" @@ -95,7 +95,7 @@ anndists = { version = "0.1.2" } hdf5 = { version = "0.8" } ndarray = { version = "0.15" } skiplist = { version = "0.5" } -tempfile = "3" +tempfile = { version = "3" } [features] diff --git a/examples/ann-glove25-angular.rs b/examples/ann-glove25-angular.rs index 791df06..1d8bc46 100644 --- a/examples/ann-glove25-angular.rs +++ b/examples/ann-glove25-angular.rs @@ -1,3 +1,5 @@ +#![allow(clippy::needless_range_loop)] + use cpu_time::ProcessTime; use std::time::{Duration, SystemTime}; @@ -180,7 +182,7 @@ pub fn search( let recall = knn_neighbours_d.iter().filter(|d| *d <= &max_dist).count(); recalls.push(recall); let mut ratio = 0.; - if knn_neighbours_d.len() >= 1 { + if !knn_neighbours_d.is_empty() { ratio = knn_neighbours_d[knn_neighbours_d.len() - 1] / max_dist; } last_distances_ratio.push(ratio); diff --git a/examples/ann-mnist-784-euclidean.rs b/examples/ann-mnist-784-euclidean.rs index b40f261..e44926b 100644 --- a/examples/ann-mnist-784-euclidean.rs +++ b/examples/ann-mnist-784-euclidean.rs @@ -1,3 +1,5 @@ +#![allow(clippy::needless_range_loop)] + use cpu_time::ProcessTime; use std::time::{Duration, SystemTime}; @@ -132,7 +134,7 @@ pub fn main() { .count(); recalls.push(recall); let mut ratio = 0.; - if knn_neighbours_dist.len() >= 1 { + if !knn_neighbours_dist.is_empty() { ratio = knn_neighbours_dist[knn_neighbours_dist.len() - 1] / max_dist; } last_distances_ratio.push(ratio); diff --git a/examples/ann-sift1m-128-euclidean.rs b/examples/ann-sift1m-128-euclidean.rs index 4ed2bbb..f1d0040 100644 --- a/examples/ann-sift1m-128-euclidean.rs +++ b/examples/ann-sift1m-128-euclidean.rs @@ -1,3 +1,5 @@ +#![allow(clippy::needless_range_loop)] + use cpu_time::ProcessTime; use env_logger::Builder; use std::time::{Duration, SystemTime}; @@ -159,7 +161,7 @@ pub fn search( let recall = knn_neighbours_d.iter().filter(|d| *d <= &max_dist).count(); recalls.push(recall); let mut ratio = 0.; - if knn_neighbours_d.len() >= 1 { + if !knn_neighbours_d.is_empty() { ratio = knn_neighbours_d[knn_neighbours_d.len() - 1] / max_dist; } last_distances_ratio.push(ratio); diff --git a/examples/levensthein.rs b/examples/levensthein.rs index b44743d..cd597d1 100644 --- a/examples/levensthein.rs +++ b/examples/levensthein.rs @@ -31,11 +31,9 @@ fn main() { } words.push(String::from("abcdj")); // - let mut i = 0; - for w in &words { + for (i, w) in words.iter().enumerate() { let vec: Vec = w.chars().map(|c| c as u16).collect(); hns.insert((&vec, i)); - i = i + 1; } // create a filter let mut filter: Vec = Vec::new(); diff --git a/examples/random.rs b/examples/random.rs index 58688c2..5f259fd 100644 --- a/examples/random.rs +++ b/examples/random.rs @@ -1,3 +1,6 @@ +#![allow(clippy::needless_range_loop)] +#![allow(clippy::range_zip_with_len)] + use cpu_time::ProcessTime; use rand::distributions::Uniform; use rand::prelude::*; @@ -16,10 +19,7 @@ fn main() { let unif = Uniform::::new(0., 1.); let mut data = Vec::with_capacity(nb_elem); for _ in 0..nb_elem { - let column = (0..dim) - .into_iter() - .map(|_| rng.sample(unif)) - .collect::>(); + let column = (0..dim).map(|_| rng.sample(unif)).collect::>(); data.push(column); } // give an id to each data diff --git a/src/datamap.rs b/src/datamap.rs index b3b2525..833e182 100644 --- a/src/datamap.rs +++ b/src/datamap.rs @@ -358,8 +358,8 @@ mod tests { let ef_construct = 25; let nb_connection = 10; let hnsw = Hnsw::::new(nb_connection, nbcolumn, 16, ef_construct, DistL1 {}); - for i in 0..data.len() { - hnsw.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hnsw.insert((d, i)); } // some loggin info hnsw.dump_layer_info(); @@ -424,28 +424,25 @@ mod tests { let ef_construct = 25; let nb_connection = 10; let hnsw = Hnsw::::new(nb_connection, nbcolumn, 16, ef_construct, DistL1 {}); - for i in 0..data.len() { - hnsw.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hnsw.insert((d, i)); } // some loggin info hnsw.dump_layer_info(); // dump in a file. Must take care of name as tests runs in // !!! let fname = "mmap_order_test"; let directory = tempfile::tempdir().unwrap(); - let _res = hnsw.file_dump(directory.path(), &fname); + let _res = hnsw.file_dump(directory.path(), fname); // now we have check that datamap seems ok, test reload of hnsw with mmap - let datamap: DataMap = - DataMap::from_hnswdump::(directory.path(), &fname.to_string()).unwrap(); + let datamap: DataMap = DataMap::from_hnswdump::(directory.path(), fname).unwrap(); // testing type check assert!(datamap.check_data_type::()); assert!(!datamap.check_data_type::()); info!("Datamap iteration order checking"); let keys = datamap.get_dataid_iter(); - let mut ukey = 0usize; - for dataid in keys { + for (i, dataid) in keys.enumerate() { let v = datamap.get_data::(dataid).unwrap(); - assert_eq!(v, &data[*dataid], "dataid = {}, ukey = {}", dataid, ukey); - ukey += 1; + assert_eq!(v, &data[*dataid], "dataid = {}, ukey = {}", dataid, i); } // rm files generated! let _ = std::fs::remove_file("mmap_order_test.hnsw.data"); diff --git a/src/flatten.rs b/src/flatten.rs index da61c5b..2298306 100644 --- a/src/flatten.rs +++ b/src/flatten.rs @@ -167,8 +167,8 @@ mod tests { let ef_construct = 25; let nb_connection = 10; let hnsw = Hnsw::::new(nb_connection, nbcolumn, 16, ef_construct, DistL1 {}); - for i in 0..data.len() { - hnsw.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hnsw.insert((d, i)); } // some loggin info hnsw.dump_layer_info(); diff --git a/src/hnsw.rs b/src/hnsw.rs index d26dd90..e423cec 100644 --- a/src/hnsw.rs +++ b/src/hnsw.rs @@ -1701,25 +1701,21 @@ mod tests { ef_construct, dist::DistL1 {}, ); - for i in 0..data.len() { - hns.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hns.insert((d, i)); } let cpu_time = start.elapsed(); println!(" test_insert_iter_point time inserting {:?}", cpu_time); hns.dump_layer_info(); // now check iteration - let mut ptiter = hns.get_point_indexation().into_iter(); + let ptiter = hns.get_point_indexation().into_iter(); let mut nb_dumped = 0; - loop { - if let Some(_point) = ptiter.next() { - // println!("point : {:?}", _point.p_id); - nb_dumped += 1; - } else { - break; - } - } // end while - // + for _point in ptiter { + // println!("point : {:?}", _point.p_id); + nb_dumped += 1; + } + // assert_eq!(nb_dumped, nbcolumn); } // end of test_iter_point @@ -1753,8 +1749,8 @@ mod tests { ef_construct, dist::DistL1 {}, ); - for i in 0..data.len() { - hns.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hns.insert((d, i)); } let cpu_time = start.elapsed(); println!(" test_insert_iter_point time inserting {:?}", cpu_time); @@ -1763,17 +1759,13 @@ mod tests { // now check iteration let layer_num = 0; let nbpl = hns.get_point_indexation().get_layer_nb_point(layer_num); - let mut layer_iter = hns.get_point_indexation().get_layer_iterator(layer_num); + let layer_iter = hns.get_point_indexation().get_layer_iterator(layer_num); // let mut nb_dumped = 0; - loop { - if let Some(_point) = layer_iter.next() { - // println!("point : {:?}", _point.p_id); - nb_dumped += 1; - } else { - break; - } - } // end while + for _point in layer_iter { + // println!("point : {:?}", _point.p_id); + nb_dumped += 1; + } println!( "test_iter_layerpoint : nb point in layer {} , nb found {}", nbpl, nb_dumped diff --git a/src/hnswio.rs b/src/hnswio.rs index de8784b..9b150ee 100644 --- a/src/hnswio.rs +++ b/src/hnswio.rs @@ -1372,7 +1372,7 @@ mod tests { fn my_fn(v1: &[f32], v2: &[f32]) -> f32 { let norm_l1: f32 = v1.iter().zip(v2.iter()).map(|t| (*t.0 - *t.1).abs()).sum(); - norm_l1 as f32 + norm_l1 } #[test] @@ -1404,8 +1404,8 @@ mod tests { ef_construct, dist::DistL1 {}, ); - for i in 0..data.len() { - hnsw.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hnsw.insert((d, i)); } // some loggin info hnsw.dump_layer_info(); @@ -1454,8 +1454,8 @@ mod tests { ef_construct, mydist, ); - for i in 0..data.len() { - hnsw.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hnsw.insert((d, i)); } // some loggin info hnsw.dump_layer_info(); @@ -1502,8 +1502,8 @@ mod tests { ef_construct, dist::DistL1 {}, ); - for i in 0..data.len() { - hnsw.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hnsw.insert((d, i)); } // some loggin info hnsw.dump_layer_info(); @@ -1554,8 +1554,8 @@ mod tests { ef_construct, dist::DistL1 {}, ); - for i in 0..data.len() { - hnsw.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hnsw.insert((d, i)); } // some loggin info hnsw.dump_layer_info(); @@ -1593,8 +1593,8 @@ mod tests { first_with_mmap ); let nb_in = hnsw.get_nb_point(); - for i in 0..data.len() { - hnsw.insert((&data[i], i + nb_in)); + for (i, d) in data.iter().enumerate() { + hnsw.insert((d, i + nb_in)); } // let search_res = hnsw.search(&first, 5, ef_construct); @@ -1625,7 +1625,7 @@ mod tests { // // TODO: redump and care about mmapped file, so we do not overwrite // - let dump_init = DumpInit::new(directory.path(), &fname, false); + let dump_init = DumpInit::new(directory.path(), fname, false); info!("will use basename : {}", dump_init.get_basename()); let res = hnsw.file_dump(directory.path(), dump_init.get_basename()); if res.is_err() { @@ -1663,8 +1663,8 @@ mod tests { Hnsw::::new(nb_connection, 0, 16, ef_construct, dist::DistL1 {}); let fname = "empty_db"; let directory = tempfile::tempdir()?; - let _res = hnsw.file_dump(directory.path(), &fname); - let mut reloader = HnswIo::new(directory.path(), &fname); + let _res = hnsw.file_dump(directory.path(), fname); + let mut reloader = HnswIo::new(directory.path(), fname); let hnsw_loaded_res = reloader.load_hnsw::(); assert!(hnsw_loaded_res.is_err()); Ok(()) diff --git a/tests/deallocation_test.rs b/tests/deallocation_test.rs index 3563bcc..8df3c84 100644 --- a/tests/deallocation_test.rs +++ b/tests/deallocation_test.rs @@ -29,6 +29,6 @@ fn main() { if counter % 1_000_000 == 0 { println!("counter : {}", counter) } - counter = counter + 1; + counter += 1; } } diff --git a/tests/filtertest.rs b/tests/filtertest.rs index ea8ef60..a100cdf 100644 --- a/tests/filtertest.rs +++ b/tests/filtertest.rs @@ -1,3 +1,6 @@ +#![allow(clippy::needless_range_loop)] +#![allow(clippy::range_zip_with_len)] + use anndists::dist::*; use hnsw_rs::prelude::*; use rand::{distributions::Uniform, Rng}; @@ -19,19 +22,14 @@ fn generate_random_string(len: usize) -> String { fn search_closure_filter( word: &str, hns: &Hnsw, - words: &Vec, - filter_vector: &Vec, + words: &[String], + filter_vector: &[usize], ) { // transform string to u16 values let vec: Vec = word.chars().map(|c| c as u16).collect(); // now create a closure using this filter_vector // here we can off course implement more advanced filter logic - let filter = |id: &usize| -> bool { - match filter_vector.binary_search(id) { - Ok(_) => true, - Err(_) => false, - } - }; + let filter = |id: &usize| -> bool { filter_vector.binary_search(id).is_ok() }; // Now let us do the search by using the defined clojure, which in turn uses our vector // ids not in the vector will not be indluced in the search results @@ -65,11 +63,9 @@ fn filter_levenstein() { words.push(tw); } - let mut i = 0; - for w in &words { + for (i, w) in words.iter().enumerate() { let vec: Vec = w.chars().map(|c| c as u16).collect(); hns.insert((&vec, i)); - i = i + 1; if i % 1000 == 0 { println!("Inserting: {:?}", i); } @@ -158,10 +154,7 @@ fn filter_l2() { let unif = Uniform::::new(0., 1.); let mut data = Vec::with_capacity(nb_elem); for _ in 0..nb_elem { - let column = (0..dim) - .into_iter() - .map(|_| rng.sample(unif)) - .collect::>(); + let column = (0..dim).map(|_| rng.sample(unif)).collect::>(); data.push(column); } // give an id to each data @@ -176,10 +169,7 @@ fn filter_l2() { // let ef_search = 30; let knbn = 10; - let vec_tosearch = (0..dim) - .into_iter() - .map(|_| rng.sample(unif)) - .collect::>(); + let vec_tosearch = (0..dim).map(|_| rng.sample(unif)).collect::>(); // // Create a sorted vector of ids // the ids in the vector will be used as a filter diff --git a/tests/serpar.rs b/tests/serpar.rs index 8db352d..92bff79 100644 --- a/tests/serpar.rs +++ b/tests/serpar.rs @@ -1,3 +1,5 @@ +#![allow(clippy::range_zip_with_len)] + //! some testing utilities. //! run with to get output statistics : cargo test --release -- --nocapture --test test_parallel. //! serial test corresponds to random-10nn-euclidean(k=10) @@ -15,11 +17,7 @@ use serde::{de::DeserializeOwned, Serialize}; pub fn gen_random_vector_f32(nbrow: usize) -> Vec { let mut rng = thread_rng(); let unif = Uniform::::new(0., 1.); - let vec = (0..nbrow) - .into_iter() - .map(|_| rng.sample(unif)) - .collect::>(); - vec + (0..nbrow).map(|_| rng.sample(unif)).collect::>() } /// return nbcolumn vectors of dimension nbrow @@ -28,20 +26,17 @@ pub fn gen_random_matrix_f32(nbrow: usize, nbcolumn: usize) -> Vec> { let unif = Uniform::::new(0., 1.); let mut data = Vec::with_capacity(nbcolumn); for _ in 0..nbcolumn { - let column = (0..nbrow) - .into_iter() - .map(|_| rng.sample(unif)) - .collect::>(); + let column = (0..nbrow).map(|_| rng.sample(unif)).collect::>(); data.push(column); } - return data; + data } fn brute_force_neighbours( nb_neighbours: usize, refdata: &PointIndexation, distance: PointDistance, - data: &Vec, + data: &[T], ) -> OrderedSkipList { let mut neighbours = OrderedSkipList::::with_capacity(refdata.get_nb_point()); @@ -107,8 +102,8 @@ mod tests { hns.parallel_insert(&data_with_id); } else { println!("serial insertion"); - for i in 0..data.len() { - hns.insert((&data[i], i)); + for (i, d) in data.iter().enumerate() { + hns.insert((d, i)); } } let mut cpu_time: Duration = start.elapsed(); @@ -179,8 +174,7 @@ mod tests { // let mean_recall = (recalls.iter().sum::() as f32) / ((knbn * recalls.len()) as f32); - let mean_search_time = - (search_times.iter().sum::() as f32) / (search_times.len() as f32); + let mean_search_time = (search_times.iter().sum::()) / (search_times.len() as f32); println!( "\n mean fraction (of knbn) returned by search {:?} ", (nb_returned.iter().sum::() as f32) / ((nb_returned.len() * knbn) as f32) @@ -206,8 +200,8 @@ mod tests { // // let mut data = gen_random_matrix_f32(dim, nb_elem); - for i in 0..data.len() { - l2_normalize(&mut data[i]); + for v in &mut data { + l2_normalize(v); } let data_with_id = data.iter().zip(0..data.len()).collect::>(); let nb_layer = 16.min((nb_elem as f32).ln().trunc() as usize); @@ -318,8 +312,7 @@ mod tests { // let mean_recall = (recalls.iter().sum::() as f32) / ((knbn * recalls.len()) as f32); - let mean_search_time = - (search_times.iter().sum::() as f32) / (search_times.len() as f32); + let mean_search_time = (search_times.iter().sum::()) / (search_times.len() as f32); println!( "\n nb search {:?} recall rate is {:?} search time inverse {:?} ", nbtest,