Skip to content

Commit

Permalink
Merge branch 'bwsw-master'
Browse files Browse the repository at this point in the history
  • Loading branch information
jean-pierreBoth committed Jun 10, 2024
2 parents 9df7e2a + db338c6 commit 00e8fb5
Show file tree
Hide file tree
Showing 17 changed files with 715 additions and 731 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ anndists = { version = "0.1.2" }
hdf5 = { version = "0.8" }
ndarray = { version = "0.15" }
skiplist = { version = "0.5" }
tempfile = "3"


[features]
Expand Down
7 changes: 4 additions & 3 deletions examples/ann-glove25-angular.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ use std::time::{Duration, SystemTime};

use anndists::dist::*;
use hnsw_rs::prelude::*;
use log::info;

mod utils;

Expand All @@ -64,12 +65,12 @@ pub fn main() {
// run bench
let nb_elem = anndata.train_data.len();
let knbn_max = anndata.test_distances.dim().1;
log::info!(
" train size : {}, test size : {}",
info!(
"Train size : {}, test size : {}",
nb_elem,
anndata.test_data.len()
);
log::info!(" nb neighbours answers for test data : {} \n\n", knbn_max);
info!("Nb neighbours answers for test data : {} \n\n", knbn_max);
//
let max_nb_connection = 48;
let ef_c = 800;
Expand Down
7 changes: 4 additions & 3 deletions examples/ann-mnist-784-euclidean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use std::time::{Duration, SystemTime};

use anndists::dist::*;
use hnsw_rs::prelude::*;
use log::info;

mod utils;
use utils::*;
Expand All @@ -33,12 +34,12 @@ pub fn main() {
let anndata = annhdf5::AnnBenchmarkData::new(fname).unwrap();
let knbn_max = anndata.test_distances.dim().1;
let nb_elem = anndata.train_data.len();
log::info!(
" train size : {}, test size : {}",
info!(
"Train size : {}, test size : {}",
nb_elem,
anndata.test_data.len()
);
log::info!(" nb neighbours answers for test data : {} \n\n", knbn_max);
info!("Nb neighbours answers for test data : {}", knbn_max);
//
let max_nb_connection = 24;
let nb_layer = 16.min((nb_elem as f32).ln().trunc() as usize);
Expand Down
7 changes: 4 additions & 3 deletions examples/ann-sift1m-128-euclidean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use env_logger::Builder;
use std::time::{Duration, SystemTime};

use anndists::dist::*;
use log::info;

// search in paralle mode 8 core i7-10875H @2.3Ghz time 100 neighbours

Expand Down Expand Up @@ -42,12 +43,12 @@ pub fn main() {
// run bench
let knbn_max = anndata.test_distances.dim().1;
let nb_elem = anndata.train_data.len();
log::info!(
info!(
" train size : {}, test size : {}",
nb_elem,
anndata.test_data.len()
);
log::info!(" nb neighbours answers for test data : {} \n\n", knbn_max);
info!(" nb neighbours answers for test data : {}", knbn_max);
//
let max_nb_connection = 64;
let nb_layer = 16.min((nb_elem as f32).ln().trunc() as usize);
Expand All @@ -64,7 +65,7 @@ pub fn main() {
let mut hnsw = Hnsw::<f32, DistL2>::new(max_nb_connection, nb_elem, nb_layer, ef_c, DistL2 {});
//
let extend_flag = false;
log::info!("extend flag = {:?} ", extend_flag);
info!("extend flag = {:?} ", extend_flag);
hnsw.set_extend_candidates(extend_flag);
//
// parallel insertion
Expand Down
4 changes: 2 additions & 2 deletions examples/random.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ fn main() {
data.push(column);
}
// give an id to each data
let data_with_id = data.iter().zip(0..data.len()).collect();
let data_with_id = data.iter().zip(0..data.len()).collect::<Vec<_>>();

let ef_c = 200;
let max_nb_connection = 15;
Expand All @@ -50,7 +50,7 @@ fn main() {
start = ProcessTime::now();
begin_t = SystemTime::now();
for _i in 0..data_with_id.len() {
hns.insert(data_with_id[_i]);
hns.insert((data_with_id[_i].0.as_slice(), data_with_id[_i].1))
}
cpu_time = start.elapsed();
println!("\n\n serial hnsw data insertion {:?}", cpu_time);
Expand Down
9 changes: 5 additions & 4 deletions examples/utils/annhdf5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use ndarray::Array2;

use ::hdf5::*;
use log::debug;

// datasets
// . distances (nbojects, dim) f32 matrix for tests objects
Expand Down Expand Up @@ -66,8 +67,8 @@ impl AnnBenchmarkData {
}
let test_distances = res.unwrap();
// a check for row order
log::debug!(
" first 2 distances for first test {:?} {:?} ",
debug!(
"First 2 distances for first test {:?} {:?} ",
test_distances.get((0, 0)).unwrap(),
test_distances.get((0, 1)).unwrap()
);
Expand Down Expand Up @@ -95,8 +96,8 @@ impl AnnBenchmarkData {
panic!("error reading neighbours dataset");
}
let test_neighbours = res.unwrap();
log::debug!(
" first 2 neighbours for first test {:?} {:?} ",
debug!(
"First 2 neighbours for first test {:?} {:?} ",
test_neighbours.get((0, 0)).unwrap(),
test_neighbours.get((0, 1)).unwrap()
);
Expand Down
2 changes: 1 addition & 1 deletion examples/utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
//! hdf5 utilities for examples
pub mod annhdf5;
pub mod annhdf5;
57 changes: 24 additions & 33 deletions src/api.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
//! Api for external language.
//! This file provides a trait to be used as an opaque pointer for C or Julia calls used in file libext.rs
use std::io::prelude::*;
use std::path::PathBuf;
use std::path::Path;

use serde::{de::DeserializeOwned, Serialize};

use crate::hnsw::*;
use crate::hnswio::*;
use anndists::dist::distances::Distance;
use log::info;

pub trait AnnT {
/// type of data vectors
type Val;
///
fn insert_data(&mut self, data: &Vec<Self::Val>, id: usize);
///
fn search_neighbours(&self, data: &Vec<Self::Val>, knbn: usize, ef_s: usize) -> Vec<Neighbour>;
///
fn parallel_insert_data(&mut self, data: &Vec<(&Vec<Self::Val>, usize)>);
///
//
fn insert_data(&mut self, data: &[Self::Val], id: usize);
//
fn search_neighbours(&self, data: &[Self::Val], knbn: usize, ef_s: usize) -> Vec<Neighbour>;
//
fn parallel_insert_data(&mut self, data: &[(&Vec<Self::Val>, usize)]);
//
fn parallel_search_neighbours(
&self,
data: &Vec<Vec<Self::Val>>,
data: &[Vec<Self::Val>],
knbn: usize,
ef_s: usize,
) -> Vec<Vec<Neighbour>>;
Expand All @@ -33,7 +33,7 @@ pub trait AnnT {
/// **We do not overwrite old files if they are currently in use by memory map**
/// If these files already exist , they are not overwritten and a unique filename is generated by concatenating a random number to filename.
/// The function returns the basename used for the dump
fn file_dump(&self, filename: &String) -> anyhow::Result<String>;
fn file_dump(&self, path: &Path, file_basename: &str) -> anyhow::Result<String>;
}

impl<'b, T, D> AnnT for Hnsw<'b, T, D>
Expand All @@ -42,21 +42,21 @@ where
D: Distance<T> + Send + Sync,
{
type Val = T;
///
fn insert_data(&mut self, data: &Vec<Self::Val>, id: usize) {
//
fn insert_data(&mut self, data: &[Self::Val], id: usize) {
self.insert((data, id));
}
///
fn search_neighbours(&self, data: &Vec<T>, knbn: usize, ef_s: usize) -> Vec<Neighbour> {
//
fn search_neighbours(&self, data: &[T], knbn: usize, ef_s: usize) -> Vec<Neighbour> {
self.search(data, knbn, ef_s)
}
fn parallel_insert_data(&mut self, data: &Vec<(&Vec<Self::Val>, usize)>) {
fn parallel_insert_data(&mut self, data: &[(&Vec<Self::Val>, usize)]) {
self.parallel_insert(data);
}

fn parallel_search_neighbours(
&self,
data: &Vec<Vec<Self::Val>>,
data: &[Vec<Self::Val>],
knbn: usize,
ef_s: usize,
) -> Vec<Vec<Neighbour>> {
Expand All @@ -69,31 +69,22 @@ where
///
///
fn file_dump(&self, filename: &String) -> anyhow::Result<String> {
log::info!("in Hnsw::file_dump");
fn file_dump(&self, path: &Path, file_basename: &str) -> anyhow::Result<String> {
info!("In Hnsw::file_dump");
//
let mut dir = PathBuf::new();
dir.push(".");
// do not overwrite if mmap is active
let overwrite = !self.get_datamap_opt();
let mut dumpinit = DumpInit::new(dir, filename.clone(), overwrite);
let mut dumpinit = DumpInit::new(path, file_basename, overwrite);
let dumpname = dumpinit.get_basename().clone();
//
let res = self.dump(DumpMode::Full, &mut dumpinit);
//
let outgraph = &mut dumpinit.graph_out;
let outdata = &mut dumpinit.data_out;
outgraph.flush().unwrap();
outdata.flush().unwrap();
//
drop(dumpinit.graph_out);
drop(dumpinit.data_out);
//
log::info!("\n end of dump");
dumpinit.flush()?;
info!("End of dump");
if res.is_ok() {
return Ok(dumpname);
Ok(dumpname)
} else {
return Err(anyhow::anyhow!("unexpected error"));
Err(anyhow::anyhow!("unexpected error"))
}
} // end of dump
} // end of impl block AnnT for Hnsw<T,D>
Loading

0 comments on commit 00e8fb5

Please sign in to comment.