Skip to content

Commit

Permalink
Merge pull request #407 from zierf/zig_rust_improvements
Browse files Browse the repository at this point in the history
Rust & Zig: Various Improvements
  • Loading branch information
PEZ authored Feb 8, 2025
2 parents a1ee655 + ed444e1 commit d59797c
Show file tree
Hide file tree
Showing 14 changed files with 175 additions and 176 deletions.
7 changes: 6 additions & 1 deletion fibonacci/rust/src/bin/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ use fibonacci::*;
fn main() -> Result<(), Box<dyn Error>> {
let args: Vec<String> = std::env::args().skip(1).collect();

// try different integer sizes (u32/u64/u128/usize) to see their impact on performance (u8/u16 are too small)
let runs: u32 = args.first().ok_or("missing number argument")?.parse()?;
let sum: u32 = (0..runs).map(fibonacci).sum();

// sum the fibonacci numbers
// (fold can infer the type automatically, sum needs explicit type annotations)
#[allow(clippy::unnecessary_fold)]
let sum = (0..runs).map(fibonacci).fold(0, |acc, n| acc + n);

println!("{}", sum);

Expand Down
11 changes: 7 additions & 4 deletions fibonacci/zig/code.zig
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ pub fn main() !void {

const args = args_cli[1..];

const runs = try std.fmt.parseInt(usize, args[0], 0);
// try different integer sizes (u32/u64/u128/usize) to see their impact on performance
const runs = try std.fmt.parseInt(u32, args[0], 0);

var sum: usize = 0;
const fn_fib = fibonacci(@TypeOf(runs));

for (0..runs) |i| {
sum += fibonacci(i);
var sum: @TypeOf(runs) = 0;

for (0..@intCast(runs)) |i| {
sum += fn_fib(@intCast(i));
}

const stdout = std.io.getStdOut().writer();
Expand Down
14 changes: 9 additions & 5 deletions fibonacci/zig/fibonacci.zig
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
pub fn fibonacci(n: usize) usize {
return switch (n) {
0...1 => n,
else => fibonacci(n - 1) + fibonacci(n - 2),
};
pub fn fibonacci(comptime T: type) fn (T) T {
return struct {
pub fn fib(n: T) T {
return switch (n) {
0...1 => n,
else => fib(n - 1) + fib(n - 2),
};
}
}.fib;
}
5 changes: 3 additions & 2 deletions fibonacci/zig/run.zig
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ pub fn main() !void {
const args = try benchmark.loadArguments(allocator);

// parse program arguments
const number: usize = try std.fmt.parseInt(usize, args.program_args[0], 0);
// try different integer sizes (u32/u64/u128/usize) to see their impact on performance
const number = try std.fmt.parseInt(usize, args.program_args[0], 0);

// perform full benchmark
const context = benchmark.createContext(fibonacci);
const context = benchmark.createContext(fibonacci(@TypeOf(number)));
const stats = (try context.benchmark(allocator, args.warmup_ms, args.run_ms, .{number})).?;

// get last result for success checks
Expand Down
16 changes: 3 additions & 13 deletions levenshtein/rust/src/bin/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,9 @@ fn main() {
std::process::exit(3);
}

// calculate length of longest input string
let mut max_inp_len: usize = 0;

for argument in args.iter() {
max_inp_len = max_inp_len.max(argument.len());
}

// reuse buffer for prev_row and curr_row to minimize allocations
// try different integer sizes (u16/u32/u64/usize) to see their impact on performance
let mut buffer: Vec<u32> = vec![0; (max_inp_len + 1) * 2];

// try different integer sizes (u32/u64/usize) to see their impact on performance
let mut min_distance = u32::MAX;
let mut times = 0;
let mut times: usize = 0;

// compare all pairs of strings
for (i, s1) in args.iter().enumerate() {
Expand All @@ -36,7 +26,7 @@ fn main() {
continue;
}

let distance = levenshtein_distance(s1, s2, &mut buffer);
let distance = levenshtein_distance(s1, s2);
min_distance = min_distance.min(distance);

times += 1;
Expand Down
34 changes: 13 additions & 21 deletions levenshtein/rust/src/bin/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@ use std::error::Error;
use benchmark::{BenchmarkArguments, BenchmarkContext};
use levenshtein::*;

fn calculate_distances<T: CostInteger>(
word_list: &[impl AsRef<str>],
buffer: &mut Vec<T>,
) -> Vec<T> {
fn calculate_distances<T: CostInteger>(word_list: &[String]) -> Result<Vec<T>, &'static str> {
let list_length = word_list.len();
let mut results = Vec::with_capacity((list_length * (list_length - 1)) / 2);

Expand All @@ -16,12 +13,12 @@ fn calculate_distances<T: CostInteger>(
let (_, cmp_words) = word_list.split_at(i + 1);

for word_b in cmp_words.iter() {
let distance = levenshtein_distance(word_a, word_b, buffer);
let distance = levenshtein_distance(word_a, word_b);
results.push(distance);
}
}

results
Ok(results)
}

fn main() -> Result<(), Box<dyn Error>> {
Expand All @@ -43,27 +40,22 @@ fn main() -> Result<(), Box<dyn Error>> {
std::process::exit(3);
}

// calculate length of longest input string
let max_inp_len: usize = word_list
.iter()
.map(|word| word.len())
.max()
.ok_or("empty word list")?;

// reuse buffer for prev_row and curr_row to minimize allocations
// try different integer sizes (u32/u64/usize) to see their impact on performance
let mut buffer: Vec<u32> = vec![0; (max_inp_len + 1) * 2];

// perform full benchmark
let mut context = BenchmarkContext::new(move || calculate_distances(&word_list, &mut buffer));
// try different integer sizes (u32/u64/usize) to see their impact on performance
let mut context = BenchmarkContext::new(move || calculate_distances::<u32>(&word_list));
let stats = context
.benchmark(args.run_ms, args.warmup_ms)
.ok_or("no benchmark result")?;

// get last result for success checks
let last_result = stats.last_result().ok_or("empty result list")?;

// sum the distances outside the benchmarked function
let last_result = stats
.last_result()
.ok_or("empty result list")?
.as_ref()
.map_err(|&msg| msg)?;

// sum the list of distances outside the benchmarked function
// (fold can infer the typy automatically, sum needs explicit type annotations)
#[allow(clippy::unnecessary_fold)]
let sum = last_result.iter().fold(0, |acc, n| acc + n);

Expand Down
21 changes: 5 additions & 16 deletions levenshtein/rust/src/levenshtein.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@ impl<T: Unsigned + Integer + Copy + UpperBounded + FromPrimitive + ToPrimitive +
///
/// Space Complexity: O(min(m,n)) - only uses two rows instead of full matrix
/// Time Complexity: O(m*n) - where m and n are the lengths of the input strings
pub fn levenshtein_distance<T: CostInteger>(
s1: &impl AsRef<str>,
s2: &impl AsRef<str>,
buffer: &mut Vec<T>,
) -> T {
pub fn levenshtein_distance<T: CostInteger>(s1: &impl AsRef<str>, s2: &impl AsRef<str>) -> T {
let s1: &str = s1.as_ref();
let s2: &str = s2.as_ref();

Expand All @@ -39,19 +35,12 @@ pub fn levenshtein_distance<T: CostInteger>(
let m = s1.len();

let row_elements = m + 1;
let req_size = row_elements * 2;

if buffer.len() < req_size {
buffer.extend((buffer.len()..req_size).map(|_| T::zero()));
}

// use two rows instead of full matrix for space optimization
let (mut prev_row, mut curr_row) = buffer[0..req_size].split_at_mut(row_elements);

// Initialize the previous row
for (i, v) in prev_row.iter_mut().enumerate() {
*v = T::from_usize(i).expect("init number");
}
let mut prev_row: Vec<T> = (0..row_elements)
.map(|i| T::from_usize(i).expect("init value"))
.collect();
let mut curr_row: Vec<T> = vec![T::zero(); row_elements];

// main computation loop
for (j, ch2) in s2.iter().enumerate() {
Expand Down
19 changes: 7 additions & 12 deletions levenshtein/zig/code.zig
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,22 @@ pub fn main() !void {

const args = args_cli[1..];

// Calculate length of longest input string
var max_inp_len: usize = 0;
// try different integer sizes (u32/u64/u128/usize) to see their impact on performance
const distance_type = u32;

for (args) |argument| {
max_inp_len = @max(max_inp_len, argument.len);
}

// Reuse prev and curr row to minimize allocations
const buffer = try allocator.alloc(usize, (max_inp_len + 1) * 2);

var min_distance: usize = std.math.maxInt(usize);
var min_distance: distance_type = std.math.maxInt(distance_type);
var times: usize = 0;

// Compare all pairs of strings
const fn_levenshtein = levenshteinDistance(@TypeOf(min_distance));

// compare all pairs of strings
for (args, 0..args.len) |argA, i| {
for (args, 0..args.len) |argB, j| {
if (i == j) {
continue;
}

const distance = levenshteinDistance(&argA, &argB, &buffer);
const distance = fn_levenshtein(allocator, &argA, &argB);
min_distance = @min(min_distance, distance);

times += 1;
Expand Down
103 changes: 57 additions & 46 deletions levenshtein/zig/levenshtein.zig
Original file line number Diff line number Diff line change
@@ -1,51 +1,62 @@
const std = @import("std");
const Allocator = std.mem.Allocator;

/// Calculates the Levenshtein distance between two strings using Wagner-Fischer algorithm
/// Calculates the Levenshtein distance between two strings using Wagner-Fischer algorithm.
///
/// Space Complexity: O(min(m,n)) - only uses two arrays instead of full matrix
/// Time Complexity: O(m*n) where m and n are the lengths of the input strings
pub fn levenshteinDistance(s1: *const []const u8, s2: *const []const u8, buffer: *const []usize) usize {
// Early termination checks
if (s1.*.len == 0) return s2.*.len;
if (s2.*.len == 0) return s1.*.len;

// Make s1 the shorter string for space optimization
const str1, const str2 = init: {
if (s1.*.len > s2.*.len) {
break :init .{ s2.*, s1.* };
/// Time Complexity: O(m*n) - where m and n are the lengths of the input strings
pub fn levenshteinDistance(comptime T: type) (fn (allocator: Allocator, s1: *const []const u8, s2: *const []const u8) T) {
return struct {
pub fn levenshtein(allocator: Allocator, s1: *const []const u8, s2: *const []const u8) T {
// early termination checks
if (s1.*.len == 0) return @intCast(s2.*.len);
if (s2.*.len == 0) return @intCast(s1.*.len);

// make s1 the shorter string for space optimization
const str1, const str2 = init: {
if (s1.*.len > s2.*.len) {
break :init .{ s2.*, s1.* };
}

break :init .{ s1.*, s2.* };
};

const m = str1.len;
const n = str2.len;

const row_elements = m + 1;

// use two rows instead of full matrix for space optimization
var prev_row: []T = allocator.alloc(T, row_elements) catch unreachable;
var curr_row: []T = allocator.alloc(T, row_elements) catch unreachable;
defer allocator.free(prev_row);
defer allocator.free(curr_row);

// initialize first row
for (0..m + 1) |i| {
prev_row[i] = @intCast(i);
}

// main computation loop
for (str2, 0..n) |ch2, j| {
curr_row[0] = @as(T, @intCast(j)) + 1;

for (str1, 0..m) |ch1, i| {
const cost: T = @intFromBool(ch1 != ch2);

// calculate minimum of three operations
curr_row[i + 1] = @min(
prev_row[i + 1] + 1, // deletion
curr_row[i] + 1, // insertion
prev_row[i] + cost, // substitution
);
}

// swap rows
std.mem.swap([]T, &prev_row, &curr_row);
}

return prev_row[m];
}

break :init .{ s1.*, s2.* };
};

const m = str1.len;
const n = str2.len;

var prev_row = buffer.*[0..(m + 1)];
var curr_row = buffer.*[(m + 1)..];

// Initialize first row
for (0..m + 1) |i| {
prev_row[i] = i;
}

// Main computation loop
for (str2, 0..n) |ch2, j| {
curr_row[0] = j + 1;

for (str1, 0..m) |ch1, i| {
const cost: usize = @intFromBool(ch1 != ch2);

// Calculate minimum of three operations
curr_row[i + 1] = @min(
prev_row[i + 1] + 1, // deletion
curr_row[i] + 1, // insertion
prev_row[i] + cost, // substitution
);
}

// Swap rows
std.mem.swap([]usize, &prev_row, &curr_row);
}

return prev_row[m];
}.levenshtein;
}
Loading

0 comments on commit d59797c

Please sign in to comment.