Merge pull request #407 from zierf/zig_rust_improvements

Rust & Zig: Various Improvements
bddicken · Feb 8, 2025 · d59797c · d59797c
2 parents a1ee655 + ed444e1
commit d59797c
Show file tree

Hide file tree

Showing 14 changed files with 175 additions and 176 deletions.
diff --git a/fibonacci/rust/src/bin/code.rs b/fibonacci/rust/src/bin/code.rs
@@ -5,8 +5,13 @@ use fibonacci::*;
 fn main() -> Result<(), Box<dyn Error>> {
     let args: Vec<String> = std::env::args().skip(1).collect();
 
+    // try different integer sizes (u32/u64/u128/usize) to see their impact on performance (u8/u16 are too small)
     let runs: u32 = args.first().ok_or("missing number argument")?.parse()?;
-    let sum: u32 = (0..runs).map(fibonacci).sum();
+
+    // sum the fibonacci numbers
+    // (fold can infer the type automatically, sum needs explicit type annotations)
+    #[allow(clippy::unnecessary_fold)]
+    let sum = (0..runs).map(fibonacci).fold(0, |acc, n| acc + n);
 
     println!("{}", sum);
 

diff --git a/fibonacci/zig/code.zig b/fibonacci/zig/code.zig
@@ -17,12 +17,15 @@ pub fn main() !void {
 
     const args = args_cli[1..];
 
-    const runs = try std.fmt.parseInt(usize, args[0], 0);
+    // try different integer sizes (u32/u64/u128/usize) to see their impact on performance
+    const runs = try std.fmt.parseInt(u32, args[0], 0);
 
-    var sum: usize = 0;
+    const fn_fib = fibonacci(@TypeOf(runs));
 
-    for (0..runs) |i| {
-        sum += fibonacci(i);
+    var sum: @TypeOf(runs) = 0;
+
+    for (0..@intCast(runs)) |i| {
+        sum += fn_fib(@intCast(i));
     }
 
     const stdout = std.io.getStdOut().writer();

diff --git a/fibonacci/zig/fibonacci.zig b/fibonacci/zig/fibonacci.zig
@@ -1,6 +1,10 @@
-pub fn fibonacci(n: usize) usize {
-    return switch (n) {
-        0...1 => n,
-        else => fibonacci(n - 1) + fibonacci(n - 2),
-    };
+pub fn fibonacci(comptime T: type) fn (T) T {
+    return struct {
+        pub fn fib(n: T) T {
+            return switch (n) {
+                0...1 => n,
+                else => fib(n - 1) + fib(n - 2),
+            };
+        }
+    }.fib;
 }
diff --git a/fibonacci/zig/run.zig b/fibonacci/zig/run.zig
@@ -12,10 +12,11 @@ pub fn main() !void {
     const args = try benchmark.loadArguments(allocator);
 
     // parse program arguments
-    const number: usize = try std.fmt.parseInt(usize, args.program_args[0], 0);
+    // try different integer sizes (u32/u64/u128/usize) to see their impact on performance
+    const number = try std.fmt.parseInt(usize, args.program_args[0], 0);
 
     // perform full benchmark
-    const context = benchmark.createContext(fibonacci);
+    const context = benchmark.createContext(fibonacci(@TypeOf(number)));
     const stats = (try context.benchmark(allocator, args.warmup_ms, args.run_ms, .{number})).?;
 
     // get last result for success checks

diff --git a/levenshtein/rust/src/bin/code.rs b/levenshtein/rust/src/bin/code.rs
@@ -15,19 +15,9 @@ fn main() {
         std::process::exit(3);
     }
 
-    // calculate length of longest input string
-    let mut max_inp_len: usize = 0;
-
-    for argument in args.iter() {
-        max_inp_len = max_inp_len.max(argument.len());
-    }
-
-    // reuse buffer for prev_row and curr_row to minimize allocations
-    // try different integer sizes (u16/u32/u64/usize) to see their impact on performance
-    let mut buffer: Vec<u32> = vec![0; (max_inp_len + 1) * 2];
-
+    // try different integer sizes (u32/u64/usize) to see their impact on performance
     let mut min_distance = u32::MAX;
-    let mut times = 0;
+    let mut times: usize = 0;
 
     // compare all pairs of strings
     for (i, s1) in args.iter().enumerate() {
@@ -36,7 +26,7 @@ fn main() {
                 continue;
             }
 
-            let distance = levenshtein_distance(s1, s2, &mut buffer);
+            let distance = levenshtein_distance(s1, s2);
             min_distance = min_distance.min(distance);
 
             times += 1;

diff --git a/levenshtein/rust/src/bin/run.rs b/levenshtein/rust/src/bin/run.rs
@@ -3,10 +3,7 @@ use std::error::Error;
 use benchmark::{BenchmarkArguments, BenchmarkContext};
 use levenshtein::*;
 
-fn calculate_distances<T: CostInteger>(
-    word_list: &[impl AsRef<str>],
-    buffer: &mut Vec<T>,
-) -> Vec<T> {
+fn calculate_distances<T: CostInteger>(word_list: &[String]) -> Result<Vec<T>, &'static str> {
     let list_length = word_list.len();
     let mut results = Vec::with_capacity((list_length * (list_length - 1)) / 2);
 
@@ -16,12 +13,12 @@ fn calculate_distances<T: CostInteger>(
         let (_, cmp_words) = word_list.split_at(i + 1);
 
         for word_b in cmp_words.iter() {
-            let distance = levenshtein_distance(word_a, word_b, buffer);
+            let distance = levenshtein_distance(word_a, word_b);
             results.push(distance);
         }
     }
 
-    results
+    Ok(results)
 }
 
 fn main() -> Result<(), Box<dyn Error>> {
@@ -43,27 +40,22 @@ fn main() -> Result<(), Box<dyn Error>> {
         std::process::exit(3);
     }
 
-    // calculate length of longest input string
-    let max_inp_len: usize = word_list
-        .iter()
-        .map(|word| word.len())
-        .max()
-        .ok_or("empty word list")?;
-
-    // reuse buffer for prev_row and curr_row to minimize allocations
-    // try different integer sizes (u32/u64/usize) to see their impact on performance
-    let mut buffer: Vec<u32> = vec![0; (max_inp_len + 1) * 2];
-
     // perform full benchmark
-    let mut context = BenchmarkContext::new(move || calculate_distances(&word_list, &mut buffer));
+    // try different integer sizes (u32/u64/usize) to see their impact on performance
+    let mut context = BenchmarkContext::new(move || calculate_distances::<u32>(&word_list));
     let stats = context
         .benchmark(args.run_ms, args.warmup_ms)
         .ok_or("no benchmark result")?;
 
     // get last result for success checks
-    let last_result = stats.last_result().ok_or("empty result list")?;
-
-    // sum the distances outside the benchmarked function
+    let last_result = stats
+        .last_result()
+        .ok_or("empty result list")?
+        .as_ref()
+        .map_err(|&msg| msg)?;
+
+    // sum the list of distances outside the benchmarked function
+    // (fold can infer the typy automatically, sum needs explicit type annotations)
     #[allow(clippy::unnecessary_fold)]
     let sum = last_result.iter().fold(0, |acc, n| acc + n);
 

diff --git a/levenshtein/rust/src/levenshtein.rs b/levenshtein/rust/src/levenshtein.rs
@@ -17,11 +17,7 @@ impl<T: Unsigned + Integer + Copy + UpperBounded + FromPrimitive + ToPrimitive +
 ///
 /// Space Complexity: O(min(m,n)) - only uses two rows instead of full matrix
 /// Time Complexity: O(m*n) - where m and n are the lengths of the input strings
-pub fn levenshtein_distance<T: CostInteger>(
-    s1: &impl AsRef<str>,
-    s2: &impl AsRef<str>,
-    buffer: &mut Vec<T>,
-) -> T {
+pub fn levenshtein_distance<T: CostInteger>(s1: &impl AsRef<str>, s2: &impl AsRef<str>) -> T {
     let s1: &str = s1.as_ref();
     let s2: &str = s2.as_ref();
 
@@ -39,19 +35,12 @@ pub fn levenshtein_distance<T: CostInteger>(
     let m = s1.len();
 
     let row_elements = m + 1;
-    let req_size = row_elements * 2;
-
-    if buffer.len() < req_size {
-        buffer.extend((buffer.len()..req_size).map(|_| T::zero()));
-    }
 
     // use two rows instead of full matrix for space optimization
-    let (mut prev_row, mut curr_row) = buffer[0..req_size].split_at_mut(row_elements);
-
-    // Initialize the previous row
-    for (i, v) in prev_row.iter_mut().enumerate() {
-        *v = T::from_usize(i).expect("init number");
-    }
+    let mut prev_row: Vec<T> = (0..row_elements)
+        .map(|i| T::from_usize(i).expect("init value"))
+        .collect();
+    let mut curr_row: Vec<T> = vec![T::zero(); row_elements];
 
     // main computation loop
     for (j, ch2) in s2.iter().enumerate() {

diff --git a/levenshtein/zig/code.zig b/levenshtein/zig/code.zig
@@ -17,27 +17,22 @@ pub fn main() !void {
 
     const args = args_cli[1..];
 
-    // Calculate length of longest input string
-    var max_inp_len: usize = 0;
+    // try different integer sizes (u32/u64/u128/usize) to see their impact on performance
+    const distance_type = u32;
 
-    for (args) |argument| {
-        max_inp_len = @max(max_inp_len, argument.len);
-    }
-
-    // Reuse prev and curr row to minimize allocations
-    const buffer = try allocator.alloc(usize, (max_inp_len + 1) * 2);
-
-    var min_distance: usize = std.math.maxInt(usize);
+    var min_distance: distance_type = std.math.maxInt(distance_type);
     var times: usize = 0;
 
-    // Compare all pairs of strings
+    const fn_levenshtein = levenshteinDistance(@TypeOf(min_distance));
+
+    // compare all pairs of strings
     for (args, 0..args.len) |argA, i| {
         for (args, 0..args.len) |argB, j| {
             if (i == j) {
                 continue;
             }
 
-            const distance = levenshteinDistance(&argA, &argB, &buffer);
+            const distance = fn_levenshtein(allocator, &argA, &argB);
             min_distance = @min(min_distance, distance);
 
             times += 1;

diff --git a/levenshtein/zig/levenshtein.zig b/levenshtein/zig/levenshtein.zig
@@ -1,51 +1,62 @@
 const std = @import("std");
+const Allocator = std.mem.Allocator;
 
-/// Calculates the Levenshtein distance between two strings using Wagner-Fischer algorithm
+/// Calculates the Levenshtein distance between two strings using Wagner-Fischer algorithm.
+///
 /// Space Complexity: O(min(m,n)) - only uses two arrays instead of full matrix
-/// Time Complexity: O(m*n) where m and n are the lengths of the input strings
-pub fn levenshteinDistance(s1: *const []const u8, s2: *const []const u8, buffer: *const []usize) usize {
-    // Early termination checks
-    if (s1.*.len == 0) return s2.*.len;
-    if (s2.*.len == 0) return s1.*.len;
-
-    // Make s1 the shorter string for space optimization
-    const str1, const str2 = init: {
-        if (s1.*.len > s2.*.len) {
-            break :init .{ s2.*, s1.* };
+/// Time Complexity: O(m*n) - where m and n are the lengths of the input strings
+pub fn levenshteinDistance(comptime T: type) (fn (allocator: Allocator, s1: *const []const u8, s2: *const []const u8) T) {
+    return struct {
+        pub fn levenshtein(allocator: Allocator, s1: *const []const u8, s2: *const []const u8) T {
+            // early termination checks
+            if (s1.*.len == 0) return @intCast(s2.*.len);
+            if (s2.*.len == 0) return @intCast(s1.*.len);
+
+            // make s1 the shorter string for space optimization
+            const str1, const str2 = init: {
+                if (s1.*.len > s2.*.len) {
+                    break :init .{ s2.*, s1.* };
+                }
+
+                break :init .{ s1.*, s2.* };
+            };
+
+            const m = str1.len;
+            const n = str2.len;
+
+            const row_elements = m + 1;
+
+            // use two rows instead of full matrix for space optimization
+            var prev_row: []T = allocator.alloc(T, row_elements) catch unreachable;
+            var curr_row: []T = allocator.alloc(T, row_elements) catch unreachable;
+            defer allocator.free(prev_row);
+            defer allocator.free(curr_row);
+
+            // initialize first row
+            for (0..m + 1) |i| {
+                prev_row[i] = @intCast(i);
+            }
+
+            // main computation loop
+            for (str2, 0..n) |ch2, j| {
+                curr_row[0] = @as(T, @intCast(j)) + 1;
+
+                for (str1, 0..m) |ch1, i| {
+                    const cost: T = @intFromBool(ch1 != ch2);
+
+                    // calculate minimum of three operations
+                    curr_row[i + 1] = @min(
+                        prev_row[i + 1] + 1, // deletion
+                        curr_row[i] + 1, // insertion
+                        prev_row[i] + cost, // substitution
+                    );
+                }
+
+                // swap rows
+                std.mem.swap([]T, &prev_row, &curr_row);
+            }
+
+            return prev_row[m];
         }
-
-        break :init .{ s1.*, s2.* };
-    };
-
-    const m = str1.len;
-    const n = str2.len;
-
-    var prev_row = buffer.*[0..(m + 1)];
-    var curr_row = buffer.*[(m + 1)..];
-
-    // Initialize first row
-    for (0..m + 1) |i| {
-        prev_row[i] = i;
-    }
-
-    // Main computation loop
-    for (str2, 0..n) |ch2, j| {
-        curr_row[0] = j + 1;
-
-        for (str1, 0..m) |ch1, i| {
-            const cost: usize = @intFromBool(ch1 != ch2);
-
-            // Calculate minimum of three operations
-            curr_row[i + 1] = @min(
-                prev_row[i + 1] + 1, // deletion
-                curr_row[i] + 1, // insertion
-                prev_row[i] + cost, // substitution
-            );
-        }
-
-        // Swap rows
-        std.mem.swap([]usize, &prev_row, &curr_row);
-    }
-
-    return prev_row[m];
+    }.levenshtein;
 }