Skip to content

Commit

Permalink
ObjC in-process: Port levenshtein
Browse files Browse the repository at this point in the history
  • Loading branch information
PEZ committed Feb 8, 2025
1 parent 0e95a94 commit 34d8103
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 4 deletions.
2 changes: 2 additions & 0 deletions languages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ function compile_languages {
compile 'Java' 'jvm' 'javac -cp ../lib/java jvm/*.java'
compile 'Java Native' 'java-native-image' '(cd java-native-image ; native-image -cp ..:../../lib/java --no-fallback -O3 --pgo-instrument -march=native jvm.run) && ./java-native-image/jvm.run -XX:ProfilesDumpFile=java-native-image/run.iprof 10000 2000 $(./check-output.sh -i) && (cd java-native-image ; native-image -cp ..:../../lib/java -O3 --pgo=run.iprof -march=native jvm.run -o run)'
compile 'Fortran' 'fortran' 'gfortran -O3 -J../lib/fortran ../lib/fortran/benchmark.f90 fortran/*.f90 -o fortran/run'
compile 'Objective C' 'objc' 'clang -O3 -I../lib/c -framework Foundation objc/*.m ../lib/c/benchmark.c -o objc/run'
compile 'Racket' 'racket' '(cd racket && raco make run.rkt && raco demod -o run.zo run.rkt && raco exe -o run run.zo)'
compile 'Rust' 'rust' 'cargo build --manifest-path rust/Cargo.toml --release'
compile 'Zig' 'zig' 'zig build --build-file zig/build.zig --prefix ${PWD}/zig/zig-out --cache-dir ${PWD}/zig/.zig-cache --release=fast'
Expand All @@ -29,6 +30,7 @@ function run_languages {
run 'Fortran' './fortran/run' './fortran/run'
run 'Java' './jvm/run.class' 'java -cp .:../lib/java jvm.run'
run 'Java Native' './java-native-image/run' './java-native-image/run'
run 'Objective C' './objc/run' './objc/run'
run "Python" "./py/run.py" "python3.12 ./py/run.py"
run "Python JIT" "./py-jit/run.py" "python3.12 ./py-jit/run.py"
run "Racket" './racket/run' './racket/run'
Expand Down
26 changes: 26 additions & 0 deletions levenshtein/objc/levenshtein.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#import <Foundation/Foundation.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
* Calculates the Levenshtein distance between two strings using the Wagner-Fischer algorithm.
*
* @param s1
* @param s2
* @return The Levenshtein distance between s1 and s2.
*/
NSInteger levenshteinDistance(NSString *s1, NSString *s2);

/**
* Calculates the Levenshtein distances for every pairing of words in the given array.
*
* @param words
* @return A pointer to an array of NSInteger values representing the distances.
*/
NSInteger* distances(NSArray<NSString *> *words);

#ifdef __cplusplus
}
#endif
92 changes: 92 additions & 0 deletions levenshtein/objc/levenshtein.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#import <Foundation/Foundation.h>

/**
* Calculates the Levenshtein distance between two strings using Wagner-Fischer algorithm
* Space Complexity: O(min(m,n)) - only uses two arrays instead of full matrix
* Time Complexity: O(m*n) where m and n are the lengths of the input strings
*/
NSInteger levenshteinDistance(NSString *s1, NSString *s2) {
// Early termination checks
if ([s1 isEqualToString:s2]) return 0;
if (s1.length == 0) return s2.length;
if (s2.length == 0) return s1.length;

// Make s1 the shorter string for space optimization
if (s1.length > s2.length) {
NSString *temp = s1;
s1 = s2;
s2 = temp;
}

NSUInteger m = s1.length;
NSUInteger n = s2.length;

// Use two arrays instead of full matrix for space optimization
NSMutableData *prevRowData = [NSMutableData dataWithLength:(m + 1) * sizeof(NSInteger)];
NSMutableData *currRowData = [NSMutableData dataWithLength:(m + 1) * sizeof(NSInteger)];

NSInteger *prevRow = (NSInteger *)prevRowData.mutableBytes;
NSInteger *currRow = (NSInteger *)currRowData.mutableBytes;

// Initialize first row
for (NSUInteger i = 0; i <= m; i++) {
prevRow[i] = (NSInteger)i;
}

// Convert strings to UTF-16 for faster access
const unichar *s1Chars = CFStringGetCharactersPtr((__bridge CFStringRef)s1);
const unichar *s2Chars = CFStringGetCharactersPtr((__bridge CFStringRef)s2);

NSMutableData *s1Buffer = nil;
NSMutableData *s2Buffer = nil;

// If direct access failed, create buffers
if (s1Chars == NULL) {
s1Buffer = [NSMutableData dataWithLength:m * sizeof(unichar)];
s1Chars = s1Buffer.mutableBytes;
[s1 getCharacters:(unichar *)s1Chars range:NSMakeRange(0, m)];
}
if (s2Chars == NULL) {
s2Buffer = [NSMutableData dataWithLength:n * sizeof(unichar)];
s2Chars = s2Buffer.mutableBytes;
[s2 getCharacters:(unichar *)s2Chars range:NSMakeRange(0, n)];
}

// Main computation loop
for (NSUInteger j = 1; j <= n; j++) {
currRow[0] = (NSInteger)j;

for (NSUInteger i = 1; i <= m; i++) {
NSInteger cost = (s1Chars[i-1] == s2Chars[j-1]) ? 0 : 1;

// Calculate minimum of three operations
NSInteger deletion = prevRow[i] + 1;
NSInteger insertion = currRow[i-1] + 1;
NSInteger substitution = prevRow[i-1] + cost;

currRow[i] = MIN(deletion, MIN(insertion, substitution));
}

// Swap rows
NSInteger *temp = prevRow;
prevRow = currRow;
currRow = temp;
}

return prevRow[m];
}

NSInteger* distances(NSArray<NSString *> *words) {
NSUInteger count = words.count * (words.count - 1);
NSInteger *result = malloc(count * sizeof(NSInteger));
NSUInteger index = 0;
for (NSUInteger i = 0; i < words.count; i++) {
for (NSUInteger j = 0; j < words.count; j++) {
if (i < j) {
result[index] = levenshteinDistance(words[i], words[j]);
index++;
}
}
}
return result;
}
85 changes: 85 additions & 0 deletions levenshtein/objc/run.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#import <Foundation/Foundation.h>
#import "levenshtein.h"
#import "benchmark.h"

/**
* Reads words from a file, splitting on newline characters.
* @param filePath A C-string representing the file path.
* @return An NSArray of NSString objects containing each non-empty line in the file.
*/
NSArray<NSString *> *readWords(const char *filePath) {
NSString *path = [NSString stringWithUTF8String:filePath];
NSError *error = nil;
NSString *fileContents = [NSString stringWithContentsOfFile:path
encoding:NSUTF8StringEncoding
error:&error];
if (!fileContents) {
NSLog(@"Error reading file %@", path);
return @[];
}
NSArray<NSString *> *lines = [fileContents componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]];
NSPredicate *nonEmpty = [NSPredicate predicateWithFormat:@"length > 0"];
NSArray<NSString *> *words = [lines filteredArrayUsingPredicate:nonEmpty];
return words;
}

/**
* A struct to pass word data to the benchmark work function.
*/
typedef struct {
NSArray<NSString *> *words;
} word_data_t;

/**
* The work function called by benchmark_run.
*/
static benchmark_result_t work(void *data) {
word_data_t *wd = (word_data_t *)data;
NSInteger *result = distances(wd->words);
benchmark_result_t res;
res.value.ptr = result;
return res;
}

int main(int argc, const char * argv[]) {
@autoreleasepool {
if (argc < 4) {
NSLog(@"Usage: %s <runMs> <warmupMs> <wordsFile>", argv[0]);
return 1;
}
int runMs = atoi(argv[1]);
int warmupMs = atoi(argv[2]);
const char *wordsFilePath = argv[3];

NSArray<NSString *> *words = readWords(wordsFilePath);
if (!words || words.count == 0) {
NSLog(@"No words to process.");
return 1;
}

// Set up the data for the benchmark.
word_data_t data = { words };

// Warmup run (result is ignored)
benchmark_run(work, &data, warmupMs);

// Benchmark run.
benchmark_stats_t stats = benchmark_run(work, &data, runMs);

NSInteger *distancesArray = (NSInteger *)stats.last_result.value.ptr;
// Expected count based on unique pairs (i < j)
NSUInteger count = (words.count * (words.count - 1)) / 2;
NSInteger sum = 0;
for (NSUInteger i = 0; i < count; i++) {
sum += distancesArray[i];
}
stats.last_result.value.number = sum;

char buffer[1024];
benchmark_format_results(stats, buffer, sizeof(buffer));
printf("%s\n", buffer);

free(distancesArray);
}
return 0;
}
9 changes: 5 additions & 4 deletions lib/c/benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ benchmark_stats_t benchmark_run(benchmark_fn fn, void* data, int run_ms) {
count++;
}

// If this is a check-output run we haven't printed any status dots,
// If this is a check-output run we haven't printed any status dots,
// so no newline should be printed either
if (run_ms > 1) fprintf(stderr, "\n");

Expand Down Expand Up @@ -113,7 +113,8 @@ benchmark_stats_t benchmark_run(benchmark_fn fn, void* data, int run_ms) {

void benchmark_format_results(benchmark_stats_t stats, char* buffer,
size_t size) {
snprintf(buffer, size, "%.6f,%.6f,%.6f,%.6f,%d,%ld", stats.mean_ms,
stats.std_dev_ms, stats.min_ms, stats.max_ms, stats.runs,
stats.last_result.value.number);
snprintf(buffer, size, "%.6f,%.6f,%.6f,%.6f,%d,%lld",
stats.mean_ms, stats.std_dev_ms, stats.min_ms, stats.max_ms, stats.runs,
stats.last_result.value.number);
}

0 comments on commit 34d8103

Please sign in to comment.