-
Notifications
You must be signed in to change notification settings - Fork 342
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
210 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#import <Foundation/Foundation.h> | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
/** | ||
* Calculates the Levenshtein distance between two strings using the Wagner-Fischer algorithm. | ||
* | ||
* @param s1 | ||
* @param s2 | ||
* @return The Levenshtein distance between s1 and s2. | ||
*/ | ||
NSInteger levenshteinDistance(NSString *s1, NSString *s2); | ||
|
||
/** | ||
* Calculates the Levenshtein distances for every pairing of words in the given array. | ||
* | ||
* @param words | ||
* @return A pointer to an array of NSInteger values representing the distances. | ||
*/ | ||
NSInteger* distances(NSArray<NSString *> *words); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#import <Foundation/Foundation.h> | ||
|
||
/** | ||
* Calculates the Levenshtein distance between two strings using Wagner-Fischer algorithm | ||
* Space Complexity: O(min(m,n)) - only uses two arrays instead of full matrix | ||
* Time Complexity: O(m*n) where m and n are the lengths of the input strings | ||
*/ | ||
NSInteger levenshteinDistance(NSString *s1, NSString *s2) { | ||
// Early termination checks | ||
if ([s1 isEqualToString:s2]) return 0; | ||
if (s1.length == 0) return s2.length; | ||
if (s2.length == 0) return s1.length; | ||
|
||
// Make s1 the shorter string for space optimization | ||
if (s1.length > s2.length) { | ||
NSString *temp = s1; | ||
s1 = s2; | ||
s2 = temp; | ||
} | ||
|
||
NSUInteger m = s1.length; | ||
NSUInteger n = s2.length; | ||
|
||
// Use two arrays instead of full matrix for space optimization | ||
NSMutableData *prevRowData = [NSMutableData dataWithLength:(m + 1) * sizeof(NSInteger)]; | ||
NSMutableData *currRowData = [NSMutableData dataWithLength:(m + 1) * sizeof(NSInteger)]; | ||
|
||
NSInteger *prevRow = (NSInteger *)prevRowData.mutableBytes; | ||
NSInteger *currRow = (NSInteger *)currRowData.mutableBytes; | ||
|
||
// Initialize first row | ||
for (NSUInteger i = 0; i <= m; i++) { | ||
prevRow[i] = (NSInteger)i; | ||
} | ||
|
||
// Convert strings to UTF-16 for faster access | ||
const unichar *s1Chars = CFStringGetCharactersPtr((__bridge CFStringRef)s1); | ||
const unichar *s2Chars = CFStringGetCharactersPtr((__bridge CFStringRef)s2); | ||
|
||
NSMutableData *s1Buffer = nil; | ||
NSMutableData *s2Buffer = nil; | ||
|
||
// If direct access failed, create buffers | ||
if (s1Chars == NULL) { | ||
s1Buffer = [NSMutableData dataWithLength:m * sizeof(unichar)]; | ||
s1Chars = s1Buffer.mutableBytes; | ||
[s1 getCharacters:(unichar *)s1Chars range:NSMakeRange(0, m)]; | ||
} | ||
if (s2Chars == NULL) { | ||
s2Buffer = [NSMutableData dataWithLength:n * sizeof(unichar)]; | ||
s2Chars = s2Buffer.mutableBytes; | ||
[s2 getCharacters:(unichar *)s2Chars range:NSMakeRange(0, n)]; | ||
} | ||
|
||
// Main computation loop | ||
for (NSUInteger j = 1; j <= n; j++) { | ||
currRow[0] = (NSInteger)j; | ||
|
||
for (NSUInteger i = 1; i <= m; i++) { | ||
NSInteger cost = (s1Chars[i-1] == s2Chars[j-1]) ? 0 : 1; | ||
|
||
// Calculate minimum of three operations | ||
NSInteger deletion = prevRow[i] + 1; | ||
NSInteger insertion = currRow[i-1] + 1; | ||
NSInteger substitution = prevRow[i-1] + cost; | ||
|
||
currRow[i] = MIN(deletion, MIN(insertion, substitution)); | ||
} | ||
|
||
// Swap rows | ||
NSInteger *temp = prevRow; | ||
prevRow = currRow; | ||
currRow = temp; | ||
} | ||
|
||
return prevRow[m]; | ||
} | ||
|
||
NSInteger* distances(NSArray<NSString *> *words) { | ||
NSUInteger count = words.count * (words.count - 1); | ||
NSInteger *result = malloc(count * sizeof(NSInteger)); | ||
NSUInteger index = 0; | ||
for (NSUInteger i = 0; i < words.count; i++) { | ||
for (NSUInteger j = 0; j < words.count; j++) { | ||
if (i < j) { | ||
result[index] = levenshteinDistance(words[i], words[j]); | ||
index++; | ||
} | ||
} | ||
} | ||
return result; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
#import <Foundation/Foundation.h> | ||
#import "levenshtein.h" | ||
#import "benchmark.h" | ||
|
||
/** | ||
* Reads words from a file, splitting on newline characters. | ||
* @param filePath A C-string representing the file path. | ||
* @return An NSArray of NSString objects containing each non-empty line in the file. | ||
*/ | ||
NSArray<NSString *> *readWords(const char *filePath) { | ||
NSString *path = [NSString stringWithUTF8String:filePath]; | ||
NSError *error = nil; | ||
NSString *fileContents = [NSString stringWithContentsOfFile:path | ||
encoding:NSUTF8StringEncoding | ||
error:&error]; | ||
if (!fileContents) { | ||
NSLog(@"Error reading file %@", path); | ||
return @[]; | ||
} | ||
NSArray<NSString *> *lines = [fileContents componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]]; | ||
NSPredicate *nonEmpty = [NSPredicate predicateWithFormat:@"length > 0"]; | ||
NSArray<NSString *> *words = [lines filteredArrayUsingPredicate:nonEmpty]; | ||
return words; | ||
} | ||
|
||
/** | ||
* A struct to pass word data to the benchmark work function. | ||
*/ | ||
typedef struct { | ||
NSArray<NSString *> *words; | ||
} word_data_t; | ||
|
||
/** | ||
* The work function called by benchmark_run. | ||
*/ | ||
static benchmark_result_t work(void *data) { | ||
word_data_t *wd = (word_data_t *)data; | ||
NSInteger *result = distances(wd->words); | ||
benchmark_result_t res; | ||
res.value.ptr = result; | ||
return res; | ||
} | ||
|
||
int main(int argc, const char * argv[]) { | ||
@autoreleasepool { | ||
if (argc < 4) { | ||
NSLog(@"Usage: %s <runMs> <warmupMs> <wordsFile>", argv[0]); | ||
return 1; | ||
} | ||
int runMs = atoi(argv[1]); | ||
int warmupMs = atoi(argv[2]); | ||
const char *wordsFilePath = argv[3]; | ||
|
||
NSArray<NSString *> *words = readWords(wordsFilePath); | ||
if (!words || words.count == 0) { | ||
NSLog(@"No words to process."); | ||
return 1; | ||
} | ||
|
||
// Set up the data for the benchmark. | ||
word_data_t data = { words }; | ||
|
||
// Warmup run (result is ignored) | ||
benchmark_run(work, &data, warmupMs); | ||
|
||
// Benchmark run. | ||
benchmark_stats_t stats = benchmark_run(work, &data, runMs); | ||
|
||
NSInteger *distancesArray = (NSInteger *)stats.last_result.value.ptr; | ||
// Expected count based on unique pairs (i < j) | ||
NSUInteger count = (words.count * (words.count - 1)) / 2; | ||
NSInteger sum = 0; | ||
for (NSUInteger i = 0; i < count; i++) { | ||
sum += distancesArray[i]; | ||
} | ||
stats.last_result.value.number = sum; | ||
|
||
char buffer[1024]; | ||
benchmark_format_results(stats, buffer, sizeof(buffer)); | ||
printf("%s\n", buffer); | ||
|
||
free(distancesArray); | ||
} | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters