From f07739a5c1cb26ef566a2cccdd8b0d6eb79c4556 Mon Sep 17 00:00:00 2001 From: psnider Date: Tue, 24 Mar 2015 14:11:09 +0000 Subject: [PATCH] dice_coefficient supports single character strings --- lib/natural/distance/dice_coefficient.js | 34 +++++++++++++++--------- spec/dice_coefficient_spec.js | 5 ++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/lib/natural/distance/dice_coefficient.js b/lib/natural/distance/dice_coefficient.js index 52655d373..c8e85c76e 100644 --- a/lib/natural/distance/dice_coefficient.js +++ b/lib/natural/distance/dice_coefficient.js @@ -48,22 +48,32 @@ var sanitize = function (str) { // Compare two strings, and spit out a number from 0-1 var compare = function (str1, str2) { - var pairs1 = wordLetterPairs(sanitize(str1)); - var pairs2 = wordLetterPairs(sanitize(str2)); + var sanitized_str1 = sanitize(str1); + var sanitized_str2 = sanitize(str2); + var pairs1 = wordLetterPairs(sanitized_str1); + var pairs2 = wordLetterPairs(sanitized_str2); var intersection = 0, union = pairs1.length + pairs2.length; - var i, j, pair1, pair2; - for (i = 0; i < pairs1.length; i++) { - pair1 = pairs1[i]; - for (j = 0; j < pairs2.length; j++) { - pair2 = pairs2[j]; - if (pair1 == pair2) { - intersection ++; - delete pairs2[j]; - break; + if (union === 0) { + if (sanitized_str1 === sanitized_str2) { + return 1; + } else { + return 0; + } + } else { + var i, j, pair1, pair2; + for (i = 0; i < pairs1.length; i++) { + pair1 = pairs1[i]; + for (j = 0; j < pairs2.length; j++) { + pair2 = pairs2[j]; + if (pair1 == pair2) { + intersection ++; + delete pairs2[j]; + break; + } } } + return 2 * intersection / union; } - return 2 * intersection / union; }; module.exports = compare; diff --git a/spec/dice_coefficient_spec.js b/spec/dice_coefficient_spec.js index 9a86ae58b..0e98d4665 100644 --- a/spec/dice_coefficient_spec.js +++ b/spec/dice_coefficient_spec.js @@ -6,6 +6,11 @@ describe('dice', function () { expect(dice('john', 'john')).toBe(1); }); + it('should match single character words', function () { + expect(dice('a', 'a')).toBe(1); + expect(dice('a', 'b')).toBe(0); + }); + it('should handle total mis-matches', function () { expect(dice('john', 'matt')).toBe(0); });