From a29689b282e98ba5ff3f9e872ac82141eedd494b Mon Sep 17 00:00:00 2001
From: Mike Amaral <mike.amaral@gemr.com>
Date: Thu, 30 Apr 2015 11:12:23 -0400
Subject: [PATCH 1/3] Added tests validating bug exists where stopword
 filtering was incorrectly case-sensitive.

---
 spec/porter_stemmer_spec.js | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/spec/porter_stemmer_spec.js b/spec/porter_stemmer_spec.js
index dd413f0fb..37592a53a 100644
--- a/spec/porter_stemmer_spec.js
+++ b/spec/porter_stemmer_spec.js
@@ -21,6 +21,7 @@ THE SOFTWARE.
 */
 
 var stemmer = require('../lib/natural/stemmers/porter_stemmer');
+var stopwords = require('../lib/natural/util/stopwords');
 
 describe('porter_stemmer', function() {
 	it('should categorizeGroups', function() {
@@ -179,4 +180,13 @@ describe('porter_stemmer', function() {
 		expect('scoring stinks'.tokenizeAndStem()).toEqual(['score', 'stink']);
 		expect('SCORING STINKS'.tokenizeAndStem()).toEqual(['score', 'stink']);
 	});
+
+	it('should tokenize and stem ignoring stopwords', function() {
+		expect('My dog is very fun TO play with And another thing, he is A poodle.'.tokenizeAndStem()).toEqual(['dog', 'fun', 'plai', 'thing', 'poodl']);
+	});
+
+	it('should tokenize and stem ignoring all capital stopwords', function() {
+		var allCapitalStopwords = stopwords.words.join(' ').toUpperCase();
+		expect(allCapitalStopwords.tokenizeAndStem()).toEqual([]);
+	});
 });

From 6eedbb16a2da6c72f739a7e96aeb23b970ed93b9 Mon Sep 17 00:00:00 2001
From: Mike Amaral <mike.amaral@gemr.com>
Date: Thu, 30 Apr 2015 11:28:09 -0400
Subject: [PATCH 2/3] Ensure we lowercase all tokens before testing against the
 stopwords, and reduce the number of conditionals per function call.

---
 lib/natural/stemmers/stemmer.js | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/lib/natural/stemmers/stemmer.js b/lib/natural/stemmers/stemmer.js
index f1c2d1777..12f4cca59 100644
--- a/lib/natural/stemmers/stemmer.js
+++ b/lib/natural/stemmers/stemmer.js
@@ -40,11 +40,21 @@ module.exports = function() {
 
     stemmer.tokenizeAndStem = function(text, keepStops) {
         var stemmedTokens = [];
-        
-        new Tokenizer().tokenize(text).forEach(function(token) {
-            if(keepStops || stopwords.words.indexOf(token) == -1)
+        var lowercaseText = text.toLowerCase();
+        var tokens = new Tokenizer().tokenize(lowercaseText);
+
+        if (keepStops) {
+            tokens.forEach(function(token) {
                 stemmedTokens.push(stemmer.stem(token));
-        });
+            });
+        }
+
+        else {
+            tokens.forEach(function(token) {
+                if (stopwords.words.indexOf(token) == -1)
+                    stemmedTokens.push(stemmer.stem(token));
+            });
+        }
         
         return stemmedTokens;
     };

From e99431ba9fc5f34b96d2968018f310e027b61875 Mon Sep 17 00:00:00 2001
From: Mike Amaral <mike.amaral@gemr.com>
Date: Thu, 30 Apr 2015 11:29:23 -0400
Subject: [PATCH 3/3] Added test to ensure that if a true flag is passed in
 indicating we want to keep stopwords, that they are properly stemmed and
 included in the output.

---
 spec/porter_stemmer_spec.js | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/spec/porter_stemmer_spec.js b/spec/porter_stemmer_spec.js
index 37592a53a..a2c90bdb9 100644
--- a/spec/porter_stemmer_spec.js
+++ b/spec/porter_stemmer_spec.js
@@ -189,4 +189,8 @@ describe('porter_stemmer', function() {
 		var allCapitalStopwords = stopwords.words.join(' ').toUpperCase();
 		expect(allCapitalStopwords.tokenizeAndStem()).toEqual([]);
 	});
+
+	it('should tokenize and stem including stopwords', function() {
+		expect('My dog is very fun TO play with And another thing, he is A poodle.'.tokenizeAndStem(true)).toEqual(['my', 'dog', 'is', 'veri', 'fun', 'to', 'plai', 'with', 'and', 'anoth', 'thing', 'he', 'is', 'a', 'poodl']);
+	});
 });