Skip to content

Commit

Permalink
Merge pull request NaturalNode#1 from NaturalNode/master
Browse files Browse the repository at this point in the history
Update my fork.
  • Loading branch information
Mike Amaral committed Apr 30, 2015
2 parents fd0340c + a792545 commit 3a5181c
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 12 deletions.
7 changes: 3 additions & 4 deletions lib/natural/classifiers/bayes_classifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

var PorterStemmer = require('../stemmers/porter_stemmer'),
util = require('util'),
Classifier = require('./classifier'),
ApparatusBayesClassifier = require('apparatus').BayesClassifier;
var util = require('util'),
Classifier = require('./classifier'),
ApparatusBayesClassifier = require('apparatus').BayesClassifier;

var BayesClassifier = function(stemmer, smoothing) {
var abc = new ApparatusBayesClassifier();
Expand Down
7 changes: 3 additions & 4 deletions lib/natural/classifiers/logistic_regression_classifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

var PorterStemmer = require('../stemmers/porter_stemmer'),
util = require('util'),
Classifier = require('./classifier'),
ApparatusLogisticRegressionClassifier = require('apparatus').LogisticRegressionClassifier;
var util = require('util'),
Classifier = require('./classifier'),
ApparatusLogisticRegressionClassifier = require('apparatus').LogisticRegressionClassifier;

var LogisticRegressionClassifier = function(stemmer) {
Classifier.call(this, new ApparatusLogisticRegressionClassifier(), stemmer);
Expand Down
18 changes: 14 additions & 4 deletions lib/natural/stemmers/stemmer.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,21 @@ module.exports = function() {

stemmer.tokenizeAndStem = function(text, keepStops) {
var stemmedTokens = [];

new Tokenizer().tokenize(text).forEach(function(token) {
if(keepStops || stopwords.words.indexOf(token) == -1)
var lowercaseText = text.toLowerCase();
var tokens = new Tokenizer().tokenize(lowercaseText);

if (keepStops) {
tokens.forEach(function(token) {
stemmedTokens.push(stemmer.stem(token));
});
});
}

else {
tokens.forEach(function(token) {
if (stopwords.words.indexOf(token) == -1)
stemmedTokens.push(stemmer.stem(token));
});
}

return stemmedTokens;
};
Expand Down
14 changes: 14 additions & 0 deletions spec/porter_stemmer_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ THE SOFTWARE.
*/

var stemmer = require('../lib/natural/stemmers/porter_stemmer');
var stopwords = require('../lib/natural/util/stopwords');

describe('porter_stemmer', function() {
it('should categorizeGroups', function() {
Expand Down Expand Up @@ -179,4 +180,17 @@ describe('porter_stemmer', function() {
expect('scoring stinks'.tokenizeAndStem()).toEqual(['score', 'stink']);
expect('SCORING STINKS'.tokenizeAndStem()).toEqual(['score', 'stink']);
});

it('should tokenize and stem ignoring stopwords', function() {
expect('My dog is very fun TO play with And another thing, he is A poodle.'.tokenizeAndStem()).toEqual(['dog', 'fun', 'plai', 'thing', 'poodl']);
});

it('should tokenize and stem ignoring all capital stopwords', function() {
var allCapitalStopwords = stopwords.words.join(' ').toUpperCase();
expect(allCapitalStopwords.tokenizeAndStem()).toEqual([]);
});

it('should tokenize and stem including stopwords', function() {
expect('My dog is very fun TO play with And another thing, he is A poodle.'.tokenizeAndStem(true)).toEqual(['my', 'dog', 'is', 'veri', 'fun', 'to', 'plai', 'with', 'and', 'anoth', 'thing', 'he', 'is', 'a', 'poodl']);
});
});

0 comments on commit 3a5181c

Please sign in to comment.