Skip to content

Commit

Permalink
Merge pull request #21 from camspiers/stemmer-exception
Browse files Browse the repository at this point in the history
Added php stemmer exception + unit tests
  • Loading branch information
camspiers committed Jan 5, 2014
2 parents 743c646 + 9708383 commit e5e622a
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@
*/
class PhpStemmer implements NormalizerInterface
{
/**
* Available languages.
*
* @var array
*/
protected $availableLanguages = array('danish', 'dutch', 'english', 'finnish', 'french', 'german', 'hungarian',
'italian', 'norwegian', 'porter', 'portuguese', 'romanian', 'russian',
'spanish', 'swedish', 'turkish');

/**
* Charset.
*
Expand All @@ -26,9 +35,7 @@ class PhpStemmer implements NormalizerInterface
protected $charset;

/**
* Lang (danish, dutch, english, finnish, french,
* german, hungarian, italian, norwegian, porter, portuguese,
* romanian, russian, spanish, swedish, turkish)
* Lang.
*
* @var string
*/
Expand All @@ -40,6 +47,12 @@ class PhpStemmer implements NormalizerInterface
*/
public function __construct($lang, $charset = 'utf-8')
{
$lang = strtolower($lang);

if (! in_array($lang, $this->availableLanguages)) {
throw new \InvalidArgumentException("Invalid language $lang");
}

$this->charset = strtoupper(str_replace('-', '_', $charset));;
$this->lang = $lang;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?php
namespace Camspiers\StatisticalClassifier\Normalizer\Token;

class PhpStemmerTest extends \PHPUnit_Framework_TestCase
{
/**
* @var PhpStemmer
*/
protected $phpStemmer;

public function normalizeDataProvider()
{
return array(
array(array('optimization'), array('optim'), 'english', 'utf-8'),
array(array('optimisation'), array('optimis'), 'french', 'utf-8'),
array(array('wzhgqkx'), array('wzhgq'), 'alien', 'utf-8', 'InvalidArgumentException'),
);
}

/**
* @covers \Camspiers\StatisticalClassifier\Normalizer\Token\PhpStemmer::normalize
* @dataProvider normalizeDataProvider
*
* @param array $words
* @param array $expected
* @param string $lang
* @param string $charset
* @param boolean $expectedException
*/
public function testNormalize(array $words, array $expected, $lang, $charset, $expectedException = false)
{
if (! extension_loaded('stemmer')) {
$this->markTestSkipped('stemmer PHP extension not available');
}

if ($expectedException) {
$this->setExpectedException($expectedException);
}

$stemmer = new PhpStemmer($lang, $charset);
$this->assertEquals($expected, $stemmer->normalize($words));
}
}

0 comments on commit e5e622a

Please sign in to comment.