From 18938da81b75bcc17315b90610ecc2b488bf0d49 Mon Sep 17 00:00:00 2001 From: "Kyle L. Jensen" Date: Tue, 14 Nov 2023 16:40:24 -0500 Subject: [PATCH] Export the IsStopWord function. Closes #27 --- english/common.go | 2 +- english/english_test.go | 4 ++-- english/stem.go | 2 +- french/common.go | 2 +- french/french_test.go | 2 +- french/stem.go | 2 +- hungarian/common.go | 4 ++-- hungarian/stem.go | 2 +- norwegian/common.go | 2 +- norwegian/norwegian_test.go | 4 ++-- norwegian/stem.go | 2 +- russian/common.go | 2 +- russian/russian_test.go | 2 +- russian/stem.go | 2 +- spanish/common.go | 2 +- spanish/spanish_test.go | 2 +- spanish/stem.go | 2 +- swedish/common.go | 2 +- swedish/stem.go | 2 +- swedish/swedish_test.go | 4 ++-- 20 files changed, 24 insertions(+), 24 deletions(-) diff --git a/english/common.go b/english/common.go index b2ae63a..e33935e 100644 --- a/english/common.go +++ b/english/common.go @@ -203,7 +203,7 @@ func stemSpecialWord(word string) (stemmed string) { // Return `true` if the input `word` is an English stop word. // -func isStopWord(word string) bool { +func IsStopWord(word string) bool { switch word { case "a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", diff --git a/english/english_test.go b/english/english_test.go index be6a906..ebfd4f2 100644 --- a/english/english_test.go +++ b/english/english_test.go @@ -26,7 +26,7 @@ func Test_stopWords(t *testing.T) { "was", } for _, word := range knownTrueStopwords { - if isStopWord(word) == false { + if IsStopWord(word) == false { t.Errorf("Expected %v, to be in stopWords", word) } } @@ -39,7 +39,7 @@ func Test_stopWords(t *testing.T) { "bullschnizzle", } for _, word := range knownFalseStopwords { - if isStopWord(word) == true { + if IsStopWord(word) == true { t.Errorf("Expected %v, to be in stopWords", word) } } diff --git a/english/stem.go b/english/stem.go index 090bd71..7a46d31 100644 --- a/english/stem.go +++ b/english/stem.go @@ -13,7 +13,7 @@ func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) // Return small words and stop words - if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) { + if len(word) <= 2 || (stemStopwWords == false && IsStopWord(word)) { return word } diff --git a/french/common.go b/french/common.go index 7468a5d..2ef58b7 100644 --- a/french/common.go +++ b/french/common.go @@ -7,7 +7,7 @@ import ( // Return `true` if the input `word` is a French stop word. // -func isStopWord(word string) bool { +func IsStopWord(word string) bool { switch word { case "au", "aux", "avec", "ce", "ces", "dans", "de", "des", "du", "elle", "en", "et", "eux", "il", "je", "la", "le", "leur", diff --git a/french/french_test.go b/french/french_test.go index 3b63399..d61ff11 100644 --- a/french/french_test.go +++ b/french/french_test.go @@ -14,7 +14,7 @@ func Test_stopWords(t *testing.T) { {"eussiez", true}, {"machine", false}, } - romance.RunWordBoolTest(t, isStopWord, testCases) + romance.RunWordBoolTest(t, IsStopWord, testCases) } // Test isLowerVowel for things we know should be true diff --git a/french/stem.go b/french/stem.go index b9da5b6..e6dbbd7 100644 --- a/french/stem.go +++ b/french/stem.go @@ -13,7 +13,7 @@ func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) // Return small words and stop words - if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) { + if len(word) <= 2 || (stemStopwWords == false && IsStopWord(word)) { return word } diff --git a/hungarian/common.go b/hungarian/common.go index 478a5cf..b603f89 100644 --- a/hungarian/common.go +++ b/hungarian/common.go @@ -119,12 +119,12 @@ func isDoubleConsonant(rs []rune) int { return 2 } -// isStopWord returns true it the word is a stop word. +// IsStopWord returns true it the word is a stop word. // // # Hungarian stop word list prepared by Anna Tordai // // https://snowballstem.org/algorithms/hungarian/stop.txt -func isStopWord(word string) bool { +func IsStopWord(word string) bool { switch word { case "a", "ahogy", diff --git a/hungarian/stem.go b/hungarian/stem.go index 0c288ad..49f6161 100644 --- a/hungarian/stem.go +++ b/hungarian/stem.go @@ -48,7 +48,7 @@ func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) // Return small words and stop words - if len(word) <= 2 || (!stemStopwWords && isStopWord(word)) { + if len(word) <= 2 || (!stemStopwWords && IsStopWord(word)) { return word } diff --git a/norwegian/common.go b/norwegian/common.go index 3f2d792..18b2500 100644 --- a/norwegian/common.go +++ b/norwegian/common.go @@ -34,7 +34,7 @@ func isLowerVowel(r rune) bool { // Return `true` if the input `word` is a Norwegian stop word. // -func isStopWord(word string) bool { +func IsStopWord(word string) bool { switch word { case "ut", "få", "hadde", "hva", "tilbake", "vil", "han", "meget", "men", "vi", "en", "før", "samme", "stille", "inn", "er", "kan", "makt", "ved", "forsøke", "hvis", "part", "rett", diff --git a/norwegian/norwegian_test.go b/norwegian/norwegian_test.go index 69d7017..8bd3b41 100644 --- a/norwegian/norwegian_test.go +++ b/norwegian/norwegian_test.go @@ -23,7 +23,7 @@ func Test_stopWords(t *testing.T) { "ikke", } for _, word := range knownTrueStopwords { - if isStopWord(word) == false { + if IsStopWord(word) == false { t.Errorf("Expected %v, to be in stopWords", word) } } @@ -36,7 +36,7 @@ func Test_stopWords(t *testing.T) { "bullschnizzle", } for _, word := range knownFalseStopwords { - if isStopWord(word) == true { + if IsStopWord(word) == true { t.Errorf("Expected %v, to be in stopWords", word) } } diff --git a/norwegian/stem.go b/norwegian/stem.go index eb01512..659d4ed 100644 --- a/norwegian/stem.go +++ b/norwegian/stem.go @@ -13,7 +13,7 @@ func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) // Return small words and stop words - if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) { + if len(word) <= 2 || (stemStopwWords == false && IsStopWord(word)) { return word } diff --git a/russian/common.go b/russian/common.go index 00ae46c..a7c09c0 100644 --- a/russian/common.go +++ b/russian/common.go @@ -21,7 +21,7 @@ func isLowerVowel(r rune) bool { // Return `true` if the input `word` is a French stop word. // -func isStopWord(word string) bool { +func IsStopWord(word string) bool { switch word { case "и", "в", "во", "не", "что", "он", "на", "я", "с", "со", "как", "а", "то", "все", "она", "так", "его", diff --git a/russian/russian_test.go b/russian/russian_test.go index 1494864..ad746e5 100644 --- a/russian/russian_test.go +++ b/russian/russian_test.go @@ -29,7 +29,7 @@ func Test_stopWords(t *testing.T) { {"химическое", false}, {"машиностроение", false}, } - romance.RunWordBoolTest(t, isStopWord, testCases) + romance.RunWordBoolTest(t, IsStopWord, testCases) } func Test_findRegions(t *testing.T) { diff --git a/russian/stem.go b/russian/stem.go index 9a2a4db..b3aaa35 100644 --- a/russian/stem.go +++ b/russian/stem.go @@ -14,7 +14,7 @@ func Stem(word string, stemStopwWords bool) string { w := snowballword.New(word) // Return small words and stop words - if len(w.RS) <= 2 || (stemStopwWords == false && isStopWord(word)) { + if len(w.RS) <= 2 || (stemStopwWords == false && IsStopWord(word)) { return word } diff --git a/spanish/common.go b/spanish/common.go index 2af8212..8825983 100644 --- a/spanish/common.go +++ b/spanish/common.go @@ -95,7 +95,7 @@ func isLowerVowel(r rune) bool { // Return `true` if the input `word` is a Spanish stop word. // -func isStopWord(word string) bool { +func IsStopWord(word string) bool { switch word { case "de", "la", "que", "el", "en", "y", "a", "los", "del", "se", "las", "por", "un", "para", "con", "no", "una", "su", "al", "lo", "como", diff --git a/spanish/spanish_test.go b/spanish/spanish_test.go index 720d767..19b049f 100644 --- a/spanish/spanish_test.go +++ b/spanish/spanish_test.go @@ -13,7 +13,7 @@ func Test_stopWords(t *testing.T) { {"el", true}, {"queso", false}, } - romance.RunWordBoolTest(t, isStopWord, testCases) + romance.RunWordBoolTest(t, IsStopWord, testCases) } // Test isLowerVowel for things we know should be true diff --git a/spanish/stem.go b/spanish/stem.go index 2135aa7..3953fc9 100644 --- a/spanish/stem.go +++ b/spanish/stem.go @@ -20,7 +20,7 @@ func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) // Return small words and stop words - if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) { + if len(word) <= 2 || (stemStopwWords == false && IsStopWord(word)) { return word } diff --git a/swedish/common.go b/swedish/common.go index 9cda98e..53e0b56 100644 --- a/swedish/common.go +++ b/swedish/common.go @@ -34,7 +34,7 @@ func isLowerVowel(r rune) bool { // Return `true` if the input `word` is a Swedish stop word. // -func isStopWord(word string) bool { +func IsStopWord(word string) bool { switch word { case "och", "det", "att", "i", "en", "jag", "hon", "som", "han", "på", "den", "med", "var", "sig", "för", "så", "till", "är", "men", diff --git a/swedish/stem.go b/swedish/stem.go index 69b042a..3bca57b 100644 --- a/swedish/stem.go +++ b/swedish/stem.go @@ -14,7 +14,7 @@ func Stem(word string, stemStopwWords bool) string { word = strings.ToLower(strings.TrimSpace(word)) // Return small words and stop words - if len(word) <= 2 || (stemStopwWords == false && isStopWord(word)) { + if len(word) <= 2 || (stemStopwWords == false && IsStopWord(word)) { return word } diff --git a/swedish/swedish_test.go b/swedish/swedish_test.go index c83e32f..76ed8cd 100644 --- a/swedish/swedish_test.go +++ b/swedish/swedish_test.go @@ -23,7 +23,7 @@ func Test_stopWords(t *testing.T) { "inte", } for _, word := range knownTrueStopwords { - if isStopWord(word) == false { + if IsStopWord(word) == false { t.Errorf("Expected %v, to be in stopWords", word) } } @@ -36,7 +36,7 @@ func Test_stopWords(t *testing.T) { "bullschnizzle", } for _, word := range knownFalseStopwords { - if isStopWord(word) == true { + if IsStopWord(word) == true { t.Errorf("Expected %v, to be in stopWords", word) } }