Skip to content

Commit

Permalink
expanding a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
Jean-Baptiste-Camps committed Feb 14, 2024
1 parent d8e8c9c commit 5280bce
Showing 1 changed file with 32 additions and 4 deletions.
36 changes: 32 additions & 4 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,43 @@
import unittest
from superstyl.preproc.tuyau import normalise
import superstyl.preproc.tuyau
import superstyl.preproc.features_extract

class DataLoading(unittest.TestCase):
# First, testing the tuyau features
def test_normalise(self):
text = " Hello, Mr. 𓀁, how are §§ you; doing?"
expected_default = "hello mr how are you doing"
self.assertEqual(normalise(text), expected_default)
self.assertEqual(superstyl.preproc.tuyau.normalise(text), expected_default)
expected_keeppunct = "Hello, Mr. , how are SSSS you; doing?"
self.assertEqual(normalise(text, keep_punct=True), expected_keeppunct)
self.assertEqual(superstyl.preproc.tuyau.normalise(text, keep_punct=True), expected_keeppunct)
expected_keepsym = "Hello, Mr. 𓀁, how are §§ you; doing?"
self.assertEqual(normalise(text, keep_sym=True), expected_keepsym)
self.assertEqual(superstyl.preproc.tuyau.normalise(text, keep_sym=True), expected_keepsym)

def test_detect_lang(self):
french = "Bonjour, Monsieur, comment allez-vous?"
english = "Hello, How do you do good sir?"
italian = "Buongiorno signore, come sta?"
#TODO: find something that manages old languages, like fasttext did…
self.assertEqual(superstyl.preproc.tuyau.detect_lang(french), "fr")
self.assertEqual(superstyl.preproc.tuyau.detect_lang(english), "en")
self.assertEqual(superstyl.preproc.tuyau.detect_lang(italian), "it")

# Now, lower level features,
# from features_extract
def test_counts(self):
text = "the the the the"
superstyl.preproc.features_extract.count_words(text, feat_list=None, feats = "words", n = 1, relFreqs = False)
self.assertEqual(
superstyl.preproc.features_extract.count_words(text, feat_list=None, feats = "words", n = 1, relFreqs = False),
{'the': 4}
)
self.assertEqual(
superstyl.preproc.features_extract.count_words(text, feat_list=None, feats="words", n=1, relFreqs=True),
{'the': 1.0}
)

#TODO: a lot more tests



if __name__ == '__main__':
Expand Down

0 comments on commit 5280bce

Please sign in to comment.