From 5280bce12c6afdd4cbfa9a75d6cfc3be6a850764 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Camps Date: Wed, 14 Feb 2024 10:38:16 +0100 Subject: [PATCH] expanding a bit --- tests/test_main.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/tests/test_main.py b/tests/test_main.py index fed79151..fb21b7b0 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,15 +1,43 @@ import unittest -from superstyl.preproc.tuyau import normalise +import superstyl.preproc.tuyau +import superstyl.preproc.features_extract class DataLoading(unittest.TestCase): + # First, testing the tuyau features def test_normalise(self): text = " Hello, Mr. 𓀁, how are §§ you; doing?" expected_default = "hello mr how are you doing" - self.assertEqual(normalise(text), expected_default) + self.assertEqual(superstyl.preproc.tuyau.normalise(text), expected_default) expected_keeppunct = "Hello, Mr. , how are SSSS you; doing?" - self.assertEqual(normalise(text, keep_punct=True), expected_keeppunct) + self.assertEqual(superstyl.preproc.tuyau.normalise(text, keep_punct=True), expected_keeppunct) expected_keepsym = "Hello, Mr. 𓀁, how are §§ you; doing?" - self.assertEqual(normalise(text, keep_sym=True), expected_keepsym) + self.assertEqual(superstyl.preproc.tuyau.normalise(text, keep_sym=True), expected_keepsym) + + def test_detect_lang(self): + french = "Bonjour, Monsieur, comment allez-vous?" + english = "Hello, How do you do good sir?" + italian = "Buongiorno signore, come sta?" + #TODO: find something that manages old languages, like fasttext did… + self.assertEqual(superstyl.preproc.tuyau.detect_lang(french), "fr") + self.assertEqual(superstyl.preproc.tuyau.detect_lang(english), "en") + self.assertEqual(superstyl.preproc.tuyau.detect_lang(italian), "it") + + # Now, lower level features, + # from features_extract + def test_counts(self): + text = "the the the the" + superstyl.preproc.features_extract.count_words(text, feat_list=None, feats = "words", n = 1, relFreqs = False) + self.assertEqual( + superstyl.preproc.features_extract.count_words(text, feat_list=None, feats = "words", n = 1, relFreqs = False), + {'the': 4} + ) + self.assertEqual( + superstyl.preproc.features_extract.count_words(text, feat_list=None, feats="words", n=1, relFreqs=True), + {'the': 1.0} + ) + + #TODO: a lot more tests + if __name__ == '__main__':