Skip to content

Commit

Permalink
update test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
Forsythia-olive committed Dec 14, 2024
1 parent 3939631 commit 6c2cfdc
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 51 deletions.
12 changes: 10 additions & 2 deletions src/validate_train_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,22 @@ def validate_category_distribution(y_train, age_group_thresholds, tolerance):
Returns:
- bool: True if the distribution meets the thresholds with tolerance, False otherwise.
"""
value_counts = y_train.value_counts(normalize=True) # Get proportions
if y_train.empty:
return False

# Get the proportions of categories in y_train
value_counts = y_train.value_counts(normalize=True)

# Loop through each category and its thresholds
for category, (min_threshold, max_threshold) in age_group_thresholds.items():
proportion = value_counts.get(category, 0) # Get proportion for the category

# If the category is missing in y_train (proportion == 0), return False
if proportion == 0:
return False # Missing category should cause failure

# Check if the proportion is within the threshold range with tolerance
if not (min_threshold - tolerance <= proportion <= max_threshold + tolerance):
return False # Return False if the proportion is out of the acceptable range

return True # Return True if all categories meet the criteria
return True
71 changes: 44 additions & 27 deletions testcases/test_mean_cross_validation_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# date: 2024-12-10

import pytest
import unittest
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
Expand All @@ -15,38 +16,54 @@
from src.mean_cross_validation_score import mean_cross_val_scores


def test_mean_cross_val_scores():
# Generate a simple classification dataset
X, y = make_classification(
n_samples=200, n_features=10, n_informative=5, n_redundant=0, random_state=42
)
class test_mean_cross_val_scores(unittest.TestCase):

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
def setUp(self):
# Generate a simple classification dataset
self.X, self.y = make_classification(
n_samples=200, n_features=10, n_informative=5, n_redundant=0, random_state=42
)

# Initialize models to test
rf_model = RandomForestClassifier(random_state=42)
lr_model = LogisticRegression(random_state=42, max_iter=500)
# Split the dataset
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
self.X, self.y, test_size=0.2, random_state=42
)

# Test SVC
rf_scores = mean_cross_val_scores(rf_model, X_train, y_train, cv=5, scoring="accuracy")
assert isinstance(rf_scores, pd.Series), "Output should be a pandas Series."
assert "test_score" in rf_scores.index, "Expected 'test_score' in Series index."
assert rf_scores["test_score"] > 0.5, "Mean accuracy should be above 0.5 for RandomForestClassifier."
# Initialize models to test
self.rf_model = RandomForestClassifier(random_state=42)
self.lr_model = LogisticRegression(random_state=42, max_iter=500)

# Test LogisticRegression
lr_scores = mean_cross_val_scores(lr_model, X_train, y_train, cv=5, scoring="accuracy")
assert isinstance(lr_scores, pd.Series), "Output should be a pandas Series."
assert "test_score" in lr_scores.index, "Expected 'test_score' in Series index."
assert lr_scores["test_score"] > 0.5, "Mean accuracy should be above 0.5 for LogisticRegression."
def test_rf_model_cross_val_scores(self):
# Test RandomForestClassifier
rf_scores = mean_cross_val_scores(self.rf_model, self.X_train, self.y_train, cv=5, scoring="accuracy")

self.assertIsInstance(rf_scores, pd.Series, "Output should be a pandas Series.")
self.assertIn("test_score", rf_scores.index, "Expected 'test_score' in Series index.")
self.assertGreater(rf_scores["test_score"], 0.5, "Mean accuracy should be above 0.5 for RandomForestClassifier.")

# Validate the standard deviation
assert "test_score" in lr_scores.index, "Expected 'test_score' in Series index."
assert "fit_time" in rf_scores.index, "Expected 'fit_time' in Series index."
def test_lr_model_cross_val_scores(self):
# Test LogisticRegression
lr_scores = mean_cross_val_scores(self.lr_model, self.X_train, self.y_train, cv=5, scoring="accuracy")

self.assertIsInstance(lr_scores, pd.Series, "Output should be a pandas Series.")
self.assertIn("test_score", lr_scores.index, "Expected 'test_score' in Series index.")
self.assertGreater(lr_scores["test_score"], 0.5, "Mean accuracy should be above 0.5 for LogisticRegression.")

# Check for consistent mean vs. computed values
assert np.isfinite(rf_scores["test_score"]), "Mean test_score should be finite."
assert np.isfinite(lr_scores["test_score"]), "Mean test_score should be finite."
def test_standard_deviation_inclusion(self):
# Validate the standard deviation
rf_scores = mean_cross_val_scores(self.rf_model, self.X_train, self.y_train, cv=5, scoring="accuracy")
lr_scores = mean_cross_val_scores(self.lr_model, self.X_train, self.y_train, cv=5, scoring="accuracy")

self.assertIn("test_score", lr_scores.index, "Expected 'test_score' in Series index.")
self.assertIn("fit_time", rf_scores.index, "Expected 'fit_time' in Series index.")

def test_finite_test_scores(self):
# Check for consistent mean vs. computed values
rf_scores = mean_cross_val_scores(self.rf_model, self.X_train, self.y_train, cv=5, scoring="accuracy")
lr_scores = mean_cross_val_scores(self.lr_model, self.X_train, self.y_train, cv=5, scoring="accuracy")

self.assertTrue(np.isfinite(rf_scores["test_score"]), "Mean test_score should be finite.")
self.assertTrue(np.isfinite(lr_scores["test_score"]), "Mean test_score should be finite.")

if __name__ == "__main__":
pytest.main(["-v", "testcases/test_mean_cross_validation_score.py"])
unittest.main()
14 changes: 13 additions & 1 deletion testcases/test_validate_train_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# date: 2024-12-11


#import pytest
import pytest
import pandas as pd
import os
import sys
Expand All @@ -23,10 +23,22 @@ def test_validate_category_distribution():

# Case 2: Invalid category distribution (A proportion too high)
y_train = pd.Series(["A", "A", "A", "B", "B", "C", "C", "C"])
thresholds = {
"A": (0.1, 0.3),
"B": (0.2, 0.4),
"C": (0.2, 0.5)
}
assert validate_category_distribution(y_train, thresholds, tolerance) == False, "Distribution exceeding threshold should fail."

# Case 3: Missing category
thresholds = {
"A": (0.1, 0.4),
"B": (0.2, 0.4),
"C": (0.2, 0.5),
"D": (0.1, 0.3) # Adding a missing category 'D'
}
y_train = pd.Series(["A", "A", "B", "B", "B", "C", "C", "C"])
tolerance = 0.05
assert validate_category_distribution(y_train, thresholds, tolerance) == False, "Missing category should fail."

# Case 4: All categories meet thresholds with tolerance
Expand Down
45 changes: 24 additions & 21 deletions testcases/test_write_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# author: Forgive Agbesi
# date: 2024-12-11


import pytest
import os
import unittest
import pandas as pd
Expand All @@ -22,6 +22,12 @@ def setUp(self):
'Column2': [4, 5, 6]
}
self.sample_df = pd.DataFrame(self.sample_data)

def test_invalid_filename_extension(self):
"""Test that a ValueError is raised if the filename doesn't end with '.csv'."""
filename = 'test_file.txt'
with self.assertRaises(ValueError):
write_csv(self.sample_df, self.test_dir, filename)

def tearDown(self):
"""Clean up by removing the test directory and files created."""
Expand All @@ -31,38 +37,35 @@ def tearDown(self):
os.remove(file_path)
os.rmdir(self.test_dir)

def test_empty_dataframe(self):
"""Test that a ValueError is raised if the DataFrame is empty."""
empty_df = pd.DataFrame()
filename = 'test_file.csv'
with self.assertRaises(ValueError):
write_csv(empty_df, self.test_dir, filename)

def test_write_valid_csv(self):
"""Test that the DataFrame is correctly saved as a CSV file."""
filename = 'test_file.csv'
write_csv(self.sample_df, self.test_dir, filename)
self.assertTrue(os.path.exists(os.path.join(self.test_dir, filename)))

def test_invalid_filename_extension(self):
"""Test that a ValueError is raised if the filename doesn't end with '.csv'."""
filename = 'test_file.txt'
with self.assertRaises(ValueError):
write_csv(self.sample_df, self.test_dir, filename)

def test_non_existing_directory(self):
"""Test that a FileNotFoundError is raised if the directory doesn't exist."""
invalid_dir = 'invalid_dir'
filename = 'test_file.csv'
with self.assertRaises(FileNotFoundError):
write_csv(self.sample_df, invalid_dir, filename)

def test_invalid_dataframe(self):
"""Test that a TypeError is raised if the input is not a pandas DataFrame."""
invalid_df = "invalid"
filename = 'test_file.csv'
with self.assertRaises(TypeError):
write_csv(invalid_df, self.test_dir, filename)
def test_empty_dataframe(self):
"""Test that a ValueError is raised if the DataFrame is empty."""
empty_df = pd.DataFrame()

def test_non_existing_directory(self):
"""Test that a FileNotFoundError is raised if the directory doesn't exist."""
invalid_dir = 'invalid_dir'
filename = 'test_file.csv'
with self.assertRaises(ValueError):
write_csv(empty_df, self.test_dir, filename)
with self.assertRaises(FileNotFoundError):
write_csv(self.sample_df, invalid_dir, filename)

if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()



0 comments on commit 6c2cfdc

Please sign in to comment.