From f714ef4f155deca20f03adb0ebefbfcaa3760c19 Mon Sep 17 00:00:00 2001 From: Diane Napolitano Date: Fri, 22 Mar 2024 15:36:56 -0400 Subject: [PATCH] Cleaning up some docstring formatting and removing unnecessary property --- src/elexsolver/TransitionMatrixSolver.py | 8 +++--- src/elexsolver/TransitionSolver.py | 34 +++++++++++------------- tests/test_transition_matrix_solver.py | 12 ++++----- tests/test_transition_solver.py | 4 +-- 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/elexsolver/TransitionMatrixSolver.py b/src/elexsolver/TransitionMatrixSolver.py index 5e8925e..03da3e2 100644 --- a/src/elexsolver/TransitionMatrixSolver.py +++ b/src/elexsolver/TransitionMatrixSolver.py @@ -21,11 +21,11 @@ def __init__(self, strict: bool = True, lam: float | None = None): """ Parameters ---------- - `strict` : bool, default True - If True, solution will be constrainted so that all coefficients are >= 0, + strict : bool, default True + If `True`, solution will be constrainted so that all coefficients are >= 0, <= 1, and the sum of each row equals 1. - `lam` : float, optional - `lam` != 0 will enable L2 regularization (Ridge). + lam : float, optional + `lam != 0` will enable L2 regularization (Ridge). """ super().__init__() self._strict = strict diff --git a/src/elexsolver/TransitionSolver.py b/src/elexsolver/TransitionSolver.py index a2c6db3..bb70949 100644 --- a/src/elexsolver/TransitionSolver.py +++ b/src/elexsolver/TransitionSolver.py @@ -17,17 +17,23 @@ class TransitionSolver(LinearSolver): """ def __init__(self): + """ + After model-fit, `self.coefficients` will contain + the solved coefficients, an np.ndarray matrix of float of shape + (number of columns in `X`) x (number of columns in `Y`). + Each float represents the percent of how much of row x is part of column y. + """ super().__init__() def fit(self, X: np.ndarray, Y: np.ndarray, sample_weight: np.ndarray | None = None): """ Parameters ---------- - `X` : np.ndarray matrix or pandas.DataFrame of int + X : np.ndarray matrix or pandas.DataFrame of int Must have the same number of rows as `Y` but can have any number of columns greater than the number of rows. - `Y` : np.ndarray matrix or pandas.DataFrame of int + Y : np.ndarray matrix or pandas.DataFrame of int Must have the same number of rows as `X` but can have any number of columns greater than the number of rows. - `sample_weight` : list, np.ndarray, or pandas.Series of int, optional + sample_weight : list or np.ndarray or pandas.Series of int, optional Must have the same length (number of rows) as both `X` and `Y`. Returns @@ -42,7 +48,7 @@ def predict(self, X: np.ndarray) -> np.ndarray: """ Parameters ---------- - `X` : np.ndarray matrix or pandas.DataFrame of int + X : np.ndarray matrix or pandas.DataFrame of int Must have the same dimensions as the `X` supplied to `fit()`. Returns @@ -53,19 +59,11 @@ def predict(self, X: np.ndarray) -> np.ndarray: raise RuntimeError("Solver must be fit before prediction can be performed.") return X @ self.coefficients - @property - def betas(self) -> np.ndarray: + def _check_data_type(self, A: np.ndarray): """ - Returns - ------- - The solved coefficients, an np.ndarray matrix of float of shape - (number of columns in `X`) x (number of columns in `Y`). - Each float represents the percent of how much of row x is part of column y. - Will return `None` if `fit()` hasn't been called yet. + Make sure we're starting with count data which we'll standardize to percentages + by calling `self._rescale(A)` later. """ - return self.coefficients - - def _check_data_type(self, A: np.ndarray): if not np.all(A.astype("int64") == A): raise ValueError("Matrix must contain integers.") @@ -97,9 +95,9 @@ def _check_and_prepare_weights(self, X: np.ndarray, Y: np.ndarray, weights: np.n Parameters ---------- - `X` : np.ndarray matrix of int (same number of rows as `Y`) - `Y` : np.ndarray matrix of int (same number of rows as `X`) - `weights` : np.ndarray of int of the shape (number of rows in `X` and `Y`, 1), optional + X : np.ndarray matrix of int (same number of rows as `Y`) + Y : np.ndarray matrix of int (same number of rows as `X`) + weights : np.ndarray of int of the shape (number of rows in `X` and `Y`, 1), optional """ if weights is not None: diff --git a/tests/test_transition_matrix_solver.py b/tests/test_transition_matrix_solver.py index 4317f89..7ab73e3 100644 --- a/tests/test_transition_matrix_solver.py +++ b/tests/test_transition_matrix_solver.py @@ -44,7 +44,7 @@ def test_matrix_fit_predict(): tms = TransitionMatrixSolver().fit(X, Y) current_yhat = tms.predict(X) - np.testing.assert_allclose(expected_betas, tms.betas, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(expected_betas, tms.coefficients, rtol=RTOL, atol=ATOL) np.testing.assert_allclose(expected_yhat, current_yhat, rtol=RTOL, atol=ATOL) @@ -76,7 +76,7 @@ def test_matrix_fit_predict_with_weights(): expected_betas = np.array([[0.737329, 0.262671], [0.230589, 0.769411]]) tms = TransitionMatrixSolver().fit(X, Y, sample_weight=weights) - np.testing.assert_allclose(expected_betas, tms.betas, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(expected_betas, tms.coefficients, rtol=RTOL, atol=ATOL) def test_matrix_fit_predict_not_strict(): @@ -105,7 +105,7 @@ def test_matrix_fit_predict_not_strict(): expected_betas = np.array([[0.760451, 0.239558], [0.216624, 0.783369]]) tms = TransitionMatrixSolver(strict=False).fit(X, Y) - np.testing.assert_allclose(expected_betas, tms.betas, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(expected_betas, tms.coefficients, rtol=RTOL, atol=ATOL) def test_ridge_matrix_fit_predict(): @@ -134,7 +134,7 @@ def test_ridge_matrix_fit_predict(): expected_betas = np.array([[0.479416, 0.520584], [0.455918, 0.544082]]) tms = TransitionMatrixSolver(lam=1).fit(X, Y) - np.testing.assert_allclose(expected_betas, tms.betas, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(expected_betas, tms.coefficients, rtol=RTOL, atol=ATOL) def test_matrix_fit_predict_pivoted(): @@ -172,7 +172,7 @@ def test_matrix_fit_predict_pivoted(): ) tms = TransitionMatrixSolver().fit(X, Y) - np.testing.assert_allclose(expected_betas, tms.betas, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(expected_betas, tms.coefficients, rtol=RTOL, atol=ATOL) def test_matrix_fit_predict_bad_dimensions(): @@ -232,7 +232,7 @@ def test_matrix_fit_predict_pandas(): expected_betas = np.array([[0.760428, 0.239572], [0.216642, 0.783358]]) tms = TransitionMatrixSolver().fit(X, Y) - np.testing.assert_allclose(expected_betas, tms.betas, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(expected_betas, tms.coefficients, rtol=RTOL, atol=ATOL) except ImportError: # pass this test through since pandas isn't a requirement for elex-solver diff --git a/tests/test_transition_solver.py b/tests/test_transition_solver.py index 9bf3af5..2adaf03 100644 --- a/tests/test_transition_solver.py +++ b/tests/test_transition_solver.py @@ -21,9 +21,9 @@ def test_superclass_predict(): @patch.object(TransitionSolver, "__abstractmethods__", set()) -def test_superclass_get_betas(): +def test_superclass_get_coefficients(): ts = TransitionSolver() - assert ts.betas is None + assert ts.coefficients is None @patch.object(TransitionSolver, "__abstractmethods__", set())