From 1146430c9c4f9fa56e83958cdbd70f771b98edb7 Mon Sep 17 00:00:00 2001 From: Lorenzo-Perini Date: Thu, 5 Sep 2024 15:28:53 +0200 Subject: [PATCH 1/9] Including Reject Option --- README.rst | 3 + notebooks/Benchmark.ipynb | 23 ++++- pyod/models/base.py | 148 ++++++++++++++++++++++++++++++ pyod/test/test_abod.py | 12 +++ pyod/test/test_ae1svm.py | 12 +++ pyod/test/test_alad.py | 12 +++ pyod/test/test_auto_encoder.py | 10 ++ pyod/test/test_cblof.py | 12 +++ pyod/test/test_cd.py | 12 +++ pyod/test/test_cof.py | 12 +++ pyod/test/test_copod.py | 12 +++ pyod/test/test_deepsvdd.py | 12 +++ pyod/test/test_devnet.py | 12 +++ pyod/test/test_dif.py | 12 +++ pyod/test/test_ecod.py | 12 +++ pyod/test/test_feature_bagging.py | 12 +++ pyod/test/test_gmm.py | 12 +++ pyod/test/test_hbos.py | 12 +++ pyod/test/test_iforest.py | 12 +++ pyod/test/test_inne.py | 12 +++ pyod/test/test_kde.py | 12 +++ pyod/test/test_knn.py | 12 +++ pyod/test/test_kpca.py | 12 +++ pyod/test/test_lmdd.py | 12 +++ pyod/test/test_loci.py | 12 +++ pyod/test/test_loda.py | 12 +++ pyod/test/test_lof.py | 12 +++ pyod/test/test_lscp.py | 12 +++ pyod/test/test_lunar.py | 12 +++ pyod/test/test_mad.py | 12 +++ pyod/test/test_mcd.py | 12 +++ pyod/test/test_mo_gaal.py | 12 +++ pyod/test/test_ocsvm.py | 12 +++ pyod/test/test_pca.py | 12 +++ pyod/test/test_qmcd.py | 12 +++ pyod/test/test_rgraph.py | 12 +++ pyod/test/test_rod.py | 12 +++ pyod/test/test_sampling.py | 12 +++ pyod/test/test_so_gaal.py | 12 +++ pyod/test/test_so_gaal_new.py | 10 ++ pyod/test/test_sod.py | 12 +++ pyod/test/test_sos.py | 12 +++ pyod/test/test_suod.py | 12 +++ pyod/test/test_vae.py | 10 ++ pyod/test/test_xgbod.py | 12 +++ 45 files changed, 671 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 8dbc3c466..7c1e14d91 100644 --- a/README.rst +++ b/README.rst @@ -211,6 +211,7 @@ The full API Reference is available at `PyOD Documentation =1: + warnings.warn("delta must belong to (0,1). It's value has been set to 0.1") + delta = 0.1 + + self.rejection_threshold_ = 1- 2*np.exp(-T) + prediction = self.predict(X) + confidence = self.predict_confidence(X) + np.place(confidence, prediction == 0, 1 - confidence[prediction == 0]) + confidence = 2*abs(confidence-.5) + prediction[np.where(confidence<=self.rejection_threshold_)[0]] = -2 + + if return_stats: + expected_rejrate, ub_rejrate, ub_cost = self.compute_rejection_stats(T = T, delta = delta, + c_fp=c_fp, c_fn =c_fn, c_r = c_r) + return prediction, [expected_rejrate, ub_rejrate, ub_cost] + + return prediction + + + def compute_rejection_stats(self, T = 32, delta = 0.1, c_fp = 1, c_fn = 1, c_r = -1, verbose = False): + """Add reject option into the unsupervised detector. + This comes with guarantees: an estimate of the expected + rejection rate (return_rejectrate=True), an upper + bound of the rejection rate (return_ub_rejectrate= True), + and an upper bound on the cost (return_ub_cost=True). + + Parameters + ---------- + T: int, optional(default=32) + It allows to set the rejection threshold to 1-2exp(-T). + The higher the value of T, the more rejections are made. + + delta: float, optional (default = 0.1) + The upper bound rejection rate holds with probability 1-delta. + + c_fp, c_fn, c_r: floats (positive), optional (default = [1,1, contamination]) + costs for false positive predictions (c_fp), false negative + predictions (c_fn) and rejections (c_r). + + verbose: bool, optional (default = False) + If true, it prints the expected rejection rate, the upper bound rejection rate, + and the upper bound of the cost. + + Returns + ------- + expected_rejection_rate: float, the expected rejection rate; + upperbound_rejection_rate: float, the upper bound for the rejection rate + satisfied with probability 1-delta; + upperbound_cost: float, the upper bound for the cost; + """ + + check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) + + if c_r <0: + c_r = self.contamination + + if delta<=0 or delta>=1: + delta = 0.1 + + # Computing the expected rejection rate + n = len(self.decision_scores_) + n_gamma_minus1 = int(n * self.contamination) -1 + argsmin = (n_gamma_minus1, n, 1-np.exp(-T)) + argsmax = (n_gamma_minus1, n, np.exp(-T)) + q1 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, bracket=[0, 1], method='brentq', args=argsmin).root + q2 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, bracket=[0, 1], method='brentq', args=argsmax).root + expected_reject_rate = q2-q1 + + # Computing the upper bound for the rejection rate + right_mar = (-self.contamination * (n + 2) + n + 1) / n + (T * (n + 2)) / (np.sqrt(2 * n**3 * T)) + right_mar = min(1, right_mar) + left_mar = ( + (2 + n * (1 - self.contamination) * (n + 1)) / n**2 + - np.sqrt( + 0.5 * n**5 * ( + 2 * n * ( + -3 * self.contamination**2 + - 2 * n * (1 - self.contamination)**2 + + 4 * self.contamination - 3 + ) + + T * (n + 2)**2 - 8 + ) + ) / n**4 + ) + left_mar = max(0, left_mar) + add_term = 2 * np.sqrt(np.log(2 / delta) / (2 * n)) + upperbound_rejectrate = right_mar - left_mar + add_term + + # Computing the upper bound for the cost function + n_gamma_minus1 = int(n * self.contamination) -1 + argsmin = (n_gamma_minus1, n, 1-np.exp(-T)) + argsmax = (n_gamma_minus1, n, np.exp(-T)) + q1 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, bracket=[0, 1], method='brentq', args=argsmin).root + q2 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, bracket=[0, 1], method='brentq', args=argsmax).root + upperbound_cost = np.min([self.contamination,q1])*c_fp + np.min([1-q2,self.contamination])*c_fn + (q2-q1)*c_r + + if verbose: + print("Expected rejection rate: ", np.round(expected_reject_rate, 4), '%') + print("Upper bound rejection rate: ", np.round(upperbound_rejectrate, 4), '%') + print("Upper bound cost: ", np.round(upperbound_cost, 4)) + + return expected_reject_rate, upperbound_rejectrate, upperbound_cost + def _predict_rank(self, X, normalized=False): """Predict the outlyingness rank of a sample by a fitted model. The method is for outlier detector score combination. diff --git a/pyod/test/test_abod.py b/pyod/test/test_abod.py index b098ba35c..466bd2eb8 100644 --- a/pyod/test/test_abod.py +++ b/pyod/test/test_abod.py @@ -103,6 +103,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_ae1svm.py b/pyod/test/test_ae1svm.py index d8cecd131..d6beeb60f 100644 --- a/pyod/test/test_ae1svm.py +++ b/pyod/test/test_ae1svm.py @@ -102,6 +102,18 @@ def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict_score(self): self.clf.fit_predict_score(self.X_test, self.y_test) self.clf.fit_predict_score(self.X_test, self.y_test, diff --git a/pyod/test/test_alad.py b/pyod/test/test_alad.py index b7969e773..e2c9ee3ed 100644 --- a/pyod/test/test_alad.py +++ b/pyod/test/test_alad.py @@ -119,6 +119,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_auto_encoder.py b/pyod/test/test_auto_encoder.py index 43a2265a9..de251b1ea 100644 --- a/pyod/test/test_auto_encoder.py +++ b/pyod/test/test_auto_encoder.py @@ -99,6 +99,16 @@ def test_prediction_proba_linear_confidence(self): self.assertEqual(confidence.shape, self.y_test.shape) self.assertInRange(confidence, 0, 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + self.assertEqual(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + self.assertInRange(expected_rejrate, 0, 1) + self.assertInRange(ub_rejrate, 0, 1) + self.assertGreaterEqual(ub_cost, 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) self.assertEqual(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_cblof.py b/pyod/test/test_cblof.py index ece8511a7..b4c09e326 100644 --- a/pyod/test/test_cblof.py +++ b/pyod/test/test_cblof.py @@ -115,6 +115,18 @@ def test_prediction_proba_linear_confidence(self): assert_equal(confidence.shape, self.y_test.shape) assert (confidence.min() >= 0) assert (confidence.max() <= 1) + + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) diff --git a/pyod/test/test_cd.py b/pyod/test/test_cd.py index a8b8325d5..c8f37180e 100644 --- a/pyod/test/test_cd.py +++ b/pyod/test/test_cd.py @@ -89,6 +89,18 @@ def test_prediction_labels_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_prediction_proba_linear_confidence(self): pred_proba, confidence = self.clf.predict_proba(self.X_test, method='linear', diff --git a/pyod/test/test_cof.py b/pyod/test/test_cof.py index e4e612c43..1fa9a22f9 100644 --- a/pyod/test/test_cof.py +++ b/pyod/test/test_cof.py @@ -99,6 +99,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_copod.py b/pyod/test/test_copod.py index f1a45a0eb..db535faea 100644 --- a/pyod/test/test_copod.py +++ b/pyod/test/test_copod.py @@ -97,6 +97,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_deepsvdd.py b/pyod/test/test_deepsvdd.py index 1c216cf6a..e89896e4c 100644 --- a/pyod/test/test_deepsvdd.py +++ b/pyod/test/test_deepsvdd.py @@ -111,6 +111,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_devnet.py b/pyod/test/test_devnet.py index 47a06b0b3..aa8280ebe 100644 --- a/pyod/test/test_devnet.py +++ b/pyod/test/test_devnet.py @@ -105,6 +105,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train, self.y_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_dif.py b/pyod/test/test_dif.py index 5176436f8..901234c23 100644 --- a/pyod/test/test_dif.py +++ b/pyod/test/test_dif.py @@ -102,6 +102,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_ecod.py b/pyod/test/test_ecod.py index 11111a219..1d5e90441 100644 --- a/pyod/test/test_ecod.py +++ b/pyod/test/test_ecod.py @@ -97,6 +97,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_feature_bagging.py b/pyod/test/test_feature_bagging.py index 089bc6899..a22683f4e 100644 --- a/pyod/test/test_feature_bagging.py +++ b/pyod/test/test_feature_bagging.py @@ -105,6 +105,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_gmm.py b/pyod/test/test_gmm.py index 2d12e4a26..836c5565b 100644 --- a/pyod/test/test_gmm.py +++ b/pyod/test/test_gmm.py @@ -97,6 +97,18 @@ def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method="something") + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_predict_rank(self): pred_socres = self.clf.decision_function(self.X_test) pred_ranks = self.clf._predict_rank(self.X_test) diff --git a/pyod/test/test_hbos.py b/pyod/test/test_hbos.py index 25bcb3626..55c1648ad 100644 --- a/pyod/test/test_hbos.py +++ b/pyod/test/test_hbos.py @@ -86,6 +86,18 @@ def test_prediction_proba_parameter(self): with assert_raises(ValueError): self.clf.predict_proba(self.X_test, method='something') + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_iforest.py b/pyod/test/test_iforest.py index 80dca9b25..8ad4308d3 100644 --- a/pyod/test/test_iforest.py +++ b/pyod/test/test_iforest.py @@ -113,6 +113,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_inne.py b/pyod/test/test_inne.py index bb5428c06..c5c9d6845 100644 --- a/pyod/test/test_inne.py +++ b/pyod/test/test_inne.py @@ -103,6 +103,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_kde.py b/pyod/test/test_kde.py index 1c860c284..dccf08ea1 100644 --- a/pyod/test/test_kde.py +++ b/pyod/test/test_kde.py @@ -101,6 +101,18 @@ def test_prediction_proba_linear_confidence(self): assert confidence.min() >= 0 assert confidence.max() <= 1 + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_knn.py b/pyod/test/test_knn.py index a7c2c92d5..2d028d6c2 100644 --- a/pyod/test/test_knn.py +++ b/pyod/test/test_knn.py @@ -281,6 +281,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_kpca.py b/pyod/test/test_kpca.py index 2c309c9b8..28898b200 100644 --- a/pyod/test/test_kpca.py +++ b/pyod/test/test_kpca.py @@ -98,6 +98,18 @@ def test_prediction_proba_linear_confidence(self): assert confidence.min() >= 0 assert confidence.max() <= 1 + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_lmdd.py b/pyod/test/test_lmdd.py index 3cf5336a4..23acbd159 100644 --- a/pyod/test/test_lmdd.py +++ b/pyod/test/test_lmdd.py @@ -104,6 +104,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_loci.py b/pyod/test/test_loci.py index d96bad611..3ee58b392 100644 --- a/pyod/test/test_loci.py +++ b/pyod/test/test_loci.py @@ -98,6 +98,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_loda.py b/pyod/test/test_loda.py index 17cb200e0..1f2800001 100644 --- a/pyod/test/test_loda.py +++ b/pyod/test/test_loda.py @@ -100,6 +100,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_lof.py b/pyod/test/test_lof.py index 0f3896d5d..42875ead5 100644 --- a/pyod/test/test_lof.py +++ b/pyod/test/test_lof.py @@ -103,6 +103,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_lscp.py b/pyod/test/test_lscp.py index e37e7f03d..616e58056 100644 --- a/pyod/test/test_lscp.py +++ b/pyod/test/test_lscp.py @@ -125,6 +125,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_lunar.py b/pyod/test/test_lunar.py index cb5f6eed7..74c0ecfbc 100644 --- a/pyod/test/test_lunar.py +++ b/pyod/test/test_lunar.py @@ -100,6 +100,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_mad.py b/pyod/test/test_mad.py index 5d6bb3967..604adcde7 100644 --- a/pyod/test/test_mad.py +++ b/pyod/test/test_mad.py @@ -113,6 +113,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_mcd.py b/pyod/test/test_mcd.py index c8df2dba2..0ad59973f 100644 --- a/pyod/test/test_mcd.py +++ b/pyod/test/test_mcd.py @@ -115,6 +115,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_mo_gaal.py b/pyod/test/test_mo_gaal.py index 00e5f9ac1..c7ead2f49 100644 --- a/pyod/test/test_mo_gaal.py +++ b/pyod/test/test_mo_gaal.py @@ -109,6 +109,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_ocsvm.py b/pyod/test/test_ocsvm.py index 52ee52b98..89248fecf 100644 --- a/pyod/test/test_ocsvm.py +++ b/pyod/test/test_ocsvm.py @@ -113,6 +113,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_pca.py b/pyod/test/test_pca.py index 48621751b..63e9eb8cd 100644 --- a/pyod/test/test_pca.py +++ b/pyod/test/test_pca.py @@ -106,6 +106,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_qmcd.py b/pyod/test/test_qmcd.py index 8c4e62c50..51a48a242 100644 --- a/pyod/test/test_qmcd.py +++ b/pyod/test/test_qmcd.py @@ -104,6 +104,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_rgraph.py b/pyod/test/test_rgraph.py index abd8dd16b..f0b251d49 100644 --- a/pyod/test/test_rgraph.py +++ b/pyod/test/test_rgraph.py @@ -111,6 +111,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_rod.py b/pyod/test/test_rod.py index a684fd6ed..451ea7578 100644 --- a/pyod/test/test_rod.py +++ b/pyod/test/test_rod.py @@ -87,6 +87,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_sampling.py b/pyod/test/test_sampling.py index d555af012..19b68931c 100644 --- a/pyod/test/test_sampling.py +++ b/pyod/test/test_sampling.py @@ -104,6 +104,18 @@ def test_prediction_proba_linear_confidence(self): assert confidence.min() >= 0 assert confidence.max() <= 1 + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_so_gaal.py b/pyod/test/test_so_gaal.py index 115bf6545..ff269e0ee 100644 --- a/pyod/test/test_so_gaal.py +++ b/pyod/test/test_so_gaal.py @@ -107,6 +107,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_so_gaal_new.py b/pyod/test/test_so_gaal_new.py index 94277c132..89651ce51 100644 --- a/pyod/test/test_so_gaal_new.py +++ b/pyod/test/test_so_gaal_new.py @@ -100,6 +100,16 @@ def test_prediction_proba_linear_confidence(self): self.assertEqual(confidence.shape, self.y_test.shape) self.assertInRange(confidence, 0, 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + self.assertEqual(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + self.assertInRange(expected_rejrate, 0, 1) + self.assertInRange(ub_rejrate, 0, 1) + self.assertGreaterEqual(ub_cost, 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) self.assertEqual(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_sod.py b/pyod/test/test_sod.py index b12dfdf7f..a081836dd 100644 --- a/pyod/test/test_sod.py +++ b/pyod/test/test_sod.py @@ -125,6 +125,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_sos.py b/pyod/test/test_sos.py index c47adbc6a..125125d73 100644 --- a/pyod/test/test_sos.py +++ b/pyod/test/test_sos.py @@ -98,6 +98,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_suod.py b/pyod/test/test_suod.py index 293bbb9e6..6561f1619 100644 --- a/pyod/test/test_suod.py +++ b/pyod/test/test_suod.py @@ -124,6 +124,18 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_vae.py b/pyod/test/test_vae.py index da1f7e3bf..3188ffdd8 100644 --- a/pyod/test/test_vae.py +++ b/pyod/test/test_vae.py @@ -98,6 +98,16 @@ def test_prediction_proba_linear_confidence(self): self.assertEqual(confidence.shape, self.y_test.shape) self.assertInRange(confidence, 0, 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + self.assertEqual(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + self.assertInRange(expected_rejrate, 0, 1) + self.assertInRange(ub_rejrate, 0, 1) + self.assertGreaterEqual(ub_cost, 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) self.assertEqual(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_xgbod.py b/pyod/test/test_xgbod.py index 74263ed30..133eceb6b 100644 --- a/pyod/test/test_xgbod.py +++ b/pyod/test/test_xgbod.py @@ -124,6 +124,18 @@ def test_prediction_proba(self): # assert (confidence.min() >= 0) # assert (confidence.max() <= 1) + #def test_prediction_with_rejection(self): + # pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + # assert_equal(pred_labels.shape, self.y_test.shape) + + #def test_prediction_with_rejection_stats(self): + # _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + # assert (expected_rejrate >= 0) + # assert (expected_rejrate <= 1) + # assert (ub_rejrate >= 0) + # assert (ub_rejrate <= 1) + # assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train, self.y_train) assert_equal(pred_labels.shape, self.y_train.shape) From 5edee89ecfccbc49324a258c5f0adcb65e776635 Mon Sep 17 00:00:00 2001 From: Lorenzo-Perini Date: Thu, 5 Sep 2024 15:33:19 +0200 Subject: [PATCH 2/9] update 05.09 --- notebooks/Benchmark.ipynb | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/notebooks/Benchmark.ipynb b/notebooks/Benchmark.ipynb index 833704cf1..e12a5778c 100644 --- a/notebooks/Benchmark.ipynb +++ b/notebooks/Benchmark.ipynb @@ -1603,27 +1603,6 @@ }, "outputs": [], "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1642,7 +1621,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.6.2" } }, "nbformat": 4, From 3842c2c224776b85306cb95ba9b9b4149cc6bbe6 Mon Sep 17 00:00:00 2001 From: Lorenzo-Perini Date: Thu, 5 Sep 2024 16:18:26 +0200 Subject: [PATCH 3/9] adding tests --- pyod/test/test_auto_encoder.py | 6 ++++-- pyod/test/test_so_gaal_new.py | 6 ++++-- pyod/test/test_vae.py | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pyod/test/test_auto_encoder.py b/pyod/test/test_auto_encoder.py index de251b1ea..a68c6de1e 100644 --- a/pyod/test/test_auto_encoder.py +++ b/pyod/test/test_auto_encoder.py @@ -105,8 +105,10 @@ def test_prediction_with_rejection(self): def test_prediction_with_rejection_stats(self): _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) - self.assertInRange(expected_rejrate, 0, 1) - self.assertInRange(ub_rejrate, 0, 1) + self.assertGreaterEqual(expected_rejrate, 0) + self.assertLessEqual(expected_rejrate, 1) + self.assertGreaterEqual(ub_rejrate, 0) + self.assertLessEqual(ub_rejrate, 1) self.assertGreaterEqual(ub_cost, 0) def test_fit_predict(self): diff --git a/pyod/test/test_so_gaal_new.py b/pyod/test/test_so_gaal_new.py index 89651ce51..66fad35ca 100644 --- a/pyod/test/test_so_gaal_new.py +++ b/pyod/test/test_so_gaal_new.py @@ -106,8 +106,10 @@ def test_prediction_with_rejection(self): def test_prediction_with_rejection_stats(self): _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) - self.assertInRange(expected_rejrate, 0, 1) - self.assertInRange(ub_rejrate, 0, 1) + self.assertGreaterEqual(expected_rejrate, 0) + self.assertLessEqual(expected_rejrate, 1) + self.assertGreaterEqual(ub_rejrate, 0) + self.assertLessEqual(ub_rejrate, 1) self.assertGreaterEqual(ub_cost, 0) def test_fit_predict(self): diff --git a/pyod/test/test_vae.py b/pyod/test/test_vae.py index 3188ffdd8..3d5de81ed 100644 --- a/pyod/test/test_vae.py +++ b/pyod/test/test_vae.py @@ -104,8 +104,10 @@ def test_prediction_with_rejection(self): def test_prediction_with_rejection_stats(self): _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) - self.assertInRange(expected_rejrate, 0, 1) - self.assertInRange(ub_rejrate, 0, 1) + self.assertGreaterEqual(expected_rejrate, 0) + self.assertLessEqual(expected_rejrate, 1) + self.assertGreaterEqual(ub_rejrate, 0) + self.assertLessEqual(ub_rejrate, 1) self.assertGreaterEqual(ub_cost, 0) def test_fit_predict(self): From 0c53f3b5900e6cc93a509d79e42fcdb47e2ef718 Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Fri, 6 Sep 2024 09:48:25 -0700 Subject: [PATCH 4/9] polish with the rejection option --- CHANGES.txt | 3 +- pyod/models/base.py | 187 ++++++++++++++---------------- pyod/test/test_abod.py | 9 +- pyod/test/test_ae1svm.py | 9 +- pyod/test/test_alad.py | 9 +- pyod/test/test_auto_encoder.py | 9 +- pyod/test/test_base_dl.py | 28 ++--- pyod/test/test_cblof.py | 11 +- pyod/test/test_cd.py | 9 +- pyod/test/test_cof.py | 9 +- pyod/test/test_copod.py | 9 +- pyod/test/test_copod_parallel.py | 15 +++ pyod/test/test_deepsvdd.py | 9 +- pyod/test/test_devnet.py | 11 +- pyod/test/test_dif.py | 9 +- pyod/test/test_ecod.py | 9 +- pyod/test/test_ecod_parallel.py | 15 +++ pyod/test/test_feature_bagging.py | 9 +- pyod/test/test_gmm.py | 9 +- pyod/test/test_hbos.py | 9 +- pyod/test/test_iforest.py | 9 +- pyod/test/test_inne.py | 9 +- pyod/test/test_kde.py | 9 +- pyod/test/test_knn.py | 9 +- pyod/test/test_kpca.py | 9 +- pyod/test/test_lmdd.py | 9 +- pyod/test/test_loci.py | 9 +- pyod/test/test_loda.py | 9 +- pyod/test/test_lof.py | 9 +- pyod/test/test_lscp.py | 9 +- pyod/test/test_lunar.py | 9 +- pyod/test/test_mad.py | 9 +- pyod/test/test_mcd.py | 9 +- pyod/test/test_mo_gaal.py | 9 +- pyod/test/test_ocsvm.py | 9 +- pyod/test/test_pca.py | 9 +- pyod/test/test_qmcd.py | 9 +- pyod/test/test_rgraph.py | 9 +- pyod/test/test_rod.py | 9 +- pyod/test/test_sampling.py | 9 +- pyod/test/test_so_gaal.py | 9 +- pyod/test/test_so_gaal_new.py | 9 +- pyod/test/test_sod.py | 9 +- pyod/test/test_sos.py | 9 +- pyod/test/test_suod.py | 9 +- pyod/test/test_vae.py | 9 +- pyod/test/test_xgbod.py | 6 +- 47 files changed, 383 insertions(+), 244 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 33d4e70d8..4a7d91bcf 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -196,4 +196,5 @@ v<2.0.2>, <07/01/2024> -- Add AE1SVM. v<2.0.2>, <07/04/2024> -- Moving from TF to Torch -- reimplement ALAD. v<2.0.2>, <07/04/2024> -- Moving from TF to Torch -- reimplement anogan. v<2.0.2>, <07/06/2024> -- Complete of removing all Tensorflow and Keras code. -v<2.0.2>, <07/21/2024> -- Add DevNet. \ No newline at end of file +v<2.0.2>, <07/21/2024> -- Add DevNet. +v<2.0.3>, <09/06/2024> -- Add Reject Option in Unsupervised Anomaly Detection (#605). \ No newline at end of file diff --git a/pyod/models/base.py b/pyod/models/base.py index 4e21abf0a..a5a4880c9 100644 --- a/pyod/models/base.py +++ b/pyod/models/base.py @@ -294,29 +294,29 @@ def predict_confidence(self, X): np.place(confidence, prediction == 0, 1 - confidence[prediction == 0]) return confidence - - def predict_with_rejection(self, X, T = 32, return_stats = False, - delta = 0.1, c_fp = 1, c_fn = 1, c_r = -1): + + def predict_with_rejection(self, X, T=32, return_stats=False, + delta=0.1, c_fp=1, c_fn=1, c_r=-1): """Predict if a particular sample is an outlier or not, allowing the detector to reject (i.e., output = -2) low confidence predictions. Parameters ---------- - X : numpy array of shape (n_samples, n_features) - The input samples. + X : numpy array of shape (n_samples, n_features) + The input samples. - T : int, optional(default=32) - It allows to set the rejection threshold to 1-2exp(-T). - The higher the value of T, the more rejections are made. + T : int, optional(default=32) + It allows to set the rejection threshold to 1-2exp(-T). + The higher the value of T, the more rejections are made. - return_stats: bool, optional (default = False) - If true, it returns also three additional float values: - the estimated rejection rate, the upper bound rejection rate, - and the upper bound of the cost. + return_stats: bool, optional (default = False) + If true, it returns also three additional float values: + the estimated rejection rate, the upper bound rejection + rate, and the upper bound of the cost. - delta: float, optional (default = 0.1) - The upper bound rejection rate holds with probability 1-delta. + delta: float, optional (default = 0.1) + The upper bound rejection rate holds with probability 1-delta. c_fp, c_fn, c_r: floats (positive), optional (default = [1,1, contamination]) costs for false positive predictions (c_fp), false negative @@ -324,10 +324,11 @@ def predict_with_rejection(self, X, T = 32, return_stats = False, Returns ------- - outlier_labels : numpy array of shape (n_samples,) - For each observation, it tells whether it should be considered - as an outlier according to the fitted model. 0 stands for inliers, - 1 for outliers and -2 for rejection. + outlier_labels : numpy array of shape (n_samples,) + For each observation, it tells whether it should be + considered as an outlier according to the fitted + model. 0 stands for inliers, 1 for outliers and + -2 for rejection. expected_rejection_rate: float, if return_stats is True; upperbound_rejection_rate: float, if return_stats is True; @@ -335,30 +336,34 @@ def predict_with_rejection(self, X, T = 32, return_stats = False, """ check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) - if c_r <0: - warnings.warn("The cost of rejection must be positive. It has been set to the contamination rate.") + if c_r < 0: + warnings.warn( + "The cost of rejection must be positive. " + "It has been set to the contamination rate.") c_r = self.contamination - if delta<=0 or delta>=1: - warnings.warn("delta must belong to (0,1). It's value has been set to 0.1") + if delta <= 0 or delta >= 1: + warnings.warn( + "delta must belong to (0,1). It's value has been set to 0.1") delta = 0.1 - self.rejection_threshold_ = 1- 2*np.exp(-T) + self.rejection_threshold_ = 1 - 2 * np.exp(-T) prediction = self.predict(X) confidence = self.predict_confidence(X) np.place(confidence, prediction == 0, 1 - confidence[prediction == 0]) - confidence = 2*abs(confidence-.5) - prediction[np.where(confidence<=self.rejection_threshold_)[0]] = -2 + confidence = 2 * abs(confidence - .5) + prediction[np.where(confidence <= self.rejection_threshold_)[0]] = -2 if return_stats: - expected_rejrate, ub_rejrate, ub_cost = self.compute_rejection_stats(T = T, delta = delta, - c_fp=c_fp, c_fn =c_fn, c_r = c_r) + expected_rejrate, ub_rejrate, ub_cost = self.compute_rejection_stats( + T=T, delta=delta, + c_fp=c_fp, c_fn=c_fn, c_r=c_r) return prediction, [expected_rejrate, ub_rejrate, ub_cost] - + return prediction - - def compute_rejection_stats(self, T = 32, delta = 0.1, c_fp = 1, c_fn = 1, c_r = -1, verbose = False): + def compute_rejection_stats(self, T=32, delta=0.1, c_fp=1, c_fn=1, c_r=-1, + verbose=False): """Add reject option into the unsupervised detector. This comes with guarantees: an estimate of the expected rejection rate (return_rejectrate=True), an upper @@ -367,20 +372,21 @@ def compute_rejection_stats(self, T = 32, delta = 0.1, c_fp = 1, c_fn = 1, c_r = Parameters ---------- - T: int, optional(default=32) - It allows to set the rejection threshold to 1-2exp(-T). - The higher the value of T, the more rejections are made. + T: int, optional(default=32) + It allows to set the rejection threshold to 1-2exp(-T). + The higher the value of T, the more rejections are made. - delta: float, optional (default = 0.1) - The upper bound rejection rate holds with probability 1-delta. + delta: float, optional (default = 0.1) + The upper bound rejection rate holds with probability 1-delta. - c_fp, c_fn, c_r: floats (positive), optional (default = [1,1, contamination]) - costs for false positive predictions (c_fp), false negative - predictions (c_fn) and rejections (c_r). + c_fp, c_fn, c_r: floats (positive), + optional (default = [1,1, contamination]) + costs for false positive predictions (c_fp), + false negative predictions (c_fn) and rejections (c_r). - verbose: bool, optional (default = False) - If true, it prints the expected rejection rate, the upper bound rejection rate, - and the upper bound of the cost. + verbose: bool, optional (default = False) + If true, it prints the expected rejection rate, the upper + bound rejection rate, and the upper bound of the cost. Returns ------- @@ -389,59 +395,67 @@ def compute_rejection_stats(self, T = 32, delta = 0.1, c_fp = 1, c_fn = 1, c_r = satisfied with probability 1-delta; upperbound_cost: float, the upper bound for the cost; """ - + check_is_fitted(self, ['decision_scores_', 'threshold_', 'labels_']) - - if c_r <0: + + if c_r < 0: c_r = self.contamination - - if delta<=0 or delta>=1: + + if delta <= 0 or delta >= 1: delta = 0.1 - + # Computing the expected rejection rate n = len(self.decision_scores_) - n_gamma_minus1 = int(n * self.contamination) -1 - argsmin = (n_gamma_minus1, n, 1-np.exp(-T)) + n_gamma_minus1 = int(n * self.contamination) - 1 + argsmin = (n_gamma_minus1, n, 1 - np.exp(-T)) argsmax = (n_gamma_minus1, n, np.exp(-T)) - q1 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, bracket=[0, 1], method='brentq', args=argsmin).root - q2 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, bracket=[0, 1], method='brentq', args=argsmax).root - expected_reject_rate = q2-q1 + q1 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, + bracket=[0, 1], method='brentq', args=argsmin).root + q2 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, + bracket=[0, 1], method='brentq', args=argsmax).root + expected_reject_rate = q2 - q1 # Computing the upper bound for the rejection rate - right_mar = (-self.contamination * (n + 2) + n + 1) / n + (T * (n + 2)) / (np.sqrt(2 * n**3 * T)) + right_mar = (-self.contamination * (n + 2) + n + 1) / n + ( + T * (n + 2)) / (np.sqrt(2 * n ** 3 * T)) right_mar = min(1, right_mar) left_mar = ( - (2 + n * (1 - self.contamination) * (n + 1)) / n**2 - - np.sqrt( - 0.5 * n**5 * ( + (2 + n * (1 - self.contamination) * (n + 1)) / n ** 2 + - np.sqrt( + 0.5 * n ** 5 * ( 2 * n * ( - -3 * self.contamination**2 - - 2 * n * (1 - self.contamination)**2 - + 4 * self.contamination - 3 - ) - + T * (n + 2)**2 - 8 - ) - ) / n**4 + -3 * self.contamination ** 2 + - 2 * n * (1 - self.contamination) ** 2 + + 4 * self.contamination - 3 + ) + + T * (n + 2) ** 2 - 8 + ) + ) / n ** 4 ) left_mar = max(0, left_mar) add_term = 2 * np.sqrt(np.log(2 / delta) / (2 * n)) upperbound_rejectrate = right_mar - left_mar + add_term # Computing the upper bound for the cost function - n_gamma_minus1 = int(n * self.contamination) -1 - argsmin = (n_gamma_minus1, n, 1-np.exp(-T)) + n_gamma_minus1 = int(n * self.contamination) - 1 + argsmin = (n_gamma_minus1, n, 1 - np.exp(-T)) argsmax = (n_gamma_minus1, n, np.exp(-T)) - q1 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, bracket=[0, 1], method='brentq', args=argsmin).root - q2 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, bracket=[0, 1], method='brentq', args=argsmax).root - upperbound_cost = np.min([self.contamination,q1])*c_fp + np.min([1-q2,self.contamination])*c_fn + (q2-q1)*c_r + q1 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, + bracket=[0, 1], method='brentq', args=argsmin).root + q2 = root_scalar(lambda p, k, n, C: binom.cdf(k, n, p) - C, + bracket=[0, 1], method='brentq', args=argsmax).root + upperbound_cost = np.min([self.contamination, q1]) * c_fp + np.min( + [1 - q2, self.contamination]) * c_fn + (q2 - q1) * c_r if verbose: - print("Expected rejection rate: ", np.round(expected_reject_rate, 4), '%') - print("Upper bound rejection rate: ", np.round(upperbound_rejectrate, 4), '%') + print("Expected rejection rate: ", + np.round(expected_reject_rate, 4), '%') + print("Upper bound rejection rate: ", + np.round(upperbound_rejectrate, 4), '%') print("Upper bound cost: ", np.round(upperbound_cost, 4)) return expected_reject_rate, upperbound_rejectrate, upperbound_cost - + def _predict_rank(self, X, normalized=False): """Predict the outlyingness rank of a sample by a fitted model. The method is for outlier detector score combination. @@ -518,37 +532,6 @@ def fit_predict_score(self, X, y, scoring='roc_auc_score'): return score - # def score(self, X, y, scoring='roc_auc_score'): - # """Returns the evaluation resulted on the given test data and labels. - # ROC is chosen as the default evaluation metric - # - # :param X: The input samples - # :type X: numpy array of shape (n_samples, n_features) - # - # :param y: Outlier labels of the input samples - # :type y: array, shape (n_samples,) - # - # :param scoring: Evaluation metric - # - # -' roc_auc_score': ROC score - # - 'prc_n_score': Precision @ rank n score - # :type scoring: str, optional (default='roc_auc_score') - # - # :return: Evaluation score - # :rtype: float - # """ - # check_is_fitted(self, ['decision_scores_']) - # if scoring == 'roc_auc_score': - # score = roc_auc_score(y, self.decision_function(X)) - # elif scoring == 'prc_n_score': - # score = precision_n_scores(y, self.decision_function(X)) - # else: - # raise NotImplementedError('PyOD built-in scoring only supports ' - # 'ROC and Precision @ rank n') - # - # print("{metric}: {score}".format(metric=scoring, score=score)) - # - # return score def _set_n_classes(self, y): """Set the number of classes if `y` is presented, which is not diff --git a/pyod/test/test_abod.py b/pyod/test/test_abod.py index 466bd2eb8..a8964e8e1 100644 --- a/pyod/test/test_abod.py +++ b/pyod/test/test_abod.py @@ -104,11 +104,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_ae1svm.py b/pyod/test/test_ae1svm.py index d6beeb60f..a4d701356 100644 --- a/pyod/test/test_ae1svm.py +++ b/pyod/test/test_ae1svm.py @@ -103,11 +103,14 @@ def test_fit_predict(self): assert_equal(pred_labels.shape, self.y_train.shape) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_alad.py b/pyod/test/test_alad.py index e2c9ee3ed..5ee1a87de 100644 --- a/pyod/test/test_alad.py +++ b/pyod/test/test_alad.py @@ -120,11 +120,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_auto_encoder.py b/pyod/test/test_auto_encoder.py index a68c6de1e..5749c4d75 100644 --- a/pyod/test/test_auto_encoder.py +++ b/pyod/test/test_auto_encoder.py @@ -100,11 +100,14 @@ def test_prediction_proba_linear_confidence(self): self.assertInRange(confidence, 0, 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) self.assertEqual(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) self.assertGreaterEqual(expected_rejrate, 0) self.assertLessEqual(expected_rejrate, 1) self.assertGreaterEqual(ub_rejrate, 0) diff --git a/pyod/test/test_base_dl.py b/pyod/test/test_base_dl.py index 80838a574..137178164 100644 --- a/pyod/test/test_base_dl.py +++ b/pyod/test/test_base_dl.py @@ -40,8 +40,8 @@ def forward(self, x): class DummyDetector(BaseDeepLearningDetector): - def __init__(self, contamination=0.1, epoch_num=1, optimizer_name='adam', - loss_func=None, criterion=None, criterion_name='mse', + def __init__(self, contamination=0.1, epoch_num=1, optimizer_name='adam', + loss_func=None, criterion=None, criterion_name='mse', verbose=1, preprocessing=True, use_compile=False): super(DummyDetector, self).__init__(contamination=contamination, epoch_num=epoch_num, @@ -65,14 +65,14 @@ def training_forward(self, batch_data): loss.backward() self.optimizer.step() return loss.item() - + def evaluating_forward(self, batch_data): return np.zeros(batch_data.shape[0]) - + class DummyDetector2(DummyDetector): - def __init__(self, contamination=0.1, epoch_num=1, optimizer_name='adam', - loss_func=None, criterion=None, criterion_name='mse', + def __init__(self, contamination=0.1, epoch_num=1, optimizer_name='adam', + loss_func=None, criterion=None, criterion_name='mse', verbose=1, preprocessing=True, use_compile=False): super(DummyDetector2, self).__init__(contamination=contamination, epoch_num=epoch_num, @@ -104,7 +104,7 @@ def assertHasAttr(self, obj, intended_attr): def assertNotHasAttr(self, obj, intended_attr): self.assertFalse(hasattr(obj, intended_attr)) - + def setUp(self): self.n_train = 100 self.n_test = 50 @@ -112,15 +112,15 @@ def setUp(self): self.X_train, self.X_test, self.y_train, self.y_test = generate_data( n_train=self.n_train, n_test=self.n_test, contamination=self.contamination) - + def test_init(self): dummy_clf = DummyDetector() dummy_clf.fit(self.X_train) self.assertEqual(dummy_clf.contamination, 0.1) self.assertIsInstance(dummy_clf.optimizer, torch.optim.Adam) self.assertIsInstance(dummy_clf.criterion, nn.MSELoss) - - dummy_clf = DummyDetector(contamination=0.2, optimizer_name='sgd', + + dummy_clf = DummyDetector(contamination=0.2, optimizer_name='sgd', loss_func=loss_function, criterion='mae') dummy_clf.fit(self.X_train) self.assertEqual(dummy_clf.contamination, 0.2) @@ -140,7 +140,8 @@ def test_init(self): dummy_clf.fit(self.X_train) self.assertRaises(ValueError, DummyDetector, loss_func=0) self.assertRaises(ValueError, DummyDetector, criterion=0) - self.assertRaises(ValueError, DummyDetector, criterion_name='dummy_criterion') + self.assertRaises(ValueError, DummyDetector, + criterion_name='dummy_criterion') def test_fit_decision_function(self): zero_scores = np.zeros(self.n_train) @@ -180,8 +181,9 @@ def test_save_load(self): self.assertTrue(os.path.exists('dummy_clf.txt')) loaded_dummy_clf = DummyDetector.load('dummy_clf.txt') - self.assertEqual(loaded_dummy_clf.decision_function(self.X_train).all(), - zero_scores.all()) + self.assertEqual( + loaded_dummy_clf.decision_function(self.X_train).all(), + zero_scores.all()) os.remove('dummy_clf.txt') diff --git a/pyod/test/test_cblof.py b/pyod/test/test_cblof.py index b4c09e326..ac4159b44 100644 --- a/pyod/test/test_cblof.py +++ b/pyod/test/test_cblof.py @@ -115,13 +115,16 @@ def test_prediction_proba_linear_confidence(self): assert_equal(confidence.shape, self.y_test.shape) assert (confidence.min() >= 0) assert (confidence.max() <= 1) - + def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_cd.py b/pyod/test/test_cd.py index c8f37180e..70a9b259e 100644 --- a/pyod/test/test_cd.py +++ b/pyod/test/test_cd.py @@ -90,11 +90,14 @@ def test_prediction_labels_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_cof.py b/pyod/test/test_cof.py index 1fa9a22f9..2e91d4b38 100644 --- a/pyod/test/test_cof.py +++ b/pyod/test/test_cof.py @@ -100,11 +100,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_copod.py b/pyod/test/test_copod.py index db535faea..1a73c250c 100644 --- a/pyod/test/test_copod.py +++ b/pyod/test/test_copod.py @@ -98,11 +98,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_copod_parallel.py b/pyod/test/test_copod_parallel.py index 1cd32f3d0..22f4b094f 100644 --- a/pyod/test/test_copod_parallel.py +++ b/pyod/test/test_copod_parallel.py @@ -109,6 +109,21 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_deepsvdd.py b/pyod/test/test_deepsvdd.py index e89896e4c..94b995a0d 100644 --- a/pyod/test/test_deepsvdd.py +++ b/pyod/test/test_deepsvdd.py @@ -112,11 +112,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_devnet.py b/pyod/test/test_devnet.py index aa8280ebe..9f33b251a 100644 --- a/pyod/test/test_devnet.py +++ b/pyod/test/test_devnet.py @@ -22,7 +22,7 @@ from pyod.utils.data import generate_data -class TestDevNet(unittest.TestCase): +class TestDevNet(unittest.TestCase): def setUp(self): self.n_train = 3000 self.n_test = 1500 @@ -106,11 +106,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_dif.py b/pyod/test/test_dif.py index 901234c23..a2ba68f16 100644 --- a/pyod/test/test_dif.py +++ b/pyod/test/test_dif.py @@ -103,11 +103,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_ecod.py b/pyod/test/test_ecod.py index 1d5e90441..4507742c0 100644 --- a/pyod/test/test_ecod.py +++ b/pyod/test/test_ecod.py @@ -98,11 +98,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_ecod_parallel.py b/pyod/test/test_ecod_parallel.py index ed85772c0..2ff3c3357 100644 --- a/pyod/test/test_ecod_parallel.py +++ b/pyod/test/test_ecod_parallel.py @@ -109,6 +109,21 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.min() >= 0) assert (confidence.max() <= 1) + def test_prediction_with_rejection(self): + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) + assert_equal(pred_labels.shape, self.y_test.shape) + + def test_prediction_with_rejection_stats(self): + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) + assert (expected_rejrate >= 0) + assert (expected_rejrate <= 1) + assert (ub_rejrate >= 0) + assert (ub_rejrate <= 1) + assert (ub_cost >= 0) + def test_fit_predict(self): pred_labels = self.clf.fit_predict(self.X_train) assert_equal(pred_labels.shape, self.y_train.shape) diff --git a/pyod/test/test_feature_bagging.py b/pyod/test/test_feature_bagging.py index a22683f4e..2c9bc9770 100644 --- a/pyod/test/test_feature_bagging.py +++ b/pyod/test/test_feature_bagging.py @@ -106,11 +106,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_gmm.py b/pyod/test/test_gmm.py index 836c5565b..985d7d63f 100644 --- a/pyod/test/test_gmm.py +++ b/pyod/test/test_gmm.py @@ -98,11 +98,14 @@ def test_prediction_proba_parameter(self): self.clf.predict_proba(self.X_test, method="something") def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_hbos.py b/pyod/test/test_hbos.py index 55c1648ad..5e037e96a 100644 --- a/pyod/test/test_hbos.py +++ b/pyod/test/test_hbos.py @@ -87,11 +87,14 @@ def test_prediction_proba_parameter(self): self.clf.predict_proba(self.X_test, method='something') def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_iforest.py b/pyod/test/test_iforest.py index 8ad4308d3..72dba2201 100644 --- a/pyod/test/test_iforest.py +++ b/pyod/test/test_iforest.py @@ -114,11 +114,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_inne.py b/pyod/test/test_inne.py index c5c9d6845..68b9a2eba 100644 --- a/pyod/test/test_inne.py +++ b/pyod/test/test_inne.py @@ -104,11 +104,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_kde.py b/pyod/test/test_kde.py index dccf08ea1..432ed6533 100644 --- a/pyod/test/test_kde.py +++ b/pyod/test/test_kde.py @@ -102,11 +102,14 @@ def test_prediction_proba_linear_confidence(self): assert confidence.max() <= 1 def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_knn.py b/pyod/test/test_knn.py index 2d028d6c2..1f23b5235 100644 --- a/pyod/test/test_knn.py +++ b/pyod/test/test_knn.py @@ -282,11 +282,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_kpca.py b/pyod/test/test_kpca.py index 28898b200..ec543b9c5 100644 --- a/pyod/test/test_kpca.py +++ b/pyod/test/test_kpca.py @@ -99,11 +99,14 @@ def test_prediction_proba_linear_confidence(self): assert confidence.max() <= 1 def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_lmdd.py b/pyod/test/test_lmdd.py index 23acbd159..b3832ff29 100644 --- a/pyod/test/test_lmdd.py +++ b/pyod/test/test_lmdd.py @@ -105,11 +105,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_loci.py b/pyod/test/test_loci.py index 3ee58b392..1086c428a 100644 --- a/pyod/test/test_loci.py +++ b/pyod/test/test_loci.py @@ -99,11 +99,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_loda.py b/pyod/test/test_loda.py index 1f2800001..f74fcd2d0 100644 --- a/pyod/test/test_loda.py +++ b/pyod/test/test_loda.py @@ -101,11 +101,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_lof.py b/pyod/test/test_lof.py index 42875ead5..01b044316 100644 --- a/pyod/test/test_lof.py +++ b/pyod/test/test_lof.py @@ -104,11 +104,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_lscp.py b/pyod/test/test_lscp.py index 616e58056..d14685eda 100644 --- a/pyod/test/test_lscp.py +++ b/pyod/test/test_lscp.py @@ -126,11 +126,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_lunar.py b/pyod/test/test_lunar.py index 74c0ecfbc..994562daf 100644 --- a/pyod/test/test_lunar.py +++ b/pyod/test/test_lunar.py @@ -101,11 +101,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_mad.py b/pyod/test/test_mad.py index 604adcde7..dde99ce8c 100644 --- a/pyod/test/test_mad.py +++ b/pyod/test/test_mad.py @@ -114,11 +114,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_mcd.py b/pyod/test/test_mcd.py index 0ad59973f..b0e8bd57a 100644 --- a/pyod/test/test_mcd.py +++ b/pyod/test/test_mcd.py @@ -116,11 +116,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_mo_gaal.py b/pyod/test/test_mo_gaal.py index c7ead2f49..51ff68a00 100644 --- a/pyod/test/test_mo_gaal.py +++ b/pyod/test/test_mo_gaal.py @@ -110,11 +110,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_ocsvm.py b/pyod/test/test_ocsvm.py index 89248fecf..c11cdda69 100644 --- a/pyod/test/test_ocsvm.py +++ b/pyod/test/test_ocsvm.py @@ -114,11 +114,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_pca.py b/pyod/test/test_pca.py index 63e9eb8cd..c84fd13be 100644 --- a/pyod/test/test_pca.py +++ b/pyod/test/test_pca.py @@ -107,11 +107,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_qmcd.py b/pyod/test/test_qmcd.py index 51a48a242..ded2f6aca 100644 --- a/pyod/test/test_qmcd.py +++ b/pyod/test/test_qmcd.py @@ -105,11 +105,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_rgraph.py b/pyod/test/test_rgraph.py index f0b251d49..ebbb5e48e 100644 --- a/pyod/test/test_rgraph.py +++ b/pyod/test/test_rgraph.py @@ -112,11 +112,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_rod.py b/pyod/test/test_rod.py index 451ea7578..48fac3239 100644 --- a/pyod/test/test_rod.py +++ b/pyod/test/test_rod.py @@ -88,11 +88,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_sampling.py b/pyod/test/test_sampling.py index 19b68931c..0e1bf6a25 100644 --- a/pyod/test/test_sampling.py +++ b/pyod/test/test_sampling.py @@ -105,11 +105,14 @@ def test_prediction_proba_linear_confidence(self): assert confidence.max() <= 1 def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_so_gaal.py b/pyod/test/test_so_gaal.py index ff269e0ee..e9f0ef354 100644 --- a/pyod/test/test_so_gaal.py +++ b/pyod/test/test_so_gaal.py @@ -108,11 +108,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_so_gaal_new.py b/pyod/test/test_so_gaal_new.py index 66fad35ca..2f96f85f9 100644 --- a/pyod/test/test_so_gaal_new.py +++ b/pyod/test/test_so_gaal_new.py @@ -101,11 +101,14 @@ def test_prediction_proba_linear_confidence(self): self.assertInRange(confidence, 0, 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) self.assertEqual(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) self.assertGreaterEqual(expected_rejrate, 0) self.assertLessEqual(expected_rejrate, 1) self.assertGreaterEqual(ub_rejrate, 0) diff --git a/pyod/test/test_sod.py b/pyod/test/test_sod.py index a081836dd..a7fb2cc23 100644 --- a/pyod/test/test_sod.py +++ b/pyod/test/test_sod.py @@ -126,11 +126,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_sos.py b/pyod/test/test_sos.py index 125125d73..94fc92ad2 100644 --- a/pyod/test/test_sos.py +++ b/pyod/test/test_sos.py @@ -99,11 +99,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_suod.py b/pyod/test/test_suod.py index 6561f1619..131ad3e3b 100644 --- a/pyod/test/test_suod.py +++ b/pyod/test/test_suod.py @@ -125,11 +125,14 @@ def test_prediction_proba_linear_confidence(self): assert (confidence.max() <= 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) assert_equal(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) assert (expected_rejrate >= 0) assert (expected_rejrate <= 1) assert (ub_rejrate >= 0) diff --git a/pyod/test/test_vae.py b/pyod/test/test_vae.py index 3d5de81ed..fcbcb1eea 100644 --- a/pyod/test/test_vae.py +++ b/pyod/test/test_vae.py @@ -99,11 +99,14 @@ def test_prediction_proba_linear_confidence(self): self.assertInRange(confidence, 0, 1) def test_prediction_with_rejection(self): - pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) + pred_labels = self.clf.predict_with_rejection(self.X_test, + return_stats=False) self.assertEqual(pred_labels.shape, self.y_test.shape) - + def test_prediction_with_rejection_stats(self): - _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) + _, [expected_rejrate, ub_rejrate, + ub_cost] = self.clf.predict_with_rejection(self.X_test, + return_stats=True) self.assertGreaterEqual(expected_rejrate, 0) self.assertLessEqual(expected_rejrate, 1) self.assertGreaterEqual(ub_rejrate, 0) diff --git a/pyod/test/test_xgbod.py b/pyod/test/test_xgbod.py index 133eceb6b..09a9d6c89 100644 --- a/pyod/test/test_xgbod.py +++ b/pyod/test/test_xgbod.py @@ -124,11 +124,11 @@ def test_prediction_proba(self): # assert (confidence.min() >= 0) # assert (confidence.max() <= 1) - #def test_prediction_with_rejection(self): + # def test_prediction_with_rejection(self): # pred_labels = self.clf.predict_with_rejection(self.X_test, return_stats = False) # assert_equal(pred_labels.shape, self.y_test.shape) - - #def test_prediction_with_rejection_stats(self): + + # def test_prediction_with_rejection_stats(self): # _, [expected_rejrate, ub_rejrate, ub_cost] = self.clf.predict_with_rejection(self.X_test, return_stats = True) # assert (expected_rejrate >= 0) # assert (expected_rejrate <= 1) From d5caaec33cd52118842c7b29b02965cb23218f77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9C=D0=B8=D1=85=D0=B0=D0=B8=D0=BB=20=D0=92=D0=B0=D1=81?= =?UTF-8?q?=D0=B8=D0=BB=D1=8C=D0=B5=D0=B2?= Date: Mon, 16 Dec 2024 18:57:19 +0300 Subject: [PATCH 5/9] Fix references --- docs/index.rst | 2 +- docs/zreferences.bib | 12 +++++++++++- pyod/models/ecod.py | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 4d4697aff..80c8bb10b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -215,7 +215,7 @@ Proximity-Based SOD Subspace Outlier Detection Proximity-Based ROD Rotation-based Outlier Detection 2020 :class:`pyod.models.rod.ROD` :cite:`a-almardeny2020novel` Outlier Ensembles IForest Isolation Forest 2008 :class:`pyod.models.iforest.IForest` :cite:`a-liu2008isolation,a-liu2012isolation` Outlier Ensembles INNE Isolation-based Anomaly Detection Using Nearest-Neighbor Ensembles 2018 :class:`pyod.models.inne.INNE` :cite:`a-bandaragoda2018isolation` -Outlier Ensembles DIF Deep Isolation Forest for Anomaly Detection 2023 :class:`pyod.models.dif.DIF` :cite:`a-Xu2023Deep` +Outlier Ensembles DIF Deep Isolation Forest for Anomaly Detection 2023 :class:`pyod.models.dif.DIF` :cite:`a-xu2023dif` Outlier Ensembles FB Feature Bagging 2005 :class:`pyod.models.feature_bagging.FeatureBagging` :cite:`a-lazarevic2005feature` Outlier Ensembles LSCP LSCP: Locally Selective Combination of Parallel Outlier Ensembles 2019 :class:`pyod.models.lscp.LSCP` :cite:`a-zhao2019lscp` Outlier Ensembles XGBOD Extreme Boosting Based Outlier Detection **(Supervised)** 2018 :class:`pyod.models.xgbod.XGBOD` :cite:`a-zhao2018xgbod` diff --git a/docs/zreferences.bib b/docs/zreferences.bib index 6dd2a4d90..15a450c02 100644 --- a/docs/zreferences.bib +++ b/docs/zreferences.bib @@ -517,4 +517,14 @@ @inproceedings{pang2019deep booktitle={Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery \& data mining}, pages={353--362}, year={2019} -} \ No newline at end of file +} + +@inproceedings{hashemi2019filter, + title={Filtering Approaches for Dealing with Noise in Anomaly Detection}, + url={http://dx.doi.org/10.1109/CDC40024.2019.9029258}, + DOI={10.1109/cdc40024.2019.9029258}, + booktitle={2019 IEEE 58th Conference on Decision and Control (CDC)}, + publisher={IEEE}, + author={Hashemi, Navid and German, Eduardo Verdugo and Pena Ramirez, Jonatan and Ruths, Justin}, + year={2019}, + month=dec, pages={5356--5361} } diff --git a/pyod/models/ecod.py b/pyod/models/ecod.py index 6b758f811..c1a3b3c59 100644 --- a/pyod/models/ecod.py +++ b/pyod/models/ecod.py @@ -55,7 +55,7 @@ class ECOD(BaseDetector): Cumulative Distribution Functions (ECOD) ECOD is a parameter-free, highly interpretable outlier detection algorithm based on empirical CDF functions. - See :cite:`Li2021ecod` for details. + See :cite:`li2021ecod` for details. Parameters ---------- From 3d27f53f9af8beeb0423a96f75d2ad257803e0ce Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Fri, 20 Dec 2024 01:06:08 -0800 Subject: [PATCH 6/9] fix doc issue --- docs/conf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 7513eb57d..434ac7f14 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,8 +50,6 @@ 'sphinx.ext.imgmath', 'sphinx.ext.viewcode', 'sphinxcontrib.bibtex', - # 'sphinx.ext.napoleon', - # 'sphinx_rtd_theme', ] bibtex_bibfiles = ['zreferences.bib'] @@ -173,4 +171,7 @@ # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None) +} + From e6799bdfdb706a644e365094c702318d675ba2f5 Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Fri, 20 Dec 2024 01:31:19 -0800 Subject: [PATCH 7/9] first set of doc polish --- README.rst | 33 ++++++++++++++++++++++++++------- docs/index.rst | 32 +++++++++++++++++++++++++------- 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/README.rst b/README.rst index 7c1e14d91..0400ca30a 100644 --- a/README.rst +++ b/README.rst @@ -57,20 +57,27 @@ Python Outlier Detection (PyOD) Read Me First ^^^^^^^^^^^^^ -Welcome to PyOD, a comprehensive but easy-to-use Python library for detecting anomalies in multivariate data. Whether you're tackling a small-scale project or large datasets, PyOD offers a range of algorithms to suit your needs. +Welcome to PyOD, a well-developed and easy-to-use Python library for detecting anomalies in multivariate data. Whether you are working with a small-scale project or large datasets, PyOD provides a range of algorithms to fit your needs. -* **For time-series outlier detection**, please use `TODS `_. +**PyOD Version 2 is now available** (`Paper `_), featuring: -* **For graph outlier detection**, please use `PyGOD `_. +* **Expanded Deep Learning Support**: Integrates 12 modern neural models into a single PyTorch-based framework, bringing the total number of outlier detection methods to 45. +* **Enhanced Performance and Ease of Use**: Models are optimized for efficiency and consistent performance across different datasets. +* **LLM-based Model Selection**: Automated model selection guided by a large language model reduces manual tuning and assists users who may have limited experience with outlier detection. -* **Performance Comparison & Datasets**: We have a 45-page, comprehensive `anomaly detection benchmark paper `_. The fully `open-sourced ADBench `_ compares 30 anomaly detection algorithms on 57 benchmark datasets. +**Additional Resources**: -* **Learn more about anomaly detection** at `Anomaly Detection Resources `_ +* **Time-series Outlier Detection**: `TODS `_ +* **Graph Outlier Detection**: `PyGOD `_ +* **Performance Comparison & Datasets**: Our 45-page `anomaly detection benchmark paper `_ and `ADBench `_, comparing 30 algorithms on 57 datasets +* **PyOD on Distributed Systems**: `PyOD on Databricks `_ +* **Learn More**: `Anomaly Detection Resources `_ -* **PyOD on Distributed Systems**: you can also run `PyOD on databricks `_. +**Check out our latest research in 2025 on LLM-based anomaly detection**: `AD-LLM: Benchmarking Large Language Models for Anomaly Detection `_. ---- + About PyOD ^^^^^^^^^^ @@ -106,7 +113,18 @@ Alternatively, explore `MetaOD `_ for a data **Citing PyOD**: -`PyOD paper `_ is published in `Journal of Machine Learning Research (JMLR) `_ (MLOSS track). If you use PyOD in a scientific publication, we would appreciate citations to the following paper:: +If you use PyOD in a scientific publication, we would appreciate citations to the following paper(s): + +`PyOD 2: A Python Library for Outlier Detection with LLM-powered Model Selection `_ is available as a preprint. If you use PyOD in a scientific publication, we would appreciate citations to the following paper:: + + @article{zhao2024pyod2, + author = {Chen, Sihan and Qian, Zhuangzhuang and Siu, Wingchun and Hu, Xingcan and Li, Jiaqi and Li, Shawn and Qin, Yuehan and Yang, Tiankai and Xiao, Zhuo and Ye, Wanghao and Zhang, Yichi and Dong, Yushun and Zhao, Yue}, + title = {PyOD 2: A Python Library for Outlier Detection with LLM-powered Model Selection}, + journal = {arXiv preprint arXiv:2412.12154}, + year = {2024} + } + +`PyOD paper `_ is published in `Journal of Machine Learning Research (JMLR) `_ (MLOSS track).:: @article{zhao2019pyod, author = {Zhao, Yue and Nasrullah, Zain and Li, Zheng}, @@ -123,6 +141,7 @@ or:: Zhao, Y., Nasrullah, Z. and Li, Z., 2019. PyOD: A Python Toolbox for Scalable Outlier Detection. Journal of machine learning research (JMLR), 20(96), pp.1-7. + For a broader perspective on anomaly detection, see our NeurIPS papers `ADBench: Anomaly Detection Benchmark Paper `_ and `ADGym: Design Choices for Deep Anomaly Detection `_:: @article{han2022adbench, diff --git a/docs/index.rst b/docs/index.rst index 80c8bb10b..a40a5d7ee 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -67,17 +67,23 @@ Welcome to PyOD documentation! Read Me First ^^^^^^^^^^^^^ -Welcome to PyOD, a comprehensive but easy-to-use Python library for detecting anomalies in multivariate data. Whether you're tackling a small-scale project or large datasets, PyOD offers a range of algorithms to suit your needs. +Welcome to PyOD, a comprehensive but easy-to-use Python library for detecting anomalies in multivariate data. Whether you are working with a small-scale project or large datasets, PyOD provides a range of algorithms to suit your needs. -* **For time-series outlier detection**, please use `TODS `_. +**PyOD Version 2 is now available** (`Paper `_), featuring: -* **For graph outlier detection**, please use `PyGOD `_. +* **Expanded Deep Learning Support**: Integrates 12 modern neural models into a single PyTorch-based framework, bringing the total number of outlier detection methods to 45. +* **Enhanced Performance and Ease of Use**: Models are optimized for efficiency and consistent performance across different datasets. +* **LLM-based Model Selection**: Automated model selection guided by a large language model reduces manual tuning and assists users who may have limited experience with outlier detection. -* **Performance Comparison & Datasets**: We have a 45-page, comprehensive `anomaly detection benchmark paper `_. The fully `open-sourced ADBench `_ compares 30 anomaly detection algorithms on 57 benchmark datasets. +**Additional Resources**: -* **Learn more about anomaly detection** at `Anomaly Detection Resources `_ +* **Time-series Outlier Detection**: `TODS `_ +* **Graph Outlier Detection**: `PyGOD `_ +* **Performance Comparison & Datasets**: We have a 45-page, comprehensive `anomaly detection benchmark paper `_. The fully `open-sourced ADBench `_ compares 30 anomaly detection algorithms on 57 benchmark datasets. +* **PyOD on Distributed Systems**: You can also run `PyOD on Databricks `_ +* **Learn More**: `Anomaly Detection Resources `_ -* **PyOD on Distributed Systems**: you can also run `PyOD on databricks `_. +**Check out our latest research on LLM-based anomaly detection**: `AD-LLM: Benchmarking Large Language Models for Anomaly Detection `_. ---- @@ -116,7 +122,18 @@ Alternatively, explore `MetaOD `_ for a data **Citing PyOD**: -`PyOD paper `_ is published in `Journal of Machine Learning Research (JMLR) `_ (MLOSS track). If you use PyOD in a scientific publication, we would appreciate citations to the following paper:: +If you use PyOD in a scientific publication, we would appreciate citations to the following paper(s): + +`PyOD 2: A Python Library for Outlier Detection with LLM-powered Model Selection `_ is available as a preprint. If you use PyOD in a scientific publication, we would appreciate citations to the following paper:: + + @article{zhao2024pyod2, + author = {Chen, Sihan and Qian, Zhuangzhuang and Siu, Wingchun and Hu, Xingcan and Li, Jiaqi and Li, Shawn and Qin, Yuehan and Yang, Tiankai and Xiao, Zhuo and Ye, Wanghao and Zhang, Yichi and Dong, Yushun and Zhao, Yue}, + title = {PyOD 2: A Python Library for Outlier Detection with LLM-powered Model Selection}, + journal = {arXiv preprint arXiv:2412.12154}, + year = {2024} + } + +`PyOD paper `_ is published in `Journal of Machine Learning Research (JMLR) `_ (MLOSS track).:: @article{zhao2019pyod, author = {Zhao, Yue and Nasrullah, Zain and Li, Zheng}, @@ -133,6 +150,7 @@ or:: Zhao, Y., Nasrullah, Z. and Li, Z., 2019. PyOD: A Python Toolbox for Scalable Outlier Detection. Journal of machine learning research (JMLR), 20(96), pp.1-7. + For a broader perspective on anomaly detection, see our NeurIPS papers `ADBench: Anomaly Detection Benchmark Paper `_ and `ADGym: Design Choices for Deep Anomaly Detection `_:: @article{han2022adbench, From 8797387fb94519a3006b52b9d86b7fb674758753 Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Fri, 20 Dec 2024 22:17:36 -0800 Subject: [PATCH 8/9] fix doc issue --- README.rst | 15 ++++++++++----- docs/index.rst | 13 +++++++------ docs/zreferences.bib | 21 +++++++++++++++++++++ 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index 0400ca30a..5b8f87473 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,5 @@ -Python Outlier Detection (PyOD) -=============================== +Python Outlier Detection (PyOD) V2 +================================== **Deployment & Documentation & Stats & License** @@ -59,7 +59,7 @@ Read Me First Welcome to PyOD, a well-developed and easy-to-use Python library for detecting anomalies in multivariate data. Whether you are working with a small-scale project or large datasets, PyOD provides a range of algorithms to fit your needs. -**PyOD Version 2 is now available** (`Paper `_), featuring: +**PyOD Version 2 is now available** (`Paper `_) [#Chen2024PyOD]_, featuring: * **Expanded Deep Learning Support**: Integrates 12 modern neural models into a single PyTorch-based framework, bringing the total number of outlier detection methods to 45. * **Enhanced Performance and Ease of Use**: Models are optimized for efficiency and consistent performance across different datasets. @@ -67,13 +67,14 @@ Welcome to PyOD, a well-developed and easy-to-use Python library for detecting a **Additional Resources**: +* **NLP Anomaly Detection**: `NLP-ADBench `_ provides both NLP anonaly detection datasets and algorithms * **Time-series Outlier Detection**: `TODS `_ * **Graph Outlier Detection**: `PyGOD `_ * **Performance Comparison & Datasets**: Our 45-page `anomaly detection benchmark paper `_ and `ADBench `_, comparing 30 algorithms on 57 datasets * **PyOD on Distributed Systems**: `PyOD on Databricks `_ * **Learn More**: `Anomaly Detection Resources `_ -**Check out our latest research in 2025 on LLM-based anomaly detection**: `AD-LLM: Benchmarking Large Language Models for Anomaly Detection `_. +**Check out our latest research in 2025 on LLM-based anomaly detection** [#Yang2024ad]_: `AD-LLM: Benchmarking Large Language Models for Anomaly Detection `_. ---- @@ -83,7 +84,7 @@ About PyOD PyOD, established in 2017, has become a go-to **Python library** for **detecting anomalous/outlying objects** in multivariate data. This exciting yet challenging field is commonly referred to as `Outlier Detection `_ or `Anomaly Detection `_. -PyOD includes more than 50 detection algorithms, from classical LOF (SIGMOD 2000) to the cutting-edge ECOD and DIF (TKDE 2022 and 2023). Since 2017, PyOD has been successfully used in numerous academic research projects and commercial products with more than `22 million downloads `_. It is also well acknowledged by the machine learning community with various dedicated posts/tutorials, including `Analytics Vidhya `_, `KDnuggets `_, and `Towards Data Science `_. +PyOD includes more than 50 detection algorithms, from classical LOF (SIGMOD 2000) to the cutting-edge ECOD and DIF (TKDE 2022 and 2023). Since 2017, PyOD has been successfully used in numerous academic research projects and commercial products with more than `26 million downloads `_. It is also well acknowledged by the machine learning community with various dedicated posts/tutorials, including `Analytics Vidhya `_, `KDnuggets `_, and `Towards Data Science `_. **PyOD is featured for**: @@ -537,6 +538,8 @@ Reference .. [#Cook1977Detection] Cook, R.D., 1977. Detection of influential observation in linear regression. Technometrics, 19(1), pp.15-18. +.. [#Chen2024PyOD] Chen, S., Qian, Z., Siu, W., Hu, X., Li, J., Li, S., Qin, Y., Yang, T., Xiao, Z., Ye, W. and Zhang, Y., 2024. PyOD 2: A Python Library for Outlier Detection with LLM-powered Model Selection. arXiv preprint arXiv:2412.12154. + .. [#Fang2001Wrap] Fang, K.T. and Ma, C.X., 2001. Wrap-around L2-discrepancy of random sampling, Latin hypercube and uniform designs. Journal of complexity, 17(4), pp.608-624. .. [#Goldstein2012Histogram] Goldstein, M. and Dengel, A., 2012. Histogram-based outlier score (hbos): A fast unsupervised anomaly detection algorithm. In *KI-2012: Poster and Demo Track*\ , pp.59-63. @@ -609,6 +612,8 @@ Reference .. [#Xu2023Deep] Xu, H., Pang, G., Wang, Y., Wang, Y., 2023. Deep isolation forest for anomaly detection. *IEEE Transactions on Knowledge and Data Engineering*. +.. [#Yang2024ad] Yang, T., Nian, Y., Li, S., Xu, R., Li, Y., Li, J., Xiao, Z., Hu, X., Rossi, R., Ding, K. and Hu, X., 2024. AD-LLM: Benchmarking Large Language Models for Anomaly Detection. arXiv preprint arXiv:2412.11142. + .. [#You2017Provable] You, C., Robinson, D.P. and Vidal, R., 2017. Provable self-representation based outlier detection in a union of subspaces. In Proceedings of the IEEE conference on computer vision and pattern recognition. .. [#Zenati2018Adversarially] Zenati, H., Romain, M., Foo, C.S., Lecouat, B. and Chandrasekhar, V., 2018, November. Adversarially learned anomaly detection. In 2018 IEEE International conference on data mining (ICDM) (pp. 727-736). IEEE. diff --git a/docs/index.rst b/docs/index.rst index a40a5d7ee..230bde030 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,8 +3,8 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to PyOD documentation! -============================== +Welcome to PyOD V2 documentation! +================================= **Deployment & Documentation & Stats & License** @@ -69,7 +69,7 @@ Read Me First Welcome to PyOD, a comprehensive but easy-to-use Python library for detecting anomalies in multivariate data. Whether you are working with a small-scale project or large datasets, PyOD provides a range of algorithms to suit your needs. -**PyOD Version 2 is now available** (`Paper `_), featuring: +**PyOD Version 2 is now available** (`Paper `_) :cite:`a-chen2024pyod`, featuring: * **Expanded Deep Learning Support**: Integrates 12 modern neural models into a single PyTorch-based framework, bringing the total number of outlier detection methods to 45. * **Enhanced Performance and Ease of Use**: Models are optimized for efficiency and consistent performance across different datasets. @@ -77,13 +77,14 @@ Welcome to PyOD, a comprehensive but easy-to-use Python library for detecting an **Additional Resources**: +* **NLP Anomaly Detection**: `NLP-ADBench `_ provides both NLP anomaly detection datasets and algorithms :cite:`a-li2024nlp` * **Time-series Outlier Detection**: `TODS `_ * **Graph Outlier Detection**: `PyGOD `_ * **Performance Comparison & Datasets**: We have a 45-page, comprehensive `anomaly detection benchmark paper `_. The fully `open-sourced ADBench `_ compares 30 anomaly detection algorithms on 57 benchmark datasets. * **PyOD on Distributed Systems**: You can also run `PyOD on Databricks `_ * **Learn More**: `Anomaly Detection Resources `_ -**Check out our latest research on LLM-based anomaly detection**: `AD-LLM: Benchmarking Large Language Models for Anomaly Detection `_. +**Check out our latest research on LLM-based anomaly detection** :cite:`a-yang2024ad`: `AD-LLM: Benchmarking Large Language Models for Anomaly Detection `_. ---- @@ -92,7 +93,7 @@ About PyOD PyOD, established in 2017, has become a go-to **Python library** for **detecting anomalous/outlying objects** in multivariate data. This exciting yet challenging field is commonly referred to as `Outlier Detection `_ or `Anomaly Detection `_. -PyOD includes more than 50 detection algorithms, from classical LOF (SIGMOD 2000) to the cutting-edge ECOD and DIF (TKDE 2022 and 2023). Since 2017, PyOD has been successfully used in numerous academic research projects and commercial products with more than `22 million downloads `_. It is also well acknowledged by the machine learning community with various dedicated posts/tutorials, including `Analytics Vidhya `_, `KDnuggets `_, and `Towards Data Science `_. +PyOD includes more than 50 detection algorithms, from classical LOF (SIGMOD 2000) to the cutting-edge ECOD and DIF (TKDE 2022 and 2023). Since 2017, PyOD has been successfully used in numerous academic research projects and commercial products with more than `26 million downloads `_. It is also well acknowledged by the machine learning community with various dedicated posts/tutorials, including `Analytics Vidhya `_, `KDnuggets `_, and `Towards Data Science `_. **PyOD is featured for**: @@ -176,7 +177,7 @@ For a broader perspective on anomaly detection, see our NeurIPS papers `ADBench: ADBench Benchmark and Datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -We just released a 45-page, the most comprehensive `ADBench: Anomaly Detection Benchmark `_ [#Han2022ADBench]_. +We just released a 45-page, the most comprehensive `ADBench: Anomaly Detection Benchmark `_ :cite:`a-han2022adbench`. The fully `open-sourced ADBench `_ compares 30 anomaly detection algorithms on 57 benchmark datasets. The organization of **ADBench** is provided below: diff --git a/docs/zreferences.bib b/docs/zreferences.bib index 15a450c02..3181c6f97 100644 --- a/docs/zreferences.bib +++ b/docs/zreferences.bib @@ -528,3 +528,24 @@ @inproceedings{hashemi2019filter author={Hashemi, Navid and German, Eduardo Verdugo and Pena Ramirez, Jonatan and Ruths, Justin}, year={2019}, month=dec, pages={5356--5361} } + +@article{yang2024ad, + title={AD-LLM: Benchmarking Large Language Models for Anomaly Detection}, + author={Yang, Tiankai and Nian, Yi and Li, Shawn and Xu, Ruiyao and Li, Yuangang and Li, Jiaqi and Xiao, Zhuo and Hu, Xiyang and Rossi, Ryan and Ding, Kaize and others}, + journal={arXiv preprint arXiv:2412.11142}, + year={2024} +} + +@article{chen2024pyod, + title={PyOD 2: A Python Library for Outlier Detection with LLM-powered Model Selection}, + author={Chen, Sihan and Qian, Zhuangzhuang and Siu, Wingchun and Hu, Xingcan and Li, Jiaqi and Li, Shawn and Qin, Yuehan and Yang, Tiankai and Xiao, Zhuo and Ye, Wanghao and others}, + journal={arXiv preprint arXiv:2412.12154}, + year={2024} +} + +@article{li2024nlp, + title={NLP-ADBench: NLP Anomaly Detection Benchmark}, + author={Li, Yuangang and Li, Jiaqi and Xiao, Zhuo and Yang, Tiankai and Nian, Yi and Hu, Xiyang and Zhao, Yue}, + journal={arXiv preprint arXiv:2412.04784}, + year={2024} +} From b919bac80417c4ec51a51178321aa0708b74b7a2 Mon Sep 17 00:00:00 2001 From: Yue Zhao Date: Fri, 20 Dec 2024 22:18:26 -0800 Subject: [PATCH 9/9] fix doc issue --- CHANGES.txt | 3 ++- pyod/version.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 4a7d91bcf..1a22c1182 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -197,4 +197,5 @@ v<2.0.2>, <07/04/2024> -- Moving from TF to Torch -- reimplement ALAD. v<2.0.2>, <07/04/2024> -- Moving from TF to Torch -- reimplement anogan. v<2.0.2>, <07/06/2024> -- Complete of removing all Tensorflow and Keras code. v<2.0.2>, <07/21/2024> -- Add DevNet. -v<2.0.3>, <09/06/2024> -- Add Reject Option in Unsupervised Anomaly Detection (#605). \ No newline at end of file +v<2.0.3>, <09/06/2024> -- Add Reject Option in Unsupervised Anomaly Detection (#605). +v<2.0.3>, <12/20/2024> -- Massive documentation polish. \ No newline at end of file diff --git a/pyod/version.py b/pyod/version.py index 56995526d..99db0a143 100644 --- a/pyod/version.py +++ b/pyod/version.py @@ -20,4 +20,4 @@ # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer. # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = '2.0.2' # pragma: no cover +__version__ = '2.0.3' # pragma: no cover