diff --git a/CHANGES.txt b/CHANGES.txt index bf42f9ab8..b104c5879 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -111,6 +111,7 @@ v<0.8.3>, <09/19/2020> -- Add model persistence documentation (save and load). v<0.8.4>, <10/13/2020> -- Fix COPOD code inconsistency (issue #239). v<0.8.4>, <10/24/2020> -- Fix LSCP minor bug (issue #180). v<0.8.4>, <11/02/2020> -- Add support for Tensorflow 2. +v<0.8.4>, <11/12/2020> -- Merge PR #!02 for categortical data generation. diff --git a/examples/generate_data_categorical_example.py b/examples/generate_data_categorical_example.py index cec4d5f89..7926eee78 100644 --- a/examples/generate_data_categorical_example.py +++ b/examples/generate_data_categorical_example.py @@ -11,6 +11,7 @@ import sys import numpy as np import matplotlib.pyplot as plt + # temporary solution for relative imports in case pyod is not installed # if pyod is installed, no need to use the following line @@ -19,16 +20,16 @@ from pyod.utils.data import generate_data_categorical - if __name__ == "__main__": contamination = 0.1 # percentage of outliers # Generate sample data in clusters - X_train, X_test, y_train, y_test = generate_data_categorical(n_train=200, n_test=50, - n_category_in=8, n_category_out=5, - n_informative=1, n_features=1, - contamination=contamination, - shuffle=True, random_state=42) + X_train, X_test, y_train, y_test = generate_data_categorical \ + (n_train=200, n_test=50, + n_category_in=8, n_category_out=5, + n_informative=1, n_features=1, + contamination=contamination, + shuffle=True, random_state=42) # note that visalizing it can only be in 1 dimension! cats = list(np.ravel(X_train)) diff --git a/pyod/utils/data.py b/pyod/utils/data.py index 1d30f60ff..5588c1879 100644 --- a/pyod/utils/data.py +++ b/pyod/utils/data.py @@ -494,7 +494,6 @@ def generate_data_categorical(n_train=1000, n_test=500, n_features=2, n_informative=2, n_category_in=2, n_category_out=2, contamination=0.1, shuffle=True, random_state=None): - """Utility function to generate synthesized categorical data. Parameters