Skip to content

Commit

Permalink
improve categorical data generation.
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhao062 committed Nov 12, 2020
1 parent df8cd11 commit 4b62599
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ v<0.8.3>, <09/19/2020> -- Add model persistence documentation (save and load).
v<0.8.4>, <10/13/2020> -- Fix COPOD code inconsistency (issue #239).
v<0.8.4>, <10/24/2020> -- Fix LSCP minor bug (issue #180).
v<0.8.4>, <11/02/2020> -- Add support for Tensorflow 2.
v<0.8.4>, <11/12/2020> -- Merge PR #!02 for categortical data generation.



13 changes: 7 additions & 6 deletions examples/generate_data_categorical_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import sys
import numpy as np
import matplotlib.pyplot as plt

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line

Expand All @@ -19,16 +20,16 @@

from pyod.utils.data import generate_data_categorical


if __name__ == "__main__":
contamination = 0.1 # percentage of outliers

# Generate sample data in clusters
X_train, X_test, y_train, y_test = generate_data_categorical(n_train=200, n_test=50,
n_category_in=8, n_category_out=5,
n_informative=1, n_features=1,
contamination=contamination,
shuffle=True, random_state=42)
X_train, X_test, y_train, y_test = generate_data_categorical \
(n_train=200, n_test=50,
n_category_in=8, n_category_out=5,
n_informative=1, n_features=1,
contamination=contamination,
shuffle=True, random_state=42)

# note that visalizing it can only be in 1 dimension!
cats = list(np.ravel(X_train))
Expand Down
1 change: 0 additions & 1 deletion pyod/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,6 @@ def generate_data_categorical(n_train=1000, n_test=500, n_features=2,
n_informative=2, n_category_in=2,
n_category_out=2, contamination=0.1,
shuffle=True, random_state=None):

"""Utility function to generate synthesized categorical data.
Parameters
Expand Down

0 comments on commit 4b62599

Please sign in to comment.