Skip to content

Commit

Permalink
Merge pull request #247 from yzhao062/development
Browse files Browse the repository at this point in the history
V0.8.4
  • Loading branch information
yzhao062 authored Nov 17, 2020
2 parents 94c27ef + 4b62599 commit 5c9386f
Show file tree
Hide file tree
Showing 14 changed files with 493 additions and 47 deletions.
4 changes: 4 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ v<0.8.2>, <07/04/2020> -- Add a set of utility functions.
v<0.8.2>, <08/30/2020> -- Add COPOD and MAD algorithm.
v<0.8.3>, <09/01/2020> -- Make decision score consistent.
v<0.8.3>, <09/19/2020> -- Add model persistence documentation (save and load).
v<0.8.4>, <10/13/2020> -- Fix COPOD code inconsistency (issue #239).
v<0.8.4>, <10/24/2020> -- Fix LSCP minor bug (issue #180).
v<0.8.4>, <11/02/2020> -- Add support for Tensorflow 2.
v<0.8.4>, <11/12/2020> -- Merge PR #!02 for categortical data generation.



49 changes: 49 additions & 0 deletions examples/generate_data_categorical_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
"""Example of using and visualizing ``generate_data_categorical`` function.
"""
# Author: Yahya Almardeny <[email protected]>
# License: BSD 2 clause

from __future__ import division
from __future__ import print_function

import os
import sys
import numpy as np
import matplotlib.pyplot as plt

# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line

sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))

from pyod.utils.data import generate_data_categorical

if __name__ == "__main__":
contamination = 0.1 # percentage of outliers

# Generate sample data in clusters
X_train, X_test, y_train, y_test = generate_data_categorical \
(n_train=200, n_test=50,
n_category_in=8, n_category_out=5,
n_informative=1, n_features=1,
contamination=contamination,
shuffle=True, random_state=42)

# note that visalizing it can only be in 1 dimension!
cats = list(np.ravel(X_train))
labels = list(y_train)
fig, axs = plt.subplots(1, 2)
axs[0].bar(cats, labels)
axs[1].plot(cats, labels)
plt.title('Synthetic Categorical Train Data')
plt.show()

cats = list(np.ravel(X_test))
labels = list(y_test)
fig, axs = plt.subplots(1, 2)
axs[0].bar(cats, labels)
axs[1].plot(cats, labels)
plt.title('Synthetic Categorical Test Data')
plt.show()
19 changes: 14 additions & 5 deletions pyod/models/auto_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@
from __future__ import print_function

import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import l2
from keras.losses import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
Expand All @@ -20,6 +16,19 @@
from ..utils.stat_models import pairwise_distances_no_broadcast

from .base import BaseDetector
from .base_dl import _get_tensorflow_version

# if tensorflow 2, import from tf directly
if _get_tensorflow_version() == 1:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import l2
from keras.losses import mean_squared_error
else:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.losses import mean_squared_error


# noinspection PyUnresolvedReferences,PyPep8Naming,PyTypeChecker
Expand Down Expand Up @@ -78,7 +87,7 @@ class AutoEncoder(BaseDetector):
- 1 = progress bar
- 2 = one line per epoch.
For verbosity >= 1, model summary may be printed.
For verbose >= 1, model summary may be printed.
random_state : random_state: int, RandomState instance or None, optional
(default=None)
Expand Down
27 changes: 27 additions & 0 deletions pyod/models/base_dl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
"""Base class for deep learning models
"""
# Author: Yue Zhao <[email protected]>
# License: BSD 2 clause

from __future__ import division
from __future__ import print_function

import tensorflow

def _get_tensorflow_version(): # pragma: no cover
""" Utility function to decide the version of tensorflow, which will
affect how to import keras models.
Returns
-------
tensorflow version : int
"""

tf_version = str(tensorflow.__version__)
if int(tf_version.split(".")[0]) != 1 and int(
tf_version.split(".")[0]) != 2:
raise ValueError("tensorflow version error")

return int(tf_version.split(".")[0])
3 changes: 2 additions & 1 deletion pyod/models/copod.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,10 @@ def fit(self, X, y=None):
Fitted estimator.
"""
X = check_array(X)
self._set_n_classes(y=None)
self._set_n_classes(y)
self.X_train = X
self.decision_function(X)
return self

def decision_function(self, X):
"""Predict raw anomaly score of X using the fitted detector.
Expand Down
14 changes: 11 additions & 3 deletions pyod/models/gaal_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,17 @@

import math

import keras
from keras.layers import Input, Dense
from keras.models import Sequential, Model
from .base_dl import _get_tensorflow_version

# if tensorflow 2, import from tf directly
if _get_tensorflow_version() == 1:
import keras
from keras.layers import Input, Dense
from keras.models import Sequential, Model
else:
import tensorflow.keras as keras
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Sequential, Model


# TODO: create a base class for so_gaal and mo_gaal
Expand Down
16 changes: 12 additions & 4 deletions pyod/models/lscp.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,10 +341,18 @@ def _get_local_region(self, X_test_norm):
# keep nearby points which occur at least local_region_threshold times
final_local_region_list = [[]] * X_test_norm.shape[0]
for j in range(X_test_norm.shape[0]):
final_local_region_list[j] = [item for item, count in
collections.Counter(
local_region_list[j]).items() if
count > self.local_region_threshold]
tmp = [item for item, count in collections.Counter(
local_region_list[j]).items() if
count > self.local_region_threshold]
decrease_value = 0
while len(tmp) < 2:
decrease_value = decrease_value + 1
assert decrease_value < self.local_region_threshold
tmp = [item for item, count in
collections.Counter(local_region_list[j]).items() if
count > (self.local_region_threshold - decrease_value)]

final_local_region_list[j] = tmp

return final_local_region_list

Expand Down
15 changes: 11 additions & 4 deletions pyod/models/mo_gaal.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,23 @@

import numpy as np

from keras.layers import Input
from keras.models import Model
from keras.optimizers import SGD

from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted

from .base import BaseDetector
from .gaal_base import create_discriminator
from .gaal_base import create_generator
from .base_dl import _get_tensorflow_version

# if tensorflow 2, import from tf directly
if _get_tensorflow_version() == 1:
from keras.layers import Input
from keras.models import Model
from keras.optimizers import SGD
else:
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD


class MO_GAAL(BaseDetector):
Expand Down
15 changes: 11 additions & 4 deletions pyod/models/so_gaal.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,23 @@

import numpy as np

from keras.layers import Input
from keras.models import Model
from keras.optimizers import SGD

from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted

from .base import BaseDetector
from .gaal_base import create_discriminator
from .gaal_base import create_generator
from .base_dl import _get_tensorflow_version

# if tensorflow 2, import from tf directly
if _get_tensorflow_version() == 1:
from keras.layers import Input
from keras.models import Model
from keras.optimizers import SGD
else:
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD


class SO_GAAL(BaseDetector):
Expand Down
38 changes: 23 additions & 15 deletions pyod/models/vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,6 @@

import numpy as np

from keras.models import Model
from keras.layers import Lambda, Input, Dense, Dropout
from keras.regularizers import l2
from keras.losses import mse, binary_crossentropy
from keras.utils import plot_model
from keras import backend as K

from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
Expand All @@ -37,6 +30,21 @@
from ..utils.stat_models import pairwise_distances_no_broadcast

from .base import BaseDetector
from .base_dl import _get_tensorflow_version

# if tensorflow 2, import from tf directly
if _get_tensorflow_version() == 1:
from keras.models import Model
from keras.layers import Lambda, Input, Dense, Dropout
from keras.regularizers import l2
from keras.losses import mse, binary_crossentropy
from keras import backend as K
else:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Lambda, Input, Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.losses import mse, binary_crossentropy
from tensorflow.keras import backend as K


class VAE(BaseDetector):
Expand Down Expand Up @@ -114,13 +122,13 @@ class VAE(BaseDetector):
If True, apply standardization on the data.
verbose : int, optional (default=1)
Verbosity mode.
verbose mode.
- 0 = silent
- 1 = progress bar
- 2 = one line per epoch.
For verbosity >= 1, model summary may be printed.
For verbose >= 1, model summary may be printed.
random_state : random_state: int, RandomState instance or None, opti
(default=None)
Expand Down Expand Up @@ -172,7 +180,7 @@ def __init__(self, encoder_neurons=None, decoder_neurons=None,
output_activation='sigmoid', loss=mse, optimizer='adam',
epochs=100, batch_size=32, dropout_rate=0.2,
l2_regularizer=0.1, validation_size=0.1, preprocessing=True,
verbosity=1, random_state=None, contamination=0.1,
verbose=1, random_state=None, contamination=0.1,
gamma=1.0, capacity=0.0):
super(VAE, self).__init__(contamination=contamination)
self.encoder_neurons = encoder_neurons
Expand All @@ -187,7 +195,7 @@ def __init__(self, encoder_neurons=None, decoder_neurons=None,
self.l2_regularizer = l2_regularizer
self.validation_size = validation_size
self.preprocessing = preprocessing
self.verbosity = verbosity
self.verbose = verbose
self.random_state = random_state
self.latent_dim = latent_dim
self.gamma = gamma
Expand Down Expand Up @@ -264,7 +272,7 @@ def _build_model(self):
[z_mean, z_log])
# Instantiate encoder
encoder = Model(inputs, [z_mean, z_log, z])
if self.verbosity >= 1:
if self.verbose >= 1:
encoder.summary()

# Build Decoder
Expand All @@ -281,7 +289,7 @@ def _build_model(self):
layer)
# Instatiate decoder
decoder = Model(latent_inputs, outputs)
if self.verbosity >= 1:
if self.verbose >= 1:
decoder.summary()
# Generate outputs
outputs = decoder(encoder(inputs)[2])
Expand All @@ -290,7 +298,7 @@ def _build_model(self):
vae = Model(inputs, outputs)
vae.add_loss(self.vae_loss(inputs, outputs, z_mean, z_log))
vae.compile(optimizer=self.optimizer)
if self.verbosity >= 1:
if self.verbose >= 1:
vae.summary()
return vae

Expand Down Expand Up @@ -335,7 +343,7 @@ def fit(self, X, y=None):
batch_size=self.batch_size,
shuffle=True,
validation_split=self.validation_size,
verbose=self.verbosity).history
verbose=self.verbose).history
# Predict on X itself and calculate the reconstruction error as
# the outlier scores. Noted X_norm was shuffled has to recreate
if self.preprocessing:
Expand Down
Loading

0 comments on commit 5c9386f

Please sign in to comment.