-
Notifications
You must be signed in to change notification settings - Fork 78
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
397 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
import numpy as np | ||
from sklearn.ensemble import GradientBoostingRegressor | ||
from sklearn.ensemble import RandomForestRegressor | ||
from sklearn.metrics import mean_pinball_loss, mean_squared_error | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.neural_network import MLPRegressor | ||
from numpy.random import default_rng | ||
from sklearn.neighbors import KernelDensity | ||
from sklearn.neighbors import KNeighborsRegressor, KDTree | ||
from sklearn.kernel_ridge import KernelRidge | ||
|
||
# cqr imports | ||
from cqr.cqr import helper | ||
from cqr.nonconformist.nc import RegressorNc | ||
from cqr.nonconformist.cp import IcpRegressor | ||
from cqr.nonconformist.nc import QuantileRegErrFunc | ||
|
||
# chr imports | ||
from chr.chr.black_boxes import QNet, QRF | ||
from chr.chr.black_boxes_r import QBART | ||
from chr.chr.methods import CHR | ||
|
||
# locally adaptive conformal prediction imports | ||
from cqr.nonconformist.nc import AbsErrorErrFunc | ||
from cqr.nonconformist.nc import RegressorNormalizer | ||
|
||
class ConformalBase: | ||
''' | ||
Implementation inspired from: | ||
https://github.com/yromano/cqr/blob/master/cqr_synthetic_data_example_1.ipynb | ||
''' | ||
def __init__(self, alpha=0.1): | ||
self.alpha = alpha | ||
|
||
def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10): | ||
''' | ||
* split data into train and calibrate | ||
* y_calibrate contains residuals | ||
step 1: fit model on training data + training residuals | ||
step 2: call calibrate | ||
''' | ||
raise NotImplementedError() | ||
|
||
def predict(self, x_test): | ||
''' | ||
We return both predictions and interval for each prediction | ||
''' | ||
raise NotImplementedError() | ||
|
||
class QR(ConformalBase): | ||
|
||
def __init__(self, alpha=0.1): | ||
super().__init__(alpha) | ||
|
||
def fit(self, x_calibrate, y_calibrate): | ||
self.all_models = {} | ||
common_params = dict( | ||
learning_rate=0.05, | ||
n_estimators=200, | ||
max_depth=2, | ||
min_samples_leaf=9, | ||
min_samples_split=9, | ||
) | ||
for alpha_ in [self.alpha/2, 1-(self.alpha/2)]: | ||
gbr = GradientBoostingRegressor(loss="quantile", alpha=alpha_, **common_params) | ||
self.all_models["q %1.2f" % alpha_] = \ | ||
gbr.fit(x_calibrate.reshape((-1, 1)), np.array(y_calibrate).reshape((-1, 1))) | ||
|
||
def predict(self, x_test): | ||
Quant_lo = self.all_models['q 0.05'].predict(x_test.reshape((-1, 1))) | ||
Quant_up = self.all_models['q 0.95'].predict(x_test.reshape((-1, 1))) | ||
return [Quant_lo, Quant_up] # check if this is the same format as the other return statements | ||
|
||
class CQR(ConformalBase): | ||
|
||
def __init__(self, alpha=0.1): | ||
super().__init__(alpha) | ||
n_estimators = 100 | ||
min_samples_leaf = 40 | ||
max_features = 1 | ||
random_state = 0 | ||
quantiles = [alpha*10/2, 100-(alpha*10/2)] | ||
|
||
# define dictionary for quantile estimator | ||
params_qforest = dict() | ||
params_qforest['n_estimators'] = n_estimators | ||
params_qforest['min_samples_leaf'] = min_samples_leaf | ||
params_qforest['max_features'] = max_features | ||
params_qforest['CV'] = True | ||
params_qforest['coverage_factor'] = 0.9 | ||
params_qforest['test_ratio'] = 0.1 | ||
params_qforest['random_state'] = random_state | ||
params_qforest['range_vals'] = 10 | ||
params_qforest['num_vals'] = 4 | ||
|
||
quantile_estimator = helper.QuantileForestRegressorAdapter(model=None, | ||
fit_params=None, | ||
quantiles=quantiles, | ||
params=params_qforest) | ||
nc = RegressorNc(quantile_estimator, QuantileRegErrFunc()) | ||
self.icp = IcpRegressor(nc) | ||
|
||
def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10): | ||
''' | ||
* split data into train and calibrate | ||
* y_calibrate contains residuals | ||
step 1: fit model on training data + training residuals | ||
step 2: call calibrate | ||
''' | ||
x_train, x_calib, y_train, y_calib = \ | ||
train_test_split(x_calibrate, y_calibrate, test_size=1-frac, random_state=random_state) | ||
self.icp.fit(x_train, y_train) | ||
self.icp.calibrate(x_calib, y_calib) | ||
|
||
def predict(self, x_test): | ||
''' | ||
We return both predictions and interval for each prediction | ||
''' | ||
return self.icp.predict(x_test, significance=self.alpha) | ||
|
||
|
||
class CondHist(ConformalBase): | ||
|
||
def __init__(self, alpha=0.1): | ||
super().__init__(alpha) | ||
grid_quantiles = np.arange(0.01,1.0,0.01) | ||
self.bbox = QNet(grid_quantiles, 1, no_crossing=True, batch_size=1000, dropout=0.1, | ||
num_epochs=10000, learning_rate=0.0005, num_hidden=256, calibrate=0) | ||
|
||
def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10): | ||
''' | ||
* split data into train and calibrate | ||
* y_calibrate contains residuals | ||
step 1: fit model on training data + training residuals | ||
step 2: call calibrate | ||
''' | ||
x_train, x_calib, y_train, y_calib = \ | ||
train_test_split(x_calibrate, y_calibrate, test_size=1-frac, random_state=random_state) | ||
self.bbox.fit(x_train, y_train) | ||
# Initialize and calibrate the new method | ||
self.chr = CHR(self.bbox, ymin=-3, ymax=20, y_steps=200, delta_alpha=0.001, randomize=True) | ||
self.chr.calibrate(x_calib, y_calib, self.alpha) | ||
|
||
def predict(self, x_test): | ||
return self.chr.predict(x_test) | ||
|
||
class LACP(ConformalBase): | ||
|
||
def __init__(self, alpha=0.1): | ||
super().__init__(alpha) | ||
n_estimators = 100 | ||
min_samples_leaf = 40 | ||
max_features = 1 | ||
random_state = 0 | ||
# define the conditonal mean estimator as random forests (used to predict the labels) | ||
mean_estimator = RandomForestRegressor(n_estimators=n_estimators, | ||
min_samples_leaf=min_samples_leaf, | ||
max_features=max_features, | ||
random_state=random_state) | ||
|
||
# define the MAD estimator as random forests (used to scale the absolute residuals) | ||
mad_estimator = RandomForestRegressor(n_estimators=n_estimators, | ||
min_samples_leaf=min_samples_leaf, | ||
max_features=max_features, | ||
random_state=random_state) | ||
|
||
# define a conformal normalizer object that uses the two regression functions. | ||
# The nonconformity score is absolute residual error | ||
normalizer = RegressorNormalizer(mean_estimator, | ||
mad_estimator, | ||
AbsErrorErrFunc()) | ||
|
||
# define the final local conformal object | ||
nc = RegressorNc(mean_estimator, AbsErrorErrFunc(), normalizer) | ||
|
||
# build the split local conformal object | ||
self.icp = IcpRegressor(nc) | ||
|
||
def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10): | ||
x_train, x_calib, y_train, y_calib = \ | ||
train_test_split(x_calibrate, y_calibrate, test_size=1-frac, random_state=random_state) | ||
self.icp.fit(x_train, y_train) | ||
self.icp.calibrate(x_calib, y_calib) | ||
|
||
def predict(self, x_test): | ||
return self.icp.predict(x_test, significance=self.alpha) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
|
||
# Copyright (c) 2020, Ahmed M. Alaa | ||
# Licensed under the BSD 3-clause license (see LICENSE.txt) | ||
|
||
# --------------------------------------------------------- | ||
# Base classes for feedforward, convolutional and recurrent | ||
# neural network (DNN, CNN, RNN) models in pytorch | ||
# --------------------------------------------------------- | ||
|
||
from __future__ import absolute_import, division, print_function | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import sys | ||
|
||
if not sys.warnoptions: | ||
import warnings | ||
warnings.simplefilter("ignore") | ||
|
||
import torch | ||
from torch.autograd import Variable | ||
import torch.nn.functional as nnf | ||
from torch.utils.data import random_split | ||
from torch.optim import SGD | ||
from torch.distributions import constraints | ||
import torchvision as torchv | ||
import torchvision.transforms as torchvt | ||
from torch import nn | ||
from torch.autograd import grad | ||
import torch.nn.functional as F | ||
import scipy.stats as st | ||
|
||
from sklearn.preprocessing import StandardScaler | ||
from copy import deepcopy | ||
import time | ||
|
||
from models.base_models import DNN | ||
|
||
torch.manual_seed(1) | ||
|
||
|
||
class MCDP_DNN(DNN): | ||
|
||
def __init__(self, | ||
dropout_prob=0.5, | ||
dropout_active=True, | ||
n_dim=1, | ||
num_layers=2, | ||
num_hidden=200, | ||
output_size=1, | ||
activation="ReLU", | ||
mode="Regression"): | ||
|
||
super(MCDP_DNN, self).__init__() | ||
|
||
self.dropout_prob = dropout_prob | ||
self.dropout = nn.Dropout(p=dropout_prob) | ||
self.dropout_active = True | ||
|
||
|
||
def forward(self, X): | ||
|
||
_out= self.dropout(self.model(X)) | ||
|
||
return _out | ||
|
||
|
||
def predict(self, X, alpha=0.1, MC_samples=100): | ||
|
||
z_c = st.norm.ppf(1-alpha/2) | ||
X = torch.tensor(X.reshape((-1, self.n_dim))).float() | ||
samples_ = [self.forward(X).detach().numpy() for u in range(MC_samples)] | ||
pred_sample = np.concatenate(samples_, axis=1) | ||
pred_mean = np.mean(pred_sample, axis=1) | ||
pred_std = z_c * np.std(pred_sample, axis=1) | ||
|
||
return pred_mean, pred_std |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
|
||
# Copyright (c) 2020, Ahmed M. Alaa | ||
# Licensed under the BSD 3-clause license (see LICENSE.txt) | ||
|
||
# --------------------------------------------------------- | ||
# Base classes for feedforward, convolutional and recurrent | ||
# neural network (DNN, CNN, RNN) models in pytorch | ||
# --------------------------------------------------------- | ||
|
||
# ------------------------------------- | ||
# | TO DO: | | ||
# | ------ | | ||
# | Loss functions file | | ||
# | ADD EPOCHS | | ||
# | argument explanation for the DNN | | ||
# | Exception handling | | ||
# | Multiple architectures in RNN | | ||
# | cmd arguments | | ||
# | logger, misc and config files | | ||
# ------------------------------------- | ||
|
||
from __future__ import absolute_import, division, print_function | ||
|
||
import numpy as np | ||
import pandas as pd | ||
import sys | ||
|
||
if not sys.warnoptions: | ||
import warnings | ||
warnings.simplefilter("ignore") | ||
|
||
import torch | ||
from torch.autograd import Variable | ||
import torch.nn.functional as nnf | ||
from torch.utils.data import random_split | ||
from torch.optim import SGD | ||
from torch.distributions import constraints | ||
import torchvision as torchv | ||
import torchvision.transforms as torchvt | ||
from torch import nn | ||
import torchvision.transforms as transforms | ||
from torch.autograd import grad | ||
import scipy.stats as st | ||
|
||
from sklearn.preprocessing import StandardScaler | ||
from copy import deepcopy | ||
import time | ||
|
||
from utils.parameters import * | ||
|
||
torch.manual_seed(1) | ||
|
||
|
||
class DNN(nn.Module): | ||
|
||
def __init__(self, | ||
n_dim=1, | ||
dropout_prob=0.0, | ||
dropout_active=False, | ||
num_layers=2, | ||
num_hidden=200, | ||
output_size=1, | ||
activation="Tanh", | ||
mode="Regression" | ||
): | ||
|
||
super(DNN, self).__init__() | ||
|
||
self.n_dim = n_dim | ||
self.num_layers = num_layers | ||
self.num_hidden = num_hidden | ||
self.mode = mode | ||
self.activation = activation | ||
self.device = torch.device('cpu') # Make this an option | ||
self.output_size = output_size | ||
self.dropout_prob = dropout_prob | ||
self.dropout_active = dropout_active | ||
self.model = build_architecture(self) | ||
|
||
|
||
def fit(self, X, y, learning_rate=1e-3, loss_type="MSE", batch_size=100, num_iter=500, verbosity=False): | ||
|
||
self.X = torch.tensor(X.reshape((-1, self.n_dim))).float() | ||
self.y = torch.tensor(y).float() | ||
|
||
loss_dict = {"MSE": torch.nn.MSELoss} | ||
|
||
self.loss_fn = loss_dict[loss_type](reduction='mean') | ||
self.loss_trace = [] | ||
|
||
batch_size = np.min((batch_size, X.shape[0])) | ||
|
||
optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate) | ||
|
||
for _ in range(num_iter): | ||
|
||
batch_idx = np.random.choice(list(range(X.shape[0])), batch_size ) | ||
|
||
y_pred = self.model(self.X[batch_idx, :]) | ||
|
||
self.loss = self.loss_fn(y_pred.reshape((batch_size, self.n_dim)), self.y[batch_idx].reshape((batch_size, self.n_dim))) | ||
|
||
self.loss_trace.append(self.loss.detach().numpy()) | ||
|
||
if verbosity: | ||
|
||
print("--- Iteration: %d \t--- Loss: %.3f" % (_, self.loss.item())) | ||
|
||
self.model.zero_grad() | ||
|
||
optimizer.zero_grad() # clear gradients for this training step | ||
self.loss.backward() # backpropagation, compute gradients | ||
optimizer.step() | ||
|
||
|
||
|
||
def predict(self, X, numpy_output=True): | ||
|
||
X = torch.tensor(X.reshape((-1, self.n_dim))).float() | ||
|
||
if numpy_output: | ||
|
||
prediction = self.model(X).detach().numpy() | ||
|
||
else: | ||
|
||
prediction = self.model(X) | ||
|
||
|
||
return prediction | ||
|
||
|
||
|