Skip to content

Commit

Permalink
restructuring
Browse files Browse the repository at this point in the history
  • Loading branch information
zeshanmh committed May 5, 2022
1 parent 797372e commit 33181a3
Show file tree
Hide file tree
Showing 3 changed files with 397 additions and 0 deletions.
187 changes: 187 additions & 0 deletions TCP/conformal/baselines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_pinball_loss, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from numpy.random import default_rng
from sklearn.neighbors import KernelDensity
from sklearn.neighbors import KNeighborsRegressor, KDTree
from sklearn.kernel_ridge import KernelRidge

# cqr imports
from cqr.cqr import helper
from cqr.nonconformist.nc import RegressorNc
from cqr.nonconformist.cp import IcpRegressor
from cqr.nonconformist.nc import QuantileRegErrFunc

# chr imports
from chr.chr.black_boxes import QNet, QRF
from chr.chr.black_boxes_r import QBART
from chr.chr.methods import CHR

# locally adaptive conformal prediction imports
from cqr.nonconformist.nc import AbsErrorErrFunc
from cqr.nonconformist.nc import RegressorNormalizer

class ConformalBase:
'''
Implementation inspired from:
https://github.com/yromano/cqr/blob/master/cqr_synthetic_data_example_1.ipynb
'''
def __init__(self, alpha=0.1):
self.alpha = alpha

def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10):
'''
* split data into train and calibrate
* y_calibrate contains residuals
step 1: fit model on training data + training residuals
step 2: call calibrate
'''
raise NotImplementedError()

def predict(self, x_test):
'''
We return both predictions and interval for each prediction
'''
raise NotImplementedError()

class QR(ConformalBase):

def __init__(self, alpha=0.1):
super().__init__(alpha)

def fit(self, x_calibrate, y_calibrate):
self.all_models = {}
common_params = dict(
learning_rate=0.05,
n_estimators=200,
max_depth=2,
min_samples_leaf=9,
min_samples_split=9,
)
for alpha_ in [self.alpha/2, 1-(self.alpha/2)]:
gbr = GradientBoostingRegressor(loss="quantile", alpha=alpha_, **common_params)
self.all_models["q %1.2f" % alpha_] = \
gbr.fit(x_calibrate.reshape((-1, 1)), np.array(y_calibrate).reshape((-1, 1)))

def predict(self, x_test):
Quant_lo = self.all_models['q 0.05'].predict(x_test.reshape((-1, 1)))
Quant_up = self.all_models['q 0.95'].predict(x_test.reshape((-1, 1)))
return [Quant_lo, Quant_up] # check if this is the same format as the other return statements

class CQR(ConformalBase):

def __init__(self, alpha=0.1):
super().__init__(alpha)
n_estimators = 100
min_samples_leaf = 40
max_features = 1
random_state = 0
quantiles = [alpha*10/2, 100-(alpha*10/2)]

# define dictionary for quantile estimator
params_qforest = dict()
params_qforest['n_estimators'] = n_estimators
params_qforest['min_samples_leaf'] = min_samples_leaf
params_qforest['max_features'] = max_features
params_qforest['CV'] = True
params_qforest['coverage_factor'] = 0.9
params_qforest['test_ratio'] = 0.1
params_qforest['random_state'] = random_state
params_qforest['range_vals'] = 10
params_qforest['num_vals'] = 4

quantile_estimator = helper.QuantileForestRegressorAdapter(model=None,
fit_params=None,
quantiles=quantiles,
params=params_qforest)
nc = RegressorNc(quantile_estimator, QuantileRegErrFunc())
self.icp = IcpRegressor(nc)

def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10):
'''
* split data into train and calibrate
* y_calibrate contains residuals
step 1: fit model on training data + training residuals
step 2: call calibrate
'''
x_train, x_calib, y_train, y_calib = \
train_test_split(x_calibrate, y_calibrate, test_size=1-frac, random_state=random_state)
self.icp.fit(x_train, y_train)
self.icp.calibrate(x_calib, y_calib)

def predict(self, x_test):
'''
We return both predictions and interval for each prediction
'''
return self.icp.predict(x_test, significance=self.alpha)


class CondHist(ConformalBase):

def __init__(self, alpha=0.1):
super().__init__(alpha)
grid_quantiles = np.arange(0.01,1.0,0.01)
self.bbox = QNet(grid_quantiles, 1, no_crossing=True, batch_size=1000, dropout=0.1,
num_epochs=10000, learning_rate=0.0005, num_hidden=256, calibrate=0)

def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10):
'''
* split data into train and calibrate
* y_calibrate contains residuals
step 1: fit model on training data + training residuals
step 2: call calibrate
'''
x_train, x_calib, y_train, y_calib = \
train_test_split(x_calibrate, y_calibrate, test_size=1-frac, random_state=random_state)
self.bbox.fit(x_train, y_train)
# Initialize and calibrate the new method
self.chr = CHR(self.bbox, ymin=-3, ymax=20, y_steps=200, delta_alpha=0.001, randomize=True)
self.chr.calibrate(x_calib, y_calib, self.alpha)

def predict(self, x_test):
return self.chr.predict(x_test)

class LACP(ConformalBase):

def __init__(self, alpha=0.1):
super().__init__(alpha)
n_estimators = 100
min_samples_leaf = 40
max_features = 1
random_state = 0
# define the conditonal mean estimator as random forests (used to predict the labels)
mean_estimator = RandomForestRegressor(n_estimators=n_estimators,
min_samples_leaf=min_samples_leaf,
max_features=max_features,
random_state=random_state)

# define the MAD estimator as random forests (used to scale the absolute residuals)
mad_estimator = RandomForestRegressor(n_estimators=n_estimators,
min_samples_leaf=min_samples_leaf,
max_features=max_features,
random_state=random_state)

# define a conformal normalizer object that uses the two regression functions.
# The nonconformity score is absolute residual error
normalizer = RegressorNormalizer(mean_estimator,
mad_estimator,
AbsErrorErrFunc())

# define the final local conformal object
nc = RegressorNc(mean_estimator, AbsErrorErrFunc(), normalizer)

# build the split local conformal object
self.icp = IcpRegressor(nc)

def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10):
x_train, x_calib, y_train, y_calib = \
train_test_split(x_calibrate, y_calibrate, test_size=1-frac, random_state=random_state)
self.icp.fit(x_train, y_train)
self.icp.calibrate(x_calib, y_calib)

def predict(self, x_test):
return self.icp.predict(x_test, significance=self.alpha)

77 changes: 77 additions & 0 deletions models/DNN_uncertainty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@

# Copyright (c) 2020, Ahmed M. Alaa
# Licensed under the BSD 3-clause license (see LICENSE.txt)

# ---------------------------------------------------------
# Base classes for feedforward, convolutional and recurrent
# neural network (DNN, CNN, RNN) models in pytorch
# ---------------------------------------------------------

from __future__ import absolute_import, division, print_function

import numpy as np
import pandas as pd
import sys

if not sys.warnoptions:
import warnings
warnings.simplefilter("ignore")

import torch
from torch.autograd import Variable
import torch.nn.functional as nnf
from torch.utils.data import random_split
from torch.optim import SGD
from torch.distributions import constraints
import torchvision as torchv
import torchvision.transforms as torchvt
from torch import nn
from torch.autograd import grad
import torch.nn.functional as F
import scipy.stats as st

from sklearn.preprocessing import StandardScaler
from copy import deepcopy
import time

from models.base_models import DNN

torch.manual_seed(1)


class MCDP_DNN(DNN):

def __init__(self,
dropout_prob=0.5,
dropout_active=True,
n_dim=1,
num_layers=2,
num_hidden=200,
output_size=1,
activation="ReLU",
mode="Regression"):

super(MCDP_DNN, self).__init__()

self.dropout_prob = dropout_prob
self.dropout = nn.Dropout(p=dropout_prob)
self.dropout_active = True


def forward(self, X):

_out= self.dropout(self.model(X))

return _out


def predict(self, X, alpha=0.1, MC_samples=100):

z_c = st.norm.ppf(1-alpha/2)
X = torch.tensor(X.reshape((-1, self.n_dim))).float()
samples_ = [self.forward(X).detach().numpy() for u in range(MC_samples)]
pred_sample = np.concatenate(samples_, axis=1)
pred_mean = np.mean(pred_sample, axis=1)
pred_std = z_c * np.std(pred_sample, axis=1)

return pred_mean, pred_std
133 changes: 133 additions & 0 deletions models/base_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@

# Copyright (c) 2020, Ahmed M. Alaa
# Licensed under the BSD 3-clause license (see LICENSE.txt)

# ---------------------------------------------------------
# Base classes for feedforward, convolutional and recurrent
# neural network (DNN, CNN, RNN) models in pytorch
# ---------------------------------------------------------

# -------------------------------------
# | TO DO: |
# | ------ |
# | Loss functions file |
# | ADD EPOCHS |
# | argument explanation for the DNN |
# | Exception handling |
# | Multiple architectures in RNN |
# | cmd arguments |
# | logger, misc and config files |
# -------------------------------------

from __future__ import absolute_import, division, print_function

import numpy as np
import pandas as pd
import sys

if not sys.warnoptions:
import warnings
warnings.simplefilter("ignore")

import torch
from torch.autograd import Variable
import torch.nn.functional as nnf
from torch.utils.data import random_split
from torch.optim import SGD
from torch.distributions import constraints
import torchvision as torchv
import torchvision.transforms as torchvt
from torch import nn
import torchvision.transforms as transforms
from torch.autograd import grad
import scipy.stats as st

from sklearn.preprocessing import StandardScaler
from copy import deepcopy
import time

from utils.parameters import *

torch.manual_seed(1)


class DNN(nn.Module):

def __init__(self,
n_dim=1,
dropout_prob=0.0,
dropout_active=False,
num_layers=2,
num_hidden=200,
output_size=1,
activation="Tanh",
mode="Regression"
):

super(DNN, self).__init__()

self.n_dim = n_dim
self.num_layers = num_layers
self.num_hidden = num_hidden
self.mode = mode
self.activation = activation
self.device = torch.device('cpu') # Make this an option
self.output_size = output_size
self.dropout_prob = dropout_prob
self.dropout_active = dropout_active
self.model = build_architecture(self)


def fit(self, X, y, learning_rate=1e-3, loss_type="MSE", batch_size=100, num_iter=500, verbosity=False):

self.X = torch.tensor(X.reshape((-1, self.n_dim))).float()
self.y = torch.tensor(y).float()

loss_dict = {"MSE": torch.nn.MSELoss}

self.loss_fn = loss_dict[loss_type](reduction='mean')
self.loss_trace = []

batch_size = np.min((batch_size, X.shape[0]))

optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)

for _ in range(num_iter):

batch_idx = np.random.choice(list(range(X.shape[0])), batch_size )

y_pred = self.model(self.X[batch_idx, :])

self.loss = self.loss_fn(y_pred.reshape((batch_size, self.n_dim)), self.y[batch_idx].reshape((batch_size, self.n_dim)))

self.loss_trace.append(self.loss.detach().numpy())

if verbosity:

print("--- Iteration: %d \t--- Loss: %.3f" % (_, self.loss.item()))

self.model.zero_grad()

optimizer.zero_grad() # clear gradients for this training step
self.loss.backward() # backpropagation, compute gradients
optimizer.step()



def predict(self, X, numpy_output=True):

X = torch.tensor(X.reshape((-1, self.n_dim))).float()

if numpy_output:

prediction = self.model(X).detach().numpy()

else:

prediction = self.model(X)


return prediction



0 comments on commit 33181a3

Please sign in to comment.