restructuring

AlaaLab · May 5, 2022 · 33181a3 · 33181a3
1 parent 797372e
commit 33181a3
Show file tree

Hide file tree

Showing 3 changed files with 397 additions and 0 deletions.
diff --git a/TCP/conformal/baselines.py b/TCP/conformal/baselines.py
@@ -0,0 +1,187 @@
+import numpy as np 
+from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.metrics import mean_pinball_loss, mean_squared_error
+from sklearn.model_selection import train_test_split
+from sklearn.neural_network import MLPRegressor
+from numpy.random import default_rng
+from sklearn.neighbors import KernelDensity
+from sklearn.neighbors import KNeighborsRegressor, KDTree 
+from sklearn.kernel_ridge import KernelRidge
+
+# cqr imports
+from cqr.cqr import helper
+from cqr.nonconformist.nc import RegressorNc
+from cqr.nonconformist.cp import IcpRegressor
+from cqr.nonconformist.nc import QuantileRegErrFunc
+
+# chr imports 
+from chr.chr.black_boxes import QNet, QRF
+from chr.chr.black_boxes_r import QBART
+from chr.chr.methods import CHR
+
+# locally adaptive conformal prediction imports
+from cqr.nonconformist.nc import AbsErrorErrFunc
+from cqr.nonconformist.nc import RegressorNormalizer
+
+class ConformalBase: 
+    '''
+        Implementation inspired from: 
+            https://github.com/yromano/cqr/blob/master/cqr_synthetic_data_example_1.ipynb
+    '''
+    def __init__(self, alpha=0.1):
+        self.alpha = alpha 
+
+    def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10): 
+        '''
+            * split data into train and calibrate
+            * y_calibrate contains residuals 
+            step 1: fit model on training data + training residuals 
+            step 2: call calibrate  
+        '''
+        raise NotImplementedError()
+
+    def predict(self, x_test): 
+        '''
+            We return both predictions and interval for each prediction
+        '''
+        raise NotImplementedError()
+
+class QR(ConformalBase): 
+
+    def __init__(self, alpha=0.1): 
+        super().__init__(alpha)
+
+    def fit(self, x_calibrate, y_calibrate): 
+        self.all_models    = {}
+        common_params = dict(
+            learning_rate=0.05,
+            n_estimators=200,
+            max_depth=2,
+            min_samples_leaf=9,
+            min_samples_split=9,
+        )
+        for alpha_ in [self.alpha/2, 1-(self.alpha/2)]:
+            gbr = GradientBoostingRegressor(loss="quantile", alpha=alpha_, **common_params)
+            self.all_models["q %1.2f" % alpha_] = \
+                gbr.fit(x_calibrate.reshape((-1, 1)), np.array(y_calibrate).reshape((-1, 1)))
+
+    def predict(self, x_test): 
+        Quant_lo = self.all_models['q 0.05'].predict(x_test.reshape((-1, 1)))
+        Quant_up = self.all_models['q 0.95'].predict(x_test.reshape((-1, 1)))
+        return [Quant_lo, Quant_up] # check if this is the same format as the other return statements
+
+class CQR(ConformalBase): 
+
+    def __init__(self, alpha=0.1): 
+        super().__init__(alpha)        
+        n_estimators = 100 
+        min_samples_leaf = 40 
+        max_features = 1 
+        random_state = 0
+        quantiles = [alpha*10/2, 100-(alpha*10/2)]         
+
+        # define dictionary for quantile estimator
+        params_qforest = dict()
+        params_qforest['n_estimators'] = n_estimators
+        params_qforest['min_samples_leaf'] = min_samples_leaf
+        params_qforest['max_features'] = max_features
+        params_qforest['CV'] = True
+        params_qforest['coverage_factor'] = 0.9
+        params_qforest['test_ratio'] = 0.1
+        params_qforest['random_state'] = random_state
+        params_qforest['range_vals'] = 10
+        params_qforest['num_vals'] = 4
+
+        quantile_estimator = helper.QuantileForestRegressorAdapter(model=None,
+                                                           fit_params=None,
+                                                           quantiles=quantiles,
+                                                           params=params_qforest)
+        nc  = RegressorNc(quantile_estimator, QuantileRegErrFunc())
+        self.icp = IcpRegressor(nc)
+
+    def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10):
+        '''
+            * split data into train and calibrate
+            * y_calibrate contains residuals 
+            step 1: fit model on training data + training residuals 
+            step 2: call calibrate  
+        '''
+        x_train, x_calib, y_train, y_calib = \
+            train_test_split(x_calibrate, y_calibrate, test_size=1-frac, random_state=random_state)
+        self.icp.fit(x_train, y_train)
+        self.icp.calibrate(x_calib, y_calib)
+
+    def predict(self, x_test): 
+        '''
+            We return both predictions and interval for each prediction
+        '''
+        return self.icp.predict(x_test, significance=self.alpha)
+
+
+class CondHist(ConformalBase): 
+
+    def __init__(self, alpha=0.1): 
+        super().__init__(alpha)        
+        grid_quantiles = np.arange(0.01,1.0,0.01)
+        self.bbox = QNet(grid_quantiles, 1, no_crossing=True, batch_size=1000, dropout=0.1,
+            num_epochs=10000, learning_rate=0.0005, num_hidden=256, calibrate=0)
+
+    def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10): 
+        '''
+            * split data into train and calibrate
+            * y_calibrate contains residuals 
+            step 1: fit model on training data + training residuals 
+            step 2: call calibrate  
+        '''
+        x_train, x_calib, y_train, y_calib = \
+            train_test_split(x_calibrate, y_calibrate, test_size=1-frac, random_state=random_state) 
+        self.bbox.fit(x_train, y_train)
+        # Initialize and calibrate the new method
+        self.chr = CHR(self.bbox, ymin=-3, ymax=20, y_steps=200, delta_alpha=0.001, randomize=True)
+        self.chr.calibrate(x_calib, y_calib, self.alpha)
+
+    def predict(self, x_test): 
+        return self.chr.predict(x_test)
+
+class LACP(ConformalBase): 
+
+    def __init__(self, alpha=0.1): 
+        super().__init__(alpha)
+        n_estimators = 100 
+        min_samples_leaf = 40 
+        max_features = 1 
+        random_state = 0
+        # define the conditonal mean estimator as random forests (used to predict the labels)
+        mean_estimator = RandomForestRegressor(n_estimators=n_estimators,
+                                            min_samples_leaf=min_samples_leaf,
+                                            max_features=max_features,
+                                            random_state=random_state)
+
+        # define the MAD estimator as random forests (used to scale the absolute residuals)
+        mad_estimator = RandomForestRegressor(n_estimators=n_estimators,
+                                            min_samples_leaf=min_samples_leaf,
+                                            max_features=max_features,
+                                            random_state=random_state)
+
+        # define a conformal normalizer object that uses the two regression functions.
+        # The nonconformity score is absolute residual error
+        normalizer = RegressorNormalizer(mean_estimator,
+                                        mad_estimator,
+                                        AbsErrorErrFunc())
+
+        # define the final local conformal object 
+        nc = RegressorNc(mean_estimator, AbsErrorErrFunc(), normalizer)
+
+        # build the split local conformal object
+        self.icp = IcpRegressor(nc)
+
+    def fit(self, x_calibrate, y_calibrate, frac=0.7, random_state=10):
+        x_train, x_calib, y_train, y_calib = \
+            train_test_split(x_calibrate, y_calibrate, test_size=1-frac, random_state=random_state)
+        self.icp.fit(x_train, y_train)
+        self.icp.calibrate(x_calib, y_calib)
+
+    def predict(self, x_test): 
+        return self.icp.predict(x_test, significance=self.alpha)
+
diff --git a/models/DNN_uncertainty.py b/models/DNN_uncertainty.py
@@ -0,0 +1,77 @@
+
+# Copyright (c) 2020, Ahmed M. Alaa
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# ---------------------------------------------------------
+# Base classes for feedforward, convolutional and recurrent 
+# neural network (DNN, CNN, RNN) models in pytorch
+# ---------------------------------------------------------
+
+from __future__ import absolute_import, division, print_function
+
+import numpy as np
+import pandas as pd
+import sys
+
+if not sys.warnoptions:
+    import warnings
+    warnings.simplefilter("ignore")
+
+import torch
+from torch.autograd import Variable 
+import torch.nn.functional as nnf
+from torch.utils.data import random_split
+from torch.optim import SGD 
+from torch.distributions import constraints
+import torchvision as torchv
+import torchvision.transforms as torchvt
+from torch import nn
+from torch.autograd import grad
+import torch.nn.functional as F
+import scipy.stats as st
+
+from sklearn.preprocessing import StandardScaler
+from copy import deepcopy
+import time
+
+from models.base_models import DNN
+
+torch.manual_seed(1) 
+
+
+class MCDP_DNN(DNN):
+
+    def __init__(self, 
+                 dropout_prob=0.5,
+                 dropout_active=True,                  
+                 n_dim=1, 
+                 num_layers=2, 
+                 num_hidden=200,
+                 output_size=1,
+                 activation="ReLU", 
+                 mode="Regression"):
+
+        super(MCDP_DNN, self).__init__()
+
+        self.dropout_prob   = dropout_prob 
+        self.dropout        = nn.Dropout(p=dropout_prob)
+        self.dropout_active = True
+
+
+    def forward(self, X):
+
+        _out= self.dropout(self.model(X))  
+
+        return _out
+
+
+    def predict(self, X, alpha=0.1, MC_samples=100):
+
+        z_c         = st.norm.ppf(1-alpha/2)
+        X           = torch.tensor(X.reshape((-1, self.n_dim))).float()
+        samples_    = [self.forward(X).detach().numpy() for u in range(MC_samples)]
+        pred_sample = np.concatenate(samples_, axis=1)
+        pred_mean   = np.mean(pred_sample, axis=1)  
+        pred_std    = z_c * np.std(pred_sample, axis=1)         
+
+        return pred_mean, pred_std     
diff --git a/models/base_models.py b/models/base_models.py
@@ -0,0 +1,133 @@
+
+# Copyright (c) 2020, Ahmed M. Alaa
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# ---------------------------------------------------------
+# Base classes for feedforward, convolutional and recurrent 
+# neural network (DNN, CNN, RNN) models in pytorch
+# ---------------------------------------------------------
+
+# -------------------------------------
+# |  TO DO:                           | 
+# |  ------                           | 
+# |  Loss functions file              |
+# |  ADD EPOCHS                       |
+# |  argument explanation for the DNN |
+# |  Exception handling               |
+# |  Multiple architectures in RNN    | 
+# |  cmd arguments                    |
+# |  logger, misc and config files    |
+# -------------------------------------
+
+from __future__ import absolute_import, division, print_function
+
+import numpy as np
+import pandas as pd
+import sys
+
+if not sys.warnoptions:
+    import warnings
+    warnings.simplefilter("ignore")
+
+import torch
+from torch.autograd import Variable 
+import torch.nn.functional as nnf
+from torch.utils.data import random_split
+from torch.optim import SGD 
+from torch.distributions import constraints
+import torchvision as torchv
+import torchvision.transforms as torchvt
+from torch import nn
+import torchvision.transforms as transforms
+from torch.autograd import grad
+import scipy.stats as st
+
+from sklearn.preprocessing import StandardScaler
+from copy import deepcopy
+import time
+
+from utils.parameters import *
+
+torch.manual_seed(1) 
+
+
+class DNN(nn.Module):
+
+    def __init__(self, 
+                 n_dim=1, 
+                 dropout_prob=0.0,
+                 dropout_active=False,  
+                 num_layers=2, 
+                 num_hidden=200,
+                 output_size=1,
+                 activation="Tanh", 
+                 mode="Regression"
+                ):
+
+        super(DNN, self).__init__()
+
+        self.n_dim          = n_dim
+        self.num_layers     = num_layers
+        self.num_hidden     = num_hidden
+        self.mode           = mode
+        self.activation     = activation
+        self.device         = torch.device('cpu') # Make this an option
+        self.output_size    = output_size
+        self.dropout_prob   = dropout_prob
+        self.dropout_active = dropout_active  
+        self.model          = build_architecture(self)
+
+
+    def fit(self, X, y, learning_rate=1e-3, loss_type="MSE", batch_size=100, num_iter=500, verbosity=False):
+
+        self.X           = torch.tensor(X.reshape((-1, self.n_dim))).float()
+        self.y           = torch.tensor(y).float()
+
+        loss_dict        = {"MSE": torch.nn.MSELoss}
+
+        self.loss_fn     = loss_dict[loss_type](reduction='mean')
+        self.loss_trace  = []     
+
+        batch_size       = np.min((batch_size, X.shape[0]))
+
+        optimizer        = torch.optim.Adam(self.parameters(), lr=learning_rate) 
+
+        for _ in range(num_iter):
+
+            batch_idx = np.random.choice(list(range(X.shape[0])), batch_size )
+
+            y_pred    = self.model(self.X[batch_idx, :])
+
+            self.loss = self.loss_fn(y_pred.reshape((batch_size, self.n_dim)), self.y[batch_idx].reshape((batch_size, self.n_dim)))
+
+            self.loss_trace.append(self.loss.detach().numpy())
+
+            if verbosity:
+
+                print("--- Iteration: %d \t--- Loss: %.3f" % (_, self.loss.item()))
+
+            self.model.zero_grad()
+
+            optimizer.zero_grad()   # clear gradients for this training step
+            self.loss.backward()    # backpropagation, compute gradients
+            optimizer.step()
+
+
+
+    def predict(self, X, numpy_output=True):
+
+        X = torch.tensor(X.reshape((-1, self.n_dim))).float()
+
+        if numpy_output:
+
+            prediction = self.model(X).detach().numpy()
+
+        else:
+
+            prediction = self.model(X)    
+
+
+        return prediction
+
+
+