Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multifidelity models #24

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion peslearn/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def pes(geom_vectors, cartesian=True):
axis = 0
g = np.apply_along_axis(cart1d_to_distances1d, axis, g)
newX = gp.transform_new_X(g, params, Xscaler)
E, cov = final.predict(newX, full_cov=False)
E, cov = model.predict_f_compiled(newX)
e = gp.inverse_transform_new_y(E,yscaler)
#e = e - (insert min energy here)
#e *= 219474.63 ( convert units )
Expand Down
13 changes: 13 additions & 0 deletions peslearn/ml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,20 @@
from . import neural_network
from . import preprocessing_helper
from . import model
from . import svigp
from . import mfgp
from . import mfmodel
from . import mfnn
from . import diff_nn
from . import gpytorch_gpr
from . import mfgp_nsp

from .gaussian_process import GaussianProcess
from .data_sampler import DataSampler
from .neural_network import NeuralNetwork
from .svigp import SVIGP
from .mfgp import MFGP
from .mfmodel import MFModel
from .gpytorch_gpr import GaussianProcess as GpyGPR
from .mfgp_nsp import MFGP_NSP
#from .mfnn.dual import DualNN
5 changes: 5 additions & 0 deletions peslearn/ml/diff_nn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from . import diff_model
from . import diff_neural_network

from .diff_model import DiffModel
from .diff_neural_network import DiffNeuralNetwork
23 changes: 23 additions & 0 deletions peslearn/ml/diff_nn/diff_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from ..model import Model
from ...constants import bohr2angstroms
import re

class DiffModel(Model):
def __init__(self, dataset_path, input_obj, molecule_type=None, molecule=None, der_lvl=0, train_path=None, test_path=None, valid_path=None):
super().__init__(dataset_path, input_obj, molecule_type, molecule, train_path, test_path, valid_path)
nletters = re.findall(r"[A-Z]", self.molecule_type)
nnumbers = re.findall(r"\d", self.molecule_type)
nnumbers2 = [int(i) for i in nnumbers]
self.natoms = len(nletters) + sum(nnumbers2) - len(nnumbers2)
# Assuming Cartesian coordinates, and Cartesian gradients and Hessians in Bohr
ncart = self.natoms*3
nhess = ncart**2
if der_lvl == 1:
self.raw_grad = self.raw_X[:,ncart:2*ncart]
self.raw_X = self.raw_X[:, :ncart]
elif der_lvl == 2:
self.raw_hess = self.raw_X[:, 2*ncart:nhess+2*ncart]
self.raw_grad = self.raw_X[:, ncart:2*ncart]
self.raw_X = self.raw_X[:, :ncart]
else:
raise ValueError(f"Error: Invalid value for `der_lvl': {der_lvl}")
666 changes: 666 additions & 0 deletions peslearn/ml/diff_nn/diff_neural_network.py

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions peslearn/ml/diff_nn/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from . import cart_dist
from . import pip
from . import transform_deriv

from .pip import Pip_B
33 changes: 33 additions & 0 deletions peslearn/ml/diff_nn/utils/cart_dist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import numpy as np

def cart_dist_B_2(X, do_hess=False):
ndat, nvar = X.shape
natom = round(nvar/3)
X = X.reshape((ndat,natom,3))
nr = round((natom**2 - natom) / 2)
R = np.zeros((ndat, nr))
ctr = 0
for atomi in range(natom):
for atomj in range(atomi):
R[:,ctr] = np.linalg.norm(X[:,atomi,:] - X[:,atomj,:], axis=1)
ctr += 1
B1 = np.zeros((ndat, nr, nvar))
B2 = np.zeros((ndat, nr, nvar, nvar))
for atomi in range(natom):
for atomj in range(atomi):
r = round((atomi*(atomi-1))/2) + atomj
B1[:, r, 3*atomi:3*(atomi+1)] = np.divide((X[:,atomi,:] - X[:,atomj,:]), R[:,r][:,None])
B1[:, r, 3*atomj:3*(atomj+1)] = -B1[:, r, 3*atomi:3*(atomi+1)]
if do_hess:
for atomi in range(natom):
for atomj in range(atomi):
r = round((atomi*(atomi-1))/2) + atomj
v = X[:, atomi, :] - X[:, atomj, :]
matt = np.einsum("ni, nj -> nij", v, v, optimize="optimal")
matt *= (R[:,r][:,None,None]**-3.0)
matt -= (np.identity(3) * (R[:,r][:,None,None]**-1.0))
B2[:, r, 3*atomi:3*(atomi+1), 3*atomi:3*(atomi+1)] = -1.0 * matt
B2[:, r, 3*atomj:3*(atomj+1), 3*atomj:3*(atomj+1)] = -1.0 * matt
B2[:, r, 3*atomi:3*(atomi+1), 3*atomj:3*(atomj+1)] = matt
B2[:, r, 3*atomj:3*(atomj+1), 3*atomi:3*(atomi+1)] = matt
return R, B1, B2
114 changes: 114 additions & 0 deletions peslearn/ml/diff_nn/utils/pip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import numpy as np
import sympy
import re

class Pip_B():
def __init__(self, fn, nbonds):
# Returns PIPs from X (n interatomic distances), degrees of PIPs,
# Grad. of PIP wrt dist., and Hess. of PIP wrt dist. x dist.

# Read and prep PIP strings
with open(fn, 'r') as f:
data = f.read()
data = re.sub('\^', '**', data)

# Define variables for Sympy
symbols = ""
for i in range(nbonds):
symbols += f"x{i} "

variables = sympy.symbols(symbols)
for i in range(1, nbonds+1):
data = re.sub('x{}(\D)'.format(str(i)), 'x{}\\1'.format(i-1), data)

# Define PIP equations for Sympy
self.polys = re.findall("\]=(.+)",data)

# Calculate PIP first and second derivatives and associated "lambdified" functions
self.grad = []
self.grad_lambda = []
self.hess = []
self.hess_lambda = []
for p in self.polys:
lil_grad = []
lil_grad_lambda = []
lil_hess = []
lil_hess_lambda = []
for x1 in variables:
d1 = sympy.diff(p, x1)
lil_grad.append(d1)
lil_grad_lambda.append(re.sub(r"x(\d+)", r"X[:,\1]", str(d1)))
liller_hess = []
liller_hess_lambda = []
for x2 in variables:
d1d2 = sympy.diff(d1, x2)
liller_hess.append(d1d2)
liller_hess_lambda.append(re.sub(r"x(\d+)", r"X[:,\1]", str(d1d2)))
lil_hess.append(liller_hess)
lil_hess_lambda.append(liller_hess_lambda)
self.grad.append(lil_grad)
self.grad_lambda.append(lil_grad_lambda)
self.hess.append(lil_hess)
self.hess_lambda.append(lil_hess_lambda)

# Determine nonzero second derivatives w.r.t. polynomial and the Hessian row (xi)
self.grad_not_zero = []
self.grad_not_const = []
self.grad_const = []
self.hess_not_const = []
self.hess_const = []
self.hess_not_zero = []

for pi in range(len(self.polys)):
for xi in range(nbonds):
if self.grad[pi][xi] != 0:
if "x" in str(self.grad[pi][xi]):
self.grad_not_const.append((pi, xi))
else:
self.grad_const.append((pi, xi))
self.grad_not_zero.append((pi, xi))
for xj in range(nbonds):
if self.hess[pi][xi][xj] != 0:
if "x" in str(self.hess[pi][xi][xj]):
self.hess_not_const.append((pi, xi, xj))
else:
self.hess_const.append((pi, xi, xj))
self.hess_not_zero.append((pi, xi, xj))


def transform(self, X, do_hess=False):
ndat, nbonds = X.shape
new_X = np.zeros((ndat, len(self.polys)))
# Evaluate polynomials
for i, p in enumerate(self.polys): # evaluate each FI
# convert the FI to a python expression of raw_X, e.g. x1 + x2 becomes raw_X[:,1] + raw_X[:,2]
eval_string = re.sub(r"(x)(\d+)", r"X[:,\2]", p)
# evaluate that column's FI from columns of raw_X
new_X[:,i] = eval(eval_string)

# Evaluate polynomial derivatives
egrad = np.zeros((ndat, len(self.polys), nbonds))
ehess = np.zeros((ndat, len(self.polys), nbonds, nbonds))

for pi, xi in self.grad_not_const:
egrad[:,pi,xi] = eval(self.grad_lambda[pi][xi])
for pi, xi in self.grad_const:
egrad[:,pi,xi] = float(self.grad[pi][xi])

if do_hess:
for pi, xi, xj in self.hess_not_const:
ehess[:,pi,xi,xj] = eval(self.hess_lambda[pi][xi][xj])
for pi, xi, xj in self.hess_const:
ehess[:,pi,xi,xj] = float(self.hess[pi][xi][xj])

degrees = []
for p in self.polys:
# just checking first, assumes every term in each FI polynomial has the same degree (seems to always be true)
tmp = p.split('+')[0]
# count number of exponents and number of occurances of character 'x'
exps = [int(i) - 1 for i in re.findall("\*\*(\d+)", tmp)]
ndegrees = len(re.findall("x", tmp)) + sum(exps)
degrees.append(ndegrees)

return new_X, degrees, egrad, ehess # PIP values, degrees, B1, and B2

37 changes: 37 additions & 0 deletions peslearn/ml/diff_nn/utils/transform_deriv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np

# Scaling functions (flattened to vectors): begin
def morse(x, alpha):
# THIS IS BIG BOOTY WRONG!!! NEEDS MINUS SIGN IN EXPONENT!!!!!!!!! TODO
return np.exp(-x/alpha)

def morse_B1(x, alpha=1.0):
# dm/dr
return -1.0 * (alpha**-1.0) * morse(x, alpha) #TODO

def morse_B2(x, alpha=1.0):
return (alpha**-2.0) * morse(x, alpha)

def scale_mean_B1(x, stds):
return np.array([[stds[i] for i in range(len(stds))] for n in range(len(stds))])
#return x.std(axis=0)**-1

def scale_mm_B1(x, bmin, bmax):
xmin = x.min(axis=0)
xmax = x.max(axis=0)
return (bmax-bmin)/(xmax-xmin)

def degree_B1(x, degrees):
pwr = np.power(degrees, -1.0) - 1
return np.divide(np.power(x, pwr), degrees)

def degree_B2(x, degrees):
pwr = np.power(degrees, -1.0) - 2
factor = np.power(degrees,-2.0) - np.power(degrees,-1.0)
return np.multiply(np.power(x, pwr), factor)

def dist(x):
pass

def ddist(x):
pass
16 changes: 10 additions & 6 deletions peslearn/ml/gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def set_default_hyperparameters(self):
Set default hyperparameter space. If none is provided, default is used.
"""
self.hyperparameter_space = {
'scale_X': hp.choice('scale_X', ['std', 'mm01', 'mm11', None]),
#'scale_X': hp.choice('scale_X', ['std', 'mm01', 'mm11', None]),
'scale_y': hp.choice('scale_y', ['std', 'mm01', 'mm11', None]),
}

Expand Down Expand Up @@ -75,6 +75,7 @@ def split_train_test(self, params):
self.ytest = self.y[self.test_indices]

def build_model(self, params, nrestarts=10, maxit=1000, seed=0):
params['scale_X'] = 'std'
print("Hyperparameters: ", params)
self.split_train_test(params)
np.random.seed(seed) # make GPy deterministic for a given hyperparameter config
Expand All @@ -87,7 +88,11 @@ def build_model(self, params, nrestarts=10, maxit=1000, seed=0):
ard_val = False
kernel = RBF(dim, ARD=ard_val) # TODO add HP control of kernel
self.model = GPRegression(self.Xtr, self.ytr, kernel=kernel, normalizer=False)
self.model.optimize_restarts(nrestarts, optimizer="lbfgsb", robust=True, verbose=False, max_iters=maxit, messages=False)
#self.model.optimize_restarts(nrestarts, optimizer="lbfgsb", robust=True, verbose=False, max_iters=maxit, messages=False)
self.model.optimize(optimizer="lbfgsb", max_iters=maxit, messages=False)
#TODO
err = self.vet_model(self.model)
#TODO
gc.collect(2) #fixes some memory leak issues with certain BLAS configs

def hyperopt_model(self, params):
Expand Down Expand Up @@ -117,8 +122,7 @@ def vet_model(self, model):
print("Full Dataset {}".format(round(hartree2cm * error_full,2)), end=' ')
print("Median error: {}".format(np.round(median_error[0],2)), end=' ')
print("Max 5 errors: {}".format(np.sort(np.round(max_errors.flatten(),1))),'\n')
error_test_invcm = round(hartree2cm * error_test,2)
return error_test_invcm
return error_test

def preprocess(self, params, raw_X, raw_y):
"""
Expand All @@ -135,7 +139,6 @@ def preprocess(self, params, raw_X, raw_y):
raw_X, degrees = interatomics_to_fundinvar(raw_X,path)
if params['pip']['degree_reduction']:
raw_X = degree_reduce(raw_X, degrees)

if params['scale_X']:
X, Xscaler = general_scaler(params['scale_X'], raw_X)
else:
Expand All @@ -157,7 +160,8 @@ def optimize_model(self):
self.hyperopt_trials = Trials()
self.itercount = 1 # keep track of hyperopt iterations
if self.input_obj.keywords['rseed']:
rstate = np.random.RandomState(self.input_obj.keywords['rseed'])
rstate = np.random.default_rng(self.input_obj.keywords['rseed'])
#rstate = np.random.RandomState(self.input_obj.keywords['rseed'])
else:
rstate = None
best = fmin(self.hyperopt_model,
Expand Down
Loading