Skip to content

Commit

Permalink
Update scalar couplings for BPTI, HEWL, and Ubq (#13)
Browse files Browse the repository at this point in the history
* Bugfix for 4-point water

* Update NMR fit force fields

* Timeseries analysis

* Update scalar couplings for bpti, hewl, ubq

* Fix bracket error in benchmark_targets

* Disable time series analysis by default

* Benchmark targets for Ala6 and Ala7

* Time series analysis for H bond scalar couplings
  • Loading branch information
chapincavender authored Nov 21, 2023
1 parent 53b78f9 commit 70e55c8
Show file tree
Hide file tree
Showing 17 changed files with 736 additions and 1,293 deletions.
230 changes: 159 additions & 71 deletions proteinbenchmark/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,51 @@
from loos.pyloos import Trajectory
from openff.toolkit import Molecule
from openff.units import unit
from pymbar import timeseries

from proteinbenchmark.analysis_parameters import *
from proteinbenchmark.benchmark_targets import benchmark_targets, experimental_datasets
from proteinbenchmark.utilities import list_of_dicts_to_csv


def get_timeseries_mean(correlated_timeseries: numpy.ndarray):
# Get burn-in time, statistical inefficiency, and maximum number of
# uncorrelated samples from pymbar.timeseries
t0, g, Neff_max = timeseries.detect_equilibration(correlated_timeseries, nskip=10)

# Get an uncorrelated sample from the correlated timeseries
truncated_timeseries = correlated_timeseries[t0:]
uncorrelated_sample_indices = timeseries.subsample_correlated_data(
truncated_timeseries, g=g
)
uncorrelated_timeseries = truncated_timeseries[uncorrelated_sample_indices]

# Get mean and standard error of the mean for the correlated, truncated, and
# uncorrelated timeseries
N_correlated = correlated_timeseries.size
correlated_mean = correlated_timeseries.mean()
correlated_sem = correlated_timeseries.std(ddof=1) / numpy.sqrt(N_correlated)
N_truncated = truncated_timeseries.size
truncated_mean = truncated_timeseries.mean()
truncated_sem = truncated_timeseries.std(ddof=1) / numpy.sqrt(N_truncated)
N_uncorrelated = uncorrelated_timeseries.size
uncorrelated_mean = uncorrelated_timeseries.mean()
uncorrelated_sem = uncorrelated_timeseries.std(ddof=1) / numpy.sqrt(N_uncorrelated)

return {
"Statistical Inefficiency": g,
"Correlated Mean": correlated_mean,
"Correlated SEM": correlated_sem,
"Correlated N": N_correlated,
"Truncated Mean": truncated_mean,
"Truncated SEM": truncated_sem,
"Truncated N": N_truncated,
"Uncorrelated Mean": uncorrelated_mean,
"Uncorrelated SEM": uncorrelated_sem,
"Uncorrelated N": N_uncorrelated,
}


def align_trajectory(
topology_path: str,
trajectory_path: str,
Expand Down Expand Up @@ -636,9 +675,10 @@ def compute_scalar_couplings(
dihedrals_path: str,
output_path: str,
karplus: str = "vogeli",
time_series_output_path: str = None,
):
"""
Compute NMR scalar couplings and chi^2 with respect to experimental values.
Compute NMR scalar couplings using a Karplus model.
Parameters
---------
Expand All @@ -647,12 +687,14 @@ def compute_scalar_couplings(
dihedrals_path
The path to the time series of dihedrals.
output_path
The path to write the computed scalar couplings and chi^2 values.
The path to write the computed mean scalar couplings.
karplus
The name of the set of Karplus parameters to use.
time_series_output_path
The path to write the time series of computed scalar couplings.
"""

# Check Karplus parameters
# Get Karplus parameters for scalar couplings associated with phi
karplus = karplus.lower()

if karplus == "vogeli":
Expand All @@ -672,9 +714,12 @@ def compute_scalar_couplings(
"\n case_dft1\n case_dft2"
)

# Get Karplus parameters for scalar couplings associated with psi
karplus_parameters.update(WIRMER_KARPLUS_PARAMETERS)
karplus_parameters.update(DING_KARPLUS_PARAMETERS)
karplus_parameters.update(HENNIG_KARPLUS_PARAMETERS)

# Get Karplus parameters for scalar couplings associated with chi1
karplus_parameters.update(PEREZ_KARPLUS_PARAMETERS)

if karplus != "schmidt":
Expand Down Expand Up @@ -708,6 +753,8 @@ def compute_scalar_couplings(
observable_df.reset_index(drop=True, inplace=True)

# Compute observables
if time_series_output_path is not None:
observable_timeseries = list()
computed_observables = list()

for index, row in observable_df.iterrows():
Expand All @@ -718,27 +765,25 @@ def compute_scalar_couplings(
if observable == "3j_hn_ca":
# Compute sine and cosine of phi and psi
# Reset indices for products, e.g. cos phi * cos psi
phi = numpy.deg2rad(
dihedral_df[
(dihedral_df["Dihedral Name"] == "phi")
& (dihedral_df["Resid"] == observable_resid)
]["Dihedral (deg)"].values
)
karplus_df = dihedral_df[
(dihedral_df["Dihedral Name"] == "phi")
& (dihedral_df["Resid"] == observable_resid)
]
phi = numpy.deg2rad(karplus_df["Dihedral (deg)"].values)

prev_psi = numpy.deg2rad(
dihedral_df[
(dihedral_df["Dihedral Name"] == "psi")
& (dihedral_df["Resid"] == observable_resid - 1)
]["Dihedral (deg)"].values
)
karplus_df = dihedral_df[
(dihedral_df["Dihedral Name"] == "psi")
& (dihedral_df["Resid"] == observable_resid - 1)
]
prev_psi = numpy.deg2rad(karplus_df["Dihedral (deg)"].values)

cos_phi = numpy.cos(phi)
sin_phi = numpy.sin(phi)
cos_psi = numpy.cos(prev_psi)
sin_psi = numpy.sin(prev_psi)

# Compute estimate for scalar coupling
computed_coupling = numpy.mean(
computed_coupling = (
observable_parameters["cos_phi"] * cos_phi
+ observable_parameters["sin_phi"] * sin_phi
+ observable_parameters["cos_psi"] * cos_psi
Expand All @@ -748,10 +793,7 @@ def compute_scalar_couplings(
+ observable_parameters["sin_phi_cos_psi"] * sin_phi * cos_psi
+ observable_parameters["sin_phi_sin_psi"] * sin_phi * sin_psi
+ observable_parameters["C"]
)

# Extrema of 3j_hn_ca Karplus curve from numerical optimization
karplus_extrema = [0.0329976 / unit.second, 1.08915 / unit.second]
).m_as(unit.second**-1)

else:
# Get relevant dihedral angle
Expand All @@ -777,68 +819,76 @@ def compute_scalar_couplings(
dihedral_resname = PEREZ_KARPLUS_RESIDUE_MAP[row["Resname"]]
observable_parameters = observable_parameters[dihedral_resname]

# Compute cos(theta + delta) and cos^2(theta + delta)
# Compute cos(theta + delta)
karplus_angle = numpy.deg2rad(
observable_parameters["delta"] + karplus_df["Dihedral (deg)"].values
)

cos_angle = numpy.cos(karplus_angle)
cos_sq_angle = numpy.square(cos_angle)

# Compute estimate for scalar coupling
# <J> = A <cos^2(theta)> + B <cos(theta)> + C
karplus_A = observable_parameters["A"]
karplus_B = observable_parameters["B"]
karplus_C = observable_parameters["C"]

computed_coupling = numpy.mean(
karplus_A * cos_sq_angle + karplus_B * cos_angle + karplus_C
)

# Get extrema of Karplus curve at
# J(0) = A + B + C
# J(pi) = A - B + C
# J(+/- arccos(-B / 2 A)) = -B^2 / (4 A) + C
karplus_extrema = [
karplus_A + karplus_B + karplus_C,
karplus_A - karplus_B + karplus_C,
]

if numpy.abs(karplus_B / karplus_A) <= 2:
karplus_extrema.append(
-karplus_B * karplus_B / karplus_A / 4 + karplus_C,
)
computed_coupling = (
observable_parameters["A"] * numpy.square(cos_angle)
+ observable_parameters["B"] * cos_angle
+ observable_parameters["C"]
).m_as(unit.second**-1)

# Compute contribution to chi^2
experimental_coupling = row["Experiment"] / unit.second
uncertainty = observable_parameters["sigma"]
chi_sq = numpy.square((computed_coupling - experimental_coupling) / uncertainty)
# Get experimental uncertainty from Karplus model
experiment_uncertainty = observable_parameters["sigma"].m_as(unit.second**-1)

# Truncate experimental coupling to Karplus extrema
experimental_coupling = row["Experiment"] / unit.second
truncated_experimental_coupling = min(
max(experimental_coupling, min(karplus_extrema)),
max(karplus_extrema),
)
truncated_chi_sq = numpy.square(
(computed_coupling - truncated_experimental_coupling) / uncertainty
)
max(experimental_coupling, observable_parameters["minimum"]),
observable_parameters["maximum"],
).m_as(unit.second**-1)

if time_series_output_path is not None:
# Get mean and SEM of correlated, truncated, and uncorrelated timeseries
# for computed scalar coupling
computed_coupling_mean = get_timeseries_mean(computed_coupling)

# Write time series of observable
observable_timeseries.append(
{
"Frame": karplus_df["Frame"],
"Time (ns)": karplus_df["Time (ns)"],
"Observable": observable,
"Resid": observable_resid,
"Resname": row["Resname"],
"Experiment": row["Experiment"],
"Experiment Uncertainty": experiment_uncertainty,
"Truncated Experiment": truncated_experimental_coupling,
"Computed": computed_coupling,
}
)

else:
computed_coupling_mean = {
"Correlated Mean": computed_coupling.mean(),
"Correlated SEM": (
computed_coupling.std(ddof=1) / numpy.sqrt(computed_coupling.size)
),
}

# Write computed means of observable
computed_observables.append(
{
"Uncertainty": uncertainty.m_as(unit.second**-1),
"Computed": computed_coupling.m_as(unit.second**-1),
"Chi^2": chi_sq,
"Truncated Experiment": (
truncated_experimental_coupling.m_as(unit.second**-1)
),
"Truncated Chi^2": truncated_chi_sq,
"Experiment Uncertainty": experiment_uncertainty,
"Truncated Experiment": truncated_experimental_coupling,
**computed_coupling_mean,
}
)

if time_series_output_path is not None:
observable_timeseries_df = pandas.concat(
[pandas.DataFrame(df) for df in observable_timeseries]
).reset_index(drop=True)
observable_timeseries_df.to_csv(time_series_output_path)

scalar_coupling_df = pandas.concat(
[observable_df, pandas.DataFrame(computed_observables)], axis=1
)

scalar_coupling_df.to_csv(output_path)


Expand All @@ -847,6 +897,7 @@ def compute_h_bond_scalar_couplings(
h_bond_geometries_path: str,
output_path: str,
karplus: str = "barfield",
time_series_output_path: str = None,
):
"""
Compute NMR 3J_N_CO scalar couplings and chi^2 with respect to experimental
Expand All @@ -862,6 +913,8 @@ def compute_h_bond_scalar_couplings(
The path to write the computed scalar couplings and chi^2 values.
karplus
The name of the set of Karplus parameters to use.
time_series_output_path
The path to write the time series of computed scalar couplings.
"""

# Check Karplus parameters
Expand Down Expand Up @@ -921,6 +974,8 @@ def compute_h_bond_scalar_couplings(
h_bond_df = pandas.read_csv(h_bond_geometries_path, index_col=0)

# Compute observables
if time_series_output_path is not None:
observable_timeseries = list()
computed_observables = list()

for index, row in observable_df.iterrows():
Expand Down Expand Up @@ -950,7 +1005,7 @@ def compute_h_bond_scalar_couplings(
# 3J(R, theta, phi) = (
# (exp(a R_0) A cos^2 phi + exp(a R_0) B cos phi + exp(a R_0) C)
# * sin^2 theta + exp(a R_0) D cos^2 theta) * exp(-a R)
computed_coupling = numpy.mean(
computed_coupling = (
(
(
karplus_sin_sq_cos_sq * cos_sq_HOCN_dihedral
Expand All @@ -961,21 +1016,54 @@ def compute_h_bond_scalar_couplings(
+ karplus_cos_sq * cos_sq_HOC_angle
)
* exp_HO_distance
)
).m_as(unit.second**-1)

# Get experimental uncertainty from Karplus model
experiment_uncertainty = karplus_parameters["sigma"].m_as(unit.second**-1)

if time_series_output_path is not None:
# Get mean and SEM of correlated, truncated, and uncorrelated timeseries
# for computed scalar coupling
computed_coupling_mean = get_timeseries_mean(computed_coupling)

# Write time series of observable
observable_timeseries.append(
{
"Frame": geometry_df["Frame"],
"Time (ns)": geometry_df["Time (ns)"],
"Observable": observable,
"Resid N": observable_resid_n,
"Resname N": row["Resname N"],
"Resid CO": observable_resid_co,
"Resname CO": row["Resname CO"],
"Experiment": row["Experiment"],
"Experiment Uncertainty": experiment_uncertainty,
"Computed": computed_coupling,
}
)

# Compute contribution to chi^2
experimental_coupling = row["Experiment"] / unit.second
uncertainty = karplus_parameters["sigma"]
chi_sq = numpy.square((computed_coupling - experimental_coupling) / uncertainty)
else:
computed_coupling_mean = {
"Correlated Mean": computed_coupling.mean(),
"Correlated SEM": (
computed_coupling.std(ddof=1) / numpy.sqrt(computed_coupling.size)
),
}

# Write computed means of observable
computed_observables.append(
{
"Uncertainty": uncertainty.m_as(unit.second**-1),
"Computed": computed_coupling.m_as(unit.second**-1),
"Chi^2": chi_sq,
"Experiment Uncertainty": experiment_uncertainty,
**computed_coupling_mean,
}
)

if time_series_output_path is not None:
observable_timeseries_df = pandas.concat(
[pandas.DataFrame(df) for df in observable_timeseries]
).reset_index(drop=True)
observable_timeseries_df.to_csv(time_series_output_path)

scalar_coupling_df = pandas.concat(
[observable_df, pandas.DataFrame(computed_observables)], axis=1
)
Expand Down
Loading

0 comments on commit 70e55c8

Please sign in to comment.