diff --git a/devtools/conda-envs/proteinbenchmark-espaloma.yaml b/devtools/conda-envs/proteinbenchmark-espaloma.yaml new file mode 100644 index 0000000..f5b9cd7 --- /dev/null +++ b/devtools/conda-envs/proteinbenchmark-espaloma.yaml @@ -0,0 +1,35 @@ +name: openff-proteinbenchmark-espaloma +channels: + - conda-forge + - openeye + - defaults +dependencies: + - black + - click + - isort + - pip + - python + + # System setup + - pip: + - git+https://github.com/deGrootLab/pmx@develop + - git+https://github.com/Electrostatics/pdb2pqr@master#egg=pdb2pqr + + # MM calculations + - ambertools + - cudatoolkit + - espaloma>=0.3.2 + - openeye-toolkits + - openff-forcefields + - openff-toolkit>=0.13.0 + - openff-units + - openmm>=8.0.0 + - openmmforcefields>=0.12.0 + - openmmtools + + # Analysis + - loos + - pandas + - pymol-open-source + - seaborn + diff --git a/devtools/conda-envs/proteinbenchmark.yaml b/devtools/conda-envs/proteinbenchmark.yaml index 775e2ff..a419716 100644 --- a/devtools/conda-envs/proteinbenchmark.yaml +++ b/devtools/conda-envs/proteinbenchmark.yaml @@ -14,7 +14,6 @@ dependencies: - pip: - git+https://github.com/deGrootLab/pmx@develop - git+https://github.com/Electrostatics/pdb2pqr@master#egg=pdb2pqr - - git+https://github.com/openmm/openmmforcefields@main # MM calculations - ambertools @@ -24,6 +23,7 @@ dependencies: - openff-toolkit>=0.13.0 - openff-units - openmm>=8.0.0 + - openmmforcefields>=0.12.0 - openmmtools # Analysis diff --git a/proteinbenchmark/data/force-fields/espaloma-0.3.2.pt b/proteinbenchmark/data/force-fields/espaloma-0.3.2.pt new file mode 100644 index 0000000..5212a6c Binary files /dev/null and b/proteinbenchmark/data/force-fields/espaloma-0.3.2.pt differ diff --git a/proteinbenchmark/force_fields.py b/proteinbenchmark/force_fields.py index 72eff2f..66d71a5 100644 --- a/proteinbenchmark/force_fields.py +++ b/proteinbenchmark/force_fields.py @@ -8,6 +8,10 @@ # List of force fields with force field XML file, water model, and water model # XML file force_fields = { + "espaloma-0.3.2-tip3p": { + "force_field_file": Path(ff_directory, "espaloma-0.3.2.pt"), + "water_model": "tip3p", + }, "ff14sb-opc": { "force_field_file": Path( ff_directory, "nerenberg_ff14sb_c0ala_c0gly_c0val.xml" diff --git a/proteinbenchmark/protein_system.py b/proteinbenchmark/protein_system.py index 0366ad3..bb6bf09 100644 --- a/proteinbenchmark/protein_system.py +++ b/proteinbenchmark/protein_system.py @@ -15,6 +15,7 @@ from proteinbenchmark.openmm_simulation import OpenMMSimulation from proteinbenchmark.simulation_parameters import * from proteinbenchmark.system_setup import ( + assign_parameters, build_initial_coordinates, minimize_gmx, minimize_openmm, @@ -168,8 +169,8 @@ def setup(self): 'contain one of "aa_sequence" or "initial_pdb"' ) - # Solvate, add ions, and construct parametrized system - if not exists_and_not_empty(self.parametrized_system): + # Solvate and add ions using Amber ff14SB as a reference force field + if not exists_and_not_empty(solvated_pdb): print(f"Solvating system {self.system_name}") # Get parameters for solvation and constructing OpenMM system @@ -191,8 +192,29 @@ def setup(self): vdw_switch_width = VDW_SWITCH_WIDTH solvate( - simulation_platform=self.simulation_platform, ionic_strength=self.target_parameters["ionic_strength"], + protonated_pdb_file=self.protonated_pdb, + solvated_pdb_file=solvated_pdb, + water_model=self.water_model, + solvent_padding=solvent_padding, + ) + + # Construct parametrized system + if not exists_and_not_empty(self.parametrized_system): + print(f"Parametrizing system {self.system_name}") + + if "nonbonded_cutoff" in self.target_parameters: + nonbonded_cutoff = self.target_parameters["nonbonded_cutoff"] + else: + nonbonded_cutoff = NONBONDED_CUTOFF + + if "vdw_switch_width" in self.target_parameters: + vdw_switch_width = self.target_parameters["vdw_switch_width"] + else: + vdw_switch_width = VDW_SWITCH_WIDTH + + assign_parameters( + simulation_platform=self.simulation_platform, nonbonded_cutoff=nonbonded_cutoff, vdw_switch_width=vdw_switch_width, protonated_pdb_file=self.protonated_pdb, @@ -201,7 +223,6 @@ def setup(self): water_model=self.water_model, force_field_file=self.force_field_file, water_model_file=self.water_model_file, - solvent_padding=solvent_padding, setup_prefix=self.setup_prefix, ) diff --git a/proteinbenchmark/system_setup.py b/proteinbenchmark/system_setup.py index d3c2de2..5b5fb78 100644 --- a/proteinbenchmark/system_setup.py +++ b/proteinbenchmark/system_setup.py @@ -2,14 +2,13 @@ import numpy import openmm -from openff.toolkit import ForceField as OFFForceField -from openff.toolkit import Molecule as OFFMolecule -from openff.toolkit import Topology as OFFTopology +from openff.toolkit import ForceField, Molecule, Topology from openff.units import unit from openmm import app from openmm import unit as openmm_unit +from openmmforcefields.generators import EspalomaTemplateGenerator -from proteinbenchmark.force_fields import water_model_files +from proteinbenchmark.force_fields import force_fields from proteinbenchmark.utilities import ( read_xml, remove_model_lines, @@ -18,77 +17,6 @@ ) -class _OFFForceField(OFFForceField): - """ - Dummy class that defines a `createSystem()` method so that this force field - can be passed to the `addSolvent() and `_addIons()` methods of - `openmm.app.Modeller`. - """ - - def __init__( - self, unique_molecules, *args, remove_water_virtual_sites=False, **kwargs - ): - self._from_openmm_unique_molecules = unique_molecules - self._remove_water_virtual_sites = remove_water_virtual_sites - super().__init__(*args, **kwargs) - - def createSystem(self, openmm_topology: app.Topology): - """Return an OpenMM system from an OpenMM topology.""" - - return self.createInter(openmm_topology).to_openmm( - combine_nonbonded_forces=True - ) - - def createInter(self, openmm_topology: app.Topology): - """Return an OpenFF Interchange object from an OpenMM topology.""" - - if self._remove_water_virtual_sites: - # Create a new OpenMM topology without water virtual sites - no_vsite_topology = app.Topology() - no_vsite_topology.setPeriodicBoxVectors( - openmm_topology.getPeriodicBoxVectors() - ) - no_vsite_atoms = dict() - - for chain in openmm_topology.chains(): - no_vsite_chain = no_vsite_topology.addChain(chain.id) - - for residue in chain.residues(): - no_vsite_residue = no_vsite_topology.addResidue( - residue.name, - no_vsite_chain, - residue.id, - residue.insertionCode, - ) - - for atom in residue.atoms(): - if residue.name != "HOH" or atom.name in {"O", "H1", "H2"}: - no_vsite_atom = no_vsite_topology.addAtom( - atom.name, atom.element, no_vsite_residue - ) - no_vsite_atoms[atom] = no_vsite_atom - - # Include bonds only between non-virtual site atoms - for bond in openmm_topology.bonds(): - if bond[0] in no_vsite_atoms and bond[1] in no_vsite_atoms: - no_vsite_topology.addBond( - no_vsite_atoms[bond[0]], no_vsite_atoms[bond[1]] - ) - - openff_topology = OFFTopology.from_openmm( - no_vsite_topology, - unique_molecules=self._from_openmm_unique_molecules, - ) - - else: - openff_topology = OFFTopology.from_openmm( - openmm_topology, - unique_molecules=self._from_openmm_unique_molecules, - ) - - return self.create_interchange(openff_topology) - - def build_initial_coordinates( build_method: str, ph: float, @@ -282,51 +210,27 @@ def build_initial_coordinates( def solvate( - simulation_platform: str, ionic_strength: unit.Quantity, - nonbonded_cutoff: unit.Quantity, - vdw_switch_width: unit.Quantity, protonated_pdb_file: str, solvated_pdb_file: str, - parametrized_system: str, water_model: str, - force_field_file: str, - water_model_file: str = None, - hydrogen_mass: unit.Quantity = 3.0 * unit.dalton, solvent_padding: unit.Quantity = None, n_solvent: int = None, - setup_prefix: str = None, ): """ - Add water and salt ions and write OpenMM System to XML. Exactly one of - solvent_padding or n_solvent must be specified. + Add water and salt ions. Exactly one of solvent_padding or n_solvent must be + specified. Parameters ---------- - simulation_platform - Simulation platform for file exporting. ionic_strength The bulk ionic strength of the desired thermodynamic state. - nonbonded_cutoff - The cutoff for the Lennard-Jones potential and PME direct space - summation. - vdw_switch_width - The distance from the nonbonded cutoff at which to apply the - switching function. protonated_pdb_file The path to the protonated PDB with initial coordinates. solvated_pdb_file The path to write the solvated PDB with ions. - parametrized_system - The path to write the parametrized OpenMM system as a serialized XML. water_model The name of the water model used to parametrize the water. - force_field_file - The path to the force field to parametrize the system. - water_model_file - The path to the force field containing the water model. - hydrogen_mass - The mass of solute hydrogen atoms for hydrogen mass repartitioning. solvent_padding The padding distance used to setup the solvent box. n_solvent @@ -346,19 +250,6 @@ def solvate( if not ionic_strength.is_compatible_with(unit.molar): raise ValueError("ionic_strength does not have units of Amount Length^-3") - if not nonbonded_cutoff.is_compatible_with(unit.nanometer): - raise ValueError("nonbonded_cutoff does not have units of Length") - if not vdw_switch_width.is_compatible_with(unit.nanometer): - raise ValueError("vdw_switch_width does not have units of Length") - if not hydrogen_mass.is_compatible_with(unit.dalton): - raise ValueError("hydrogen_mass does not have units of Mass") - - if water_model in ["opc3", "tip3p", "tip3p-fb"]: - water_has_virtual_sites = False - modeller_water_model = "tip3p" - elif water_model in ["opc", "tip4p-fb"]: - water_has_virtual_sites = True - modeller_water_model = "tip4pew" if solvent_padding is not None: solvent_arg_str = ( @@ -373,76 +264,31 @@ def solvate( f"{solvent_arg_str}" "\n ionic_strength " f"{ionic_strength.m_as(unit.molar):.3f} M" - "\n nonbonded_cutoff " - f"{nonbonded_cutoff.m_as(unit.nanometer):.3f} nm" - "\n vdw_switch_width " - f"{vdw_switch_width.m_as(unit.nanometer):.3f} nm" ) - # Set up force field - smirnoff = Path(force_field_file).suffix == ".offxml" - - if smirnoff: - # SMIRNOFF force field - - # Get unique molecules (solute, water, sodium ion, chloride ion) needed - # for openff.toolkit.topology.Topology.from_openmm() - solute_off_topology = OFFTopology.from_pdb(protonated_pdb_file) - unique_molecules = [ - *solute_off_topology.unique_molecules, - OFFMolecule.from_smiles("O"), - OFFMolecule.from_smiles("[Na+1]"), - OFFMolecule.from_smiles("[Cl-1]"), - ] - - # Use the dummy class _OFFForceField so we can pass this to Modeller - if water_model_file is None: - force_field = _OFFForceField( - unique_molecules, - force_field_file, - remove_water_virtual_sites=water_has_virtual_sites, - ) - print(f"Force field read from\n {force_field_file}") - - else: - force_field = _OFFForceField( - unique_molecules, - force_field_file, - water_model_file, - remove_water_virtual_sites=water_has_virtual_sites, - ) - print( - f"Force field read from\n {force_field_file}" - f"\n and {water_model_file}" - ) - - # Set up solute topology and positions - solute_interchange = force_field.create_interchange(solute_off_topology) - solute_topology = solute_interchange.topology.to_openmm() - solute_positions = solute_interchange.positions.to_openmm() - + if water_model in {"opc3", "tip3p", "tip3p-fb"}: + water_has_virtual_sites = False + modeller_water_model = "tip3p" + elif water_model in {"opc", "tip4p-fb"}: + water_has_virtual_sites = True + modeller_water_model = "tip4pew" else: - if water_model_file is None: - # OpenMM force field with no separate water model - force_field = app.ForceField(force_field_file) - print(f"Force field read from\n {force_field_file}") + raise ValueError( + "water_model must be one of\n opc\n opc3\n tip3p" + "\n tip3p-fb\n tip4p-fb" + ) - else: - # OpenMM force field with separate water model - force_field = app.ForceField(force_field_file, water_model_file) - print( - f"Force field read from\n {force_field_file}" - f"\n and {water_model_file}" - ) + # Use Amber ff14SB as a reference for building solvent coordinates + force_field = app.ForceField( + force_fields["ff14sb-tip3p"]["force_field_file"], + force_fields["ff14sb-tip3p"]["water_model_file"], + ) - # Set up solute topology and positions - solute_pdb = app.PDBFile(protonated_pdb_file) - solute_topology = solute_pdb.topology - solute_positions = solute_pdb.positions + # Set up solute topology and positions + solute_pdb = app.PDBFile(protonated_pdb_file) + modeller = app.Modeller(solute_pdb.topology, solute_pdb.positions) # Add water in a rhombic dodecahedral box with no ions - modeller = app.Modeller(solute_topology, solute_positions) - if solvent_padding is not None: modeller.addSolvent( forcefield=force_field, @@ -481,9 +327,9 @@ def solvate( # Get total charge of system without ions system = force_field.createSystem(modeller.topology) - for i in range(system.getNumForces()): - if isinstance(system.getForce(i), openmm.NonbondedForce): - nonbonded_force = system.getForce(i) + for force in system.getForces(): + if isinstance(force, openmm.NonbondedForce): + nonbonded_force = force break else: @@ -606,10 +452,176 @@ def solvate( # Write solvated system to PDB file write_pdb(solvated_pdb_file, modeller.topology, modeller.positions) + +def assign_parameters( + simulation_platform: str, + nonbonded_cutoff: unit.Quantity, + vdw_switch_width: unit.Quantity, + protonated_pdb_file: str, + solvated_pdb_file: str, + parametrized_system: str, + water_model: str, + force_field_file: str, + water_model_file: str = None, + hydrogen_mass: unit.Quantity = 3.0 * unit.dalton, + setup_prefix: str = None, +): + """ + Assign parameters to the solvated topology. + + Parameters + ---------- + simulation_platform + Simulation platform for file exporting. + nonbonded_cutoff + The cutoff for the Lennard-Jones potential and PME direct space + summation. + vdw_switch_width + The distance from the nonbonded cutoff at which to apply the + switching function. + protonated_pdb_file + The path to the protonated PDB with initial coordinates. + solvated_pdb_file + The path to the solvated PDB with ions. + parametrized_system + The path to write the parametrized system. + water_model + The name of the water model used to parametrize the water. + force_field_file + The path to the force field to parametrize the system. + water_model_file + The path to the force field containing the water model. + hydrogen_mass + The mass of solute hydrogen atoms for hydrogen mass repartitioning. + """ + + # Check arguments + if not nonbonded_cutoff.is_compatible_with(unit.nanometer): + raise ValueError("nonbonded_cutoff does not have units of Length") + if not vdw_switch_width.is_compatible_with(unit.nanometer): + raise ValueError("vdw_switch_width does not have units of Length") + if not hydrogen_mass.is_compatible_with(unit.dalton): + raise ValueError("hydrogen_mass does not have units of Mass") + + solvated_pdb = app.PDBFile(solvated_pdb_file) + openmm_topology = solvated_pdb.topology + + # Check bond between OXT and HO for protonated C terminus + for chain in openmm_topology.chains(): + for residue in chain.residues(): + # Check for both OXT and HO atoms in the same residue + oxt = None + ho = None + for atom in residue.atoms(): + if atom.name == "OXT": + oxt = atom + elif atom.name == "HO": + ho = atom + if oxt is not None and ho is not None: + # Check that there is a bond between these atoms + for bond in openmm_topology.bonds(): + if (bond[0] == oxt and bond[1] == ho) or ( + bond[0] == ho and bond[1] == oxt + ): + break + else: + # Create a bond between these atoms + openmm_topology.addBond(oxt, ho) + + # Set up force field + ff_extension = Path(force_field_file).suffix + if ff_extension == ".offxml": + ff_type = "smirnoff" + ff_class = ForceField + + elif ff_extension == ".pt": + ff_type = "espaloma" + + # Make a force field with the water model, then add espaloma using a + # template generator from openmmforcefields + ff_class = app.ForceField + espaloma_model_file = force_field_file + force_field_file = water_model_file + water_model_file = None + + else: + ff_type = "openmm" + ff_class = app.ForceField + + if water_model_file is None: + force_field = ff_class(force_field_file) + print(f"Force field read from\n {force_field_file}") + + else: + force_field = ff_class(force_field_file, water_model_file) + print( + f"Force field read from\n {force_field_file}" + f"\n and {water_model_file}" + ) + # Create the parametrized system from the solvated topology - if smirnoff: - openmm_system = force_field.createSystem(modeller.topology) + if ff_type in {"smirnoff", "espaloma"}: + # Get list of OpenFF Molecules in solute OpenMM topology + openff_solute_topology = Topology.from_pdb(protonated_pdb_file) + openff_solute_molecules = openff_solute_topology.unique_molecules + + if ff_type == "smirnoff": + # Get unique molecules (solute, water, sodium ion, chloride ion) needed + # for openff.toolkit.topology.Topology.from_openmm() + unique_molecules = [ + *openff_solute_molecules, + Molecule.from_smiles("O"), + Molecule.from_smiles("[Na+1]"), + Molecule.from_smiles("[Cl-1]"), + ] + + if water_model in {"opc", "tip4p-fb"}: + # Create a new OpenMM topology without water virtual sites + no_vsite_topology = app.Topology() + no_vsite_topology.setPeriodicBoxVectors( + openmm_topology.getPeriodicBoxVectors() + ) + no_vsite_atoms = dict() + + for chain in openmm_topology.chains(): + no_vsite_chain = no_vsite_topology.addChain(chain.id) + + for residue in chain.residues(): + no_vsite_residue = no_vsite_topology.addResidue( + residue.name, + no_vsite_chain, + residue.id, + residue.insertionCode, + ) + for atom in residue.atoms(): + if residue.name != "HOH" or atom.name in {"O", "H1", "H2"}: + no_vsite_atom = no_vsite_topology.addAtom( + atom.name, atom.element, no_vsite_residue + ) + no_vsite_atoms[atom] = no_vsite_atom + + # Include bonds only between non-virtual site atoms + for bond in openmm_topology.bonds(): + if bond[0] in no_vsite_atoms and bond[1] in no_vsite_atoms: + no_vsite_topology.addBond( + no_vsite_atoms[bond[0]], no_vsite_atoms[bond[1]] + ) + + openff_topology = Topology.from_openmm( + no_vsite_topology, + unique_molecules=unique_molecules, + ) + + else: + openff_topology = Topology.from_openmm( + openmm_topology, + unique_molecules=unique_molecules, + ) + + openmm_system = force_field.create_openmm_system(openff_topology) + + # Repartition hydrogen mass to bonded heavy atom if simulation_platform == "openmm": openmm_hydrogen_mass = hydrogen_mass.to_openmm() # Manually change hydrogen masses in OpenMM system. Taken from @@ -634,10 +646,78 @@ def solvate( openmm_system.setParticleMass(atom1.index, heavy_mass) else: + if ff_type == "espaloma": + # Set up template generator with espaloma model + espaloma_generator = EspalomaTemplateGenerator( + molecules=openff_solute_molecules, + forcefield=espaloma_model_file, + ) + force_field.registerTemplateGenerator(espaloma_generator.generator) + + # Get OpenMM topology of solute with one residue per molecule + openmm_solute_topology = openff_solute_topology.to_openmm() + espaloma_topology = app.Topology() + espaloma_topology.setPeriodicBoxVectors( + openmm_topology.getPeriodicBoxVectors() + ) + solute_atoms = dict() + chain_index = 0 + + for chain in openmm_solute_topology.chains(): + new_chain = espaloma_topology.addChain(chain.id) + residue_name = f"XX{chain_index:01d}" + chain_index += 1 + resid = "1" + new_residue = espaloma_topology.addResidue( + residue_name, new_chain, resid + ) + + for residue in chain.residues(): + for atom in residue.atoms(): + new_atom = espaloma_topology.addAtom( + atom.name, atom.element, new_residue, atom.id + ) + solute_atoms[atom] = new_atom + + for bond in openmm_solute_topology.bonds(): + if bond[0] in solute_atoms and bond[1] in solute_atoms: + espaloma_topology.addBond( + solute_atoms[bond[0]], solute_atoms[bond[1]] + ) + + # Copy solvent topology from solvated system topology + solute_chain_indices = set( + chain.index for chain in espaloma_topology.chains() + ) + solvent_atoms = dict() + + for chain in openmm_topology.chains(): + if chain.index in solute_chain_indices: + continue + new_chain = espaloma_topology.addChain(chain.id) + for residue in chain.residues(): + new_residue = espaloma_topology.addResidue( + residue.name, new_chain, residue.id + ) + for atom in residue.atoms(): + new_atom = espaloma_topology.addAtom( + atom.name, atom.element, new_residue, atom.id + ) + solvent_atoms[atom] = new_atom + + for bond in openmm_topology.bonds(): + if bond[0] in solvent_atoms and bond[1] in solvent_atoms: + espaloma_topology.addBond( + solvent_atoms[bond[0]], solvent_atoms[bond[1]] + ) + + openmm_topology = espaloma_topology + + # Create parametrized system switch_distance = nonbonded_cutoff - vdw_switch_width if simulation_platform == "openmm": openmm_system = force_field.createSystem( - modeller.topology, + openmm_topology, nonbondedMethod=app.PME, nonbondedCutoff=nonbonded_cutoff.to_openmm(), constraints=app.HBonds, @@ -647,7 +727,7 @@ def solvate( ) elif simulation_platform == "gmx": openmm_system = force_field.createSystem( - modeller.topology, + openmm_topology, nonbondedMethod=app.PME, nonbondedCutoff=nonbonded_cutoff.to_openmm(), rigidWater=False, @@ -655,11 +735,13 @@ def solvate( switchDistance=switch_distance.to_openmm(), ) - if (smirnoff and simulation_platform == "openmm") or smirnoff == False: + if ( + ff_type == "smirnoff" and simulation_platform == "openmm" + ) or ff_type != "smirnoff": # Validate total charge of solvated system - for i in range(openmm_system.getNumForces()): - if isinstance(openmm_system.getForce(i), openmm.NonbondedForce): - nonbonded_force = openmm_system.getForce(i) + for force in openmm_system.getForces(): + if isinstance(force, openmm.NonbondedForce): + nonbonded_force = force break else: @@ -689,7 +771,7 @@ def solvate( # Write GROMACS files struct = pmd.openmm.load_topology( - modeller.topology, openmm_system, xyz=modeller.positions + openmm_topology, openmm_system, xyz=modeller.positions ) hmass = pmd.tools.HMassRepartition(struct, hydrogen_mass.m_as(unit.dalton)) hmass.execute()