Skip to content

Commit

Permalink
use rdkit2ase (#344)
Browse files Browse the repository at this point in the history
* use rdkit2ase

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update rdkit2ase

* rename `Smiles2Atoms`

* refactor to use rdkit2ase

* update packmol

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
PythonFZ and pre-commit-ci[bot] authored Dec 3, 2024
1 parent d849313 commit c62086f
Show file tree
Hide file tree
Showing 9 changed files with 105 additions and 189 deletions.
12 changes: 6 additions & 6 deletions docs/source/_get_started/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,17 @@ The goal is to place water molecules inside a box at a specified density.
with ips.Project() as project:
# Generate a single water molecule
mol = ips.SmilesToAtoms(smiles="O")
mol = ips.Smiles2Atoms(smiles="O")
# Duplicate water molecules
packmol = ips.Packmol(
data=[mol.atoms], count=[10], density=876
)
project.build()
The *SmilesToAtoms* node takes the `SMILES <https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system>`_ string for water ("O"),
The *Smiles2Atoms* node takes the `SMILES <https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system>`_ string for water ("O"),
and generates an an `ASE atoms object <https://wiki.fysik.dtu.dk/ase/ase/atoms.html>`_ from it.
This atom from the SmilesToAtoms instance can be accessd by calling `mol.atoms`.
This atom from the Smiles2Atoms instance can be accessd by calling `mol.atoms`.
This instance attribute is then handed over to the Packmol node in the `data` attribute.
It is possible to hand over multiple different types of ASE atoms,
so `mol.atoms` as to be given as a list. The next attribute we need to set is the amount of molecules we want to place in the box.
Expand All @@ -80,7 +80,7 @@ used with `Mermaid <https://mermaid.js.org/>`_ to better visualise the workflows

The parameters files, namely `params.yaml` can be viewed and parameters can be changed without rerunning the python script.

By calling `dvc repro` the workflow consisting of the nodes is executed, which results in the generation of firstly a single water molecule by the SmilesToAtoms node and then the placement
By calling `dvc repro` the workflow consisting of the nodes is executed, which results in the generation of firstly a single water molecule by the Smiles2Atoms node and then the placement
of multiple water molecules in a box by the Packmol node.
After running `dvc repro` a new folder `nodes` is created which each node having their own subfolder. These folders
contain the results from the Nodes.
Expand All @@ -90,9 +90,9 @@ we can visualise the H5MD files generated by the nodes.

.. code-block:: bash
zndraw nodes/SmilesToAtoms/structures.h5
zndraw nodes/Smiles2Atoms/structures.h5
will visualise the single water molecule generated by SmilesToAtoms. There will also be a H5MD file in the Packmol folder
will visualise the single water molecule generated by Smiles2Atoms. There will also be a H5MD file in the Packmol folder

.. figure:: ../images/water_packmol.png
:alt: ZnDraw Screenshot
Expand Down
4 changes: 2 additions & 2 deletions docs/source/examples/06_Bootstrapping_Datasets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Running DVC command: 'stage add --name SmilesToAtoms --force ...'\n",
"Running DVC command: 'stage add --name Smiles2Atoms --force ...'\n",
"Running DVC command: 'stage add --name Packmol --force ...'\n",
"Running DVC command: 'stage add --name opt_calc --force ...'\n",
"Running DVC command: 'stage add --name ASEGeoOpt --force ...'\n",
Expand Down Expand Up @@ -371,7 +371,7 @@
"mapping = ips.geometry.BarycenterMapping()\n",
"\n",
"with ips.Project() as project:\n",
" water = ips.SmilesToAtoms(smiles=\"O\")\n",
" water = ips.Smiles2Atoms(smiles=\"O\")\n",
" packmol = ips.Packmol(\n",
" data=[water.atoms], count=[10], density=997\n",
" )\n",
Expand Down
8 changes: 4 additions & 4 deletions ipsuite/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ from .calculators import (
from .configuration_generation import (
MultiPackmol,
Packmol,
Smiles2Atoms,
Smiles2Conformers,
Smiles2Gromacs,
SmilesToAtoms,
SmilesToConformers,
)

# Configuration Selection
Expand Down Expand Up @@ -127,8 +127,8 @@ __all__ = [
# Configuration Generation
"Packmol",
"MultiPackmol",
"SmilesToAtoms",
"SmilesToConformers",
"Smiles2Atoms",
"Smiles2Conformers",
"Smiles2Gromacs",
# Data
"AddData",
Expand Down
6 changes: 3 additions & 3 deletions ipsuite/configuration_generation/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

from .gmx import Smiles2Gromacs
from .packmol import MultiPackmol, Packmol
from .smiles_to_atoms import SmilesToAtoms, SmilesToConformers
from .smiles_to_atoms import Smiles2Atoms, Smiles2Conformers

__all__ = [
"SmilesToAtoms",
"Smiles2Atoms",
"Packmol",
"SmilesToConformers",
"Smiles2Conformers",
"MultiPackmol",
"Smiles2Gromacs",
]
136 changes: 35 additions & 101 deletions ipsuite/configuration_generation/packmol.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
"""Use packmole to create a periodic box"""

import logging
import pathlib
import subprocess
import random

import ase
import ase.units
import numpy as np
import rdkit2ase
import zntrack
from ase.visualize import view

from ipsuite import base, fields
from ipsuite.utils.ase_sim import get_box_from_density

log = logging.getLogger(__name__)

Expand All @@ -31,8 +29,6 @@ class Packmol(base.IPSNode):
Number of molecules to add for each entry in data.
tolerance : float
Tolerance for the distance of atoms in angstrom.
box : list[float]
Box size in angstrom. Either density or box is required.
density : float
Density of the system in kg/m^3. Either density or box is required.
pbc : bool
Expand All @@ -45,73 +41,38 @@ class Packmol(base.IPSNode):
data_ids: list[int] = zntrack.params(None)
count: list = zntrack.params()
tolerance: float = zntrack.params(2.0)
box: list = zntrack.params(None)
density: float = zntrack.params(None)
structures: pathlib.Path = zntrack.outs_path(zntrack.nwd / "packmol")
density: float = zntrack.params()
frames: list[ase.Atoms] = fields.Atoms()
pbc: bool = zntrack.params(True)

def __post_init__(self):
if self.box is None and self.density is None:
raise ValueError("Either box or density must be set.")
if len(self.data) != len(self.count):
raise ValueError("The number of data and count must be the same.")
if self.data_ids is not None and len(self.data) != len(self.data_ids):
raise ValueError("The number of data and data_ids must be the same.")
if self.box is not None and isinstance(self.box, (int, float)):
self.box = [self.box, self.box, self.box]

def run(self):
self.structures.mkdir(exist_ok=True, parents=True)
for idx, atoms in enumerate(self.data):
atoms = atoms[-1] if self.data_ids is None else atoms[self.data_ids[idx]]
ase.io.write(self.structures / f"{idx}.xyz", atoms)

if self.density is not None:
self._get_box_from_molar_volume()

if self.pbc:
scaled_box = [x - self.tolerance for x in self.box]
data = []
if self.data_ids is not None:
for idx, frames in zip(self.data_ids, self.data):
data.append([frames[idx]])
else:
scaled_box = self.box

file = f"""
tolerance {self.tolerance}
filetype xyz
output mixture.xyz
"""
for idx, count in enumerate(self.count):
file += f"""
structure {idx}.xyz
number {count}
inside box 0 0 0 {" ".join([f"{x:.4f}" for x in scaled_box])}
end structure
"""
with pathlib.Path(self.structures / "packmole.inp").open("w") as f:
f.write(file)

subprocess.check_call("packmol < packmole.inp", shell=True, cwd=self.structures)

atoms = ase.io.read(self.structures / "mixture.xyz")
if self.pbc:
atoms.cell = self.box
atoms.pbc = True
self.frames = [atoms]

def _get_box_from_molar_volume(self):
"""Get the box size from the molar volume"""
self.box = get_box_from_density(self.data, self.count, self.density)
log.info(f"estimated box size: {self.box}")

def view(self) -> view:
return view(self.frames, viewer="x3d")
data = self.data

self.frames = [
rdkit2ase.pack(
data=data,
counts=self.count,
tolerance=self.tolerance,
density=self.density,
pbc=self.pbc,
)
]


class MultiPackmol(Packmol):
"""Create multiple configurations with packmol.
This Node generates multiple configurations with packmol.
This is best used in conjunction with SmilesToConformers:
This is best used in conjunction with Smiles2Conformers:
Example
-------
Expand All @@ -120,7 +81,7 @@ class MultiPackmol(Packmol):
>>> import ipsuite as ips
>>> with ips.Project() as project:
... water = ips.SmilesToConformers(
... water = ips.Smiles2Conformers(
... smiles='O', numConfs=100
... )
... boxes = ips.MultiPackmol(
Expand All @@ -141,50 +102,23 @@ class MultiPackmol(Packmol):

n_configurations: int = zntrack.params()
seed: int = zntrack.params(42)
data_ids = None

def run(self):
np.random.seed(self.seed)
self.frames = []

if self.density is not None:
self._get_box_from_molar_volume()

if self.pbc:
scaled_box = [x - self.tolerance for x in self.box]
else:
scaled_box = self.box

self.structures.mkdir(exist_ok=True, parents=True)
for idx, atoms_list in enumerate(self.data):
for jdx, atoms in enumerate(atoms_list):
ase.io.write(self.structures / f"{idx}_{jdx}.xyz", atoms)

for idx in range(self.n_configurations):
file = f"""
tolerance {self.tolerance}
filetype xyz
output mixture_{idx}.xyz
"""
for jdx, count in enumerate(self.count):
choices = np.random.choice(len(self.data[jdx]), count)
for kdx in choices:
file += f"""
structure {jdx}_{kdx}.xyz
number 1
inside box 0 0 0 {" ".join([f"{x:.4f}" for x in scaled_box])}
end structure
"""
with pathlib.Path(self.structures / f"packmole_{idx}.inp").open("w") as f:
f.write(file)

subprocess.check_call(
f"packmol < packmole_{idx}.inp", shell=True, cwd=self.structures
for _ in range(self.n_configurations):
# shuffle each data entry
data = []
for frames in self.data:
random.shuffle(frames)
data.append(frames)

self.frames.append(
rdkit2ase.pack(
data=data,
counts=self.count,
tolerance=self.tolerance,
density=self.density,
pbc=self.pbc,
)
)

atoms = ase.io.read(self.structures / f"mixture_{idx}.xyz")
if self.pbc:
atoms.cell = self.box
atoms.pbc = True

self.frames.append(atoms)
55 changes: 12 additions & 43 deletions ipsuite/configuration_generation/smiles_to_atoms.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,29 @@
import pathlib

import ase
import rdkit2ase
import zntrack
from ase.visualize import view
from rdkit import Chem
from rdkit.Chem import AllChem, Draw

from ipsuite import base, fields


class SmilesToAtoms(base.IPSNode):
class Smiles2Atoms(base.IPSNode):
atoms: list[ase.Atoms] = fields.Atoms()

smiles: str = zntrack.params()
cell: float = zntrack.params(None)
seed: int = zntrack.params(1234)
optimizer: str = zntrack.params("UFF")
image: pathlib.Path = zntrack.outs_path(zntrack.nwd / "molecule.png")

def run(self):
mol = Chem.MolFromSmiles(self.smiles)
Draw.MolToFile(mol, self.image)

mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol, randomSeed=self.seed)

if self.optimizer == "UFF":
AllChem.UFFOptimizeMolecule(mol)
elif self.optimizer == "MMFF":
AllChem.MMFFOptimizeMolecule(mol)

atoms = ase.Atoms(
positions=mol.GetConformer().GetPositions(),
numbers=[atom.GetAtomicNum() for atom in mol.GetAtoms()],
atoms = rdkit2ase.smiles2atoms(
smiles=self.smiles,
seed=self.seed,
)
atoms.positions -= atoms.get_center_of_mass()
if self.cell is not None:
if self.cell:
atoms.set_cell([self.cell, self.cell, self.cell])
atoms.center()
self.atoms = [atoms]

def view(self) -> view:
return view(self.atoms[0], viewer="x3d")


class SmilesToConformers(base.IPSNode):
class Smiles2Conformers(base.IPSNode):
frames: list[ase.Atoms] = fields.Atoms()

smiles: str = zntrack.params()
Expand All @@ -54,24 +33,14 @@ class SmilesToConformers(base.IPSNode):
cell: float = zntrack.params(100)

def run(self):
mol = Chem.MolFromSmiles(self.smiles)
mol = Chem.AddHs(mol)
AllChem.EmbedMultipleConfs(
mol,
frames = rdkit2ase.smiles2conformers(
smiles=self.smiles,
numConfs=self.numConfs,
randomSeed=self.seed,
maxAttempts=self.maxAttempts,
)
self.frames = []
for conf in mol.GetConformers():
atoms = ase.Atoms(
positions=conf.GetPositions(),
numbers=[atom.GetAtomicNum() for atom in mol.GetAtoms()],
)
atoms.positions -= atoms.get_center_of_mass()

if self.cell is not None:
if self.cell:
for atoms in frames:
atoms.set_cell([self.cell, self.cell, self.cell])
atoms.center()

self.frames.append(atoms)
self.frames = frames
Loading

0 comments on commit c62086f

Please sign in to comment.