Skip to content

Commit

Permalink
Merge pull request #91 from molssi-seamm/dev
Browse files Browse the repository at this point in the history
Bugfix: more issues with property handling.

* The types of properties were not kept when using Open Babel or RDKit, so when properties were reread from an SDF file the JSON properties were converted into strings, causing various errors. This is fixed.
  • Loading branch information
seamm authored Dec 14, 2024
2 parents f3abde8 + cea1b5a commit 9b5fc21
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 85 deletions.
5 changes: 5 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
=======
History
=======
2024.12.14 -- Bugfix: more issues with property handling.
* The types of properties were not kept when using Open Babel or RDKit, so when
properties were reread from an SDF file the JSON properties were converted into
strings, causing various errors. This is fixed.

2024.12.11 -- Bugfix: Properties in SDF files
* Transferring properties to the Open Babel and RDKit molecules was incorrect after
recent changes to the handling of properties. This fixes the problem, and now SDF
Expand Down
2 changes: 1 addition & 1 deletion molsystem/data/standard_properties.csv
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Property,Type,Units,Description,URLHOMO energy#experiment,float,eV,"The {model} energy of the highest occupied molecular orbital (HOMO)",https://en.wikipedia.org/wiki/HOMO_and_LUMOLUMO energy#experiment,float,eV,"The {model} energy of the lowest unoccupied molecular orbital (LUMO)",https://en.wikipedia.org/wiki/HOMO_and_LUMOband gap#experiment,float,eV,"The band gap in a solid.",https://en.wikipedia.org/wiki/Band_gapchemical formula,string,,"The chemical formula of the molecule or conventional unit cell.",constant pressure heat capacity#experiment,float,J/mol/K,"The constant-pressure heat capacity, Cp.",https://en.wikipedia.org/wiki/Heat_capacityconstant volume heat capacity#experiment,float,J/mol/K,"The constant-volume heat capacity, Cv.",https://en.wikipedia.org/wiki/Heat_capacitydensity,float,g/mL,https://en.wikipedia.org/wiki/Densitydensity#experiment,float,g/mL,https://en.wikipedia.org/wiki/Densitydipole moment#experiment,float,D,"The electric dipole moment, which is the magnitude of the dipole vector.",https://en.wikipedia.org/wiki/Electric_dipole_momentempirical formula,string,,"The empirical formula of the molecule or conventional unit cell.",enthalpy of formation#experiment,float,kJ/mol,"The enthalpy of formation, DHf.",https://en.wikipedia.org/wiki/Standard_enthalpy_of_formationentropy#experiment,float,J/mol/K,"The standard entropy.",https://en.wikipedia.org/wiki/Entropyionization energy#experiment,float,eV,"The ionization energy or potential.",https://en.wikipedia.org/wiki/Ionization_energymolecular weight,float,Da,"The molecular weight in Daltons.",nuclear repulsion energy,float,eV,"The repulsion energy between the nuclei, based on atomic numbers and distances",unit cell volume,float,Å^3,"The volume of the unit cell.",
Property,Type,Units,Description,URLHOMO energy#experiment,float,eV,"The {model} energy of the highest occupied molecular orbital (HOMO)",https://en.wikipedia.org/wiki/HOMO_and_LUMOLUMO energy#experiment,float,eV,"The {model} energy of the lowest unoccupied molecular orbital (LUMO)",https://en.wikipedia.org/wiki/HOMO_and_LUMOband gap#experiment,float,eV,"The band gap in a solid.",https://en.wikipedia.org/wiki/Band_gapchemical formula,str,,"The chemical formula of the molecule or conventional unit cell.",constant pressure heat capacity#experiment,float,J/mol/K,"The constant-pressure heat capacity, Cp.",https://en.wikipedia.org/wiki/Heat_capacityconstant volume heat capacity#experiment,float,J/mol/K,"The constant-volume heat capacity, Cv.",https://en.wikipedia.org/wiki/Heat_capacitydensity,float,g/mL,https://en.wikipedia.org/wiki/Densitydensity#experiment,float,g/mL,https://en.wikipedia.org/wiki/Densitydipole moment#experiment,float,D,"The electric dipole moment, which is the magnitude of the dipole vector.",https://en.wikipedia.org/wiki/Electric_dipole_momentempirical formula,str,,"The empirical formula of the molecule or conventional unit cell.",enthalpy of formation#experiment,float,kJ/mol,"The enthalpy of formation, DHf.",https://en.wikipedia.org/wiki/Standard_enthalpy_of_formationentropy#experiment,float,J/mol/K,"The standard entropy.",https://en.wikipedia.org/wiki/Entropyionization energy#experiment,float,eV,"The ionization energy or potential.",https://en.wikipedia.org/wiki/Ionization_energymolecular weight,float,Da,"The molecular weight in Daltons.",nuclear repulsion energy,float,eV,"The repulsion energy between the nuclei, based on atomic numbers and distances",unit cell volume,float,Å^3,"The volume of the unit cell.",
Expand Down
84 changes: 46 additions & 38 deletions molsystem/openbabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""Interface to openbabel."""

import logging
import json
from pathlib import Path

try:
Expand Down Expand Up @@ -70,32 +71,30 @@ def to_OBMol(self, properties=None):
pair = ob.OBPairData()

if self.__class__.__name__ == "_Configuration":
pair.SetAttribute("net charge")
pair.SetAttribute("SEAMM|net charge|int|")
pair.SetValue(str(self.charge))
ob_mol.CloneData(pair)

pair.SetAttribute("spin multiplicity")
pair.SetAttribute("SEAMM|spin multiplicity|int|")
pair.SetValue(str(self.spin_multiplicity))
ob_mol.CloneData(pair)

if properties is not None:
data = self.properties.get(properties, include_system_properties=True)
for key, value in data.items():
for _property, value in data.items():
_type = self.properties.type(_property)
units = self.properties.units(_property)
value = value["value"]
key = f"SEAMM|{_property}|{_type}|"
if units is not None and units != "":
key += units

if _type == "json":
value = json.dumps(value)

pair.SetAttribute(key)
pair.SetValue(str(value))
ob_mol.CloneData(pair)

# Units, if any
units = self.properties.units(key)
if units is not None and units != "":
tmp = key.split("#", maxsplit=1)
if len(tmp) > 1:
pair.SetAttribute(tmp[0] + ",units" + "#" + tmp[1])
else:
pair.SetAttribute(key + ",units")
pair.SetValue(units)
ob_mol.CloneData(pair)
return ob_mol

def from_OBMol(
Expand Down Expand Up @@ -159,23 +158,27 @@ def from_OBMol(
data = {}
for item in ob_mol.GetData():
value = item.GetValue()
try:
value = int(value)
except Exception:
key = item.GetAttribute()
if key.startswith("SEAMM|"):
data[key] = value
else:
try:
value = float(value)
value = int(value)
except Exception:
pass
data[item.GetAttribute()] = value
try:
value = float(value)
except Exception:
pass
data[key] = value

# Check for property items for charge and multiplicity
if self.__class__.__name__ == "_Configuration":
self.charge = ob_mol.GetTotalCharge()
self.spin_multiplicity = ob_mol.GetTotalSpinMultiplicity()
if "net charge" in data:
self.charge = int(data["net charge"])
if "spin multiplicity" in data:
self.spin_multiplicity = int(data["spin multiplicity"])
if "SEAMM|net charge|int|" in data:
self.charge = int(data["SEAMM|net charge|int|"])
if "SEAMM|spin multiplicity|int|" in data:
self.spin_multiplicity = int(data["SEAMM|spin multiplicity|int|"])

if atoms:
if any([i != 0.0 for i in qs]):
Expand All @@ -199,21 +202,26 @@ def from_OBMol(
# Record any properties in the database if desired
if properties == "all":
for key, value in data.items():
if ",units" not in key and key not in [
"net charge",
"spin multiplicity",
]:
if not self.properties.exists(key):
tmp = key.split("#", maxsplit=1)
if len(tmp) > 1:
units_key = tmp[0] + ",units" + "#" + tmp[1]
else:
units_key = key + ",units"
_type = value.__class__.__name__
if units_key in data:
units = data[units_key]
self.properties.add(key, _type, units=units)
if key not in (
"SEAMM|net charge|int|",
"SEAMM|spin multiplicity|int|",
):
if key.startswith("SEAMM|"):
_, _property, _type, units = key.split("|", 4)
units = None if units.strip() == "" else units
if not self.properties.exists(_property):
self.properties.add(_property, _type=_type, units=units)
if _type == "int":
value = int(value)
elif _type == "float":
value = float(value)
elif _type == "json":
value = json.dumps(value)
else:
pass
else:
if not self.properties.exists(key):
_type = value.__class__.__name__
self.properties.add(key, _type)
self.properties.put(key, value)
return self
Expand Down
71 changes: 37 additions & 34 deletions molsystem/rdkit_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""Interface to RDKit."""

import logging
import json
import pprint


Expand Down Expand Up @@ -97,29 +98,28 @@ def to_RDKMol(self, properties=None):

# Add the net charge and spin multiplicity as properties for configurations
if self.__class__.__name__ == "_Configuration":
rdk_mol.SetIntProp("net charge", self.charge)
rdk_mol.SetIntProp("spin multiplicity", self.spin_multiplicity)
rdk_mol.SetIntProp("SEAMM|net charge|int|", self.charge)
rdk_mol.SetIntProp("SEAMM|spin multiplicity|int|", self.spin_multiplicity)

if properties is not None:
data = self.properties.get(properties, include_system_properties=True)
for key, value in data.items():
for _property, value in data.items():
_type = self.properties.type(_property)
units = self.properties.units(_property)
value = value["value"]
if isinstance(value, int):
key = f"SEAMM|{_property}|{_type}|"
if units is not None and units != "":
key += units

if _type == "int":
rdk_mol.SetIntProp(key, value)
elif isinstance(value, float):
elif _type == "float":
rdk_mol.SetDoubleProp(key, value)
elif _type == "json":
rdk_mol.SetProp(key, json.dumps(value))
else:
rdk_mol.SetProp(key, str(value))

# Units, if any
units = self.properties.units(key)
if units is not None and units != "":
tmp = key.split("#", maxsplit=1)
if len(tmp) > 1:
rdk_mol.SetProp(tmp[0] + ",units" + "#" + tmp[1], units)
else:
rdk_mol.SetProp(key + ",units", units)

return rdk_mol

def from_RDKMol(
Expand Down Expand Up @@ -210,31 +210,34 @@ def from_RDKMol(
self.spin_multiplicity = n_electrons + 1

# Check for property items for charge and multiplicity
if "net charge" in data:
self.charge = int(data["net charge"])
if "spin multiplicity" in data:
self.spin_multiplicity = int(data["spin multiplicity"])
if "SEAMM|net charge|int|" in data:
self.charge = int(data["SEAMM|net charge|int|"])
if "SEAMM|spin multiplicity" in data:
self.spin_multiplicity = int(data["SEAMM|spin multiplicity|int|"])

# Record any properties in the database if desired
if properties == "all":
for key, value in data.items():
if ",units" not in key and key not in [
"net charge",
"spin multiplicity",
]:
if not self.properties.exists(key):
tmp = key.split("#", maxsplit=1)
if len(tmp) > 1:
units_key = tmp[0] + ",units" + "#" + tmp[1]
else:
units_key = key + ",units"
_type = value.__class__.__name__
if units_key in data:
units = data[units_key]
self.properties.add(key, _type, units=units)
else:
if ",units" not in key and key not in (
"SEAMM|net charge|int|",
"SEAMM|spin multiplicity|int|",
):
if key.startswith("SEAMM|"):
_, _property, _type, units = key.split("|", 4)
units = None if units.strip() == "" else units
if not self.properties.exists(_property):
self.properties.add(_property, _type=_type, units=units)

if _type == "json":
value = json.dumps(value)

self.properties.put(key, value)
else:
if not self.properties.exists(key):
_type = value.__class__.__name__
self.properties.add(key, _type)
self.properties.put(key, value)

self.properties.put(key, value)
return self

def debug_print(self):
Expand Down
11 changes: 5 additions & 6 deletions tests/test_openbabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,12 +501,11 @@ def test_all_residue_search(configuration):
def test_to_OBMol(Acetate):
"""Test creating an OBMol object from a structure."""
correct = {
"float property": 3.14,
"float property,units": "kcal/mol",
"int property": 2,
"net charge": -1,
"spin multiplicity": 1,
"str property": "Hi!",
"SEAMM|float property|float|kcal/mol": 3.14,
"SEAMM|int property|int|": 2,
"SEAMM|net charge|int|": -1,
"SEAMM|spin multiplicity|int|": 1,
"SEAMM|str property|str|": "Hi!",
}

mol = Acetate.to_OBMol(properties="*")
Expand Down
11 changes: 5 additions & 6 deletions tests/test_rdkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ def test_version():
def test_to_RDKMol(Acetate):
"""Test creating a RDKMol object from a structure."""
correct = {
"float property": 3.14,
"float property,units": "kcal/mol",
"int property": 2,
"net charge": -1,
"spin multiplicity": 1,
"str property": "Hi!",
"SEAMM|float property|float|kcal/mol": 3.14,
"SEAMM|int property|int|": 2,
"SEAMM|net charge|int|": -1,
"SEAMM|spin multiplicity|int|": 1,
"SEAMM|str property|str|": "Hi!",
}

mol = Acetate.to_RDKMol(properties="*")
Expand Down

0 comments on commit 9b5fc21

Please sign in to comment.