Skip to content

Commit

Permalink
Csse layout opt/td (#461)
Browse files Browse the repository at this point in the history
* review comments

* opt working

* torsiondrive converted to v2

* misc

* one bug

* hopefully fix bug

* forgive optking again
  • Loading branch information
loriab authored Dec 16, 2024
1 parent c861fe6 commit 457299c
Show file tree
Hide file tree
Showing 26 changed files with 527 additions and 293 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ jobs:
#if: false
run: |
conda remove qcelemental --force
python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536c' --no-deps
python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536d' --no-deps
# note: conda remove --force, not mamba remove --force b/c https://github.com/mamba-org/mamba/issues/412
# alt. is micromamba but not yet ready for setup-miniconda https://github.com/conda-incubator/setup-miniconda/issues/75
Expand Down Expand Up @@ -247,7 +247,7 @@ jobs:
#if: false
run: |
conda remove qcelemental --force
python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536c' --no-deps
python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536d' --no-deps
- name: Environment Information
run: |
Expand Down
4 changes: 4 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ Misc.

MUST (Unmerged)
+++++++++++++++
torsiondrive rewritten in v2.
berny harness rewritten in v2. optking and geometric natively speak v1, so adapted as well as can be.
allow nwchemdriver w/o driver=energy. provenance now nwchemdriver not nwchemrelax
Optking now fills in ``v2.OptimizationResult.stdout``. Through v2, once can alter gradient protocols in an optimization.
- (:pr:`460`) integrate ``AtomicInput.specification`` into harnesses and show what new inputs look like in tests
- (:pr:`459`) gcp, mp2d several got properties.return_energy, retunr_gradient
- (:pr:`460`) If you're missing something from AtomicResult.extras, check AtomicResult.input_data.extras in case it was passed in on input
Expand Down
8 changes: 2 additions & 6 deletions qcengine/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,14 @@ def compute(

# Build the model and validate
# * calls model_wrapper with the (Atomic|Optimization|etc)Input for which the harness was designed
# * upon return, input_data is a model of the type (e.g., Atomic) and version (e.g., 1 or 2) the harness prefers. for now, v1.
# * upon return, input_data is a model of the type (e.g., Atomic) and version (e.g., 1 or 2) the harness prefers: all v2.
input_data, input_schema_version = executor.build_input_model(input_data, return_input_schema_version=True)
return_version = input_schema_version if return_version == -1 else return_version

# Build out task_config
if task_config is None:
task_config = {}
# TODO generalize when procedures in layout in place
if isinstance(input_data, qcelemental.models.v2.AtomicInput):
input_engine_options = input_data.specification.extras.pop("_qcengine_local_config", {})
else:
input_engine_options = input_data.extras.pop("_qcengine_local_config", {})
input_engine_options = input_data.specification.extras.pop("_qcengine_local_config", {})
task_config = {**task_config, **input_engine_options}
config = get_config(task_config=task_config)

Expand Down
46 changes: 25 additions & 21 deletions qcengine/procedures/berny.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from typing import Any, ClassVar, Dict, Union

import numpy as np
from qcelemental.models.v1 import OptimizationResult
from qcelemental.models.v2 import FailedOperation, OptimizationInput
from qcelemental.models.v2 import FailedOperation, OptimizationInput, OptimizationResult
from qcelemental.util import which_import

import qcengine
Expand All @@ -33,7 +32,7 @@ def build_input_model(
return self._build_model(data, "OptimizationInput", return_input_schema_version=return_input_schema_version)

def compute(
self, input_data: "OptimizationInput", config: "TaskConfig"
self, input_model: "OptimizationInput", config: "TaskConfig"
) -> Union["OptimizationResult", "FailedOperation"]:
try:
import berny
Expand All @@ -56,19 +55,21 @@ def compute(
log = logging.getLogger(f"{__name__}.{id(self)}")
log.addHandler(logging.StreamHandler(log_stream))
log.setLevel("INFO")
input_data = input_model.model_dump()

input_data = input_data.dict()
geom_qcng = input_data["initial_molecule"]
comput = {**input_data["input_specification"], "molecule": geom_qcng}
program = input_data["keywords"].pop("program")
comput = {"specification": input_data["specification"]["specification"], "molecule": geom_qcng}
program = input_data["specification"]["specification"][
"program"
] # TODO don't need to collect when compute can work w/o 2nd arg
task_config = config.dict()
trajectory = []
output_data = input_data.copy()
try:
# Pyberny uses angstroms for the Cartesian geometry, but atomic
# units for everything else, including the gradients (hartree/bohr).
geom_berny = berny.Geometry(geom_qcng["symbols"], geom_qcng["geometry"] / berny.angstrom)
opt = berny.Berny(geom_berny, logger=log, **input_data["keywords"])
opt = berny.Berny(geom_berny, logger=log, **input_data["specification"]["keywords"])
for geom_berny in opt:
geom_qcng["geometry"] = np.stack(geom_berny.coords * berny.angstrom)
ret = qcengine.compute(comput, program, task_config=task_config)
Expand All @@ -84,17 +85,20 @@ def compute(
except Exception:
error = {"error_type": "unknown", "error_message": f"Berny error:\n{traceback.format_exc()}"}
else:
output_data["success"] = True
output_data.update(
{
"schema_name": "qcschema_optimization_output",
"final_molecule": trajectory[-1]["molecule"],
"energies": [r["properties"]["return_energy"] for r in trajectory],
"trajectory": trajectory,
"provenance": {"creator": "Berny", "routine": "berny.Berny", "version": berny_version},
"stdout": log_stream.getvalue(), # collect logged messages
}
)
output_data = OptimizationResult(**output_data)
return output_data.convert_v(2)
return FailedOperation(input_data=input_data, error=error)
output = {
"input_data": input_model,
"final_molecule": trajectory[-1]["molecule"],
"properties": {
"return_energy": trajectory[-1]["properties"]["return_energy"],
"optimization_iterations": len(trajectory),
},
"trajectory_results": trajectory,
"trajectory_properties": [r["properties"] for r in trajectory],
"provenance": {"creator": "Berny", "routine": "berny.Berny", "version": berny_version},
"stdout": log_stream.getvalue(), # collect logged messages
"success": True,
}

return OptimizationResult(**output)

return FailedOperation(input_data=input_model, error=error)
24 changes: 13 additions & 11 deletions qcengine/procedures/geometric.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,29 +43,31 @@ def compute(self, input_model: "OptimizationInput", config: "TaskConfig") -> "Op
except ModuleNotFoundError:
raise ModuleNotFoundError("Could not find geomeTRIC in the Python path.")

input_data = input_model.dict()
input_data_v1 = input_model.convert_v(1).dict()

# Temporary patch for geomeTRIC
input_data["initial_molecule"]["symbols"] = list(input_data["initial_molecule"]["symbols"])
input_data_v1["initial_molecule"]["symbols"] = list(input_data_v1["initial_molecule"]["symbols"])

# Set retries to two if zero while respecting local_config
local_config = config.dict()
local_config["retries"] = local_config.get("retries", 2) or 2
input_data["input_specification"]["extras"]["_qcengine_local_config"] = local_config
input_data_v1["input_specification"]["extras"]["_qcengine_local_config"] = local_config

# Run the program
output_data = geometric.run_json.geometric_run_json(input_data)
output_v1 = geometric.run_json.geometric_run_json(input_data_v1)

output_data["provenance"] = {
output_v1["provenance"] = {
"creator": "geomeTRIC",
"routine": "geometric.run_json.geometric_run_json",
"version": geometric.__version__,
}

output_data["schema_name"] = "qcschema_optimization_output"
output_data["input_specification"]["extras"].pop("_qcengine_local_config", None)
if output_data["success"]:
output_data = OptimizationResult(**output_data)
output_data = output_data.convert_v(2)
output_v1["schema_name"] = "qcschema_optimization_output" # overwrites OptIn value
output_v1["input_specification"]["extras"].pop("_qcengine_local_config", None)
if output_v1["success"]:
output_v1 = OptimizationResult(**output_v1)
output = output_v1.convert_v(2, external_input_data=input_model)
else:
output = output_v1 # TODO almost certainly wrong, needs v2 conv

return output_data
return output
8 changes: 2 additions & 6 deletions qcengine/procedures/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,11 @@ def _build_model(
elif isinstance(data, dict):
# remember these are user-provided dictionaries, so they'll have the mandatory fields,
# like driver, not the helpful discriminator fields like schema_version.
# so long as versions distinguishable by a *required* field, id by dict is reliable.

schver = data.get("schema_version")
if schver == 1:
mdl = model_wrapper(data, v1_model)
elif schver == 2:
if data.get("specification", False) or data.get("schema_version") == 2:
mdl = model_wrapper(data, v2_model)
else:
# for now, the two dictionaries look the same, so cast to the one we want
# note that this prevents correctly identifying the user schema version when dict passed in, so either as_v1/None or as_v2 will fail
mdl = model_wrapper(data, v1_model)

input_schema_version = mdl.schema_version
Expand Down
56 changes: 31 additions & 25 deletions qcengine/procedures/nwchem_opt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,42 +27,37 @@ def build_input_model(
) -> "OptimizationInput":
return self._build_model(data, "OptimizationInput", return_input_schema_version=return_input_schema_version)

def compute(self, input_data: OptimizationInput, config: TaskConfig) -> "BaseModel":
def compute(self, input_model: OptimizationInput, config: TaskConfig) -> "BaseModel":
nwc_harness = NWChemHarness()
self.found(raise_error=True)

# Unify the keywords from the OptimizationInput and QCInputSpecification
# Optimization input will override, but don't tell users this as it seems unnecessary
keywords = input_data.keywords.copy()
keywords.update(input_data.input_specification.keywords)
if keywords.get("program", "nwchem").lower() != "nwchem":
raise InputError("NWChemDriver procedure only works with NWChem")
keywords = input_model.specification.keywords.copy()
keywords.update(input_model.specification.specification.keywords)
if (uprog := input_model.specification.specification.program) and (uprog.lower() != "nwchem"):
raise InputError(f"NWChemDriver procedure only works with NWChem, not {uprog}")

# Add a flag to the atomic input that tells the NWChemHarness we are calling it from driver
# This is needed for the NWCHarness to make some changes to the input file
input_data.input_specification.extras["is_driver"] = True
input_model.specification.specification.extras["is_driver"] = True

# Make an atomic input
atomic_input = AtomicInput(
molecule=input_data.initial_molecule,
specification=AtomicSpecification(
driver="energy",
keywords=keywords,
**input_data.input_specification.model_dump(
exclude={"driver", "keywords", "schema_name", "schema_version"}
), # TODO allow schema_ if added back
),
molecule=input_model.initial_molecule,
specification=input_model.specification.specification,
)

# Build the inputs for the job
job_inputs = nwc_harness.build_input(atomic_input, config)

# As of v2, now use "is_driver" to signal task ... optimize in the first place. This allows driver=gradient to work even though task ... gradient not last in file
# Replace the last line with a "task {} optimize"
input_file: str = job_inputs["infiles"]["nwchem.nw"].strip()
beginning, last_line = input_file.rsplit("\n", 1)
assert last_line.startswith("task")
last_line = f"task {last_line.split(' ')[1]} optimize"
job_inputs["infiles"]["nwchem.nw"] = f"{beginning}\n{last_line}"
# input_file: str = job_inputs["infiles"]["nwchem.nw"].strip()
# beginning, last_line = input_file.rsplit("\n", 1)
# assert last_line.startswith("task")
# last_line = f"task {last_line.split(' ')[1]} optimize"
# job_inputs["infiles"]["nwchem.nw"] = f"{beginning}\n{last_line}"

# Run it!
success, dexe = nwc_harness.execute(job_inputs)
Expand All @@ -78,11 +73,12 @@ def compute(self, input_data: OptimizationInput, config: TaskConfig) -> "BaseMod
if success:
dexe["outfiles"]["stdout"] = dexe["stdout"]
dexe["outfiles"]["stderr"] = dexe["stderr"]
return self.parse_output(dexe["outfiles"], input_data)
return self.parse_output(dexe["outfiles"], input_model)
else:
raise UnknownError(dexe["stdout"])

def parse_output(self, outfiles: Dict[str, str], input_model: OptimizationInput) -> OptimizationResult:
optsubproperties = ["calcinfo_natom", "return_energy", "return_gradient"]

# Get the stdout from the calculation (required)
stdout = outfiles.pop("stdout")
Expand All @@ -94,14 +90,24 @@ def parse_output(self, outfiles: Dict[str, str], input_model: OptimizationInput)
# Isolate the converged result
final_step = atomic_results[-1]

# input_data = input_model.model_dump()
# input_data["specification"]["specification"]["program"] = "nwchem"

return OptimizationResult(
initial_molecule=input_model.initial_molecule,
input_specification=input_model.input_specification,
input_data=input_model,
final_molecule=final_step.molecule,
trajectory=atomic_results,
energies=[float(r.extras["qcvars"]["CURRENT ENERGY"]) for r in atomic_results],
trajectory_results=atomic_results,
trajectory_properties=[
{k: v for k, v in grad.properties if k in optsubproperties} for grad in atomic_results
],
properties={
"return_energy": atomic_results[-1].properties.return_energy,
"optimization_iterations": len(atomic_results),
},
stdout=stdout,
stderr=stderr,
success=True,
provenance=Provenance(creator="NWChemRelax", version=self.get_version(), routine="nwchem_opt"),
provenance=Provenance(
creator="NWChemDriver", version=self.get_version(), routine="nwchem_opt"
), # was NWChemRelax which frankly is a better name
)
4 changes: 2 additions & 2 deletions qcengine/procedures/nwchem_opt/harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def harvest_as_atomic_result(input_model: OptimizationInput, nwout: str) -> List
results = []
for qcvars, nwgrad, out_mol in zip(out_psivars, out_grads, out_mols):
if nwgrad is not None:
qcvars[f"{input_model.input_specification.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad
qcvars[f"{input_model.specification.specification.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad
qcvars["CURRENT GRADIENT"] = nwgrad

# Get the formatted properties
Expand All @@ -77,7 +77,7 @@ def harvest_as_atomic_result(input_model: OptimizationInput, nwout: str) -> List
# Format them inout an output
input_data = {
"molecule": out_mol,
"specification": input_model.input_specification.model_dump(),
"specification": input_model.specification.specification.model_dump(),
}
input_data["specification"]["driver"] = "gradient"
input_data["specification"].pop("schema_name", None)
Expand Down
30 changes: 20 additions & 10 deletions qcengine/procedures/optking.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging
import sys
from io import StringIO
from typing import Any, ClassVar, Dict, Union

from qcelemental.models.v1 import OptimizationResult
Expand Down Expand Up @@ -41,21 +44,28 @@ def compute(self, input_model: "OptimizationInput", config: "TaskConfig") -> "Op
if self.found(raise_error=True):
import optking

input_data = input_model.convert_v(1)
input_data = input_model.dict()
log_stream = StringIO()
logname = "psi4.optking" if "psi4" in sys.modules else "optking"
log = logging.getLogger(logname)
log.addHandler(logging.StreamHandler(log_stream))
log.setLevel("INFO")

input_data_v1 = input_model.convert_v(1).dict()

# Set retries to two if zero while respecting local_config
local_config = config.dict()
local_config["retries"] = local_config.get("retries", 2) or 2
input_data["input_specification"]["extras"]["_qcengine_local_config"] = local_config
input_data_v1["input_specification"]["extras"]["_qcengine_local_config"] = local_config

# Run the program
output_data = optking.optwrapper.optimize_qcengine(input_data)
output_v1 = optking.optwrapper.optimize_qcengine(input_data_v1)
output_v1["stdout"] = log_stream.getvalue()

output_data["schema_name"] = "qcschema_optimization_output"
output_data["input_specification"]["extras"].pop("_qcengine_local_config", None)
if output_data["success"]:
output_data = OptimizationResult(**output_data)
output_data = output_data.convert_v(2)
output_v1["input_specification"]["extras"].pop("_qcengine_local_config", None)
if output_v1["success"]:
output_v1 = OptimizationResult(**output_v1)
output = output_v1.convert_v(2, external_input_data=input_model)
else:
output = output_v1 # TODO almost certainly wrong -- need v2 conversion?

return output_data
return output
Loading

0 comments on commit 457299c

Please sign in to comment.