Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Csse layout opt/td #461

Merged
merged 7 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ jobs:
#if: false
run: |
conda remove qcelemental --force
python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536c' --no-deps
python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536d' --no-deps

# note: conda remove --force, not mamba remove --force b/c https://github.com/mamba-org/mamba/issues/412
# alt. is micromamba but not yet ready for setup-miniconda https://github.com/conda-incubator/setup-miniconda/issues/75
Expand Down Expand Up @@ -247,7 +247,7 @@ jobs:
#if: false
run: |
conda remove qcelemental --force
python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536c' --no-deps
python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536d' --no-deps

- name: Environment Information
run: |
Expand Down
4 changes: 4 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ Misc.

MUST (Unmerged)
+++++++++++++++
torsiondrive rewritten in v2.
berny harness rewritten in v2. optking and geometric natively speak v1, so adapted as well as can be.
allow nwchemdriver w/o driver=energy. provenance now nwchemdriver not nwchemrelax
Optking now fills in ``v2.OptimizationResult.stdout``. Through v2, once can alter gradient protocols in an optimization.
- (:pr:`460`) integrate ``AtomicInput.specification`` into harnesses and show what new inputs look like in tests
- (:pr:`459`) gcp, mp2d several got properties.return_energy, retunr_gradient
- (:pr:`460`) If you're missing something from AtomicResult.extras, check AtomicResult.input_data.extras in case it was passed in on input
Expand Down
8 changes: 2 additions & 6 deletions qcengine/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,14 @@ def compute(

# Build the model and validate
# * calls model_wrapper with the (Atomic|Optimization|etc)Input for which the harness was designed
# * upon return, input_data is a model of the type (e.g., Atomic) and version (e.g., 1 or 2) the harness prefers. for now, v1.
# * upon return, input_data is a model of the type (e.g., Atomic) and version (e.g., 1 or 2) the harness prefers: all v2.
input_data, input_schema_version = executor.build_input_model(input_data, return_input_schema_version=True)
return_version = input_schema_version if return_version == -1 else return_version

# Build out task_config
if task_config is None:
task_config = {}
# TODO generalize when procedures in layout in place
if isinstance(input_data, qcelemental.models.v2.AtomicInput):
input_engine_options = input_data.specification.extras.pop("_qcengine_local_config", {})
else:
input_engine_options = input_data.extras.pop("_qcengine_local_config", {})
input_engine_options = input_data.specification.extras.pop("_qcengine_local_config", {})
task_config = {**task_config, **input_engine_options}
config = get_config(task_config=task_config)

Expand Down
46 changes: 25 additions & 21 deletions qcengine/procedures/berny.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from typing import Any, ClassVar, Dict, Union

import numpy as np
from qcelemental.models.v1 import OptimizationResult
from qcelemental.models.v2 import FailedOperation, OptimizationInput
from qcelemental.models.v2 import FailedOperation, OptimizationInput, OptimizationResult
from qcelemental.util import which_import

import qcengine
Expand All @@ -33,7 +32,7 @@ def build_input_model(
return self._build_model(data, "OptimizationInput", return_input_schema_version=return_input_schema_version)

def compute(
self, input_data: "OptimizationInput", config: "TaskConfig"
self, input_model: "OptimizationInput", config: "TaskConfig"
) -> Union["OptimizationResult", "FailedOperation"]:
try:
import berny
Expand All @@ -56,19 +55,21 @@ def compute(
log = logging.getLogger(f"{__name__}.{id(self)}")
log.addHandler(logging.StreamHandler(log_stream))
log.setLevel("INFO")
input_data = input_model.model_dump()

input_data = input_data.dict()
geom_qcng = input_data["initial_molecule"]
comput = {**input_data["input_specification"], "molecule": geom_qcng}
program = input_data["keywords"].pop("program")
comput = {"specification": input_data["specification"]["specification"], "molecule": geom_qcng}
program = input_data["specification"]["specification"][
"program"
] # TODO don't need to collect when compute can work w/o 2nd arg
task_config = config.dict()
trajectory = []
output_data = input_data.copy()
try:
# Pyberny uses angstroms for the Cartesian geometry, but atomic
# units for everything else, including the gradients (hartree/bohr).
geom_berny = berny.Geometry(geom_qcng["symbols"], geom_qcng["geometry"] / berny.angstrom)
opt = berny.Berny(geom_berny, logger=log, **input_data["keywords"])
opt = berny.Berny(geom_berny, logger=log, **input_data["specification"]["keywords"])
for geom_berny in opt:
geom_qcng["geometry"] = np.stack(geom_berny.coords * berny.angstrom)
ret = qcengine.compute(comput, program, task_config=task_config)
Expand All @@ -84,17 +85,20 @@ def compute(
except Exception:
error = {"error_type": "unknown", "error_message": f"Berny error:\n{traceback.format_exc()}"}
else:
output_data["success"] = True
output_data.update(
{
"schema_name": "qcschema_optimization_output",
"final_molecule": trajectory[-1]["molecule"],
"energies": [r["properties"]["return_energy"] for r in trajectory],
"trajectory": trajectory,
"provenance": {"creator": "Berny", "routine": "berny.Berny", "version": berny_version},
"stdout": log_stream.getvalue(), # collect logged messages
}
)
output_data = OptimizationResult(**output_data)
return output_data.convert_v(2)
return FailedOperation(input_data=input_data, error=error)
output = {
"input_data": input_model,
"final_molecule": trajectory[-1]["molecule"],
"properties": {
"return_energy": trajectory[-1]["properties"]["return_energy"],
"optimization_iterations": len(trajectory),
},
"trajectory_results": trajectory,
"trajectory_properties": [r["properties"] for r in trajectory],
"provenance": {"creator": "Berny", "routine": "berny.Berny", "version": berny_version},
"stdout": log_stream.getvalue(), # collect logged messages
"success": True,
}

return OptimizationResult(**output)

return FailedOperation(input_data=input_model, error=error)
24 changes: 13 additions & 11 deletions qcengine/procedures/geometric.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,29 +43,31 @@ def compute(self, input_model: "OptimizationInput", config: "TaskConfig") -> "Op
except ModuleNotFoundError:
raise ModuleNotFoundError("Could not find geomeTRIC in the Python path.")

input_data = input_model.dict()
input_data_v1 = input_model.convert_v(1).dict()

# Temporary patch for geomeTRIC
input_data["initial_molecule"]["symbols"] = list(input_data["initial_molecule"]["symbols"])
input_data_v1["initial_molecule"]["symbols"] = list(input_data_v1["initial_molecule"]["symbols"])

# Set retries to two if zero while respecting local_config
local_config = config.dict()
local_config["retries"] = local_config.get("retries", 2) or 2
input_data["input_specification"]["extras"]["_qcengine_local_config"] = local_config
input_data_v1["input_specification"]["extras"]["_qcengine_local_config"] = local_config

# Run the program
output_data = geometric.run_json.geometric_run_json(input_data)
output_v1 = geometric.run_json.geometric_run_json(input_data_v1)

output_data["provenance"] = {
output_v1["provenance"] = {
"creator": "geomeTRIC",
"routine": "geometric.run_json.geometric_run_json",
"version": geometric.__version__,
}

output_data["schema_name"] = "qcschema_optimization_output"
output_data["input_specification"]["extras"].pop("_qcengine_local_config", None)
if output_data["success"]:
output_data = OptimizationResult(**output_data)
output_data = output_data.convert_v(2)
output_v1["schema_name"] = "qcschema_optimization_output" # overwrites OptIn value
output_v1["input_specification"]["extras"].pop("_qcengine_local_config", None)
if output_v1["success"]:
output_v1 = OptimizationResult(**output_v1)
output = output_v1.convert_v(2, external_input_data=input_model)
else:
output = output_v1 # TODO almost certainly wrong, needs v2 conv

return output_data
return output
8 changes: 2 additions & 6 deletions qcengine/procedures/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,11 @@ def _build_model(
elif isinstance(data, dict):
# remember these are user-provided dictionaries, so they'll have the mandatory fields,
# like driver, not the helpful discriminator fields like schema_version.
# so long as versions distinguishable by a *required* field, id by dict is reliable.

schver = data.get("schema_version")
if schver == 1:
mdl = model_wrapper(data, v1_model)
elif schver == 2:
if data.get("specification", False) or data.get("schema_version") == 2:
mdl = model_wrapper(data, v2_model)
else:
# for now, the two dictionaries look the same, so cast to the one we want
# note that this prevents correctly identifying the user schema version when dict passed in, so either as_v1/None or as_v2 will fail
mdl = model_wrapper(data, v1_model)

input_schema_version = mdl.schema_version
Expand Down
56 changes: 31 additions & 25 deletions qcengine/procedures/nwchem_opt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,42 +27,37 @@ def build_input_model(
) -> "OptimizationInput":
return self._build_model(data, "OptimizationInput", return_input_schema_version=return_input_schema_version)

def compute(self, input_data: OptimizationInput, config: TaskConfig) -> "BaseModel":
def compute(self, input_model: OptimizationInput, config: TaskConfig) -> "BaseModel":
nwc_harness = NWChemHarness()
self.found(raise_error=True)

# Unify the keywords from the OptimizationInput and QCInputSpecification
# Optimization input will override, but don't tell users this as it seems unnecessary
keywords = input_data.keywords.copy()
keywords.update(input_data.input_specification.keywords)
if keywords.get("program", "nwchem").lower() != "nwchem":
raise InputError("NWChemDriver procedure only works with NWChem")
keywords = input_model.specification.keywords.copy()
keywords.update(input_model.specification.specification.keywords)
if (uprog := input_model.specification.specification.program) and (uprog.lower() != "nwchem"):
raise InputError(f"NWChemDriver procedure only works with NWChem, not {uprog}")

# Add a flag to the atomic input that tells the NWChemHarness we are calling it from driver
# This is needed for the NWCHarness to make some changes to the input file
input_data.input_specification.extras["is_driver"] = True
input_model.specification.specification.extras["is_driver"] = True

# Make an atomic input
atomic_input = AtomicInput(
molecule=input_data.initial_molecule,
specification=AtomicSpecification(
driver="energy",
keywords=keywords,
**input_data.input_specification.model_dump(
exclude={"driver", "keywords", "schema_name", "schema_version"}
), # TODO allow schema_ if added back
),
molecule=input_model.initial_molecule,
specification=input_model.specification.specification,
)

# Build the inputs for the job
job_inputs = nwc_harness.build_input(atomic_input, config)

# As of v2, now use "is_driver" to signal task ... optimize in the first place. This allows driver=gradient to work even though task ... gradient not last in file
# Replace the last line with a "task {} optimize"
input_file: str = job_inputs["infiles"]["nwchem.nw"].strip()
beginning, last_line = input_file.rsplit("\n", 1)
assert last_line.startswith("task")
last_line = f"task {last_line.split(' ')[1]} optimize"
job_inputs["infiles"]["nwchem.nw"] = f"{beginning}\n{last_line}"
# input_file: str = job_inputs["infiles"]["nwchem.nw"].strip()
# beginning, last_line = input_file.rsplit("\n", 1)
# assert last_line.startswith("task")
# last_line = f"task {last_line.split(' ')[1]} optimize"
# job_inputs["infiles"]["nwchem.nw"] = f"{beginning}\n{last_line}"

# Run it!
success, dexe = nwc_harness.execute(job_inputs)
Expand All @@ -78,11 +73,12 @@ def compute(self, input_data: OptimizationInput, config: TaskConfig) -> "BaseMod
if success:
dexe["outfiles"]["stdout"] = dexe["stdout"]
dexe["outfiles"]["stderr"] = dexe["stderr"]
return self.parse_output(dexe["outfiles"], input_data)
return self.parse_output(dexe["outfiles"], input_model)
else:
raise UnknownError(dexe["stdout"])

def parse_output(self, outfiles: Dict[str, str], input_model: OptimizationInput) -> OptimizationResult:
optsubproperties = ["calcinfo_natom", "return_energy", "return_gradient"]

# Get the stdout from the calculation (required)
stdout = outfiles.pop("stdout")
Expand All @@ -94,14 +90,24 @@ def parse_output(self, outfiles: Dict[str, str], input_model: OptimizationInput)
# Isolate the converged result
final_step = atomic_results[-1]

# input_data = input_model.model_dump()
# input_data["specification"]["specification"]["program"] = "nwchem"

return OptimizationResult(
initial_molecule=input_model.initial_molecule,
input_specification=input_model.input_specification,
input_data=input_model,
final_molecule=final_step.molecule,
trajectory=atomic_results,
energies=[float(r.extras["qcvars"]["CURRENT ENERGY"]) for r in atomic_results],
trajectory_results=atomic_results,
trajectory_properties=[
{k: v for k, v in grad.properties if k in optsubproperties} for grad in atomic_results
],
properties={
"return_energy": atomic_results[-1].properties.return_energy,
"optimization_iterations": len(atomic_results),
},
stdout=stdout,
stderr=stderr,
success=True,
provenance=Provenance(creator="NWChemRelax", version=self.get_version(), routine="nwchem_opt"),
provenance=Provenance(
creator="NWChemDriver", version=self.get_version(), routine="nwchem_opt"
), # was NWChemRelax which frankly is a better name
)
4 changes: 2 additions & 2 deletions qcengine/procedures/nwchem_opt/harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def harvest_as_atomic_result(input_model: OptimizationInput, nwout: str) -> List
results = []
for qcvars, nwgrad, out_mol in zip(out_psivars, out_grads, out_mols):
if nwgrad is not None:
qcvars[f"{input_model.input_specification.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad
qcvars[f"{input_model.specification.specification.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad
qcvars["CURRENT GRADIENT"] = nwgrad

# Get the formatted properties
Expand All @@ -77,7 +77,7 @@ def harvest_as_atomic_result(input_model: OptimizationInput, nwout: str) -> List
# Format them inout an output
input_data = {
"molecule": out_mol,
"specification": input_model.input_specification.model_dump(),
"specification": input_model.specification.specification.model_dump(),
}
input_data["specification"]["driver"] = "gradient"
input_data["specification"].pop("schema_name", None)
Expand Down
30 changes: 20 additions & 10 deletions qcengine/procedures/optking.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import logging
import sys
from io import StringIO
from typing import Any, ClassVar, Dict, Union

from qcelemental.models.v1 import OptimizationResult
Expand Down Expand Up @@ -41,21 +44,28 @@ def compute(self, input_model: "OptimizationInput", config: "TaskConfig") -> "Op
if self.found(raise_error=True):
import optking

input_data = input_model.convert_v(1)
input_data = input_model.dict()
log_stream = StringIO()
logname = "psi4.optking" if "psi4" in sys.modules else "optking"
log = logging.getLogger(logname)
log.addHandler(logging.StreamHandler(log_stream))
log.setLevel("INFO")

input_data_v1 = input_model.convert_v(1).dict()

# Set retries to two if zero while respecting local_config
local_config = config.dict()
local_config["retries"] = local_config.get("retries", 2) or 2
input_data["input_specification"]["extras"]["_qcengine_local_config"] = local_config
input_data_v1["input_specification"]["extras"]["_qcengine_local_config"] = local_config

# Run the program
output_data = optking.optwrapper.optimize_qcengine(input_data)
output_v1 = optking.optwrapper.optimize_qcengine(input_data_v1)
output_v1["stdout"] = log_stream.getvalue()

output_data["schema_name"] = "qcschema_optimization_output"
output_data["input_specification"]["extras"].pop("_qcengine_local_config", None)
if output_data["success"]:
output_data = OptimizationResult(**output_data)
output_data = output_data.convert_v(2)
output_v1["input_specification"]["extras"].pop("_qcengine_local_config", None)
if output_v1["success"]:
output_v1 = OptimizationResult(**output_v1)
output = output_v1.convert_v(2, external_input_data=input_model)
else:
output = output_v1 # TODO almost certainly wrong -- need v2 conversion?

return output_data
return output
Loading
Loading