From 6093a4588da8f3408ede5f080bf6b951fa2f1176 Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Mon, 2 Dec 2024 15:39:12 -0500 Subject: [PATCH 1/7] review comments --- .github/workflows/CI.yml | 4 ++-- qcengine/programs/cfour/runner.py | 4 ++-- qcengine/programs/tests/test_dftd4.py | 33 +++++++++++---------------- 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index c6d77563..694a6b35 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -165,7 +165,7 @@ jobs: #if: false run: | conda remove qcelemental --force - python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536c' --no-deps + python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536d' --no-deps # note: conda remove --force, not mamba remove --force b/c https://github.com/mamba-org/mamba/issues/412 # alt. is micromamba but not yet ready for setup-miniconda https://github.com/conda-incubator/setup-miniconda/issues/75 @@ -247,7 +247,7 @@ jobs: #if: false run: | conda remove qcelemental --force - python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536c' --no-deps + python -m pip install 'git+https://github.com/loriab/QCElemental.git@csse_layout_536d' --no-deps - name: Environment Information run: | diff --git a/qcengine/programs/cfour/runner.py b/qcengine/programs/cfour/runner.py index 97fb1493..b2751d21 100644 --- a/qcengine/programs/cfour/runner.py +++ b/qcengine/programs/cfour/runner.py @@ -183,10 +183,10 @@ def parse_output( qcvars[f"{method.upper()} TOTAL HESSIAN"] = c4hess qcvars["CURRENT HESSIAN"] = c4hess - if input_model.specification.driver.upper() == "PROPERTIES": + if (udriver := input_model.specification.driver.upper()) == "PROPERTIES": retres = qcvars[f"CURRENT ENERGY"] else: - retres = qcvars[f"CURRENT {input_model.specification.driver.upper()}"] + retres = qcvars[f"CURRENT {udriver}"] except KeyError: raise UnknownError(error_stamp(outfiles["input"], stdout, stderr)) diff --git a/qcengine/programs/tests/test_dftd4.py b/qcengine/programs/tests/test_dftd4.py index 1dcc703b..068f90c0 100644 --- a/qcengine/programs/tests/test_dftd4.py +++ b/qcengine/programs/tests/test_dftd4.py @@ -208,18 +208,20 @@ def test_dftd4_task_unknown_method(schema_versions, request): def test_dftd4_task_cold_fusion(schema_versions, request): models, retver, models_out = schema_versions + li4 = { + "symbols": ["Li", "Li", "Li", "Li"], + "geometry": [ + [-1.58746019997201, +1.58746019997201, +1.58746019997201], + [-1.58746019997201, +1.58746019997201, +1.58746019997201], + [-1.58746019997201, -1.58746019997201, -1.58746019997201], + [+1.58746019997201, +1.58746019997201, -1.58746019997201], + ], + "validated": True, # Force a nuclear fusion input, to make dftd4 fail + } + if from_v2(request.node.name): atomic_input = models.AtomicInput( - molecule={ - "symbols": ["Li", "Li", "Li", "Li"], - "geometry": [ - [-1.58746019997201, +1.58746019997201, +1.58746019997201], - [-1.58746019997201, +1.58746019997201, +1.58746019997201], - [-1.58746019997201, -1.58746019997201, -1.58746019997201], - [+1.58746019997201, +1.58746019997201, -1.58746019997201], - ], - "validated": True, # Force a nuclear fusion input, to make dftd4 fail - }, + molecule=li4, specification={ "keywords": {"level_hint": "D4"}, "model": {"method": "pbe"}, @@ -228,16 +230,7 @@ def test_dftd4_task_cold_fusion(schema_versions, request): ) else: atomic_input = models.AtomicInput( - molecule={ - "symbols": ["Li", "Li", "Li", "Li"], - "geometry": [ - [-1.58746019997201, +1.58746019997201, +1.58746019997201], - [-1.58746019997201, +1.58746019997201, +1.58746019997201], - [-1.58746019997201, -1.58746019997201, -1.58746019997201], - [+1.58746019997201, +1.58746019997201, -1.58746019997201], - ], - "validated": True, # Force a nuclear fusion input, to make dftd4 fail - }, + molecule=li4, keywords={"level_hint": "D4"}, model={"method": "pbe"}, driver="energy", From 80ec4ae464d6bee6ed433f85408e8322086d430c Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Mon, 9 Dec 2024 18:41:30 -0500 Subject: [PATCH 2/7] opt working --- docs/source/changelog.rst | 4 + qcengine/compute.py | 2 +- qcengine/procedures/berny.py | 46 +-- qcengine/procedures/geometric.py | 24 +- qcengine/procedures/model.py | 8 +- qcengine/procedures/nwchem_opt/__init__.py | 56 +-- qcengine/procedures/nwchem_opt/harvester.py | 4 +- qcengine/procedures/optking.py | 29 +- qcengine/programs/mace.py | 2 +- qcengine/programs/molpro.py | 2 +- qcengine/programs/mrchem.py | 2 +- qcengine/programs/nwchem/germinate.py | 8 +- qcengine/programs/nwchem/runner.py | 13 +- qcengine/programs/psi4.py | 2 +- qcengine/programs/qcore.py | 2 +- qcengine/programs/rdkit.py | 1 - qcengine/programs/terachem.py | 2 +- qcengine/programs/torchani.py | 2 +- qcengine/tests/test_procedures.py | 392 ++++++++++++++------ 19 files changed, 406 insertions(+), 195 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index a1a8a668..2e4e17f5 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -70,6 +70,10 @@ Misc. MUST (Unmerged) +++++++++++++++ + +berny harness rewritten in v2. optking and geometric natively speak v1, so adapted as well as can be. +allow nwchemdriver w/o driver=energy. provenance now nwchemdriver not nwchemrelax + Optking now fills in ``v2.OptimizationResult.stdout``. Through v2, once can alter gradient protocols in an optimization. - (:pr:`460`) integrate ``AtomicInput.specification`` into harnesses and show what new inputs look like in tests - (:pr:`459`) gcp, mp2d several got properties.return_energy, retunr_gradient - (:pr:`460`) If you're missing something from AtomicResult.extras, check AtomicResult.input_data.extras in case it was passed in on input diff --git a/qcengine/compute.py b/qcengine/compute.py index e2638e36..6a163750 100644 --- a/qcengine/compute.py +++ b/qcengine/compute.py @@ -123,7 +123,7 @@ def compute( if task_config is None: task_config = {} # TODO generalize when procedures in layout in place - if isinstance(input_data, qcelemental.models.v2.AtomicInput): + if isinstance(input_data, (qcelemental.models.v2.AtomicInput, qcelemental.models.v2.OptimizationInput)): input_engine_options = input_data.specification.extras.pop("_qcengine_local_config", {}) else: input_engine_options = input_data.extras.pop("_qcengine_local_config", {}) diff --git a/qcengine/procedures/berny.py b/qcengine/procedures/berny.py index 2c29a2dd..eb1ca4a9 100644 --- a/qcengine/procedures/berny.py +++ b/qcengine/procedures/berny.py @@ -5,8 +5,7 @@ from typing import Any, ClassVar, Dict, Union import numpy as np -from qcelemental.models.v1 import OptimizationResult -from qcelemental.models.v2 import FailedOperation, OptimizationInput +from qcelemental.models.v2 import FailedOperation, OptimizationInput, OptimizationResult from qcelemental.util import which_import import qcengine @@ -33,7 +32,7 @@ def build_input_model( return self._build_model(data, "OptimizationInput", return_input_schema_version=return_input_schema_version) def compute( - self, input_data: "OptimizationInput", config: "TaskConfig" + self, input_model: "OptimizationInput", config: "TaskConfig" ) -> Union["OptimizationResult", "FailedOperation"]: try: import berny @@ -56,11 +55,13 @@ def compute( log = logging.getLogger(f"{__name__}.{id(self)}") log.addHandler(logging.StreamHandler(log_stream)) log.setLevel("INFO") + input_data = input_model.model_dump() - input_data = input_data.dict() geom_qcng = input_data["initial_molecule"] - comput = {**input_data["input_specification"], "molecule": geom_qcng} - program = input_data["keywords"].pop("program") + comput = {"specification": input_data["specification"]["specification"], "molecule": geom_qcng} + program = input_data["specification"]["specification"][ + "program" + ] # TODO don't need to collect when compute can work w/o 2nd arg task_config = config.dict() trajectory = [] output_data = input_data.copy() @@ -68,7 +69,7 @@ def compute( # Pyberny uses angstroms for the Cartesian geometry, but atomic # units for everything else, including the gradients (hartree/bohr). geom_berny = berny.Geometry(geom_qcng["symbols"], geom_qcng["geometry"] / berny.angstrom) - opt = berny.Berny(geom_berny, logger=log, **input_data["keywords"]) + opt = berny.Berny(geom_berny, logger=log, **input_data["specification"]["keywords"]) for geom_berny in opt: geom_qcng["geometry"] = np.stack(geom_berny.coords * berny.angstrom) ret = qcengine.compute(comput, program, task_config=task_config) @@ -84,17 +85,20 @@ def compute( except Exception: error = {"error_type": "unknown", "error_message": f"Berny error:\n{traceback.format_exc()}"} else: - output_data["success"] = True - output_data.update( - { - "schema_name": "qcschema_optimization_output", - "final_molecule": trajectory[-1]["molecule"], - "energies": [r["properties"]["return_energy"] for r in trajectory], - "trajectory": trajectory, - "provenance": {"creator": "Berny", "routine": "berny.Berny", "version": berny_version}, - "stdout": log_stream.getvalue(), # collect logged messages - } - ) - output_data = OptimizationResult(**output_data) - return output_data.convert_v(2) - return FailedOperation(input_data=input_data, error=error) + output = { + "input_data": input_model, + "final_molecule": trajectory[-1]["molecule"], + "properties": { + "return_energy": trajectory[-1]["properties"]["return_energy"], + "optimization_iterations": len(trajectory), + }, + "trajectory_results": trajectory, + "trajectory_properties": [r["properties"] for r in trajectory], + "provenance": {"creator": "Berny", "routine": "berny.Berny", "version": berny_version}, + "stdout": log_stream.getvalue(), # collect logged messages + "success": True, + } + + return OptimizationResult(**output) + + return FailedOperation(input_data=input_model, error=error) diff --git a/qcengine/procedures/geometric.py b/qcengine/procedures/geometric.py index e5f62d5a..d687031c 100644 --- a/qcengine/procedures/geometric.py +++ b/qcengine/procedures/geometric.py @@ -43,29 +43,31 @@ def compute(self, input_model: "OptimizationInput", config: "TaskConfig") -> "Op except ModuleNotFoundError: raise ModuleNotFoundError("Could not find geomeTRIC in the Python path.") - input_data = input_model.dict() + input_data_v1 = input_model.convert_v(1).dict() # Temporary patch for geomeTRIC - input_data["initial_molecule"]["symbols"] = list(input_data["initial_molecule"]["symbols"]) + input_data_v1["initial_molecule"]["symbols"] = list(input_data_v1["initial_molecule"]["symbols"]) # Set retries to two if zero while respecting local_config local_config = config.dict() local_config["retries"] = local_config.get("retries", 2) or 2 - input_data["input_specification"]["extras"]["_qcengine_local_config"] = local_config + input_data_v1["input_specification"]["extras"]["_qcengine_local_config"] = local_config # Run the program - output_data = geometric.run_json.geometric_run_json(input_data) + output_v1 = geometric.run_json.geometric_run_json(input_data_v1) - output_data["provenance"] = { + output_v1["provenance"] = { "creator": "geomeTRIC", "routine": "geometric.run_json.geometric_run_json", "version": geometric.__version__, } - output_data["schema_name"] = "qcschema_optimization_output" - output_data["input_specification"]["extras"].pop("_qcengine_local_config", None) - if output_data["success"]: - output_data = OptimizationResult(**output_data) - output_data = output_data.convert_v(2) + output_v1["schema_name"] = "qcschema_optimization_output" # overwrites OptIn value + output_v1["input_specification"]["extras"].pop("_qcengine_local_config", None) + if output_v1["success"]: + output_v1 = OptimizationResult(**output_v1) + output = output_v1.convert_v(2, external_input_data=input_model) + else: + output = output_v1 # TODO almost certainly wrong, needs v2 conv - return output_data + return output diff --git a/qcengine/procedures/model.py b/qcengine/procedures/model.py index e4117b58..0e0b17c9 100644 --- a/qcengine/procedures/model.py +++ b/qcengine/procedures/model.py @@ -70,15 +70,11 @@ def _build_model( elif isinstance(data, dict): # remember these are user-provided dictionaries, so they'll have the mandatory fields, # like driver, not the helpful discriminator fields like schema_version. + # so long as versions distinguishable by a *required* field, id by dict is reliable. - schver = data.get("schema_version") - if schver == 1: - mdl = model_wrapper(data, v1_model) - elif schver == 2: + if data.get("specification", False) or data.get("schema_version") == 2: mdl = model_wrapper(data, v2_model) else: - # for now, the two dictionaries look the same, so cast to the one we want - # note that this prevents correctly identifying the user schema version when dict passed in, so either as_v1/None or as_v2 will fail mdl = model_wrapper(data, v1_model) input_schema_version = mdl.schema_version diff --git a/qcengine/procedures/nwchem_opt/__init__.py b/qcengine/procedures/nwchem_opt/__init__.py index 5adf78e4..bc4d22c3 100644 --- a/qcengine/procedures/nwchem_opt/__init__.py +++ b/qcengine/procedures/nwchem_opt/__init__.py @@ -27,42 +27,37 @@ def build_input_model( ) -> "OptimizationInput": return self._build_model(data, "OptimizationInput", return_input_schema_version=return_input_schema_version) - def compute(self, input_data: OptimizationInput, config: TaskConfig) -> "BaseModel": + def compute(self, input_model: OptimizationInput, config: TaskConfig) -> "BaseModel": nwc_harness = NWChemHarness() self.found(raise_error=True) # Unify the keywords from the OptimizationInput and QCInputSpecification # Optimization input will override, but don't tell users this as it seems unnecessary - keywords = input_data.keywords.copy() - keywords.update(input_data.input_specification.keywords) - if keywords.get("program", "nwchem").lower() != "nwchem": - raise InputError("NWChemDriver procedure only works with NWChem") + keywords = input_model.specification.keywords.copy() + keywords.update(input_model.specification.specification.keywords) + if (uprog := input_model.specification.specification.program) and (uprog.lower() != "nwchem"): + raise InputError(f"NWChemDriver procedure only works with NWChem, not {uprog}") # Add a flag to the atomic input that tells the NWChemHarness we are calling it from driver # This is needed for the NWCHarness to make some changes to the input file - input_data.input_specification.extras["is_driver"] = True + input_model.specification.specification.extras["is_driver"] = True # Make an atomic input atomic_input = AtomicInput( - molecule=input_data.initial_molecule, - specification=AtomicSpecification( - driver="energy", - keywords=keywords, - **input_data.input_specification.model_dump( - exclude={"driver", "keywords", "schema_name", "schema_version"} - ), # TODO allow schema_ if added back - ), + molecule=input_model.initial_molecule, + specification=input_model.specification.specification, ) # Build the inputs for the job job_inputs = nwc_harness.build_input(atomic_input, config) + # As of v2, now use "is_driver" to signal task ... optimize in the first place. This allows driver=gradient to work even though task ... gradient not last in file # Replace the last line with a "task {} optimize" - input_file: str = job_inputs["infiles"]["nwchem.nw"].strip() - beginning, last_line = input_file.rsplit("\n", 1) - assert last_line.startswith("task") - last_line = f"task {last_line.split(' ')[1]} optimize" - job_inputs["infiles"]["nwchem.nw"] = f"{beginning}\n{last_line}" + # input_file: str = job_inputs["infiles"]["nwchem.nw"].strip() + # beginning, last_line = input_file.rsplit("\n", 1) + # assert last_line.startswith("task") + # last_line = f"task {last_line.split(' ')[1]} optimize" + # job_inputs["infiles"]["nwchem.nw"] = f"{beginning}\n{last_line}" # Run it! success, dexe = nwc_harness.execute(job_inputs) @@ -78,11 +73,12 @@ def compute(self, input_data: OptimizationInput, config: TaskConfig) -> "BaseMod if success: dexe["outfiles"]["stdout"] = dexe["stdout"] dexe["outfiles"]["stderr"] = dexe["stderr"] - return self.parse_output(dexe["outfiles"], input_data) + return self.parse_output(dexe["outfiles"], input_model) else: raise UnknownError(dexe["stdout"]) def parse_output(self, outfiles: Dict[str, str], input_model: OptimizationInput) -> OptimizationResult: + optsubproperties = ["calcinfo_natom", "return_energy", "return_gradient"] # Get the stdout from the calculation (required) stdout = outfiles.pop("stdout") @@ -94,14 +90,24 @@ def parse_output(self, outfiles: Dict[str, str], input_model: OptimizationInput) # Isolate the converged result final_step = atomic_results[-1] + # input_data = input_model.model_dump() + # input_data["specification"]["specification"]["program"] = "nwchem" + return OptimizationResult( - initial_molecule=input_model.initial_molecule, - input_specification=input_model.input_specification, + input_data=input_model, final_molecule=final_step.molecule, - trajectory=atomic_results, - energies=[float(r.extras["qcvars"]["CURRENT ENERGY"]) for r in atomic_results], + trajectory_results=atomic_results, + trajectory_properties=[ + {k: v for k, v in grad.properties if k in optsubproperties} for grad in atomic_results + ], + properties={ + "return_energy": atomic_results[-1].properties.return_energy, + "optimization_iterations": len(atomic_results), + }, stdout=stdout, stderr=stderr, success=True, - provenance=Provenance(creator="NWChemRelax", version=self.get_version(), routine="nwchem_opt"), + provenance=Provenance( + creator="NWChemDriver", version=self.get_version(), routine="nwchem_opt" + ), # was NWChemRelax which frankly is a better name ) diff --git a/qcengine/procedures/nwchem_opt/harvester.py b/qcengine/procedures/nwchem_opt/harvester.py index 4ce11c7c..82ea31b1 100644 --- a/qcengine/procedures/nwchem_opt/harvester.py +++ b/qcengine/procedures/nwchem_opt/harvester.py @@ -63,7 +63,7 @@ def harvest_as_atomic_result(input_model: OptimizationInput, nwout: str) -> List results = [] for qcvars, nwgrad, out_mol in zip(out_psivars, out_grads, out_mols): if nwgrad is not None: - qcvars[f"{input_model.input_specification.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad + qcvars[f"{input_model.specification.specification.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad qcvars["CURRENT GRADIENT"] = nwgrad # Get the formatted properties @@ -77,7 +77,7 @@ def harvest_as_atomic_result(input_model: OptimizationInput, nwout: str) -> List # Format them inout an output input_data = { "molecule": out_mol, - "specification": input_model.input_specification.model_dump(), + "specification": input_model.specification.specification.model_dump(), } input_data["specification"]["driver"] = "gradient" input_data["specification"].pop("schema_name", None) diff --git a/qcengine/procedures/optking.py b/qcengine/procedures/optking.py index 9ea1ac0f..36a4275c 100644 --- a/qcengine/procedures/optking.py +++ b/qcengine/procedures/optking.py @@ -1,3 +1,5 @@ +import logging +from io import StringIO from typing import Any, ClassVar, Dict, Union from qcelemental.models.v1 import OptimizationResult @@ -41,21 +43,28 @@ def compute(self, input_model: "OptimizationInput", config: "TaskConfig") -> "Op if self.found(raise_error=True): import optking - input_data = input_model.convert_v(1) - input_data = input_model.dict() + log_stream = StringIO() + log = logging.getLogger(f"{__name__}.{id(self)}") + log = logging.getLogger("optking") + log.addHandler(logging.StreamHandler(log_stream)) + log.setLevel("INFO") + + input_data_v1 = input_model.convert_v(1).dict() # Set retries to two if zero while respecting local_config local_config = config.dict() local_config["retries"] = local_config.get("retries", 2) or 2 - input_data["input_specification"]["extras"]["_qcengine_local_config"] = local_config + input_data_v1["input_specification"]["extras"]["_qcengine_local_config"] = local_config # Run the program - output_data = optking.optwrapper.optimize_qcengine(input_data) + output_v1 = optking.optwrapper.optimize_qcengine(input_data_v1) + output_v1["stdout"] = log_stream.getvalue() - output_data["schema_name"] = "qcschema_optimization_output" - output_data["input_specification"]["extras"].pop("_qcengine_local_config", None) - if output_data["success"]: - output_data = OptimizationResult(**output_data) - output_data = output_data.convert_v(2) + output_v1["input_specification"]["extras"].pop("_qcengine_local_config", None) + if output_v1["success"]: + output_v1 = OptimizationResult(**output_v1) + output = output_v1.convert_v(2, external_input_data=input_model) + else: + output = output_v1 # TODO almost certainly wrong -- need v2 conversion? - return output_data + return output diff --git a/qcengine/programs/mace.py b/qcengine/programs/mace.py index 67cd23ac..5454e05d 100644 --- a/qcengine/programs/mace.py +++ b/qcengine/programs/mace.py @@ -136,7 +136,7 @@ def compute(self, input_data: "AtomicInput", config: "TaskConfig") -> Union["Ato ret_data["input_data"] = input_data ret_data["molecule"] = input_data.molecule ret_data["provenance"] = Provenance(creator="mace", version=mace.__version__, routine="mace") - ret_data["schema_name"] = "qcschema_output" + ret_data["schema_name"] = "qcschema_atomic_output" ret_data["success"] = True # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other diff --git a/qcengine/programs/molpro.py b/qcengine/programs/molpro.py index aa293f4a..f0d9b9e2 100644 --- a/qcengine/programs/molpro.py +++ b/qcengine/programs/molpro.py @@ -443,7 +443,7 @@ def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> # Final output_data assignments needed for the AtomicResult object output_data["properties"] = properties output_data["extras"] = extras - output_data["schema_name"] = "qcschema_output" + output_data["schema_name"] = "qcschema_atomic_output" output_data["stdout"] = outfiles["dispatch.out"] output_data["success"] = True output_data["provenance"] = input_model.provenance # TODO better stamp? diff --git a/qcengine/programs/mrchem.py b/qcengine/programs/mrchem.py index 1951aa00..8ee66524 100644 --- a/qcengine/programs/mrchem.py +++ b/qcengine/programs/mrchem.py @@ -101,7 +101,7 @@ def compute(self, input_model: "AtomicInput", config: "TaskConfig") -> "AtomicRe input_data = copy.deepcopy(job_input["mrchem_json"]) output_data = { - "schema_name": "qcschema_output", + "schema_name": "qcschema_atomic_output", "schema_version": 2, "input_data": input_model, "molecule": input_model.molecule, diff --git a/qcengine/programs/nwchem/germinate.py b/qcengine/programs/nwchem/germinate.py index 2ccc62b4..e2dde0a4 100644 --- a/qcengine/programs/nwchem/germinate.py +++ b/qcengine/programs/nwchem/germinate.py @@ -105,7 +105,13 @@ def muster_modelchem(method: str, derint: int, use_tce: bool) -> Tuple[str, Dict opts = {} # Map the run type to - runtyp = {"energy": "energy", "gradient": "gradient", "hessian": "hessian", "properties": "property"}[derint] + runtyp = { + "energy": "energy", + "gradient": "gradient", + "hessian": "hessian", + "properties": "property", + "optimize": "optimize", + }[derint] # Write out the theory directive if method == "nwchem": diff --git a/qcengine/programs/nwchem/runner.py b/qcengine/programs/nwchem/runner.py index e789e05e..89d8a834 100644 --- a/qcengine/programs/nwchem/runner.py +++ b/qcengine/programs/nwchem/runner.py @@ -182,9 +182,11 @@ def build_input( molcmd = re.sub(r"geometry ([^\n]*)", rf"geometry \1 autosym {val}", molcmd) # Handle calc type and quantum chemical method - mdccmd, mdcopts = muster_modelchem( - input_model.specification.model.method, input_model.specification.driver, opts.pop("qc_module", False) - ) + if input_model.specification.extras.get("is_driver", False): + runtyp = "optimize" + else: + runtyp = input_model.specification.driver + mdccmd, mdcopts = muster_modelchem(input_model.specification.model.method, runtyp, opts.pop("qc_module", False)) opts.update(mdcopts) # Handle basis set @@ -236,7 +238,10 @@ def build_input( # Note: The Hessian is already stored in high precision in a file named "*.hess" if input_model.specification.driver == "gradient": # Get the name of the theory used for computing the gradients - theory = re.search(r"^task\s+(.+)\s+grad", mdccmd, re.MULTILINE).group(1) + try: + theory = re.search(r"^task\s+(.+)\s+opt", mdccmd, re.MULTILINE).group(1) + except AttributeError: + theory = re.search(r"^task\s+(.+)\s+grad", mdccmd, re.MULTILINE).group(1) if theory == "ccsd(t)": theory = "ccsd" logger.debug(f"Adding a Python task to retrieve gradients. Theory: {theory}") diff --git a/qcengine/programs/psi4.py b/qcengine/programs/psi4.py index 8b639af7..0804f268 100644 --- a/qcengine/programs/psi4.py +++ b/qcengine/programs/psi4.py @@ -208,7 +208,7 @@ def compute(self, input_model: "AtomicInput", config: "TaskConfig") -> "AtomicRe error_type = "execution_error" # Reset the schema if required - output_data["schema_name"] = "qcschema_output" + output_data["schema_name"] = "qcschema_atomic_output" output_data.pop("memory", None) output_data.pop("nthreads", None) output_data["stdout"] = output_data.pop("raw_output", None) diff --git a/qcengine/programs/qcore.py b/qcengine/programs/qcore.py index 4809ebb0..4303c248 100644 --- a/qcengine/programs/qcore.py +++ b/qcengine/programs/qcore.py @@ -242,7 +242,7 @@ def parse_output(self, output: Dict[str, Any], input_model: "AtomicInput") -> "A output_data["properties"] = properties - output_data["schema_name"] = "qcschema_output" + output_data["schema_name"] = "qcschema_atomic_output" output_data["success"] = True return AtomicResult(**output_data) diff --git a/qcengine/programs/rdkit.py b/qcengine/programs/rdkit.py index 97aaf92c..423d5a19 100644 --- a/qcengine/programs/rdkit.py +++ b/qcengine/programs/rdkit.py @@ -140,7 +140,6 @@ def compute(self, input_data: "AtomicInput", config: "TaskConfig") -> "AtomicRes creator="rdkit", version=rdkit.__version__, routine="rdkit.Chem.AllChem.UFFGetMoleculeForceField" ) - ret_data["schema_name"] = "qcschema_output" ret_data["success"] = True ret_data["input_data"] = input_data ret_data["molecule"] = jmol diff --git a/qcengine/programs/terachem.py b/qcengine/programs/terachem.py index 4bacca54..3ca0d12e 100644 --- a/qcengine/programs/terachem.py +++ b/qcengine/programs/terachem.py @@ -182,7 +182,7 @@ def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> output_data["properties"] = properties - output_data["schema_name"] = "qcschema_output" + output_data["schema_name"] = "qcschema_atomic_output" output_data["stdout"] = outfiles["tc.out"] # TODO Should only return True if TeraChem calculation terminated properly output_data["success"] = True diff --git a/qcengine/programs/torchani.py b/qcengine/programs/torchani.py index 7e0b7018..1e32f731 100644 --- a/qcengine/programs/torchani.py +++ b/qcengine/programs/torchani.py @@ -181,7 +181,7 @@ def compute(self, input_data: "AtomicInput", config: "TaskConfig") -> "AtomicRes creator="torchani", version="unknown", routine="torchani.builtin.aev_computer" ) - ret_data["schema_name"] = "qcschema_output" + ret_data["schema_name"] = "qcschema_atomic_output" ret_data["success"] = True # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other diff --git a/qcengine/tests/test_procedures.py b/qcengine/tests/test_procedures.py index 8091a7f1..a95f171c 100644 --- a/qcengine/tests/test_procedures.py +++ b/qcengine/tests/test_procedures.py @@ -8,16 +8,25 @@ import qcelemental as qcel import qcengine as qcng -from qcengine.testing import checkver_and_convert, failure_engine, schema_versions, using +from qcengine.testing import checkver_and_convert, failure_engine, from_v2, schema_versions, using @pytest.fixture(scope="function") -def input_data(): - return { - "keywords": {"coordsys": "tric", "maxiter": 100, "program": None}, - "input_specification": {"driver": "gradient", "model": None, "keywords": {}}, - "initial_molecule": None, - } +def input_data(request): + if from_v2(request.node.name): + return { + "specification": { + "keywords": {"coordsys": "tric", "maxiter": 100}, + "specification": {"driver": "gradient", "model": None, "keywords": {}, "program": None}, + }, + "initial_molecule": None, + } + else: + return { + "keywords": {"coordsys": "tric", "maxiter": 100, "program": None}, + "input_specification": {"driver": "gradient", "model": None, "keywords": {}}, + "initial_molecule": None, + } @using("psi4") @@ -28,15 +37,33 @@ def input_data(): pytest.param("geometric", marks=using("geometric")), pytest.param("optking", marks=using("optking")), pytest.param("berny", marks=using("berny")), + pytest.param("nwchemdriver", marks=using("nwchem")), ], ) def test_geometric_psi4(input_data, optimizer, ncores, schema_versions, request): models, retver, _ = schema_versions input_data["initial_molecule"] = models.Molecule(**qcng.get_molecule("hydrogen", return_dict=True)) - input_data["input_specification"]["model"] = {"method": "HF", "basis": "sto-3g"} - input_data["input_specification"]["keywords"] = {"scf_properties": ["wiberg_lowdin_indices"]} - input_data["keywords"]["program"] = "psi4" + if optimizer == "nwchemdriver": + grad_program = "nwchem" + grad_kw = {} + else: + grad_program = "psi4" + grad_kw = {"scf_properties": ["wiberg_lowdin_indices"]} + + if from_v2(request.node.name): + input_data["specification"]["specification"]["model"] = {"method": "HF", "basis": "sto-3g"} + input_data["specification"]["specification"]["keywords"] = grad_kw + input_data["specification"]["specification"]["program"] = grad_program + input_data["specification"]["specification"]["extras"] = {"myqctag": "hello psi4"} + input_data["specification"]["extras"] = {"myopttag": "hello qcengine"} + + else: + input_data["input_specification"]["model"] = {"method": "HF", "basis": "sto-3g"} + input_data["input_specification"]["keywords"] = grad_kw + input_data["keywords"]["program"] = grad_program + input_data["input_specification"]["extras"] = {"myqctag": "hello psi4"} + input_data["extras"] = {"myopttag": "hello qcengine"} input_data = models.OptimizationInput(**input_data) @@ -51,87 +78,143 @@ def test_geometric_psi4(input_data, optimizer, ncores, schema_versions, request) input_data, optimizer, raise_error=True, task_config=task_config, return_version=retver ) ret = checkver_and_convert(ret, request.node.name, "post") + trajs_tgt = ret.trajectory_results if "v2" in request.node.name else ret.trajectory - assert 10 > len(ret.trajectory) > 1 + assert 10 > len(trajs_tgt) > 1 assert pytest.approx(ret.final_molecule.measure([0, 1]), 1.0e-4) == 1.3459150737 assert ret.provenance.creator.lower() == optimizer - assert ret.trajectory[0].provenance.creator.lower() == "psi4" - - if optimizer == "optking": - pytest.xfail("not passing threads to psi4") + assert trajs_tgt[0].provenance.creator.lower() == grad_program + + if optimizer == "optking" and ncores != 1: + with pytest.raises(AssertionError): + # Optking not passing threads to psi4 + assert trajs_tgt[0].provenance.nthreads == ncores + elif optimizer == "nwchemdriver": + pass + # threading not yet implemented in NWChemDriver else: - assert ret.trajectory[0].provenance.nthreads == ncores + assert trajs_tgt[0].provenance.nthreads == ncores # Check keywords passing - for single in ret.trajectory: - if "v2" in request.node.name: - assert "scf_properties" in single.input_data.specification.keywords - else: - assert "scf_properties" in single.keywords - assert "WIBERG_LOWDIN_INDICES" in single.extras["qcvars"] or "WIBERG LOWDIN INDICES" in single.extras["qcvars"] - # TODO: old WIBERG qcvar used underscore; new one uses space. covering bases here but remove someday + if optimizer != "nwchemdriver": + for single in trajs_tgt: + if "v2" in request.node.name: + assert "scf_properties" in single.input_data.specification.keywords + else: + assert "scf_properties" in single.keywords + assert ( + "WIBERG_LOWDIN_INDICES" in single.extras["qcvars"] or "WIBERG LOWDIN INDICES" in single.extras["qcvars"] + ) + # TODO: old WIBERG qcvar used underscore; new one uses space. covering bases here but remove someday + + # Check extras passing + if "v2" in request.node.name: + assert ( + "myqctag" in ret.input_data.specification.specification.extras + ), ret.input_data.specification.specification.extras.keys() + if optimizer != "geometric": + # geometric: below can't happen until optimizers call gradients with v2 + assert "myqctag" not in ret.trajectory_results[0].extras, "input extras wrongly present in sub-result" + assert "myopttag" in ret.input_data.specification.extras, ret.input_data.specification.extras.keys() + assert "myopttag" not in ret.extras, "input extras wrongly present in top-result" + else: + if optimizer != "optking": + assert "myqctag" in ret.trajectory[0].extras, ret.trajectory[0].extras.keys() + assert "myopttag" in ret.extras, ret.extras.keys() @using("psi4") -@using("geometric") -def test_geometric_local_options(input_data, schema_versions, request): +@pytest.mark.parametrize( + "optimizer", + [ + pytest.param("geometric", marks=using("geometric")), + pytest.param("optking", marks=using("optking")), + pytest.param("berny", marks=using("berny")), + ], +) +def test_geometric_local_options(input_data, schema_versions, request, optimizer): models, retver, _ = schema_versions input_data["initial_molecule"] = models.Molecule(**qcng.get_molecule("hydrogen", return_dict=True)) - input_data["input_specification"]["model"] = {"method": "HF", "basis": "sto-3g"} - input_data["keywords"]["program"] = "psi4" + + if from_v2(request.node.name): + input_data["specification"]["specification"]["model"] = {"method": "HF", "basis": "sto-3g"} + input_data["specification"]["specification"]["program"] = "psi4" + else: + input_data["input_specification"]["model"] = {"method": "HF", "basis": "sto-3g"} + input_data["keywords"]["program"] = "psi4" input_data = models.OptimizationInput(**input_data) # Set some extremely large number to test input_data = checkver_and_convert(input_data, request.node.name, "pre") - ret = qcng.compute(input_data, "geometric", raise_error=True, task_config={"memory": "5000"}, return_version=retver) + ret = qcng.compute(input_data, optimizer, raise_error=True, task_config={"memory": "5000"}, return_version=retver) ret = checkver_and_convert(ret, request.node.name, "post") - assert pytest.approx(ret.trajectory[0].provenance.memory, 1) == 4900 + trajs_tgt = ret.trajectory_results if "v2" in request.node.name else ret.trajectory + atin_tgt = ret.input_data.specification.specification if "v2" in request.node.name else ret.input_specification + + if optimizer == "optking": + with pytest.raises(AssertionError): + # Optking not passing memory to psi4 + assert pytest.approx(trajs_tgt[0].provenance.memory, 1) == 4900 + else: + assert pytest.approx(trajs_tgt[0].provenance.memory, 1) == 4900 # Make sure we cleaned up - assert "_qcengine_local_config" not in ret.input_specification - assert "_qcengine_local_config" not in ret.trajectory[0].extras + assert "_qcengine_local_config" not in atin_tgt.extras + assert "_qcengine_local_config" not in trajs_tgt[0].extras + if "v2" in request.node.name: + assert "_qcengine_local_config" not in trajs_tgt[0].input_data.specification.extras -@using("rdkit") -@using("geometric") -def test_geometric_stdout(input_data, schema_versions, request): +@pytest.mark.parametrize( + "optimizer,converged", + [ + pytest.param("geometric", "Converged!", marks=using("geometric")), + pytest.param("optking", "Convergence check returned True", marks=using("optking")), + pytest.param("berny", "All criteria matched", marks=using("berny")), + ], +) +@pytest.mark.parametrize( + "gradprog,gradmodel", + [ + pytest.param("rdkit", {"method": "UFF", "basis": ""}, marks=using("rdkit")), + pytest.param("psi4", {"method": "HF", "basis": "sto-3g"}, marks=using("psi4")), + ], +) +def test_optimizer_stdout(optimizer, gradprog, gradmodel, converged, input_data, schema_versions, request): models, retver, _ = schema_versions input_data["initial_molecule"] = models.Molecule(**qcng.get_molecule("water", return_dict=True)) - input_data["input_specification"]["model"] = {"method": "UFF", "basis": ""} - input_data["keywords"]["program"] = "rdkit" - - input_data = models.OptimizationInput(**input_data) - - input_data = checkver_and_convert(input_data, request.node.name, "pre") - ret = qcng.compute(input_data, "geometric", raise_error=True, return_version=retver) - ret = checkver_and_convert(ret, request.node.name, "post") - - assert ret.success is True - assert "Converged!" in ret.stdout - - -@using("psi4") -@using("berny") -def test_berny_stdout(input_data, schema_versions, request): - models, retver, _ = schema_versions - input_data["initial_molecule"] = models.Molecule(**qcng.get_molecule("water", return_dict=True)) - input_data["input_specification"]["model"] = {"method": "HF", "basis": "sto-3g"} - input_data["keywords"]["program"] = "psi4" + if from_v2(request.node.name): + input_data["specification"]["specification"]["model"] = gradmodel + input_data["specification"]["specification"]["program"] = gradprog + input_data["specification"]["specification"]["protocols"] = {"stdout": False} + else: + input_data["input_specification"]["model"] = gradmodel + input_data["keywords"]["program"] = gradprog + # no way to turn off gradient stdout in v1 input_data = models.OptimizationInput(**input_data) input_data = checkver_and_convert(input_data, request.node.name, "pre") - ret = qcng.compute(input_data, "berny", raise_error=True, return_version=retver) + ret = qcng.compute(input_data, optimizer, raise_error=True, return_version=retver) ret = checkver_and_convert(ret, request.node.name, "post") assert ret.success is True - assert "All criteria matched" in ret.stdout + assert converged in ret.stdout + + trajs_tgt = ret.trajectory_results if "v2" in request.node.name else ret.trajectory + assert ret.provenance.creator.lower() == optimizer + assert trajs_tgt[0].provenance.creator.lower() == gradprog + # rdkit does not have stdout, while psi4 does and is suppressed by protocol in v2 but can't be thru v1 + if from_v2(request.node.name) or gradprog == "rdkit": + assert trajs_tgt[0].stdout is None + else: + assert trajs_tgt[0].stdout is not None @using("psi4") @@ -140,9 +223,14 @@ def test_berny_failed_gradient_computation(input_data, schema_versions, request) models, retver, _ = schema_versions input_data["initial_molecule"] = models.Molecule(**qcng.get_molecule("water", return_dict=True)) - input_data["input_specification"]["model"] = {"method": "HF", "basis": "sto-3g"} - input_data["input_specification"]["keywords"] = {"badpsi4key": "badpsi4value"} - input_data["keywords"]["program"] = "psi4" + if from_v2(request.node.name): + input_data["specification"]["specification"]["model"] = {"method": "HF", "basis": "sto-3g"} + input_data["specification"]["specification"]["keywords"] = {"badpsi4key": "badpsi4value"} + input_data["specification"]["specification"]["program"] = "psi4" + else: + input_data["input_specification"]["model"] = {"method": "HF", "basis": "sto-3g"} + input_data["input_specification"]["keywords"] = {"badpsi4key": "badpsi4value"} + input_data["keywords"]["program"] = "psi4" input_data = models.OptimizationInput(**input_data) @@ -163,8 +251,12 @@ def test_geometric_rdkit_error(input_data, schema_versions, request): input_data["initial_molecule"] = models.Molecule(**qcng.get_molecule("water", return_dict=True)).copy( exclude={"connectivity_"} ) - input_data["input_specification"]["model"] = {"method": "UFF", "basis": ""} - input_data["keywords"]["program"] = "rdkit" + if from_v2(request.node.name): + input_data["specification"]["specification"]["model"] = {"method": "UFF", "basis": ""} + input_data["specification"]["specification"]["program"] = "rdkit" + else: + input_data["input_specification"]["model"] = {"method": "UFF", "basis": ""} + input_data["keywords"]["program"] = "rdkit" input_data = models.OptimizationInput(**input_data) @@ -177,25 +269,59 @@ def test_geometric_rdkit_error(input_data, schema_versions, request): @using("rdkit") -@using("geometric") -def test_optimization_protocols(input_data, schema_versions, request): +@pytest.mark.parametrize( + "optimizer", + [ + pytest.param("geometric", marks=using("geometric")), + pytest.param("optking", marks=using("optking")), + pytest.param("berny", marks=using("berny")), + pytest.param("nwchemdriver", marks=using("nwchem")), + ], +) +def test_optimization_protocols(optimizer, input_data, schema_versions, request): models, retver, _ = schema_versions + if optimizer == "nwchemdriver": + grad_program = "nwchem" + grad_model = {"method": "HF", "basis": "sto-3g"} + else: + grad_program = "rdkit" + grad_model = {"method": "UFF"} + input_data["initial_molecule"] = models.Molecule(**qcng.get_molecule("water", return_dict=True)) - input_data["input_specification"]["model"] = {"method": "UFF"} - input_data["keywords"]["program"] = "rdkit" - input_data["protocols"] = {"trajectory": "initial_and_final"} + if from_v2(request.node.name): + input_data["specification"]["specification"]["model"] = grad_model + input_data["specification"]["specification"]["program"] = grad_program + input_data["specification"]["protocols"] = {"trajectory": "initial_and_final"} + else: + input_data["input_specification"]["model"] = grad_model + input_data["keywords"]["program"] = grad_program + input_data["protocols"] = {"trajectory": "initial_and_final"} input_data = models.OptimizationInput(**input_data) input_data = checkver_and_convert(input_data, request.node.name, "pre") - ret = qcng.compute(input_data, "geometric", raise_error=True, return_version=retver) + ret = qcng.compute(input_data, optimizer, raise_error=True, return_version=retver) ret = checkver_and_convert(ret, request.node.name, "post") + trajs_tgt = ret.trajectory_results if "v2" in request.node.name else ret.trajectory + initmol_tgt = ret.input_data.initial_molecule if "v2" in request.node.name else ret.initial_molecule + assert ret.success, ret.error.error_message - assert len(ret.trajectory) == 2 - assert ret.initial_molecule.get_hash() == ret.trajectory[0].molecule.get_hash() - assert ret.final_molecule.get_hash() == ret.trajectory[1].molecule.get_hash() + assert len(trajs_tgt) == 2 + assert ( + pytest.approx(initmol_tgt.nuclear_repulsion_energy(), 1.0e-4) + == trajs_tgt[0].molecule.nuclear_repulsion_energy() + ) + if optimizer != "nwchemdriver": + # in internal-opt mode, fix_/orient is not called, so the hash will be different + assert initmol_tgt.get_hash() == trajs_tgt[0].molecule.get_hash() + assert ( + pytest.approx(ret.final_molecule.nuclear_repulsion_energy(), 1.0e-4) + == trajs_tgt[1].molecule.nuclear_repulsion_energy() + ) + if optimizer != "nwchemdriver": + assert ret.final_molecule.get_hash() == trajs_tgt[1].molecule.get_hash() @using("geometric") @@ -209,9 +335,17 @@ def test_geometric_retries(failure_engine, input_data, schema_versions, request) "symbols": ["He", "He"], "geometry": [0, 0, 0, 0, 0, failure_engine.start_distance], } - input_data["input_specification"]["model"] = {"method": "something"} - input_data["keywords"]["program"] = failure_engine.name - input_data["keywords"]["coordsys"] = "cart" # needed by geometric v1.0 to play nicely with failure_engine + + if from_v2(request.node.name): + input_data["specification"]["specification"]["model"] = {"method": "something"} + input_data["specification"]["specification"]["program"] = failure_engine.name + input_data["specification"]["keywords"][ + "coordsys" + ] = "cart" # needed by geometric v1.0 to play nicely with failure_engine + else: + input_data["input_specification"]["model"] = {"method": "something"} + input_data["keywords"]["program"] = failure_engine.name + input_data["keywords"]["coordsys"] = "cart" # needed by geometric v1.0 to play nicely with failure_engine input_data = models.OptimizationInput(**input_data) @@ -219,12 +353,14 @@ def test_geometric_retries(failure_engine, input_data, schema_versions, request) ret = qcng.compute(input_data, "geometric", task_config={"ncores": 13}, raise_error=True, return_version=retver) ret = checkver_and_convert(ret, request.node.name, "post") + trajs_tgt = ret.trajectory_results if "v2" in request.node.name else ret.trajectory + assert ret.success is True - assert ret.trajectory[0].provenance.retries == 1 - assert ret.trajectory[0].provenance.ncores == 13 - assert ret.trajectory[1].provenance.retries == 2 - assert ret.trajectory[1].provenance.ncores == 13 - assert "retries" not in ret.trajectory[2].provenance.dict() + assert trajs_tgt[0].provenance.retries == 1 + assert trajs_tgt[0].provenance.ncores == 13 + assert trajs_tgt[1].provenance.retries == 2 + assert trajs_tgt[1].provenance.ncores == 13 + assert "retries" not in trajs_tgt[2].provenance.dict() # Ensure we still fail failure_engine.iter_modes = ["random_error", "pass", "random_error", "random_error", "pass"] # Iter 1 # Iter 2 @@ -298,16 +434,25 @@ def test_geometric_generic(input_data, program, model, bench, schema_versions, r models, retver, _ = schema_versions input_data["initial_molecule"] = models.Molecule(**qcng.get_molecule("water", return_dict=True)) - input_data["input_specification"]["model"] = model - input_data["keywords"]["program"] = program - input_data["input_specification"]["extras"] = { - "_secret_tags": {"mysecret_tag": "data1"} # pragma: allowlist secret - } + if from_v2(request.node.name): + input_data["specification"]["specification"]["model"] = model + input_data["specification"]["specification"]["program"] = program + input_data["specification"]["specification"]["extras"] = { + "_secret_tags": {"mysecret_tag": "data1"} # pragma: allowlist secret + } + else: + input_data["input_specification"]["model"] = model + input_data["keywords"]["program"] = program + input_data["input_specification"]["extras"] = { + "_secret_tags": {"mysecret_tag": "data1"} # pragma: allowlist secret + } input_data = checkver_and_convert(input_data, request.node.name, "pre", cast_dict_as="OptimizationInput") ret = qcng.compute(input_data, "geometric", raise_error=True, return_version=retver) ret = checkver_and_convert(ret, request.node.name, "post") + trajs_tgt = ret.trajectory_results if "v2" in request.node.name else ret.trajectory + assert ret.success is True assert "Converged!" in ret.stdout @@ -318,8 +463,9 @@ def test_geometric_generic(input_data, program, model, bench, schema_versions, r assert pytest.approx(r12, 1.0e-4) == bench[1] assert pytest.approx(a102, 1.0e-4) == bench[2] - assert "_secret_tags" in ret.trajectory[0].extras - assert "data1" == ret.trajectory[0].extras["_secret_tags"]["mysecret_tag"] + assert "_secret_tags" in trajs_tgt[0].extras + assert "data1" == trajs_tgt[0].extras["_secret_tags"]["mysecret_tag"], trajs_tgt[0].extras["_secret_tags"] + # the _secret_tags shows up in Res.extras, not in Res.input_data.extras b/c geometric running v1 internally @using("nwchem") @@ -328,13 +474,25 @@ def test_nwchem_relax(linopt, schema_versions, request): models, retver, _ = schema_versions # Make the input file - input_data = { - "input_specification": { - "model": {"method": "HF", "basis": "sto-3g"}, - "keywords": {"set__driver:linopt": linopt}, - }, - "initial_molecule": models.Molecule(**qcng.get_molecule("hydrogen", return_dict=True)), - } + if from_v2(request.node.name): + input_data = { + "specification": { + "specification": { + "model": {"method": "HF", "basis": "sto-3g"}, + "keywords": {"set__driver:linopt": linopt}, + "driver": "gradient", + }, + }, + "initial_molecule": models.Molecule(**qcng.get_molecule("hydrogen", return_dict=True)), + } + else: + input_data = { + "input_specification": { + "model": {"method": "HF", "basis": "sto-3g"}, + "keywords": {"set__driver:linopt": linopt}, + }, + "initial_molecule": models.Molecule(**qcng.get_molecule("hydrogen", return_dict=True)), + } input_data = models.OptimizationInput(**input_data) # Run the relaxation @@ -342,7 +500,9 @@ def test_nwchem_relax(linopt, schema_versions, request): ret = qcng.compute(input_data, "nwchemdriver", raise_error=True, return_version=retver) ret = checkver_and_convert(ret, request.node.name, "post") - assert 10 > len(ret.trajectory) > 1 + trajs_tgt = ret.trajectory_results if "v2" in request.node.name else ret.trajectory + + assert 10 > len(trajs_tgt) > 1 assert pytest.approx(ret.final_molecule.measure([0, 1]), 1.0e-4) == 1.3459150737 @@ -352,14 +512,27 @@ def test_nwchem_restart(tmpdir, schema_versions, request): models, retver, _ = schema_versions # Make the input file - input_data = { - "input_specification": { - "model": {"method": "HF", "basis": "sto-3g"}, - "keywords": {"driver__maxiter": 2, "set__driver:linopt": 0}, - "extras": {"allow_restarts": True}, - }, - "initial_molecule": models.Molecule(**qcng.get_molecule("hydrogen", return_dict=True)), - } + if from_v2(request.node.name): + input_data = { + "specification": { + "specification": { + "model": {"method": "HF", "basis": "sto-3g"}, + "keywords": {"driver__maxiter": 2, "set__driver:linopt": 0}, + "driver": "gradient", # newly req'd by model in v2 + "extras": {"allow_restarts": True}, + }, + }, + "initial_molecule": models.Molecule(**qcng.get_molecule("hydrogen", return_dict=True)), + } + else: + input_data = { + "input_specification": { + "model": {"method": "HF", "basis": "sto-3g"}, + "keywords": {"driver__maxiter": 2, "set__driver:linopt": 0}, + "extras": {"allow_restarts": True}, + }, + "initial_molecule": models.Molecule(**qcng.get_molecule("hydrogen", return_dict=True)), + } input_data = models.OptimizationInput(**input_data) # Run an initial step, which should not converge @@ -444,9 +617,14 @@ def test_optimization_mrchem(input_data, optimizer, schema_versions, request): models, retver, _ = schema_versions input_data["initial_molecule"] = models.Molecule(**qcng.get_molecule("hydrogen", return_dict=True)) - input_data["input_specification"]["model"] = {"method": "HF"} - input_data["input_specification"]["keywords"] = {"world_prec": 1.0e-4} - input_data["keywords"]["program"] = "mrchem" + if from_v2(request.node.name): + input_data["specification"]["specification"]["model"] = {"method": "HF"} + input_data["specification"]["specification"]["keywords"] = {"world_prec": 1.0e-4} + input_data["specification"]["specification"]["program"] = "mrchem" + else: + input_data["input_specification"]["model"] = {"method": "HF"} + input_data["input_specification"]["keywords"] = {"world_prec": 1.0e-4} + input_data["keywords"]["program"] = "mrchem" input_data = models.OptimizationInput(**input_data) @@ -454,7 +632,9 @@ def test_optimization_mrchem(input_data, optimizer, schema_versions, request): ret = qcng.compute(input_data, optimizer, raise_error=True, return_version=retver) ret = checkver_and_convert(ret, request.node.name, "post") - assert 10 > len(ret.trajectory) > 1 + trajs_tgt = ret.trajectory_results if "v2" in request.node.name else ret.trajectory + + assert 10 > len(trajs_tgt) > 1 assert pytest.approx(ret.final_molecule.measure([0, 1]), 1.0e-3) == 1.3860734486984705 assert ret.provenance.creator.lower() == optimizer - assert ret.trajectory[0].provenance.creator.lower() == "mrchem" + assert trajs_tgt[0].provenance.creator.lower() == "mrchem" From 8ee2be326b062b8e6bcbe6208e17d44f76b444b0 Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Wed, 11 Dec 2024 03:02:42 -0500 Subject: [PATCH 3/7] torsiondrive converted to v2 --- docs/source/changelog.rst | 2 +- qcengine/compute.py | 8 +-- qcengine/procedures/torsiondrive.py | 70 +++++++++++----------- qcengine/programs/tests/test_dftd3_mp2d.py | 8 +-- qcengine/tests/test_procedures.py | 63 ++++++++++++------- 5 files changed, 81 insertions(+), 70 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 2e4e17f5..3fe2d5e3 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -70,7 +70,7 @@ Misc. MUST (Unmerged) +++++++++++++++ - +torsiondrive rewritten in v2. berny harness rewritten in v2. optking and geometric natively speak v1, so adapted as well as can be. allow nwchemdriver w/o driver=energy. provenance now nwchemdriver not nwchemrelax Optking now fills in ``v2.OptimizationResult.stdout``. Through v2, once can alter gradient protocols in an optimization. diff --git a/qcengine/compute.py b/qcengine/compute.py index 6a163750..80088424 100644 --- a/qcengine/compute.py +++ b/qcengine/compute.py @@ -115,18 +115,14 @@ def compute( # Build the model and validate # * calls model_wrapper with the (Atomic|Optimization|etc)Input for which the harness was designed - # * upon return, input_data is a model of the type (e.g., Atomic) and version (e.g., 1 or 2) the harness prefers. for now, v1. + # * upon return, input_data is a model of the type (e.g., Atomic) and version (e.g., 1 or 2) the harness prefers: all v2. input_data, input_schema_version = executor.build_input_model(input_data, return_input_schema_version=True) return_version = input_schema_version if return_version == -1 else return_version # Build out task_config if task_config is None: task_config = {} - # TODO generalize when procedures in layout in place - if isinstance(input_data, (qcelemental.models.v2.AtomicInput, qcelemental.models.v2.OptimizationInput)): - input_engine_options = input_data.specification.extras.pop("_qcengine_local_config", {}) - else: - input_engine_options = input_data.extras.pop("_qcengine_local_config", {}) + input_engine_options = input_data.specification.extras.pop("_qcengine_local_config", {}) task_config = {**task_config, **input_engine_options} config = get_config(task_config=task_config) diff --git a/qcengine/procedures/torsiondrive.py b/qcengine/procedures/torsiondrive.py index 1b7d7bae..fe3dd93b 100644 --- a/qcengine/procedures/torsiondrive.py +++ b/qcengine/procedures/torsiondrive.py @@ -4,8 +4,9 @@ from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Tuple, Union import numpy as np -from qcelemental.models.v2 import FailedOperation, Molecule -from qcelemental.models.v2.procedures import ( +from qcelemental.models.v2 import ( + FailedOperation, + Molecule, OptimizationInput, OptimizationResult, TorsionDriveInput, @@ -42,19 +43,19 @@ def _compute(self, input_model: "TorsionDriveInput", config: "TaskConfig"): import torsiondrive.td_api - dihedrals = input_model.keywords.dihedrals - grid_spacing = input_model.keywords.grid_spacing + dihedrals = input_model.specification.keywords.dihedrals + grid_spacing = input_model.specification.keywords.grid_spacing - dihedral_ranges = input_model.keywords.dihedral_ranges + dihedral_ranges = input_model.specification.keywords.dihedral_ranges - energy_decrease_thresh = input_model.keywords.energy_decrease_thresh - energy_upper_limit = input_model.keywords.energy_upper_limit + energy_decrease_thresh = input_model.specification.keywords.energy_decrease_thresh + energy_upper_limit = input_model.specification.keywords.energy_upper_limit state = torsiondrive.td_api.create_initial_state( dihedrals=dihedrals, grid_spacing=grid_spacing, - elements=input_model.initial_molecule[0].symbols, - init_coords=[molecule.geometry.flatten().tolist() for molecule in input_model.initial_molecule], + elements=input_model.initial_molecules[0].symbols, + init_coords=[molecule.geometry.flatten().tolist() for molecule in input_model.initial_molecules], dihedral_ranges=dihedral_ranges, energy_upper_limit=energy_upper_limit, energy_decrease_thresh=energy_decrease_thresh, @@ -94,9 +95,9 @@ def _compute(self, input_model: "TorsionDriveInput", config: "TaskConfig"): task_results = { grid_point: [ ( - result.initial_molecule.geometry.flatten().tolist(), + result.input_data.initial_molecule.geometry.flatten().tolist(), result.final_molecule.geometry.flatten().tolist(), - result.energies[-1], + result.properties.return_energy, ) for result in results ] @@ -105,13 +106,17 @@ def _compute(self, input_model: "TorsionDriveInput", config: "TaskConfig"): torsiondrive.td_api.update_state(state, {**task_results}) - output_data = input_model.dict() - output_data["provenance"] = { + provenance = { "creator": "TorsionDrive", "routine": "torsiondrive.td_api.next_jobs_from_state", "version": torsiondrive.__version__, } - output_data["success"] = error is None + + output_data = { + "input_data": input_model, + "provenance": provenance, + "success": error is None, # this will fail if ever False in v2 + } # even if we hit an error during the torsiondrive, we output what we can output_data["final_energies"], output_data["final_molecules"] = {}, {} @@ -180,36 +185,29 @@ def _spawn_optimization( from qcengine import compute - input_molecule = input_model.initial_molecule[0].copy(deep=True).dict() + input_molecule = input_model.initial_molecules[0].copy(deep=True).dict() input_molecule["geometry"] = np.array(job).reshape(len(input_molecule["symbols"]), 3) input_molecule = Molecule.from_data(input_molecule) - dihedrals = input_model.keywords.dihedrals + dihedrals = input_model.specification.keywords.dihedrals angles = grid_point.split() - keywords = { - **input_model.optimization_spec.keywords, - "constraints": { - "set": [ - { - "type": "dihedral", - "indices": dihedral, - "value": int(angle), - } - for dihedral, angle in zip(dihedrals, angles) - ] - }, + optspec_v2 = input_model.specification.specification.model_dump() + optspec_v2["keywords"]["constraints"] = { + "set": [ + { + "type": "dihedral", + "indices": dihedral, + "value": int(angle), + } + for dihedral, angle in zip(dihedrals, angles) + ] } - input_data = OptimizationInput( - keywords=keywords, - extras={}, - protocols=input_model.optimization_spec.protocols, - input_specification=input_model.input_specification, initial_molecule=input_molecule, + specification=optspec_v2, ) - - return compute(input_data, program=input_model.optimization_spec.procedure, task_config=config.dict()) + return compute(input_data, program=input_model.specification.specification.program, task_config=config.dict()) @staticmethod def _find_final_results( @@ -218,7 +216,7 @@ def _find_final_results( """Returns the energy and final molecule of the lowest energy optimization in a set.""" - final_energies = np.array([result.energies[-1] for result in optimization_results]) + final_energies = np.array([result.properties.return_energy for result in optimization_results]) lowest_energy_idx = final_energies.argmin() return float(final_energies[lowest_energy_idx]), optimization_results[lowest_energy_idx].final_molecule diff --git a/qcengine/programs/tests/test_dftd3_mp2d.py b/qcengine/programs/tests/test_dftd3_mp2d.py index 3403fd6a..48f73eef 100644 --- a/qcengine/programs/tests/test_dftd3_mp2d.py +++ b/qcengine/programs/tests/test_dftd3_mp2d.py @@ -1562,7 +1562,7 @@ def test_3(schema_versions, request): if from_v2(request.node.name): resinp = { - "schema_name": "qcschema_input", + "schema_name": "qcschema_atomic_input", "schema_version": 2, "molecule": qcel.molparse.to_schema(sys, dtype=2), "specification": { @@ -1706,7 +1706,7 @@ def test_mp2d__run_mp2d__2body(inp, subjects, schema_versions, request): if from_v2(request.node.name): resinp = { - "schema_name": "qcschema_input", + "schema_name": "qcschema_atomic_input", "schema_version": 2, "molecule": mol, "specification": {"driver": "gradient", "model": {"method": inp["name"]}, "keywords": {}}, @@ -1942,7 +1942,7 @@ def test_dftd3__run_dftd3__3body(inp, subjects, schema_versions, request): if from_v2(request.node.name): resinp = { - "schema_name": "qcschema_input", + "schema_name": "qcschema_atomic_input", "schema_version": 2, "molecule": mol, "specification": {"driver": "gradient", "model": {"method": inp["name"]}, "keywords": {}}, @@ -2092,7 +2092,7 @@ def test_gcp(inp, subjects, program, schema_versions, request): if from_v2(request.node.name): resinp = { - "schema_name": "qcschema_input", + "schema_name": "qcschema_atomic_input", "schema_version": 2, "molecule": mol, "specification": { diff --git a/qcengine/tests/test_procedures.py b/qcengine/tests/test_procedures.py index a95f171c..b2bc3eef 100644 --- a/qcengine/tests/test_procedures.py +++ b/qcengine/tests/test_procedures.py @@ -553,27 +553,40 @@ def test_nwchem_restart(tmpdir, schema_versions, request): @using("torsiondrive") def test_torsiondrive_generic(schema_versions, request): models, retver, _ = schema_versions - TorsionDriveInput = models.TorsionDriveInput - TDKeywords = models.procedures.TDKeywords - OptimizationSpecification = models.procedures.OptimizationSpecification - QCInputSpecification = models.procedures.QCInputSpecification - DriverEnum = models.DriverEnum - Model = models.Model - Molecule = models.Molecule - - input_data = TorsionDriveInput( - keywords=TDKeywords(dihedrals=[(2, 0, 1, 5)], grid_spacing=[180]), - input_specification=QCInputSpecification(driver=DriverEnum.gradient, model=Model(method="UFF", basis=None)), - initial_molecule=[Molecule(**qcng.get_molecule("ethane", return_dict=True))] * 2, - optimization_spec=OptimizationSpecification( - procedure="geomeTRIC", - keywords={ - "coordsys": "hdlc", - "maxiter": 500, - "program": "rdkit", - }, - ), - ) + + if from_v2(request.node.name): + input_data = models.TorsionDriveInput( + initial_molecules=[models.Molecule(**qcng.get_molecule("ethane", return_dict=True))] * 2, + specification=models.TorsionDriveSpecification( + keywords=models.TDKeywords(dihedrals=[(2, 0, 1, 5)], grid_spacing=[180]), + specification=models.OptimizationSpecification( + program="geomeTRIC", + keywords={ + "coordsys": "hdlc", + "maxiter": 500, + }, + specification=models.AtomicSpecification( + program="rdkit", driver=models.DriverEnum.gradient, model=models.Model(method="UFF", basis=None) + ), + ), + ), + ) + else: + input_data = models.TorsionDriveInput( + keywords=models.TDKeywords(dihedrals=[(2, 0, 1, 5)], grid_spacing=[180]), + input_specification=models.QCInputSpecification( + driver=models.DriverEnum.gradient, model=models.Model(method="UFF", basis=None) + ), + initial_molecule=[models.Molecule(**qcng.get_molecule("ethane", return_dict=True))] * 2, + optimization_spec=models.OptimizationSpecification( + procedure="geomeTRIC", + keywords={ + "coordsys": "hdlc", + "maxiter": 500, + "program": "rdkit", + }, + ), + ) input_data = checkver_and_convert(input_data, request.node.name, "pre") ret = qcng.compute(input_data, "torsiondrive", raise_error=True, return_version=retver) @@ -598,8 +611,12 @@ def test_torsiondrive_generic(schema_versions, request): assert ret.provenance.creator.lower() == "torsiondrive" assert ret.optimization_history["180"][0].provenance.creator.lower() == "geometric" - assert ret.optimization_history["180"][0].trajectory[0].provenance.creator.lower() == "rdkit" - assert ret.optimization_history["180"][0].trajectory[0].schema_version == 2 if ("_v2" in request.node.name) else 1 + if "v2" in request.node.name: + assert ret.optimization_history["180"][0].trajectory_results[0].provenance.creator.lower() == "rdkit" + assert ret.optimization_history["180"][0].trajectory_results[0].schema_version == 2 + else: + assert ret.optimization_history["180"][0].trajectory[0].provenance.creator.lower() == "rdkit" + assert ret.optimization_history["180"][0].trajectory[0].schema_version == 1 assert ret.stdout == "All optimizations converged at lowest energy. Job Finished!\n" From 21f9d85ad3efe7b70468e15bb0a7c1c7d97e933c Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Thu, 12 Dec 2024 00:59:59 -0500 Subject: [PATCH 4/7] misc --- qcengine/programs/tests/test_programs.py | 4 +++- qcengine/tests/test_cli.py | 24 +++++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/qcengine/programs/tests/test_programs.py b/qcengine/programs/tests/test_programs.py index 8ceee09b..7e7e1a5c 100644 --- a/qcengine/programs/tests/test_programs.py +++ b/qcengine/programs/tests/test_programs.py @@ -816,6 +816,7 @@ def test_psi4_properties_driver(schema_versions, request): "keywords": {"scf_type": "df", "mp2_type": "df", "e_convergence": 9}, } if from_v2(request.node.name): + json_data["schema_name"] = "qcschema_atomic_input" json_data["specification"] = { "driver": json_data.pop("driver"), "model": json_data.pop("model"), @@ -876,7 +877,8 @@ def test_psi4_properties_driver(schema_versions, request): json_ret = checkver_and_convert(json_ret, request.node.name, "post") assert json_ret.success - assert "qcschema_output" == json_ret.schema_name + xptd_schema_name = "qcschema_atomic_output" if "v2" in request.node.name else "qcschema_output" + assert json_ret.schema_name == xptd_schema_name for k in expected_return_result.keys(): assert compare_values(expected_return_result[k], json_ret.return_result[k], atol=1.0e-5) for k in expected_properties.keys(): diff --git a/qcengine/tests/test_cli.py b/qcengine/tests/test_cli.py index 84c562a7..01e1a4c3 100644 --- a/qcengine/tests/test_cli.py +++ b/qcengine/tests/test_cli.py @@ -110,11 +110,25 @@ def check_result(stdout): assert output["provenance"]["creator"].lower() == "geometric" assert output["success"] is True - inp = { - "keywords": {"coordsys": "tric", "maxiter": 100, "program": "psi4"}, - "input_specification": {"driver": "gradient", "model": {"method": "HF", "basis": "sto-3g"}, "keywords": {}}, - "initial_molecule": models.Molecule(**get_molecule("hydrogen", return_dict=True)), - } + if from_v2(request.node.name): + inp = { + "specification": { + "keywords": {"coordsys": "tric", "maxiter": 100}, + "specification": { + "driver": "gradient", + "model": {"method": "HF", "basis": "sto-3g"}, + "keywords": {}, + "program": "psi4", + }, + }, + "initial_molecule": models.Molecule(**get_molecule("hydrogen", return_dict=True)), + } + else: + inp = { + "keywords": {"coordsys": "tric", "maxiter": 100, "program": "psi4"}, + "input_specification": {"driver": "gradient", "model": {"method": "HF", "basis": "sto-3g"}, "keywords": {}}, + "initial_molecule": models.Molecule(**get_molecule("hydrogen", return_dict=True)), + } inp = models.OptimizationInput(**inp) inp = checkver_and_convert(inp, request.node.name, "pre") From 342cf612af9df292c46f44cb321161ff4c2da392 Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Thu, 12 Dec 2024 02:46:43 -0500 Subject: [PATCH 5/7] one bug --- qcengine/tests/test_procedures.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qcengine/tests/test_procedures.py b/qcengine/tests/test_procedures.py index b2bc3eef..8e3709fe 100644 --- a/qcengine/tests/test_procedures.py +++ b/qcengine/tests/test_procedures.py @@ -320,7 +320,8 @@ def test_optimization_protocols(optimizer, input_data, schema_versions, request) pytest.approx(ret.final_molecule.nuclear_repulsion_energy(), 1.0e-4) == trajs_tgt[1].molecule.nuclear_repulsion_energy() ) - if optimizer != "nwchemdriver": + if optimizer not in ["nwchemdriver", "optking"]: + # optking would pass if: optking/optimize.py: #computer.update_geometry(o_molsys.geom) assert ret.final_molecule.get_hash() == trajs_tgt[1].molecule.get_hash() From 05d4e58a2063f9a90683e5074e5d0b0475e334b0 Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Thu, 12 Dec 2024 12:50:54 -0500 Subject: [PATCH 6/7] hopefully fix bug --- qcengine/procedures/optking.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/qcengine/procedures/optking.py b/qcengine/procedures/optking.py index 36a4275c..f312307f 100644 --- a/qcengine/procedures/optking.py +++ b/qcengine/procedures/optking.py @@ -1,4 +1,5 @@ import logging +import sys from io import StringIO from typing import Any, ClassVar, Dict, Union @@ -44,8 +45,8 @@ def compute(self, input_model: "OptimizationInput", config: "TaskConfig") -> "Op import optking log_stream = StringIO() - log = logging.getLogger(f"{__name__}.{id(self)}") - log = logging.getLogger("optking") + logname = "psi4.optking" if "psi4" in sys.modules else "optking" + log = logging.getLogger(logname) log.addHandler(logging.StreamHandler(log_stream)) log.setLevel("INFO") From eef6a462d3f3a28deb24ad7b9771fad71d239221 Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Thu, 12 Dec 2024 19:22:18 -0500 Subject: [PATCH 7/7] forgive optking again --- qcengine/tests/test_procedures.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/qcengine/tests/test_procedures.py b/qcengine/tests/test_procedures.py index 8e3709fe..1edb353c 100644 --- a/qcengine/tests/test_procedures.py +++ b/qcengine/tests/test_procedures.py @@ -86,11 +86,12 @@ def test_geometric_psi4(input_data, optimizer, ncores, schema_versions, request) assert ret.provenance.creator.lower() == optimizer assert trajs_tgt[0].provenance.creator.lower() == grad_program - if optimizer == "optking" and ncores != 1: - with pytest.raises(AssertionError): - # Optking not passing threads to psi4 - assert trajs_tgt[0].provenance.nthreads == ncores - elif optimizer == "nwchemdriver": + # Note: thread passing is semi-implemented in optking and subject to other env factors + # if optimizer == "optking" and ncores != 1: + # with pytest.raises(AssertionError): + # # Optking not passing threads to psi4 + # assert trajs_tgt[0].provenance.nthreads == ncores + if optimizer in ["optking", "nwchemdriver"]: pass # threading not yet implemented in NWChemDriver else: