Merge pull request #37 from openpathsampling/release-0.2.0

Release 0.2.0
openpathsampling · Jan 4, 2021 · 30bb0d9 · 30bb0d9
2 parents 5ff2ae7 + 3fb2f49
commit 30bb0d9
Show file tree

Hide file tree

Showing 21 changed files with 731 additions and 40 deletions.
diff --git a/.autorelease/test-testpypi.sh b/.autorelease/test-testpypi.sh
@@ -0,0 +1,2 @@
+python -m pip install sqlalchemy dill pytest
+py.test --pyargs paths_cli
diff --git a/.github/workflows/autorelease-default-env.sh b/.github/workflows/autorelease-default-env.sh
@@ -1,4 +1,4 @@
-INSTALL_AUTORELEASE="python -m pip install autorelease==0.2.3"
+INSTALL_AUTORELEASE="python -m pip install autorelease==0.2.6"
 if [ -f autorelease-env.sh ]; then
     source autorelease-env.sh
 fi

diff --git a/.github/workflows/autorelease-deploy.yml b/.github/workflows/autorelease-deploy.yml
@@ -14,7 +14,9 @@ jobs:
           python-version: "3.x"
       - run: |  # TODO: move this to an action
           source ./.github/workflows/autorelease-default-env.sh
-          cat autorelease-env.sh >> $GITHUB_ENV
+          if [ -f "autorelease-env.sh" ]; then
+            cat autorelease-env.sh >> $GITHUB_ENV
+          fi
           eval $INSTALL_AUTORELEASE
         name: "Install autorelease"
       - run: |
@@ -27,5 +29,5 @@ jobs:
       - uses: pypa/gh-action-pypi-publish@master
         with:
           password: ${{ secrets.pypi_password }}
-        name: "Deploy to testpypi"
+        name: "Deploy to pypi"
 
diff --git a/.github/workflows/autorelease-gh-rel.yml b/.github/workflows/autorelease-gh-rel.yml
@@ -15,7 +15,9 @@ jobs:
           python-version: "3.7"
       - run: |  # TODO: move this to an action
           source ./.github/workflows/autorelease-default-env.sh
-          cat autorelease-env.sh >> $GITHUB_ENV
+          if [ -f "autorelease-env.sh" ]; then
+            cat autorelease-env.sh >> $GITHUB_ENV
+          fi
           eval $INSTALL_AUTORELEASE
         name: "Install autorelease"
       - run: |

diff --git a/.github/workflows/autorelease-prep.yml b/.github/workflows/autorelease-prep.yml
@@ -19,7 +19,9 @@ jobs:
           python-version: "3.x"
       - run: |  # TODO: move this to an action
           source ./.github/workflows/autorelease-default-env.sh
-          cat autorelease-env.sh >> $GITHUB_ENV
+          if [ -f "autorelease-env.sh" ]; then
+            cat autorelease-env.sh >> $GITHUB_ENV
+          fi
           eval $INSTALL_AUTORELEASE
         name: "Install autorelease"
       - run: |
@@ -49,7 +51,9 @@ jobs:
           python-version: "3.x"
       - run: |  # TODO: move this to an action
           source ./.github/workflows/autorelease-default-env.sh
-          cat autorelease-env.sh >> $GITHUB_ENV
+          if [ -f "autorelease-env.sh" ]; then
+            cat autorelease-env.sh >> $GITHUB_ENV
+          fi
           eval $INSTALL_AUTORELEASE
         name: "Install autorelease"
       - run: test-testpypi

diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@ miscellaneous operations on OPS output files.
 **Simulation Commands:**
 
 * `visit-all`:     Run MD to generate initial trajectories
+* `md`:            Run MD for fixed time or until a given ensemble is satisfied
 * `equilibrate`:   Run equilibration for path sampling
 * `pathsampling`:  Run any path sampling simulation, including TIS variants
 

diff --git a/autorelease-env.sh b/autorelease-env.sh
@@ -1,2 +1,2 @@
-INSTALL_AUTORELEASE="python -m pip install autorelease==0.2.3 nose"
+INSTALL_AUTORELEASE="python -m pip install autorelease==0.2.3 nose sqlalchemy dill"
 PACKAGE_IMPORT_NAME=paths_cli
diff --git a/devtools/tests_require.txt b/devtools/tests_require.txt
@@ -3,3 +3,6 @@ nose
 pytest
 pytest-cov
 coveralls
+# following are for SimStore integration
+dill
+sqlalchemy
diff --git a/paths_cli/commands/contents.py b/paths_cli/commands/contents.py
@@ -1,19 +1,64 @@
 import click
 from paths_cli.parameters import INPUT_FILE
 
+UNNAMED_SECTIONS = ['steps', 'movechanges', 'samplesets', 'trajectories',
+                    'snapshots']
+
+NAME_TO_ATTR = {
+    'CVs': 'cvs',
+    'Volumes': 'volumes',
+    'Engines': 'engines',
+    'Networks': 'networks',
+    'Move Schemes': 'schemes',
+    'Simulations': 'pathsimulators',
+    'Tags': 'tags',
+    'Steps': 'steps',
+    'Move Changes': 'movechanges',
+    'SampleSets': 'samplesets',
+    'Trajectories': 'trajectories',
+    'Snapshots': 'snapshots'
+}
+
 @click.command(
     'contents',
     short_help="list named objects from an OPS .nc file",
 )
 @INPUT_FILE.clicked(required=True)
-def contents(input_file):
+@click.option('--table', type=str, required=False,
+              help="table to show results from")
+def contents(input_file, table):
     """List the names of named objects in an OPS .nc file.
 
     This is particularly useful when getting ready to use one of simulation
     scripts (i.e., to identify exactly how a state or engine is named.)
     """
     storage = INPUT_FILE.get(input_file)
     print(storage)
+    if table is None:
+        report_all_tables(storage)
+    else:
+        table_attr = table.lower()
+        try:
+            store = getattr(storage, table_attr)
+        except AttributeError:
+            raise click.UsageError("Unknown table: '" + table_attr + "'")
+        else:
+            print(get_section_string(table_attr, store))
+
+
+def get_section_string(label, store):
+    attr = NAME_TO_ATTR.get(label, label.lower())
+    if attr in UNNAMED_SECTIONS:
+        string = get_unnamed_section_string(label, store)
+    elif attr in ['tag', 'tags']:
+        string = get_section_string_nameable(label, store, _get_named_tags)
+    else:
+        string = get_section_string_nameable(label, store,
+                                             _get_named_namedobj)
+    return string
+
+
+def report_all_tables(storage):
     store_section_mapping = {
         'CVs': storage.cvs, 'Volumes': storage.volumes,
         'Engines': storage.engines, 'Networks': storage.networks,
@@ -26,12 +71,16 @@ def contents(input_file):
     print(get_section_string_nameable('Tags', storage.tags, _get_named_tags))
 
     print("\nData Objects:")
-    unnamed_sections = {
-        'Steps': storage.steps, 'Move Changes': storage.movechanges,
-        'SampleSets': storage.samplesets,
-        'Trajectories': storage.trajectories, 'Snapshots': storage.snapshots
+    data_object_mapping = {
+        'Steps': lambda storage: storage.steps,
+        'Move Changes': lambda storage: storage.movechanges,
+        'SampleSets': lambda storage: storage.samplesets,
+        'Trajectories': lambda storage: storage.trajectories,
+        'Snapshots': lambda storage: storage.snapshots
     }
-    for section, store in unnamed_sections.items():
+
+    for section, store_func in data_object_mapping.items():
+        store = store_func(storage)
         print(get_unnamed_section_string(section, store))
 
 def _item_or_items(count):

diff --git a/paths_cli/commands/equilibrate.py b/paths_cli/commands/equilibrate.py
@@ -43,6 +43,7 @@ def equilibrate_main(output_storage, scheme, init_conds, multiplier,
                      extra_steps):
     import openpathsampling as paths
     init_conds = scheme.initial_conditions_from_trajectories(init_conds)
+    scheme.assert_initial_conditions(init_conds)
     simulation = paths.PathSampling(
         storage=output_storage,
         move_scheme=scheme,

diff --git a/paths_cli/commands/md.py b/paths_cli/commands/md.py
@@ -0,0 +1,203 @@
+import click
+
+import paths_cli.utils
+from paths_cli.parameters import (INPUT_FILE, OUTPUT_FILE, ENGINE,
+                                  MULTI_ENSEMBLE, INIT_SNAP)
+
+import logging
+logger = logging.getLogger(__name__)
+
+@click.command(
+    "md",
+    short_help=("Run MD for fixed time or until a given ensemble is "
+                "satisfied"),
+)
+@INPUT_FILE.clicked(required=True)
+@OUTPUT_FILE.clicked(required=True)
+@ENGINE.clicked(required=False)
+@MULTI_ENSEMBLE.clicked(required=False)
+@click.option('-n', '--nsteps', type=int,
+              help="number of MD steps to run")
+@INIT_SNAP.clicked(required=False)
+def md(input_file, output_file, engine, ensemble, nsteps, init_frame):
+    """Run MD for for time of steps or until ensembles are satisfied.
+
+    This can either take a --nsteps or --ensemble, but not both. If the
+    --ensemble option is specfied more than once, then this will attempt to
+    run until all ensembles are satisfied by a subtrajectory.
+
+    This still respects the maximum number of frames as set in the engine,
+    and will terminate if the trajectory gets longer than that.
+    """
+    storage = INPUT_FILE.get(input_file)
+    md_main(
+        output_storage=OUTPUT_FILE.get(output_file),
+        engine=ENGINE.get(storage, engine),
+        ensembles=MULTI_ENSEMBLE.get(storage, ensemble),
+        nsteps=nsteps,
+        initial_frame=INIT_SNAP.get(storage, init_frame)
+    )
+
+class ProgressReporter(object):
+    """Generic class for a callable that reports progress.
+
+    Base class for ends-with-ensemble and fixed-length tricks.
+
+    Parameters
+    ----------
+    timestep : Any
+        timestep, optionally with units
+    update_freq : int
+        how often to report updates
+    """
+    def __init__(self, timestep, update_freq):
+        self.timestep = timestep
+        self.update_freq = update_freq
+
+    def steps_progress_string(self, n_steps):
+        """Return string for number of frames run and time elapsed
+
+        Not newline-terminated.
+        """
+        report_str = "Ran {n_steps} frames"
+        if self.timestep is not None:
+            report_str += " [{}]".format(str(n_steps * self.timestep))
+        report_str += '.'
+        return report_str.format(n_steps=n_steps)
+
+    def progress_string(self, n_steps):
+        """Return the progress string. Subclasses may override.
+        """
+        report_str = self.steps_progress_string(n_steps) + "\n"
+        return report_str.format(n_steps=n_steps)
+
+    def report_progress(self, n_steps, force=False):
+        """Report the progress to the terminal.
+        """
+        import openpathsampling as paths
+        if (n_steps % self.update_freq == 0) or force:
+            string = self.progress_string(n_steps)
+            paths.tools.refresh_output(string)
+
+    def __call__(self, trajectory, trusted=False):
+        raise NotImplementedError()
+
+
+class EnsembleSatisfiedContinueConditions(ProgressReporter):
+    """Continuation condition for including subtrajs for each ensemble.
+
+    This object creates a continuation condition (a callable) analogous with
+    the ensemble ``can_append`` method. This will tell the trajectory to
+    keep running until, for each of the given ensembles, a subtrajectory has
+    been found that will satisfy the ensemble.
+
+    Parameters
+    ----------
+    ensembles: List[:class:`openpathsampling.Ensemble`]
+        the ensembles to satisfy
+    timestep : Any
+        timestep, optionally with units
+    update_freq : int
+        how often to report updates
+    """
+    def __init__(self, ensembles, timestep=None, update_freq=10):
+        super().__init__(timestep, update_freq)
+        self.satisfied = {ens: False for ens in ensembles}
+
+    def progress_string(self, n_steps):
+        report_str = self.steps_progress_string(n_steps)
+        report_str += (" Found ensembles [{found}]. "
+                       "Looking for [{missing}].\n")
+        found = [ens.name for ens, done in self.satisfied.items() if done]
+        missing = [ens.name for ens, done in self.satisfied.items()
+                   if not done]
+        found_str = ",".join(found)
+        missing_str = ",".join(missing)
+        return report_str.format(n_steps=n_steps,
+                                 found=found_str,
+                                 missing=missing_str)
+
+
+    def _check_previous_frame(self, trajectory, start, unsatisfied):
+        if -start > len(trajectory):
+            # we've done the whole traj; don't keep going
+            return False
+        subtraj = trajectory[start:]
+        logger.debug(str(subtraj) + "/" + str(trajectory))
+        for ens in unsatisfied:
+            if not ens.strict_can_prepend(subtraj, trusted=True):
+                # test if we can't prepend because we satsify
+                self.satisfied[ens] = ens(subtraj) or ens(subtraj[1:])
+                unsatisfied.remove(ens)
+        return bool(unsatisfied)
+
+    def _call_untrusted(self, trajectory):
+        self.satisfied = {ens: False for ens in self.satisfied}
+        for i in range(1, len(trajectory)):
+            keep_going = self(trajectory[:i], trusted=True)
+            if not keep_going:
+                return False
+        return self(trajectory, trusted=True)
+
+    def __call__(self, trajectory, trusted=False):
+        if not trusted:
+            return self._call_untrusted(trajectory)
+
+        # below here, trusted is True
+        self.report_progress(len(trajectory) - 1)
+
+        unsatisfied = [ens for ens, done in self.satisfied.items()
+                       if not done]
+        # TODO: update on how many ensembles left, what frame number we are
+
+        start = -1
+        while self._check_previous_frame(trajectory, start, unsatisfied):
+            start -= 1
+
+        return not all(self.satisfied.values())
+
+
+class FixedLengthContinueCondition(ProgressReporter):
+    """Continuation condition for fixed-length runs.
+
+    Parameters
+    ----------
+    length : int
+        final length of the trajectory in frames
+    timestep : Any
+        timestep, optionally with units
+    update_freq : int
+        how often to report updates
+    """
+    def __init__(self, length, timestep=None, update_freq=10):
+        super().__init__(timestep, update_freq)
+        self.length = length
+
+    def __call__(self, trajectory, trusted=False):
+        len_traj = len(trajectory)
+        self.report_progress(len_traj - 1)
+        return len_traj < self.length
+
+
+
+def md_main(output_storage, engine, ensembles, nsteps, initial_frame):
+    import openpathsampling as paths
+    if nsteps is not None and ensembles:
+        raise RuntimeError("Options --ensemble and --nsteps cannot both be"
+                           " used at once.")
+
+    if ensembles:
+        continue_cond = EnsembleSatisfiedContinueConditions(ensembles)
+    else:
+        continue_cond = FixedLengthContinueCondition(nsteps)
+
+    trajectory = engine.generate(initial_frame, running=continue_cond)
+    continue_cond.report_progress(len(trajectory) - 1, force=True)
+    paths_cli.utils.tag_final_result(trajectory, output_storage,
+                                     'final_conditions')
+    return trajectory, None
+
+CLI = md
+SECTION = "Simulation"
+REQUIRES_OPS = (1, 0)
+
diff --git a/paths_cli/commands/visit_all.py b/paths_cli/commands/visit_all.py
@@ -1,5 +1,6 @@
 import click
 
+import paths_cli.utils
 from paths_cli.parameters import (INPUT_FILE, OUTPUT_FILE, ENGINE, STATES,
                                   INIT_SNAP)
 
@@ -34,9 +35,8 @@ def visit_all_main(output_storage, states, engine, initial_frame):
     timestep = getattr(engine, 'timestep', None)
     visit_all_ens = paths.VisitAllStatesEnsemble(states, timestep=timestep)
     trajectory = engine.generate(initial_frame, [visit_all_ens.can_append])
-    if output_storage is not None:
-        output_storage.save(trajectory)
-        output_storage.tags['final_conditions'] = trajectory
+    paths_cli.utils.tag_final_result(trajectory, output_storage,
+                                     'final_conditions')
 
     return trajectory, None  # no simulation object to return here
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		python -m pip install sqlalchemy dill pytest
		py.test --pyargs paths_cli