From 3c41f33836a29d4bd90b7602c8add713c5be9e9c Mon Sep 17 00:00:00 2001
From: Paul Price <price@astro.princeton.edu>
Date: Fri, 13 Oct 2017 13:59:46 -0400
Subject: [PATCH 1/2] add ScienceSourceSelectorTask,ReferenceSourceSelectorTask

These will select sources for photoCal. That job is currently done by some
long function in lsst.pipe.tasks.PhotoCalTask. Moving it into source
selectors allows for code re-use.

The selections done here are very modular, using Config to configure the
limits and then giving them an 'apply' method to do the selection. This
makes it very easy to expand the selections or to write new combinations
of selections.
---
 python/lsst/meas/algorithms/sourceSelector.py | 318 +++++++++++++++++-
 tests/test_sourceSelector.py                  | 232 +++++++++++++
 2 files changed, 549 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_sourceSelector.py

diff --git a/python/lsst/meas/algorithms/sourceSelector.py b/python/lsst/meas/algorithms/sourceSelector.py
index d1995ebe4..4f41fe127 100644
--- a/python/lsst/meas/algorithms/sourceSelector.py
+++ b/python/lsst/meas/algorithms/sourceSelector.py
@@ -22,13 +22,19 @@
 #
 from __future__ import absolute_import, division, print_function
 
-__all__ = ["BaseSourceSelectorConfig", "BaseSourceSelectorTask", "sourceSelectorRegistry"]
+__all__ = ["BaseSourceSelectorConfig", "BaseSourceSelectorTask", "sourceSelectorRegistry",
+           "ColorLimit", "MagnitudeLimit", "RequireFlags", "RequireUnresolved",
+           "ScienceSourceSelectorConfig", "ScienceSourceSelectorTask",
+           "ReferenceSourceSelectorConfig", "ReferenceSourceSelectorTask",
+           ]
 
 import abc
+import numpy as np
 
 import lsst.afw.table as afwTable
 import lsst.pex.config as pexConfig
 import lsst.pipe.base as pipeBase
+import lsst.afw.image
 from future.utils import with_metaclass
 
 
@@ -97,3 +103,313 @@ def _isBad(self, source):
 sourceSelectorRegistry = pexConfig.makeRegistry(
     doc="A registry of source selectors (subclasses of BaseSourceSelectorTask)",
 )
+
+
+class ColorLimit(pexConfig.Config):
+    """Select sources using a color limit
+
+    This object can be used as a `lsst.pex.config.Config` for configuring
+    the limit, and then the `apply` method can be used to identify sources
+    in the catalog that match the configured limit.
+
+    We refer to 'primary' and 'secondary' flux measurements; these are the
+    two components of the color, which is:
+
+        instFluxToMag(cat[primary]) - instFluxToMag(cat[secondary])
+    """
+    primary = pexConfig.Field(dtype=str, doc="Name of column with primary flux measurement")
+    secondary = pexConfig.Field(dtype=str, doc="Name of column with secondary flux measurement")
+    minimum = pexConfig.Field(dtype=float, optional=True, doc="Select objects with color greater than this")
+    maximum = pexConfig.Field(dtype=float, optional=True, doc="Select objects with color less than this")
+
+    def apply(self, catalog):
+        """Apply the color limit to a catalog
+
+        Parameters
+        ----------
+        catalog : `lsst.afw.table.SourceCatalog`
+            Source to which to apply limits.
+
+        Returns
+        -------
+        selected : `numpy.ndarray`
+            Boolean array indicating whether each source is selected
+            (True means selected).
+        """
+        primary = lsst.afw.image.abMagFromFlux(catalog[self.primary])
+        secondary = lsst.afw.image.abMagFromFlux(catalog[self.secondary])
+        color = primary - secondary
+        selected = np.ones(len(catalog), dtype=bool)
+        if self.minimum is not None:
+            selected &= color > self.minimum
+        if self.maximum is not None:
+            selected &= color < self.maximum
+        return selected
+
+
+class FluxLimit(pexConfig.Config):
+    """Select sources using a flux limit
+
+    This object can be used as a `lsst.pex.config.Config` for configuring
+    the limit, and then the `apply` method can be used to identify sources
+    in the catalog that match the configured limit.
+    """
+    fluxField = pexConfig.Field(dtype=str, default="slot_CalibFlux_flux",
+                                doc="Name of the source flux field to use.")
+    minimum = pexConfig.Field(dtype=float, optional=True, doc="Select objects fainter than this flux")
+    maximum = pexConfig.Field(dtype=float, optional=True, doc="Select objects brighter than this flux")
+
+    def apply(self, catalog):
+        """Apply the magnitude limit to a catalog
+
+        Parameters
+        ----------
+        catalog : `lsst.afw.table.SourceCatalog`
+            Source to which to apply limits.
+
+        Returns
+        -------
+        selected : `numpy.ndarray`
+            Boolean array indicating whether each source is selected
+            (True means selected).
+        """
+        flagField = self.fluxField + "_flag"
+        if flagField in catalog.schema:
+            selected = np.logical_not(catalog[flagField])
+        else:
+            selected = np.ones(len(catalog), dtype=bool)
+
+        flux = catalog[self.fluxField]
+        if self.minimum is not None:
+            selected &= flux > self.minimum
+        if self.maximum is not None:
+            selected &= flux < self.maximum
+        return selected
+
+
+class MagnitudeLimit(pexConfig.Config):
+    """Select sources using a magnitude limit
+
+    Note that this assumes that a zero-point has already been applied and
+    the fluxes are in AB fluxes in Jansky. It is therefore principally
+    intended for reference catalogs rather than catalogs extracted from
+    science images.
+
+    This object can be used as a `lsst.pex.config.Config` for configuring
+    the limit, and then the `apply` method can be used to identify sources
+    in the catalog that match the configured limit.
+    """
+    fluxField = pexConfig.Field(dtype=str, default="flux",
+                                doc="Name of the source flux field to use.")
+    minimum = pexConfig.Field(dtype=float, optional=True, doc="Select objects fainter than this magnitude")
+    maximum = pexConfig.Field(dtype=float, optional=True, doc="Select objects brighter than this magnitude")
+
+    def apply(self, catalog):
+        """Apply the magnitude limit to a catalog
+
+        Parameters
+        ----------
+        catalog : `lsst.afw.table.SourceCatalog`
+            Source to which to apply limits.
+
+        Returns
+        -------
+        selected : `numpy.ndarray`
+            Boolean array indicating whether each source is selected
+            (True means selected).
+        """
+        flagField = self.fluxField + "_flag"
+        if flagField in catalog.schema:
+            selected = np.logical_not(catalog[flagField])
+        else:
+            selected = np.ones(len(catalog), dtype=bool)
+
+        magnitude = lsst.afw.image.abMagFromFlux(catalog[self.fluxField])
+        if self.minimum is not None:
+            selected &= magnitude > self.minimum
+        if self.maximum is not None:
+            selected &= magnitude < self.maximum
+        return selected
+
+
+class RequireFlags(pexConfig.Config):
+    """Select sources using flags
+
+    This object can be used as a `lsst.pex.config.Config` for configuring
+    the limit, and then the `apply` method can be used to identify sources
+    in the catalog that match the configured limit.
+    """
+    good = pexConfig.ListField(dtype=str, default=[],
+                               doc="List of source flag fields that must be set for a source to be used.")
+    bad = pexConfig.ListField(dtype=str, default=[],
+                              doc="List of source flag fields that must NOT be set for a source to be used.")
+
+    def apply(self, catalog):
+        """Apply the flag requirements to a catalog
+
+        Returns whether the source is selected.
+
+        Parameters
+        ----------
+        catalog : `lsst.afw.table.SourceCatalog`
+            Source to which to apply limits.
+
+        Returns
+        -------
+        selected : `numpy.ndarray`
+            Boolean array indicating whether each source is selected
+            (True means selected).
+        """
+        selected = np.ones(len(catalog), dtype=bool)
+        for flag in self.good:
+            selected &= catalog[flag]
+        for flag in self.bad:
+            selected &= ~catalog[flag]
+        return selected
+
+
+class RequireUnresolved(pexConfig.Config):
+    """Select sources using star/galaxy separation
+
+    This object can be used as a `lsst.pex.config.Config` for configuring
+    the limit, and then the `apply` method can be used to identify sources
+    in the catalog that match the configured limit.
+    """
+    name = pexConfig.Field(dtype=str, default="base_ClassificationExtendedness_value",
+                           doc="Name of column for star/galaxy separation")
+    minimum = pexConfig.Field(dtype=float, optional=True, doc="Select objects with value greater than this.")
+    maximum = pexConfig.Field(dtype=float, optional=True, default=0.5,
+                              doc="Select objects with value less than this.")
+
+    def apply(self, catalog):
+        """Apply the flag requirements to a catalog
+
+        Returns whether the source is selected.
+
+        Parameters
+        ----------
+        catalog : `lsst.afw.table.SourceCatalog`
+            Source to which to apply limits.
+
+        Returns
+        -------
+        selected : `numpy.ndarray`
+            Boolean array indicating whether each source is selected
+            (True means selected).
+        """
+        selected = np.ones(len(catalog), dtype=bool)
+        value = catalog[self.name]
+        if self.minimum is not None:
+            selected &= value > self.minimum
+        if self.maximum is not None:
+            selected &= value < self.maximum
+        return selected
+
+
+class ScienceSourceSelectorConfig(pexConfig.Config):
+    """Configuration for selecting science sources"""
+    doFluxLimit = pexConfig.Field(dtype=bool, default=False, doc="Apply flux limit?")
+    doFlags = pexConfig.Field(dtype=bool, default=False, doc="Apply flag limitation?")
+    doUnresolved = pexConfig.Field(dtype=bool, default=False, doc="Apply unresolved limitation?")
+    fluxLimit = pexConfig.ConfigField(dtype=FluxLimit, doc="Flux limit to apply")
+    flags = pexConfig.ConfigField(dtype=RequireFlags, doc="Flags to require")
+    unresolved = pexConfig.ConfigField(dtype=RequireUnresolved, doc="Star/galaxy separation to apply")
+
+    def setDefaults(self):
+        pexConfig.Config.setDefaults(self)
+        self.flags.bad = ["base_PixelFlags_flag_edge", "base_PixelFlags_flag_interpolated",
+                          "base_PixelFlags_flag_saturated"]
+
+
+class ScienceSourceSelectorTask(BaseSourceSelectorTask):
+    """Science source selector
+
+    By "science" sources, we mean sources that are on images that we
+    are processing, as opposed to sources from reference catalogs.
+
+    This selects (science) sources by (optionally) applying each of a
+    magnitude limit, flag requirements and star/galaxy separation.
+    """
+    ConfigClass = ScienceSourceSelectorConfig
+
+    def selectSources(self, catalog, matches=None):
+        """Return a catalog of selected sources
+
+        Parameters
+        ----------
+        catalog : `lsst.afw.table.SourceCatalog`
+            Catalog of sources to select.
+        matches : `lsst.afw.table.ReferenceMatchVector`, optional
+            List of matches; ignored.
+
+        Return struct
+        -------------
+        sourceCat : `lsst.afw.table.SourceCatalog`
+            Catalog of selected sources, non-contiguous.
+        """
+        selected = np.ones(len(catalog), dtype=bool)
+        if self.config.doFluxLimit:
+            selected &= self.config.fluxLimit.apply(catalog)
+        if self.config.doFlags:
+            selected &= self.config.flags.apply(catalog)
+        if self.config.doUnresolved:
+            selected &= self.config.unresolved.apply(catalog)
+
+        self.log.info("Selected %d/%d sources", selected.sum(), len(catalog))
+
+        result = type(catalog)(catalog.table)  # Empty catalog based on the original
+        for source in catalog[selected]:
+            result.append(source)
+        return pipeBase.Struct(sourceCat=result, selection=selected)
+
+
+class ReferenceSourceSelectorConfig(pexConfig.Config):
+    doMagLimit = pexConfig.Field(dtype=bool, default=False, doc="Apply magnitude limit?")
+    doFlags = pexConfig.Field(dtype=bool, default=False, doc="Apply flag limitation?")
+    magLimit = pexConfig.ConfigField(dtype=MagnitudeLimit, doc="Magnitude limit to apply")
+    flags = pexConfig.ConfigField(dtype=RequireFlags, doc="Flags to require")
+    colorLimits = pexConfig.ConfigDictField(keytype=str, itemtype=ColorLimit, default={},
+                                            doc="Color limits to apply; key is used as a label only")
+
+
+class ReferenceSourceSelectorTask(BaseSourceSelectorTask):
+    """Reference source selector
+
+    This selects reference sources by (optionally) applying each of a
+    magnitude limit, flag requirements and color limits.
+    """
+    ConfigClass = ReferenceSourceSelectorConfig
+
+    def selectSources(self, catalog, matches=None):
+        """Return a catalog of selected reference sources
+
+        Parameters
+        ----------
+        catalog : `lsst.afw.table.SourceCatalog`
+            Catalog of sources to select.
+        matches : `lsst.afw.table.ReferenceMatchVector`, optional
+            List of matches; ignored.
+
+        Return struct
+        -------------
+        sourceCat : `lsst.afw.table.SourceCatalog`
+            Catalog of selected sources, non-contiguous.
+        """
+        selected = np.ones(len(catalog), dtype=bool)
+        if self.config.doMagLimit:
+            selected &= self.config.magLimit.apply(catalog)
+        if self.config.doFlags:
+            selected &= self.config.flags.apply(catalog)
+        for limit in self.config.colorLimits.values():
+            selected &= limit.apply(catalog)
+
+        self.log.info("Selected %d/%d references", selected.sum(), len(catalog))
+
+        result = type(catalog)(catalog.table)  # Empty catalog based on the original
+        for source in catalog[selected]:
+            result.append(source)
+        return pipeBase.Struct(sourceCat=result, selection=selected)
+
+
+sourceSelectorRegistry.register("science", ScienceSourceSelectorTask)
+sourceSelectorRegistry.register("references", ReferenceSourceSelectorTask)
diff --git a/tests/test_sourceSelector.py b/tests/test_sourceSelector.py
new file mode 100644
index 000000000..30de0b468
--- /dev/null
+++ b/tests/test_sourceSelector.py
@@ -0,0 +1,232 @@
+#
+# LSST Data Management System
+#
+# Copyright 2008-2017  AURA/LSST.
+#
+# This product includes software developed by the
+# LSST Project (http://www.lsst.org/).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the LSST License Statement and
+# the GNU General Public License along with this program.  If not,
+# see <https://www.lsstcorp.org/LegalNotices/>.
+#
+from __future__ import absolute_import, division, print_function
+
+import unittest
+import numpy as np
+
+import lsst.afw.table
+import lsst.meas.algorithms
+import lsst.utils.tests
+
+from lsst.meas.algorithms import ScienceSourceSelectorTask, ReferenceSourceSelectorTask, ColorLimit
+
+
+class SourceSelectorTester(object):
+    """Mixin for testing
+
+    This provides a base class for doing tests common to the
+    ScienceSourceSelectorTask and ReferenceSourceSelectorTask.
+    """
+    Task = None
+
+    def setUp(self):
+        schema = lsst.afw.table.SourceTable.makeMinimalSchema()
+        schema.addField("flux", float, "Flux value")
+        schema.addField("flux_flag", "Flag", "Bad flux?")
+        schema.addField("other_flux", float, "Flux value 2")
+        schema.addField("other_flux_flag", "Flag", "Bad flux 2?")
+        schema.addField("goodFlag", "Flag", "Flagged if good")
+        schema.addField("badFlag", "Flag", "Flagged if bad")
+        schema.addField("starGalaxy", float, "0=star, 1=galaxy")
+        self.catalog = lsst.afw.table.SourceCatalog(schema)
+        self.catalog.reserve(10)
+        self.config = self.Task.ConfigClass()
+
+    def tearDown(self):
+        del self.catalog
+
+    def check(self, expected):
+        task = self.Task(config=self.config)
+        results = task.selectSources(self.catalog)
+        self.assertListEqual(results.selection.tolist(), expected)
+        self.assertListEqual([src.getId() for src in results.sourceCat],
+                             [src.getId() for src, ok in zip(self.catalog, expected) if ok])
+
+    def testFlags(self):
+        bad1 = self.catalog.addNew()
+        bad1.set("goodFlag", False)
+        bad1.set("badFlag", False)
+        bad2 = self.catalog.addNew()
+        bad2.set("goodFlag", True)
+        bad2.set("badFlag", True)
+        bad3 = self.catalog.addNew()
+        bad3.set("goodFlag", False)
+        bad3.set("badFlag", True)
+        good = self.catalog.addNew()
+        good.set("goodFlag", True)
+        good.set("badFlag", False)
+        self.catalog["flux"] = 1.0
+        self.catalog["other_flux"] = 1.0
+        self.config.flags.good = ["goodFlag"]
+        self.config.flags.bad = ["badFlag"]
+        self.check([False, False, False, True])
+
+
+class ScienceSourceSelectorTaskTest(SourceSelectorTester, lsst.utils.tests.TestCase):
+    Task = ScienceSourceSelectorTask
+
+    def setUp(self):
+        SourceSelectorTester.setUp(self)
+        self.config.fluxLimit.fluxField = "flux"
+        self.config.flags.bad = []
+        self.config.doFluxLimit = True
+        self.config.doFlags = True
+        self.config.doUnresolved = False
+
+    def testFluxLimit(self):
+        tooBright = self.catalog.addNew()
+        tooBright.set("flux", 1.0e10)
+        tooBright.set("flux_flag", False)
+        good = self.catalog.addNew()
+        good.set("flux", 1000.0)
+        good.set("flux_flag", False)
+        bad = self.catalog.addNew()
+        bad.set("flux", good.get("flux"))
+        bad.set("flux_flag", True)
+        tooFaint = self.catalog.addNew()
+        tooFaint.set("flux", 1.0)
+        tooFaint.set("flux_flag", False)
+        self.config.fluxLimit.minimum = 10.0
+        self.config.fluxLimit.maximum = 1.0e6
+        self.config.fluxLimit.fluxField = "flux"
+        self.check([False, True, False, False])
+
+        # Works with no maximum set?
+        self.config.fluxLimit.maximum = None
+        self.check([True, True, False, False])
+
+        # Works with no minimum set?
+        self.config.fluxLimit.minimum = None
+        self.check([True, True, False, True])
+
+    def testUnresolved(self):
+        num = 5
+        for _ in range(num):
+            self.catalog.addNew()
+        self.catalog["flux"] = 1.0
+        starGalaxy = np.linspace(0.0, 1.0, num, False)
+        self.catalog["starGalaxy"] = starGalaxy
+        self.config.doUnresolved = True
+        self.config.unresolved.name = "starGalaxy"
+        minimum, maximum = 0.3, 0.7
+        self.config.unresolved.minimum = minimum
+        self.config.unresolved.maximum = maximum
+        self.check(((starGalaxy > minimum) & (starGalaxy < maximum)).tolist())
+
+        # Works with no minimum set?
+        self.config.unresolved.minimum = None
+        self.check((starGalaxy < maximum).tolist())
+
+        # Works with no maximum set?
+        self.config.unresolved.minimum = minimum
+        self.config.unresolved.maximum = None
+        self.check((starGalaxy > minimum).tolist())
+
+
+class ReferenceSourceSelectorTaskTest(SourceSelectorTester, lsst.utils.tests.TestCase):
+    Task = ReferenceSourceSelectorTask
+
+    def setUp(self):
+        SourceSelectorTester.setUp(self)
+        self.config.magLimit.fluxField = "flux"
+        self.config.doMagLimit = True
+        self.config.doFlags = True
+
+    def testMagnitudeLimit(self):
+        tooBright = self.catalog.addNew()
+        tooBright.set("flux", 1.0e10)
+        tooBright.set("flux_flag", False)
+        good = self.catalog.addNew()
+        good.set("flux", 1000.0)
+        good.set("flux_flag", False)
+        bad = self.catalog.addNew()
+        bad.set("flux", good.get("flux"))
+        bad.set("flux_flag", True)
+        tooFaint = self.catalog.addNew()
+        tooFaint.set("flux", 1.0)
+        tooFaint.set("flux_flag", False)
+        # Note: magnitudes are backwards, so the minimum flux is the maximum magnitude
+        self.config.magLimit.minimum = lsst.afw.image.abMagFromFlux(1.0e6)
+        self.config.magLimit.maximum = lsst.afw.image.abMagFromFlux(10.0)
+        self.config.magLimit.fluxField = "flux"
+        self.check([False, True, False, False])
+
+        # Works with no minimum set?
+        self.config.magLimit.minimum = None
+        self.check([True, True, False, False])
+
+        # Works with no maximum set?
+        self.config.magLimit.maximum = None
+        self.check([True, True, False, True])
+
+    def testColorLimits(self):
+        num = 10
+        for _ in range(num):
+            self.catalog.addNew()
+        color = np.linspace(-0.5, 0.5, num, True)
+        flux = 1000.0
+        # Definition: color = mag(flux) - mag(otherFlux)
+        otherFlux = lsst.afw.image.fluxFromABMag(lsst.afw.image.abMagFromFlux(flux) - color)
+        self.catalog["flux"] = flux
+        self.catalog["other_flux"] = otherFlux
+        minimum, maximum = -0.1, 0.2
+        self.config.colorLimits = {"test": ColorLimit(primary="flux", secondary="other_flux",
+                                                      minimum=minimum, maximum=maximum)}
+        self.check(((color > minimum) & (color < maximum)).tolist())
+
+        # Works with no minimum set?
+        self.config.colorLimits["test"].minimum = None
+        self.check((color < maximum).tolist())
+
+        # Works with no maximum set?
+        self.config.colorLimits["test"].maximum = None
+        self.config.colorLimits["test"].minimum = minimum
+        self.check((color > minimum).tolist())
+
+        # Multiple limits
+        self.config.colorLimits = {"test": ColorLimit(primary="flux", secondary="other_flux",
+                                                      minimum=minimum),
+                                   "other": ColorLimit(primary="flux", secondary="other_flux",
+                                                       maximum=maximum)}
+        assert maximum > minimum  # To be non-mutually-exclusive
+        self.check(((color > minimum) & (color < maximum)).tolist())
+
+        # Multiple mutually-exclusive limits
+        self.config.colorLimits["test"] = ColorLimit(primary="flux", secondary="other_flux", maximum=-0.1)
+        self.config.colorLimits["other"] = ColorLimit(primary="flux", secondary="other_flux", minimum=0.1)
+        self.check([False]*num)
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    import sys
+    setup_module(sys.modules[__name__])
+    unittest.main()

From 97cef175234c172f8a48f2f400523f5b8d429322 Mon Sep 17 00:00:00 2001
From: Paul Price <price@astro.princeton.edu>
Date: Wed, 18 Oct 2017 13:57:45 -0400
Subject: [PATCH 2/2] add ReserveSourcesTask

This is a Task version of operations implemented separately in
MeasurePsfTask and PhotoCalTask. This consolidates code and makes it
easily available for use in other contexts.
---
 python/lsst/meas/algorithms/__init__.py       |   1 +
 .../meas/algorithms/reserveSourcesTask.py     | 183 ++++++++++++++++++
 tests/test_reserveSourcesTask.py              | 160 +++++++++++++++
 3 files changed, 344 insertions(+)
 create mode 100644 python/lsst/meas/algorithms/reserveSourcesTask.py
 create mode 100644 tests/test_reserveSourcesTask.py

diff --git a/python/lsst/meas/algorithms/__init__.py b/python/lsst/meas/algorithms/__init__.py
index b0b3e6692..cbf9a59a1 100644
--- a/python/lsst/meas/algorithms/__init__.py
+++ b/python/lsst/meas/algorithms/__init__.py
@@ -62,6 +62,7 @@
 from .ingestIndexReferenceTask import *
 from .loadIndexedReferenceObjects import *
 from .indexerRegistry import *
+from .reserveSourcesTask import *
 
 from .version import *
 
diff --git a/python/lsst/meas/algorithms/reserveSourcesTask.py b/python/lsst/meas/algorithms/reserveSourcesTask.py
new file mode 100644
index 000000000..c2cff254e
--- /dev/null
+++ b/python/lsst/meas/algorithms/reserveSourcesTask.py
@@ -0,0 +1,183 @@
+#
+# LSST Data Management System
+#
+# Copyright 2008-2017  AURA/LSST.
+#
+# This product includes software developed by the
+# LSST Project (http://www.lsst.org/).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the LSST License Statement and
+# the GNU General Public License along with this program.  If not,
+# see <https://www.lsstcorp.org/LegalNotices/>.
+#
+from __future__ import absolute_import, division, print_function
+
+from builtins import zip
+
+import numpy as np
+
+from lsst.pex.config import Config, Field
+from lsst.pipe.base import Task, Struct
+
+import lsst.afw.table
+
+__all__ = ["ReserveSourcesConfig", "ReserveSourcesTask"]
+
+
+class ReserveSourcesConfig(Config):
+    """Configuration for reserving sources"""
+    fraction = Field(dtype=float, default=0.0,
+                     doc="Fraction of candidates to reserve from fitting; none if <= 0")
+    seed = Field(dtype=int, default=1,
+                 doc=("This number will be added to the exposure ID to set the random seed for "
+                      "reserving candidates"))
+
+
+class ReserveSourcesTask(Task):
+    """Reserve sources from analysis
+
+    We randomly select a fraction of sources that will be reserved
+    from analysis. This allows evaluation of the quality of model fits
+    using sources that were not involved in the fitting process.
+
+    Constructor parameters
+    ----------------------
+    columnName : `str`, required
+        Name of flag column to add; we will suffix this with "_reserved".
+    schema : `lsst.afw.table.Schema`, required
+        Catalog schema.
+    doc : `str`
+        Documentation for column to add.
+    config : `ReserveSourcesConfig`
+        Configuration.
+    """
+    ConfigClass = ReserveSourcesConfig
+    _DefaultName = "reserveSources"
+
+    def __init__(self, columnName=None, schema=None, doc=None, **kwargs):
+        Task.__init__(self, **kwargs)
+        assert columnName is not None, "columnName not provided"
+        assert schema is not None, "schema not provided"
+        self.columnName = columnName
+        self.key = schema.addField(self.columnName + "_reserved", type="Flag", doc=doc)
+
+    def run(self, sources, prior=None, expId=0):
+        """Select sources to be reserved
+
+        Reserved sources will be flagged in the catalog, and we will return
+        boolean arrays that identify the sources to be reserved from and
+        used in the analysis. Typically you'll want to use the sources
+        from the `use` array in your fitting, and use the sources from the
+        `reserved` array as an independent test of your fitting.
+
+        Parameters
+        ----------
+        sources : `lsst.afw.table.Catalog` or `list` of `lsst.afw.table.Record`
+            Sources from which to select some to be reserved.
+        prior : `numpy.ndarray` of type `bool`, optional
+            Prior selection of sources. Should have the same length as
+            `sources`. If set, we will only consider for reservation sources
+            that are flagged `True` in this array.
+        expId : `int`
+            Exposure identifier; used for seeding the random number generator.
+
+        Return struct contents
+        ----------------------
+        reserved : `numpy.ndarray` of type `bool`
+            Sources to be reserved are flagged `True` in this array.
+        use : `numpy.ndarray` of type `bool`
+            Sources the user should use in analysis are flagged `True`.
+        """
+        if prior is not None:
+            assert len(prior) == len(sources), "Length mismatch: %s vs %s" % (len(prior), len(sources))
+            numSources = prior.sum()
+        else:
+            numSources = len(sources)
+        selection = self.select(numSources, expId)
+        if prior is not None:
+            selection = self.applySelectionPrior(prior, selection)
+        self.markSources(sources, selection)
+        self.log.info("Reserved %d/%d sources", selection.sum(), len(selection))
+        return Struct(reserved=selection,
+                      use=prior & ~selection if prior is not None else np.logical_not(selection))
+
+    def select(self, numSources, expId=0):
+        """Randomly select some sources
+
+        We return a boolean array with a random selection. The fraction
+        of sources selected is specified by the config parameter `fraction`.
+
+        Parameters
+        ----------
+        numSources : `int`
+            Number of sources in catalog from which to select.
+        expId : `int`
+            Exposure identifier; used for seeding the random number generator.
+
+        Returns
+        -------
+        selection : `numpy.ndarray` of type `bool`
+            Selected sources are flagged `True` in this array.
+        """
+        selection = np.zeros(numSources, dtype=bool)
+        if self.config.fraction <= 0:
+            return selection
+        reserve = int(np.round(numSources*self.config.fraction))
+        selection[:reserve] = True
+        rng = np.random.RandomState(self.config.seed + expId)
+        rng.shuffle(selection)
+        return selection
+
+    def applySelectionPrior(self, prior, selection):
+        """Apply selection to full catalog
+
+        The `select` method makes a random selection of sources. If those
+        sources don't represent the full population (because a sub-selection
+        has already been made), then we need to generate a selection covering
+        the entire population.
+
+        Parameters
+        ----------
+        prior : `numpy.ndarray` of type `bool`
+            Prior selection of sources, identifying the subset from which the
+            random selection has been made.
+        selection : `numpy.ndarray` of type `bool`
+            Selection of sources in subset identified by `prior`.
+
+        Returns
+        -------
+        full : `numpy.ndarray` of type `bool`
+            Selection applied to full population.
+        """
+        full = np.zeros(len(prior), dtype=bool)
+        full[prior] = selection
+        return full
+
+    def markSources(self, sources, selection):
+        """Mark sources in a list or catalog
+
+        This requires iterating through the list and setting the flag in
+        each source individually. Even if the `sources` is a `Catalog`
+        with contiguous records, it's not currently possible to set a boolean
+        column (DM-6981) so we need to iterate.
+
+        Parameters
+        ----------
+        catalog : `lsst.afw.table.Catalog` or `list` of `lsst.afw.table.Record`
+            Catalog in which to flag selected sources.
+        selection : `numpy.ndarray` of type `bool`
+            Selection of sources to mark.
+        """
+        for src, select in zip(sources, selection):
+            if select:
+                src.set(self.key, True)
diff --git a/tests/test_reserveSourcesTask.py b/tests/test_reserveSourcesTask.py
new file mode 100644
index 000000000..66ff1d725
--- /dev/null
+++ b/tests/test_reserveSourcesTask.py
@@ -0,0 +1,160 @@
+#
+# LSST Data Management System
+#
+# Copyright 2008-2017  AURA/LSST.
+#
+# This product includes software developed by the
+# LSST Project (http://www.lsst.org/).
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the LSST License Statement and
+# the GNU General Public License along with this program.  If not,
+# see <https://www.lsstcorp.org/LegalNotices/>.
+#
+from __future__ import absolute_import, division, print_function
+
+import unittest
+import numpy as np
+
+import lsst.afw.table
+import lsst.meas.algorithms
+import lsst.utils.tests
+
+from lsst.pipe.base import Struct
+
+
+class ReserveSourcesTaskTest(lsst.utils.tests.TestCase):
+    """TestCase for the ReserveSourcesTask"""
+    def setUp(self):
+        self.num = 100  # Number of sources
+        self.longMessage = True
+
+    def construct(self, name, fraction):
+        """Construct the test environment
+
+        This isn't called 'setUp' because we want to vary the `fraction`.
+
+        Parameters
+        ----------
+        name : `str`
+            Name of column for flagging reserved sources (without "_reserved").
+        fraction : `float`
+            Fraction of sources to reserve.
+
+        Return struct elements
+        ----------------------
+        catalog : `lsst.afw.table.SourceCatalog`
+            Catalog of sources.
+        task : `lsst.meas.algorithms.ReserveSourcesTask`
+            Task to do the reservations.
+        key : `lsst.afw.table.Key`
+            Key to the flag column.
+        """
+        schema = lsst.afw.table.SourceTable.makeMinimalSchema()
+        config = lsst.meas.algorithms.ReserveSourcesConfig()
+        config.fraction = fraction
+        task = lsst.meas.algorithms.ReserveSourcesTask(columnName=name, schema=schema,
+                                                       doc="Documentation is good", config=config)
+        key = schema[name + "_reserved"].asKey()
+        catalog = lsst.afw.table.SourceCatalog(schema)
+        catalog.reserve(self.num)
+        for _ in range(self.num):
+            catalog.addNew()
+        return Struct(catalog=catalog, task=task, key=key)
+
+    def check(self, sources, task, key, fraction):
+        """Check that source reservation is working
+
+        We test that the source reservation works, that it depends
+        on the RNG seed and that things behave as we expect when there's
+        a prior selection.
+
+        Parameters
+        ----------
+        catalog : `lsst.afw.table.Catalog` or `list` of `lsst.afw.table.Record`
+            List of sources.
+        task : `lsst.meas.algorithms.ReserveSourcesTask`
+            Task to do the reservations.
+        key : `lsst.afw.table.Key`
+            Key to the flag column.
+        fraction : `float`
+            Fraction of sources to reserve.
+        """
+        message = "fraction=%s, key=%s" % (fraction,key)
+        numExpect = int(fraction*len(sources))
+
+        # No prior
+        results1 = task.run(sources, expId=1)
+        flagged1 = np.array([ss.get(key) for ss in sources])
+        self.assertEqual(flagged1.sum(), numExpect, message)
+        np.testing.assert_array_equal(results1.reserved, flagged1, message)
+        np.testing.assert_array_equal(results1.use, np.logical_not(flagged1), message)
+
+        # Second run with different seed; clear the flag first
+        for ss in sources:
+            ss.set(key, False)
+        results2 = task.run(sources, expId=2)
+        flagged2 = np.array([ss.get(key) for ss in sources])
+        self.assertEqual(flagged1.sum(), numExpect, message)
+        np.testing.assert_array_equal(results2.reserved, flagged2, message)
+        np.testing.assert_array_equal(results2.use, np.logical_not(flagged2), message)
+        if numExpect > 0:
+            self.assertFalse(np.all(flagged1 == flagged2),
+                             "Pretty unlikely since different seeds\n" + message)
+
+        # Run with prior selection; clear the flag first
+        for ss in sources:
+            ss.set(key, False)
+        prior = np.arange(0, self.num, 1, dtype=int) % 2 == 0
+        results3 = task.run(sources, prior, expId=1)
+        flagged3 = np.array([ss.get(key) for ss in sources])
+        self.assertEqual(flagged3.sum(), fraction*prior.sum(), message)
+        if numExpect > 0:
+            self.assertFalse(np.all(flagged1 == flagged3),
+                             "Flags should change, despite same see\n" + message)
+        np.testing.assert_array_equal(results3.reserved, flagged3, message)
+        self.assertEqual((results3.use & flagged3).sum(), 0,
+                         "No sources should be both reserved and used\n" + message)  #
+        self.assertEqual(results3.use.sum(), int((1.0 - fraction)*prior.sum()), message)
+        self.assertEqual(results3.reserved.sum(), int(fraction*prior.sum()), message)
+        np.testing.assert_array_equal(results3.use, prior & ~flagged3,
+                                      "Actual definition of 'use'\n" + message)
+
+    def testCatalog(self):
+        """Test source reservation with a Catalog
+
+        We test multiple reservation fractions.
+        """
+        for fraction in (0.0, 0.1, 0.2, 0.5):
+            data = self.construct("this_table_is", fraction)
+            self.check(data.catalog, data.task, data.key, fraction)
+
+    def testSources(self):
+        """Test source reservation with a list of sources"""
+        fraction = 0.2
+        data = self.construct("this_table_is", fraction)
+        sources = [ss for ss in data.catalog]
+        self.check(sources, data.task, data.key, fraction)
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    import sys
+    setup_module(sys.modules[__name__])
+    unittest.main()