diff --git a/data/Fig1-designProcess/blend1Subset.pickle b/data/Fig1-designProcess/blend1Subset.pickle index 7fdb1ba..6071621 100644 Binary files a/data/Fig1-designProcess/blend1Subset.pickle and b/data/Fig1-designProcess/blend1Subset.pickle differ diff --git a/data/Fig1-designProcess/blend2Subset.pickle b/data/Fig1-designProcess/blend2Subset.pickle index 2e13f5f..ec2c85b 100644 Binary files a/data/Fig1-designProcess/blend2Subset.pickle and b/data/Fig1-designProcess/blend2Subset.pickle differ diff --git a/data/Fig1-designProcess/blend3Subset.pickle b/data/Fig1-designProcess/blend3Subset.pickle index 35d29ce..817160e 100644 Binary files a/data/Fig1-designProcess/blend3Subset.pickle and b/data/Fig1-designProcess/blend3Subset.pickle differ diff --git a/data/Fig1-designProcess/distinctSubset.pickle b/data/Fig1-designProcess/distinctSubset.pickle index 963bfb7..cdc928d 100644 Binary files a/data/Fig1-designProcess/distinctSubset.pickle and b/data/Fig1-designProcess/distinctSubset.pickle differ diff --git a/data/Fig1-designProcess/fullData.pickle b/data/Fig1-designProcess/fullData.pickle index 28cc77c..e0a9fce 100644 Binary files a/data/Fig1-designProcess/fullData.pickle and b/data/Fig1-designProcess/fullData.pickle differ diff --git a/data/Fig1-designProcess/hullSubset.pickle b/data/Fig1-designProcess/hullSubset.pickle index 43fbee8..20ef179 100644 Binary files a/data/Fig1-designProcess/hullSubset.pickle and b/data/Fig1-designProcess/hullSubset.pickle differ diff --git a/data/Fig1-designProcess/outliersSubset.pickle b/data/Fig1-designProcess/outliersSubset.pickle index dc2902c..596d588 100644 Binary files a/data/Fig1-designProcess/outliersSubset.pickle and b/data/Fig1-designProcess/outliersSubset.pickle differ diff --git a/data/Fig2&3-objectives/clusterBest.pickle b/data/Fig2&3-objectives/clusterBest.pickle index 405aa41..34b8f26 100644 Binary files a/data/Fig2&3-objectives/clusterBest.pickle and b/data/Fig2&3-objectives/clusterBest.pickle differ diff --git a/data/Fig2&3-objectives/clusterGreedy.pickle b/data/Fig2&3-objectives/clusterGreedy.pickle index 5147b12..3df848b 100644 Binary files a/data/Fig2&3-objectives/clusterGreedy.pickle and b/data/Fig2&3-objectives/clusterGreedy.pickle differ diff --git a/data/Fig2&3-objectives/clusterWorst.pickle b/data/Fig2&3-objectives/clusterWorst.pickle index a58005f..a48177c 100644 Binary files a/data/Fig2&3-objectives/clusterWorst.pickle and b/data/Fig2&3-objectives/clusterWorst.pickle differ diff --git a/data/Fig2&3-objectives/coverageBest.pickle b/data/Fig2&3-objectives/coverageBest.pickle index 66ffaf6..0e8e5da 100644 Binary files a/data/Fig2&3-objectives/coverageBest.pickle and b/data/Fig2&3-objectives/coverageBest.pickle differ diff --git a/data/Fig2&3-objectives/coverageGreedy.pickle b/data/Fig2&3-objectives/coverageGreedy.pickle index b7200d1..187bfd5 100644 Binary files a/data/Fig2&3-objectives/coverageGreedy.pickle and b/data/Fig2&3-objectives/coverageGreedy.pickle differ diff --git a/data/Fig2&3-objectives/coverageWorst.pickle b/data/Fig2&3-objectives/coverageWorst.pickle index fda4b41..80c2aee 100644 Binary files a/data/Fig2&3-objectives/coverageWorst.pickle and b/data/Fig2&3-objectives/coverageWorst.pickle differ diff --git a/data/Fig2&3-objectives/crossingsBest.pickle b/data/Fig2&3-objectives/crossingsBest.pickle index 0002926..ff0c5d1 100644 Binary files a/data/Fig2&3-objectives/crossingsBest.pickle and b/data/Fig2&3-objectives/crossingsBest.pickle differ diff --git a/data/Fig2&3-objectives/crossingsGreedy.pickle b/data/Fig2&3-objectives/crossingsGreedy.pickle index 5c6c53f..42c30af 100644 Binary files a/data/Fig2&3-objectives/crossingsGreedy.pickle and b/data/Fig2&3-objectives/crossingsGreedy.pickle differ diff --git a/data/Fig2&3-objectives/crossingsWorst.pickle b/data/Fig2&3-objectives/crossingsWorst.pickle index c60de22..d3b7f0b 100644 Binary files a/data/Fig2&3-objectives/crossingsWorst.pickle and b/data/Fig2&3-objectives/crossingsWorst.pickle differ diff --git a/data/Fig2&3-objectives/distinctnessBest.pickle b/data/Fig2&3-objectives/distinctnessBest.pickle index 7ad945e..c27ba7f 100644 Binary files a/data/Fig2&3-objectives/distinctnessBest.pickle and b/data/Fig2&3-objectives/distinctnessBest.pickle differ diff --git a/data/Fig2&3-objectives/distinctnessGreedy.pickle b/data/Fig2&3-objectives/distinctnessGreedy.pickle index bb9b92d..7d2af9e 100644 Binary files a/data/Fig2&3-objectives/distinctnessGreedy.pickle and b/data/Fig2&3-objectives/distinctnessGreedy.pickle differ diff --git a/data/Fig2&3-objectives/distinctnessWorst.pickle b/data/Fig2&3-objectives/distinctnessWorst.pickle index a790799..3eb1b85 100644 Binary files a/data/Fig2&3-objectives/distinctnessWorst.pickle and b/data/Fig2&3-objectives/distinctnessWorst.pickle differ diff --git a/data/Fig2&3-objectives/distributionBest.pickle b/data/Fig2&3-objectives/distributionBest.pickle index 22b5d27..aa7090e 100644 Binary files a/data/Fig2&3-objectives/distributionBest.pickle and b/data/Fig2&3-objectives/distributionBest.pickle differ diff --git a/data/Fig2&3-objectives/distributionGreedy.pickle b/data/Fig2&3-objectives/distributionGreedy.pickle index 8abfa5a..d072e86 100644 Binary files a/data/Fig2&3-objectives/distributionGreedy.pickle and b/data/Fig2&3-objectives/distributionGreedy.pickle differ diff --git a/data/Fig2&3-objectives/distributionWorst.pickle b/data/Fig2&3-objectives/distributionWorst.pickle index 223ebd3..cc87fc5 100644 Binary files a/data/Fig2&3-objectives/distributionWorst.pickle and b/data/Fig2&3-objectives/distributionWorst.pickle differ diff --git a/data/Fig2&3-objectives/firstSetFull.pickle b/data/Fig2&3-objectives/firstSetFull.pickle index 9fb825f..510e329 100644 Binary files a/data/Fig2&3-objectives/firstSetFull.pickle and b/data/Fig2&3-objectives/firstSetFull.pickle differ diff --git a/data/Fig2&3-objectives/meanBest.pickle b/data/Fig2&3-objectives/meanBest.pickle index 602c561..d26812d 100644 Binary files a/data/Fig2&3-objectives/meanBest.pickle and b/data/Fig2&3-objectives/meanBest.pickle differ diff --git a/data/Fig2&3-objectives/meanGreedy.pickle b/data/Fig2&3-objectives/meanGreedy.pickle index 79e413c..50c023f 100644 Binary files a/data/Fig2&3-objectives/meanGreedy.pickle and b/data/Fig2&3-objectives/meanGreedy.pickle differ diff --git a/data/Fig2&3-objectives/rangeBest.pickle b/data/Fig2&3-objectives/rangeBest.pickle index 4085efc..9eee209 100644 Binary files a/data/Fig2&3-objectives/rangeBest.pickle and b/data/Fig2&3-objectives/rangeBest.pickle differ diff --git a/data/Fig2&3-objectives/rangeGreedy.pickle b/data/Fig2&3-objectives/rangeGreedy.pickle index 20d4242..87aa2df 100644 Binary files a/data/Fig2&3-objectives/rangeGreedy.pickle and b/data/Fig2&3-objectives/rangeGreedy.pickle differ diff --git a/data/Fig2&3-objectives/rangeWorst.pickle b/data/Fig2&3-objectives/rangeWorst.pickle index 4c15f91..930ac4f 100644 Binary files a/data/Fig2&3-objectives/rangeWorst.pickle and b/data/Fig2&3-objectives/rangeWorst.pickle differ diff --git a/data/Fig2&3-objectives/secondSetFull.pickle b/data/Fig2&3-objectives/secondSetFull.pickle index 9c3b3de..a16db3e 100644 Binary files a/data/Fig2&3-objectives/secondSetFull.pickle and b/data/Fig2&3-objectives/secondSetFull.pickle differ diff --git a/data/Fig2&3-objectives/spreadBest.pickle b/data/Fig2&3-objectives/spreadBest.pickle index 48ee6cb..3da1ace 100644 Binary files a/data/Fig2&3-objectives/spreadBest.pickle and b/data/Fig2&3-objectives/spreadBest.pickle differ diff --git a/data/Fig2&3-objectives/spreadGreedy.pickle b/data/Fig2&3-objectives/spreadGreedy.pickle index 1d7f0df..7e156ac 100644 Binary files a/data/Fig2&3-objectives/spreadGreedy.pickle and b/data/Fig2&3-objectives/spreadGreedy.pickle differ diff --git a/data/Fig2&3-objectives/spreadWorst.pickle b/data/Fig2&3-objectives/spreadWorst.pickle index a68d5ba..55d96d2 100644 Binary files a/data/Fig2&3-objectives/spreadWorst.pickle and b/data/Fig2&3-objectives/spreadWorst.pickle differ diff --git a/data/Fig2&3-objectives/varianceBest.pickle b/data/Fig2&3-objectives/varianceBest.pickle index b376a80..1b12415 100644 Binary files a/data/Fig2&3-objectives/varianceBest.pickle and b/data/Fig2&3-objectives/varianceBest.pickle differ diff --git a/data/Fig2&3-objectives/varianceGreedy.pickle b/data/Fig2&3-objectives/varianceGreedy.pickle index d8385cf..8e1b998 100644 Binary files a/data/Fig2&3-objectives/varianceGreedy.pickle and b/data/Fig2&3-objectives/varianceGreedy.pickle differ diff --git a/data/Fig2&3-objectives/varianceWorst.pickle b/data/Fig2&3-objectives/varianceWorst.pickle index 219520b..7c27049 100644 Binary files a/data/Fig2&3-objectives/varianceWorst.pickle and b/data/Fig2&3-objectives/varianceWorst.pickle differ diff --git a/data/solverData.csv b/data/solverData.csv index 2f7d803..f2cfe7d 100644 --- a/data/solverData.csv +++ b/data/solverData.csv @@ -14,3 +14,59 @@ Loss Function,Algorithm,Dataset Length,Dataset Width,Subset Length,Computation T "Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,8,0.09449754096567631,0.0 "Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.03406029101461172,-82.66069589660857 "Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.16028858395293355,-84.45664299596237 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.08147954102605581,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,13,0.11059033405035734,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.0863709170371294,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.0740786250680685,0.0 +"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.033992249984294176,-84.54589628703302 +"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.1607582913711667,-81.86395834115001 +"Multi-criterion: 100*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,8.82104833330959,-25.188865690667818 +"Multi-criterion: 10*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,9.348917667288333,-78.67270140126473 +"Multi-criterion: 1*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,11.100845790933818,-87.84137713354008 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.08417670801281929,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.08312133280560374,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,11,0.09199658269062638,0.0 +"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.03439829172566533,-76.52548733430072 +"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.16334479115903378,-71.10396031803283 +"Multi-criterion: 100*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,8.926938584074378,-34.18685064183635 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.08443129109218717,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,12,0.10422499990090728,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,10,0.08931229170411825,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,7,0.07279970869421959,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,10,0.09106883406639099,0.0 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.07939420826733112,0.0 +"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.035825416911393404,-77.29872080465749 +"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.15776562504470348,-73.12135522562879 +"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,8,0.08814891707152128,0.0 +"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.0881077079102397,-99.75139918380013 +"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.2870826250873506,-76.88675037532666 +"Multi-criterion: 100*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,9.175601665861905,-40.11565830101138 +"Multi-criterion: 10*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,9.356814333703369,-76.65185552813224 +"Multi-criterion: 1*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,11.165674250107259,-81.05143913364613 +"Uni-criterion: preserveMetric, mean",worstOfRandom,1000,10,10,0.0292369588278234,9.198305574729122 +"Uni-criterion: preserveMetric, mean",bestOfRandom,1000,10,10,0.028541583102196455,1.3894928584717083 +"Uni-criterion: preserveMetric, mean",greedySwap,1000,10,10,0.13707087468355894,0.5377753781976218 +"Uni-criterion: preserveMetric, range",worstOfRandom,1000,10,10,0.026246124878525734,109.42792301666869 +"Uni-criterion: preserveMetric, range",bestOfRandom,1000,10,10,0.025422124657779932,67.9094572323412 +"Uni-criterion: preserveMetric, range",greedySwap,1000,10,10,0.11163204209879041,23.139741839423703 +"Uni-criterion: preserveMetric, variance",worstOfRandom,1000,10,10,0.040997583884745836,33.163418779054865 +"Uni-criterion: preserveMetric, variance",bestOfRandom,1000,10,10,0.03368045808747411,5.122966903670596 +"Uni-criterion: preserveMetric, variance",greedySwap,1000,10,10,0.18226074986159801,1.7610126668908592 +Uni-criterion: pcpLineCrossings,worstOfRandom,1000,10,10,0.15455975010991096,251.0 +Uni-criterion: pcpLineCrossings,bestOfRandom,1000,10,10,0.1509659173898399,151.0 +Uni-criterion: pcpLineCrossings,greedySwap,1000,10,10,1.319750000257045,84.0 +Uni-criterion: discreteCoverage,worstOfRandom,1000,10,10,0.03174870880320668,-25.0 +Uni-criterion: discreteCoverage,bestOfRandom,1000,10,10,0.026619874872267246,-40.0 +Uni-criterion: discreteCoverage,greedySwap,1000,10,10,0.10966670885682106,-53.0 +"Uni-criterion: preserveMetric, discreteDistribution",worstOfRandom,1000,10,10,0.029847667086869478,6.097999999999999 +"Uni-criterion: preserveMetric, discreteDistribution",bestOfRandom,1000,10,10,0.029892582911998034,2.588 +"Uni-criterion: preserveMetric, discreteDistribution",greedySwap,1000,10,10,0.13888537511229515,1.638 +"Uni-criterion: distinctness, distances",worstOfRandom,1000,2,10,0.03159516677260399,-3.325124956241212 +"Uni-criterion: distinctness, distances",bestOfRandom,1000,2,10,0.03259162465110421,-27.10271348458167 +"Uni-criterion: distinctness, distances",greedySwap,1000,2,10,0.1527112922631204,-55.81886893905185 +"Uni-criterion: spread, distances",worstOfRandom,1000,2,10,0.03820183267816901,-144.2366774800783 +"Uni-criterion: spread, distances",bestOfRandom,1000,2,10,0.030908292159438133,-602.2538652359489 +"Uni-criterion: spread, distances",greedySwap,1000,2,10,0.14518300024792552,-1035.7090474217432 +Uni-criterion: clusterCenters,worstOfRandom,1000,2,10,0.029493208974599838,11.847265520187392 +Uni-criterion: clusterCenters,bestOfRandom,1000,2,10,0.02857504179701209,2.789513312747127 +Uni-criterion: clusterCenters,greedySwap,1000,2,10,0.13878262508660555,0.7724079671652213 diff --git a/figures/Fig1-designProcess/express.pdf b/figures/Fig1-designProcess/express.pdf index a3c4fa6..dab5267 100644 Binary files a/figures/Fig1-designProcess/express.pdf and b/figures/Fig1-designProcess/express.pdf differ diff --git a/figures/Fig2&3-objectives/objectives-1.pdf b/figures/Fig2&3-objectives/objectives-1.pdf index 58619cb..3337d4d 100644 Binary files a/figures/Fig2&3-objectives/objectives-1.pdf and b/figures/Fig2&3-objectives/objectives-1.pdf differ diff --git a/figures/Fig2&3-objectives/objectives-2.pdf b/figures/Fig2&3-objectives/objectives-2.pdf index abfb0e5..7580bbc 100644 Binary files a/figures/Fig2&3-objectives/objectives-2.pdf and b/figures/Fig2&3-objectives/objectives-2.pdf differ diff --git a/flexibleSubsetSelection/__init__.py b/flexibleSubsetSelection/__init__.py index 7f30d3a..5c8d117 100644 --- a/flexibleSubsetSelection/__init__.py +++ b/flexibleSubsetSelection/__init__.py @@ -16,5 +16,6 @@ plot, # Plotting functions for datasets and subsets algorithm, # Algorithms for subset selection objective, # Objective functions for defining criteria - metric # Data metric functions + metric, # Data metric functions + logger # Logging information to console or files ) \ No newline at end of file diff --git a/flexibleSubsetSelection/algorithm.py b/flexibleSubsetSelection/algorithm.py index a806cc7..442096d 100644 --- a/flexibleSubsetSelection/algorithm.py +++ b/flexibleSubsetSelection/algorithm.py @@ -1,14 +1,17 @@ # --- Imports ------------------------------------------------------------------ -# Standard library -import time - # Third party import cvxpy as cp import gurobipy as gp import numpy as np import ot +# Local files +from . import logger + +# Setup logger +log = logger.setup(__name__) + # --- Utility ------------------------------------------------------------------ @@ -47,7 +50,7 @@ def createEnvironment(outputFlag: int = 0): return environment def optimize(objective, constraints, environment, solver, - log_file='gurobi_log.txt', verbose=False): + log_file='gurobi_log.txt'): """ Sets up a cvxpy problem with given objective and constraints and solves it using the specified solver. @@ -60,8 +63,6 @@ def optimize(objective, constraints, environment, solver, particularly when using external solvers like Gurobi. solver: Optional. Solver to be used for solving the optimization problem. - verbose: Optional. Boolean flag indicating whether to print solver - output messages during optimization. Defaults to False. log_file: Optional. File path for Gurobi log. Defaults to 'gurobi_log.txt'. Returns: problem: The cvxpy Problem object after solving, which contains @@ -70,16 +71,16 @@ def optimize(objective, constraints, environment, solver, problem = cp.Problem(objective, constraints) if solver == cp.GUROBI: - problem.solve(solver=solver, verbose=verbose, env=environment, logfile=log_file) + problem.solve(solver=solver, env=environment, logfile=log_file) else: - problem.solve(solver=solver, verbose=verbose) + problem.solve(solver=solver) return problem # --- Algorithms --------------------------------------------------------------- def bestOfRandom(dataset, lossFunction, subsetSize, minLoss=0, - maxIterations=None, seed=None, verbose=False, selectBy="row"): + maxIterations=None, seed=None, selectBy="row"): if maxIterations is None: maxIterations = dataset.size[0] @@ -88,8 +89,7 @@ def bestOfRandom(dataset, lossFunction, subsetSize, minLoss=0, minLoss = lossFunction.calculate(dataset, z) for i in range(maxIterations): - if verbose: - print(f"{i}: {minLoss}") + log.debug("%s: %s", i, minLoss) curZ = randomSample(dataset.size, subsetSize, seed)[0] curLoss = lossFunction.calculate(dataset, curZ) if curLoss < minLoss: @@ -100,8 +100,7 @@ def bestOfRandom(dataset, lossFunction, subsetSize, minLoss=0, def averageOfRandom(dataset, lossFunction, subsetSize, minLoss=0, - maxIterations=None, seed=None, verbose=False, - selectBy="row"): + maxIterations=None, seed=None, selectBy="row"): if maxIterations is None: maxIterations = dataset.size[0] @@ -119,7 +118,7 @@ def averageOfRandom(dataset, lossFunction, subsetSize, minLoss=0, def worstOfRandom(dataset, lossFunction, subsetSize, minLoss=0, - maxIterations=None, seed=None, verbose=False, selectBy="row"): + maxIterations=None, seed=None, selectBy="row"): """ maximize representativeness of a subset of size s of dataset of size n by m according to metric function f using the p-norm @@ -140,8 +139,8 @@ def worstOfRandom(dataset, lossFunction, subsetSize, minLoss=0, return z, maxLoss -def greedySwap(dataset, lossFunction, subsetSize, minLoss=0, - maxIterations=None, seed=None, verbose=False, selectBy="row"): +def greedySwap(dataset, lossFunction, subsetSize, minLoss=0, maxIterations=None, + seed=None): """ A greedy algorithm with a greedy swap heuristic for subset selection. @@ -154,14 +153,12 @@ def greedySwap(dataset, lossFunction, subsetSize, minLoss=0, maxIterations (int, optional): Maximum number of iterations seed (int, rng, optional): The random seed or NumPy rng for random generation and reproducibility - verbose (bool, optional): Toggle for verbose logging Returns: z (array): Indicator vector of included items in the subset loss (float): The loss value of the final subset """ - if verbose: - print(f"Solving for a subset of size {subsetSize}.") + log.debug("Solving for a subset of size %s.", subsetSize) iterations = 0 # select random starting subset @@ -172,8 +169,7 @@ def greedySwap(dataset, lossFunction, subsetSize, minLoss=0, maxIterations = dataset.size[0] for i in range(maxIterations): - if verbose: - print(f"Iteration {i}/{maxIterations}: Loss {loss}") + log.debug("Iteration %s/%s: Loss %s.", i, maxIterations, loss) if i not in indices: zSwapBest = np.copy(z) lossSwapBest = loss @@ -202,9 +198,8 @@ def greedySwap(dataset, lossFunction, subsetSize, minLoss=0, return z, loss # return indicator and final loss -def greedyMinSubset(dataset, lossFunction, epsilon, - minError=0, maxIterations=None, seed=None, - verbose=False, initialSize=1): +def greedyMinSubset(dataset, lossFunction, epsilon, minError=0, + maxIterations=None, seed=None, initialSize=1): """ A greedy algorithm for subset selection to minimize the size of the subset such that lossFunction(subset) <= epsilon. @@ -218,7 +213,6 @@ def greedyMinSubset(dataset, lossFunction, epsilon, maxIterations (int, optional): Maximum number of iterations seed (int, rng, optional): The random seed or NumPy rng for random generation and reproducibility - verbose (bool, optional): Toggle for verbose logging initialSize (int, optional): Initial size of the subset Returns: @@ -230,8 +224,7 @@ def greedyMinSubset(dataset, lossFunction, epsilon, # Extract dataset size datasetLength = dataset.size[0] - if verbose: - print(f"Solving for a subset such that {lossFunction.objectives.__name__}(subset) <= {epsilon}") + log.debug("Solving for a subset such that loss(subset) <= %s.", epsilon) iterations = 0 consecutive_stable_iterations = 0 prev_subset_size = initialSize @@ -258,8 +251,7 @@ def greedyMinSubset(dataset, lossFunction, epsilon, maxIterations = datasetLength while iterations < maxIterations: - if verbose: - print(f"Iteration {iterations}: Loss {current_loss}, Error {error}, Subset Size {np.sum(z)}") + log.debug("Iteration: %s, Loss: %s, Error: %s, Subset Size: %s.", iterations, current_loss, error, np.sum(z)) # Check if error is less than or equal to epsilon if error <= epsilon: @@ -322,9 +314,10 @@ def greedyMinSubset(dataset, lossFunction, epsilon, return z, error def greedyMixed(dataset, lossFunction, weight=1.0, minError=0, - maxIterations=None, seed=None, verbose=False, initialSize=1): + maxIterations=None, seed=None, initialSize=1): """ - A greedy algorithm to minimize the total loss = weight * subsetSize + lossFunction.calculate(). + A greedy algorithm to minimize the total + loss = weight * subsetSize + lossFunction.calculate(). Args: dataset (object): The Dataset class object @@ -334,7 +327,6 @@ def greedyMixed(dataset, lossFunction, weight=1.0, minError=0, maxIterations (int, optional): Maximum number of iterations seed (int, rng, optional): The random seed or NumPy rng for random generation and reproducibility - verbose (bool, optional): Toggle for verbose logging initialSize (int, optional): Initial size of the subset Returns: @@ -344,8 +336,7 @@ def greedyMixed(dataset, lossFunction, weight=1.0, minError=0, # Extract dataset size datasetLength = dataset.size[0] - if verbose: - print(f"Solving to minimize total loss = {weight} * subsetSize + lossFunction.calculate()") + log.debug("Solving to minimize total loss = %s * subsetSize + lossFunction.calculate()", weight) iterations = 0 # Set the random seed @@ -371,8 +362,7 @@ def greedyMixed(dataset, lossFunction, weight=1.0, minError=0, maxIterations = datasetLength while iterations < maxIterations: - if verbose: - print(f"Iteration {iterations}: Total Loss {total_loss}, Subset Size {np.sum(z)}") + log.debug("Iteration %s: Total Loss %s, Subset Size %s", iterations, total_loss, np.sum(z)) # Check if error is less than or equal to minError if error <= minError: @@ -407,8 +397,7 @@ def greedyMixed(dataset, lossFunction, weight=1.0, minError=0, return z, total_loss # return indicator vector, and total loss -def optimizeCoverage(dataset, lossFunction, environment, subsetSize, - verbose=False): +def optimizeCoverage(dataset, lossFunction, environment, subsetSize): """ Optimize subset selection for coverage while minimizing L1 norm. @@ -438,26 +427,25 @@ def optimizeCoverage(dataset, lossFunction, environment, subsetSize, objective = cp.Minimize(cp.sum(t)) # objective is maximizing the sum of t problem = optimize(objective=objective, constraints=constraints, - environment=environment, - verbose=verbose) + environment=environment) return z.value.astype(int), problem.value -def optimizeSum(dataset, lossFunction, environment, w, solver, verbose=False): +def optimizeSum(dataset, lossFunction, environment, w, solver): datasetLength = len(dataset.dataArray) z = cp.Variable(datasetLength, boolean=True) # subset decision vector constraints = [] objective = cp.Maximize(-w[0]*cp.sum(z) + w[1]*cp.sum(z@dataset.dataArray)) - problem = optimize(objective, constraints, environment, solver, verbose) + problem = optimize(objective, constraints, environment, solver) return z.value.astype(int), problem.value def optimizeEMD(dataset, lossFunction, environment, subsetSize, - solver=cp.GUROBI, verbose=False): + solver=cp.GUROBI): datasetLength = len(dataset.dataArray) z = cp.Variable(datasetLength, boolean=True) # subset decision vector @@ -465,12 +453,11 @@ def optimizeEMD(dataset, lossFunction, environment, subsetSize, subset = np.array(z@dataset.dataArray) objective = cp.Minimize(ot.emd2([], [], ot.dist(subset, dataset.dataArray))) - problem = optimize(objective, constraints, environment, solver, verbose) + problem = optimize(objective, constraints, environment, solver) return z.value.astype(int), problem.value -def optimizeDistribution(dataset, lossFunction, environment, subsetSize, - verbose=False): +def optimizeDistribution(dataset, lossFunction, environment, subsetSize): datasetLength, oneHotWidth = dataset.dataArray.shape z = cp.Variable(datasetLength, boolean=True) # subset decision vector @@ -488,19 +475,20 @@ def optimizeDistribution(dataset, lossFunction, environment, subsetSize, problem = optimize(objective, constraints, environment, - solver=cp.GUROBI, - verbose=verbose) + solver=cp.GUROBI) return z.value.astype(int), problem.value -def sinkhorn(dataset, lossFunction, distanceMatrix, subsetSize, environment, lambdaReg=0.1, verbose=False): +def sinkhorn(dataset, lossFunction, distanceMatrix, subsetSize, environment, + lambdaReg=0.1): + datasetLength = dataset.size[0] # Decision variables z = cp.Variable(datasetLength, boolean=True) # Subset selection vector - gamma = cp.Variable((datasetLength, datasetLength), nonneg=True) # Transport plan + gamma = cp.Variable((datasetLength, datasetLength), nonneg=True) - # Define the objective: Minimize the Sinkhorn distance using the precomputed distance matrix + # Minimize the Sinkhorn distance using the precomputed distance matrix objective = cp.Minimize(cp.sum(cp.multiply(gamma, distanceMatrix))) # Constraints @@ -515,7 +503,6 @@ def sinkhorn(dataset, lossFunction, distanceMatrix, subsetSize, environment, lam problem = optimize(objective, constraints, environment, - solver=cp.GUROBI, - verbose=verbose) + solver=cp.GUROBI) return z.value.astype(int), problem.value \ No newline at end of file diff --git a/flexibleSubsetSelection/logger.py b/flexibleSubsetSelection/logger.py new file mode 100644 index 0000000..efbac8e --- /dev/null +++ b/flexibleSubsetSelection/logger.py @@ -0,0 +1,25 @@ +# --- Imports ------------------------------------------------------------------ + +# Standard library +import logging +import sys + + +# --- Logger ------------------------------------------------------------------- + +def setup(name: str = "flexibleSubsetSelection", level: int = logging.NOTSET): + """ + Sets up the logger for the package. + """ + log = logging.getLogger(name) + if not log.hasHandlers(): + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(level) + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s: %(message)s' + ) + handler.setFormatter(formatter) + log.addHandler(handler) + log.setLevel(level) + log.propagate = False + return log \ No newline at end of file diff --git a/flexibleSubsetSelection/loss.py b/flexibleSubsetSelection/loss.py index 70d755d..298425a 100644 --- a/flexibleSubsetSelection/loss.py +++ b/flexibleSubsetSelection/loss.py @@ -11,9 +11,10 @@ # Local files from .sets import Dataset, Subset +from . import logger # Setup logger -logger = logging.getLogger(__name__) +log = logger.setup(__name__) # --- Loss Function ------------------------------------------------------------ @@ -55,7 +56,7 @@ def __init__(self, objectives: List[Callable], # Generate the combined objective function self.calculate = partial(self._loss) - logger.debug("Initialized a multi-criterion loss function with " + log.debug("Initialized a multi-criterion loss function with " "objectives: %s, parameters: %s, and weights: %s", objectives, parameters, weights) @@ -139,7 +140,7 @@ def __init__(self, objective: Callable, solveArray: str = "dataArray", self.selectBy = selectBy self.parameters = parameters - logger.info("Initialized a uni-criterion loss function with " + log.info("Initialized a uni-criterion loss function with " "objective: %s, solve array: %s, selection method: %s, " "and parameters: %s", objective.__name__, solveArray, selectBy, parameters) diff --git a/flexibleSubsetSelection/objective.py b/flexibleSubsetSelection/objective.py index 899a532..f209339 100644 --- a/flexibleSubsetSelection/objective.py +++ b/flexibleSubsetSelection/objective.py @@ -162,4 +162,36 @@ def entropy(array: np.ndarray) -> float: def sinkhorn(subset, fullData, solveFunction): geometry = pointcloud.PointCloud(fullData, subset) sinkhornOutput = solveFunction(geometry) - return sinkhornOutput.reg_ot_cost \ No newline at end of file + return sinkhornOutput.reg_ot_cost + + +import numpy as np +from scipy.spatial import distance + +def compute_cost_matrix_chunked(dataset_array, subset_array, chunk_size=1000): + num_dataset = dataset_array.shape[0] + num_subset = subset_array.shape[0] + C = np.zeros((num_dataset, num_subset), dtype=np.float32) + + # Compute distance in chunks + for i in range(0, num_dataset, chunk_size): + end_i = min(i + chunk_size, num_dataset) + C[i:end_i, :] = distance.cdist(dataset_array[i:end_i], subset_array, metric='euclidean') + + return C + +# Compute the cost matrix in chunks + +def potSinkhorn(subset, fullData): + C = compute_cost_matrix_chunked(fullData, subset, chunk_size=1000) + C = C / np.max(C) + + reg = 0.01 # Regularization parameter + a = np.ones(len(fullData)) / len(fullData) + b = np.ones(len(subset)) / len(subset) + + transport_plan = ot.bregman.sinkhorn(a, b, C, reg, + method="greenkhorn", + stopThr=1e-3, + verbose=True) + return np.sum(transport_plan * C) \ No newline at end of file diff --git a/flexibleSubsetSelection/plot.py b/flexibleSubsetSelection/plot.py index 5833409..a545fd4 100644 --- a/flexibleSubsetSelection/plot.py +++ b/flexibleSubsetSelection/plot.py @@ -229,10 +229,10 @@ def scatter(ax: Axes, color: Color, dataset: (Dataset | None) = None, colors.extend([color["darkGreen"]] * len(subset.data)) data = np.concatenate(data, axis=0) - ax.scatter(data[:, features[0]], - data[:, features[1]], - data[:, features[2]], - c=colors, + ax.scatter(data[:, 0], + data[:, 1], + data[:, 2], + c=colors, **parameters) else: if dataset is not None: @@ -318,7 +318,7 @@ def histogram(ax: Axes, color: Color, dataset: (Dataset | None) = None, numFeatures = len(features) # Get the positions of each bar group - barPositions = range(numBins * numFeatures, step=numBins) + barPositions = range(0, numBins * numFeatures, numBins) for i, feature in enumerate(features): # Plot the dataset histogram @@ -335,7 +335,7 @@ def histogram(ax: Axes, color: Color, dataset: (Dataset | None) = None, numFeatures = len(features) # Get the positions of each bar group - barPositions = range(numBins * numFeatures, step=numBins) + barPositions = range(0, numBins * numFeatures, numBins) for i, feature in enumerate(features): # Calculate histogram of subset normalized by subset size diff --git a/flexibleSubsetSelection/sets.py b/flexibleSubsetSelection/sets.py index 0fce74e..2cf3ae3 100644 --- a/flexibleSubsetSelection/sets.py +++ b/flexibleSubsetSelection/sets.py @@ -15,10 +15,10 @@ # Local files from . import generate +from . import logger # Setup logger -logger = logging.getLogger(__name__) - +log = logger.setup(name=__name__) # --- Dataset and Subset Classes ----------------------------------------------- @@ -49,14 +49,14 @@ def save(self, name: str, fileType: str = "pickle", if fileType == "pickle": with open(filePath, "wb") as f: pickle.dump(self.data, f) - logger.info(f"Data successfully saved at '%s'.", filePath) + log.info(f"Data successfully saved at '%s'.", filePath) elif fileType == "csv": self.data.to_csv(filePath, index=index) - logger.info(f"Data successfully saved at '%s'.", filePath) + log.info(f"Data successfully saved at '%s'.", filePath) else: raise ValueError(f"Unsupported file type: {fileType}.") except Exception as e: - logger.exception("Error saving file", e) + log.exception("Error saving file") def load(self, name: str, fileType: str = "pickle", directory: (str | Path) = "../data") -> None: @@ -78,14 +78,14 @@ def load(self, name: str, fileType: str = "pickle", if fileType == "pickle": with open(filePath, "rb") as f: self.data = pickle.load(f) - logger.info(f"Data successfully loaded from '%s'.", filePath) + log.info(f"Data successfully loaded from '%s'.", filePath) elif fileType == "csv": self.data = pd.read_csv(filePath) - logger.info(f"Data successfully loaded from '%s'.", filePath) + log.info(f"Data successfully loaded from '%s'.", filePath) else: raise ValueError(f"Unsupported file type: {fileType}.") except Exception as e: - logger.exception("Error loading file", e) + log.exception("Error loading file") class Dataset(Set): @@ -153,7 +153,7 @@ def __init__(self, data: (pd.DataFrame | np.ndarray | None) = None, self.dataArray = self.data[self.features].to_numpy() self.indices = {feature: i for i, feature in enumerate(self.features)} self.interval = interval - logger.info("%s created.", self) + log.info("%s created.", self) def preprocess(self, **parameters) -> None: """ @@ -174,9 +174,9 @@ def preprocess(self, **parameters) -> None: setattr(self, name, func(self.dataArray, **params)) else: setattr(self, name, preprocessor(self.dataArray)) - logger.info(f"Data preprocessed with function '%s'.", name) + log.info(f"Data preprocessed with function '%s'.", name) except Exception as e: - logger.exception("Error applying function '%s'.", name) + log.exception("Error applying function '%s'.", name) def scale(self, interval: (tuple | None) = None) -> None: """ @@ -199,7 +199,7 @@ def scale(self, interval: (tuple | None) = None) -> None: self.dataArray = (self.dataArray - minVals) / rangeVals self.dataArray = self.dataArray * (interval[1] - interval[0]) self.dataArray += interval[0] - logger.info("Data scaled to %s.", interval) + log.info("Data scaled to %s.", interval) def discretize(self, bins: (int | ArrayLike), features: (list | None) = None, @@ -226,7 +226,7 @@ def discretize(self, bins: (int | ArrayLike), try: indices = [self.indices[feature] for feature in features] except KeyError as e: - logger.exception("Feature not found in indices.") + log.exception("Feature not found in indices.") selected = self.dataArray[:, indices] discretizer = KBinsDiscretizer(n_bins = bins, @@ -235,7 +235,7 @@ def discretize(self, bins: (int | ArrayLike), setattr(self, array, discretizer.fit_transform(selected)) self.bins = bins - logger.info("%s discretized by %s with %s bins.", array, strategy, bins) + log.info("%s discretized by %s with %s bins.", array, strategy, bins) def encode(self, features: (list | None) = None, dimensions: int = 1, array: (str | None) = None) -> None: @@ -270,7 +270,7 @@ def encode(self, features: (list | None) = None, dimensions: int = 1, mask = np.ones(self.dataArray.shape[1], dtype=bool) mask[indices] = False setattr(self, array, np.hstack((self.dataArray[:, mask], encoded))) - logger.info("Data one-hot encoded in '%s'", array) + log.info("Data one-hot encoded in '%s'", array) def __repr__(self) -> str: """ @@ -320,7 +320,7 @@ def __init__(self, dataset: Dataset, z: ArrayLike, self.data = dataset.data[z == 1].copy() # subset of the full data self.solveTime = solveTime self.loss = loss - logger.info("Created %s.", self) + log.info("Created %s.", self) def __repr__(self) -> str: """ diff --git a/flexibleSubsetSelection/solver.py b/flexibleSubsetSelection/solver.py index b4c55b4..981e3cb 100644 --- a/flexibleSubsetSelection/solver.py +++ b/flexibleSubsetSelection/solver.py @@ -12,9 +12,10 @@ from .loss import UniCriterion, MultiCriterion from .sets import Dataset, Subset from .timer import Timer +from . import logger # Setup logger -logger = logging.getLogger(__name__) +log = logger.setup(__name__) # --- Solver ------------------------------------------------------------------- @@ -36,7 +37,7 @@ def __init__(self, algorithm: Callable, loss: The loss function class object. savePath: The path to the solver save file. """ - logger.debug("Initializing Solver with algorithm: %s, lossFunction: %s, savePath: %s", + log.debug("Initializing Solver with algorithm: %s, lossFunction: %s, savePath: %s", algorithm.__name__, lossFunction, savePath) self.algorithm = algorithm @@ -51,9 +52,9 @@ def __init__(self, algorithm: Callable, "Dataset Width", "Subset Length", "Computation Time", "Loss"]) except FileExistsError: - logger.debug("Log file already exists at %s", self.savePath) + log.debug("Log file already exists at %s", self.savePath) - logger.info("Initialized a '%s' solver.", algorithm.__name__) + log.info("Initialized a '%s' solver.", algorithm.__name__) def solve(self, dataset: Dataset, **parameters) -> Subset: """ @@ -69,7 +70,7 @@ def solve(self, dataset: Dataset, **parameters) -> Subset: with Timer() as timer: z, loss = self.algorithm(dataset, self.lossFunction, **parameters) - logger.info(f"Selected subset with '%s' and '%s' in %ss with %s loss.", + log.info(f"Selected subset with '%s' and '%s' in %ss with %s loss.", self.algorithm.__name__, self.lossFunction, np.round(timer.elapsedTime, 2), @@ -93,4 +94,4 @@ def save(self, datasetSize: tuple, subsetSize: tuple, datasetSize[1], subsetSize[0], computationTime, loss]) - logger.info(f"Saved solver performance data to %s.", self.savePath) \ No newline at end of file + log.info(f"Saved solver performance data to %s.", self.savePath) \ No newline at end of file diff --git a/jupyter/Fig1-designProcess.ipynb b/jupyter/Fig1-designProcess.ipynb index 9120705..2bff864 100644 --- a/jupyter/Fig1-designProcess.ipynb +++ b/jupyter/Fig1-designProcess.ipynb @@ -53,11 +53,9 @@ "metadata": {}, "outputs": [], "source": [ - "logging.getLogger(\"flexibleSubsetSelection\").setLevel(logging.DEBUG)\n", - "\n", - "directory = \"Fig1-designProcess\" # data directory for this notebook\n", - "verbose = False # verbosity of logging output of solvers\n", - "seed = 123456789 # random generation seed for replicability\n", + "directory = \"Fig1-designProcess\" # data directory for this notebook\n", + "seed = 123456789 # random seed for replicability\n", + "fss.logger.setup(level=logging.WARNING) # set logging level for the package\n", "\n", "# Create a random blobs dataset to use as our example dataset\n", "dataset = fss.Dataset(randTypes=\"blobs\", size=(200, 2), seed=seed)\n", @@ -97,7 +95,7 @@ " lossFunction = lossFunction)\n", "\n", "# Solve for a convex hull subset\n", - "subsetHull = solver.solve(dataset, epsilon=0, verbose=verbose, initialSize=3)\n", + "subsetHull = solver.solve(dataset, epsilon=0, initialSize=3)\n", "subsetHull.save(f\"{directory}/hullSubset\")" ] }, @@ -130,7 +128,7 @@ "solver.algorithm = fss.algorithm.greedySwap\n", "\n", "# Solve for an outlier subset\n", - "subsetOutliers = solver.solve(dataset, subsetSize=40, verbose=verbose)\n", + "subsetOutliers = solver.solve(dataset, subsetSize=40)\n", "subsetOutliers.save(f\"{directory}/outliersSubset\")" ] }, @@ -161,7 +159,7 @@ " selectBy = \"matrix\")\n", "\n", "# Solve for distinctness subset\n", - "subsetDistinct = solver.solve(dataset=dataset, subsetSize=60, verbose=verbose)\n", + "subsetDistinct = solver.solve(dataset=dataset, subsetSize=60)\n", "subsetDistinct.save(f\"{directory}/distinctSubset\")" ] }, @@ -244,7 +242,7 @@ " weights=[100, 1])\n", "\n", "# Solve for the blended distribution and distinctness subset\n", - "subsetBlend1 = solver.solve(dataset, subsetSize=subsetSize, verbose=verbose)\n", + "subsetBlend1 = solver.solve(dataset, subsetSize=subsetSize)\n", "subsetBlend1.save(f\"{directory}/blend1Subset\")" ] }, @@ -260,7 +258,7 @@ " weights=[10, 1])\n", "\n", "# Solve for the blended distribution and distinctness subset\n", - "subsetBlend2 = solver.solve(dataset, subsetSize=subsetSize, verbose=verbose)\n", + "subsetBlend2 = solver.solve(dataset, subsetSize=subsetSize)\n", "subsetBlend2.save(f\"{directory}/blend2Subset\")" ] }, @@ -276,7 +274,7 @@ " weights=[1, 1])\n", "\n", "# Solve for the blended distribution and distinctness subset\n", - "subsetBlend3 = solver.solve(dataset, subsetSize=subsetSize, verbose=verbose)\n", + "subsetBlend3 = solver.solve(dataset, subsetSize=subsetSize)\n", "subsetBlend3.save(f\"{directory}/blend3Subset\")" ] }, @@ -342,22 +340,13 @@ "solver.algorithm = fss.algorithm.greedyMixed\n", "\n", "# Solve for subsets with 3 different subset sizes\n", - "subsetDistinct1 = solver.solve(dataset = dataset, \n", - " weight = 0.5, \n", - " initialSize = 3, \n", - " verbose = verbose)\n", + "subsetDistinct1 = solver.solve(dataset=dataset, weight=0.5, initialSize=3)\n", "subsetDistinct1.save(f\"{directory}/distinct1Subset\")\n", "\n", - "subsetDistinct2 = solver.solve(dataset = dataset, \n", - " weight = 0.25, \n", - " initialSize = 3, \n", - " verbose = verbose)\n", + "subsetDistinct2 = solver.solve(dataset=dataset, weight=0.25, initialSize=3)\n", "subsetDistinct2.save(f\"{directory}/distinct2Subset\")\n", "\n", - "subsetDistinct3 = solver.solve(dataset = dataset, \n", - " weight = 0.05, \n", - " initialSize = 3, \n", - " verbose = verbose)\n", + "subsetDistinct3 = solver.solve(dataset=dataset, weight=0.05, initialSize=3)\n", "subsetDistinct3.save(f\"{directory}/distinct3Subset\")" ] }, diff --git a/jupyter/Fig2&3-objectives.ipynb b/jupyter/Fig2&3-objectives.ipynb index a026c92..69a7098 100644 --- a/jupyter/Fig2&3-objectives.ipynb +++ b/jupyter/Fig2&3-objectives.ipynb @@ -17,6 +17,10 @@ "metadata": {}, "outputs": [], "source": [ + "# Standard library\n", + "import logging\n", + "from pathlib import Path\n", + "\n", "# Third party\n", "import matplotlib.pyplot as plt\n", "import matplotlib_inline\n", @@ -52,10 +56,10 @@ "metadata": {}, "outputs": [], "source": [ - "directory = \"Fig2&3-objectives\" # data directory for this notebook\n", - "subsetSize = 10 # size of subset selected\n", - "verbose = False # verbosity of solvers\n", - "seed = 123 # random generation seed for replicability\n", + "directory = \"Fig2&3-objectives\" # data directory for this notebook\n", + "seed = 123456789 # random seed for replicability\n", + "fss.logger.setup(level=logging.WARNING) # set logging level for the package\n", + "subsetSize = 10 # size of subset selected\n", "\n", "firstDataset = fss.Dataset(randTypes=\"multimodal\", size=(1000, 10), seed=seed)\n", "firstDataset.save(f\"{directory}/firstSetFull\")" @@ -91,19 +95,13 @@ " lossFunction = meanLoss)\n", "\n", "# Solve for mean preserved subsets with a set size\n", - "subsetMeanWorst = solveWorst.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + "subsetMeanWorst = solveWorst.solve(dataset=firstDataset, subsetSize=subsetSize)\n", "subsetMeanWorst.save(f\"{directory}/meanBest\")\n", "\n", - "subsetMeanBest = solveBest.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + "subsetMeanBest = solveBest.solve(dataset=firstDataset, subsetSize=subsetSize)\n", "subsetMeanBest.save(f\"{directory}/meanBest\")\n", "\n", - "subsetMeanGreedy = solveGreedy.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + "subsetMeanGreedy = solveGreedy.solve(dataset=firstDataset, subsetSize=subsetSize)\n", "subsetMeanGreedy.save(f\"{directory}/meanGreedy\")" ] }, @@ -133,18 +131,15 @@ "\n", "# Solve for range preserved subsets with a set size\n", "subsetRangeWorst = solveWorst.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetRangeWorst.save(f\"{directory}/rangeWorst\")\n", "\n", "subsetRangeBest = solveBest.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetRangeBest.save(f\"{directory}/rangeBest\")\n", "\n", "subsetRangeGreedy = solveGreedy.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetRangeGreedy.save(f\"{directory}/rangeGreedy\")" ] }, @@ -174,18 +169,15 @@ "\n", "# Solve for variance preserved subsets with a set size\n", "subsetVarianceWorst = solveWorst.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetVarianceWorst.save(f\"{directory}/varianceWorst\")\n", "\n", "subsetVarianceBest = solveBest.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetVarianceBest.save(f\"{directory}/varianceBest\")\n", "\n", "subsetVarianceGreedy = solveGreedy.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetVarianceGreedy.save(f\"{directory}/varianceGreedy\")" ] }, @@ -210,18 +202,15 @@ "\n", "# Solve for coverage subsets with a set size\n", "subsetCrossingsWorst = solveWorst.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetCrossingsWorst.save(f\"{directory}/crossingsWorst\")\n", "\n", "subsetCrossingsBest = solveBest.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetCrossingsBest.save(f\"{directory}/crossingsBest\")\n", "\n", "subsetCrossingsGreedy = solveGreedy.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetCrossingsGreedy.save(f\"{directory}/crossingsGreedy\")" ] }, @@ -250,18 +239,15 @@ "\n", "# Solve for coverage subsets with a set size\n", "subsetCoverageWorst = solveWorst.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetCoverageWorst.save(f\"{directory}/coverageWorst\")\n", "\n", "subsetCoverageBest = solveBest.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetCoverageBest.save(f\"{directory}/coverageBest\")\n", "\n", "subsetCoverageGreedy = solveGreedy.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetCoverageGreedy.save(f\"{directory}/coverageGreedy\")\n", "subsetCoverageWorst.loss += 60\n", "subsetCoverageBest.loss += 60\n", @@ -293,19 +279,16 @@ "solveGreedy.lossFunction = distributionLoss\n", "\n", "# Solve for coverage subsets with a set size\n", - "subsetDistributionWorst = solveWorst.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + "subsetDistributionWorst = solveWorst.solve(dataset = firstDataset, \n", + " subsetSize = subsetSize)\n", "subsetDistributionWorst.save(f\"{directory}/distributionWorst\")\n", "\n", - "subsetDistributionBest = solveBest.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + "subsetDistributionBest = solveBest.solve(dataset = firstDataset, \n", + " subsetSize = subsetSize)\n", "subsetDistributionBest.save(f\"{directory}/distributionBest\")\n", "\n", "subsetDistributionGreedy = solveGreedy.solve(dataset = firstDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetDistributionGreedy.save(f\"{directory}/distributionGreedy\")" ] }, @@ -470,7 +453,7 @@ " elif metric == \"distribution\":\n", " continue\n", " else:\n", - " fss.plot.errorBar(ax, \n", + " fss.plot.errorBars(ax, \n", " range(len(firstDataset.data.columns)),\n", " metric(firstDataset.data), \n", " metric(subsets[i][j].data), \n", @@ -502,9 +485,7 @@ "metadata": {}, "outputs": [], "source": [ - "subsetSize = 10 # size of subset selected\n", - "verbose = False # verbosity of solvers\n", - "seed = 123 # random generation seed for replicability\n", + "subsetSize = 10 # size of subset selected\n", "\n", "secondDataset = fss.Dataset(randTypes=\"blobs\", size=(1000, 2), seed=seed)\n", "secondDataset.save(f\"{directory}/secondSetFull\")" @@ -538,19 +519,16 @@ " lossFunction = distinctLoss)\n", "\n", "# Solve for coverage subsets with a set size\n", - "subsetDistinctnessWorst = solveWorst.solve(dataset = secondDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + "subsetDistinctnessWorst = solveWorst.solve(dataset = secondDataset, \n", + " subsetSize = subsetSize)\n", "subsetDistinctnessWorst.save(f\"{directory}/distinctnessWorst\")\n", "\n", "subsetDistinctnessBest = solveBest.solve(dataset = secondDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetDistinctnessBest.save(f\"{directory}/distinctnessBest\")\n", "\n", "subsetDistinctnessGreedy = solveGreedy.solve(dataset = secondDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetDistinctnessGreedy.save(f\"{directory}/distinctnessGreedy\")\n", "\n", "subsetDistinctnessWorst.loss += 60\n", @@ -581,18 +559,15 @@ "\n", "# Solve for coverage subsets with a set size\n", "subsetSpreadWorst = solveWorst.solve(dataset = secondDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetSpreadWorst.save(f\"{directory}/spreadWorst\")\n", "\n", "subsetSpreadBest = solveBest.solve(dataset = secondDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetSpreadBest.save(f\"{directory}/spreadBest\")\n", "\n", "subsetSpreadGreedy = solveGreedy.solve(dataset = secondDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetSpreadGreedy.save(f\"{directory}/spreadGreedy\")\n", "\n", "subsetSpreadWorst.loss += 1000\n", @@ -625,18 +600,15 @@ "\n", "# Solve for coverage subsets with a set size\n", "subsetClusterWorst = solveWorst.solve(dataset = secondDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetClusterWorst.save(f\"{directory}/clusterWorst\")\n", "\n", "subsetClusterBest = solveBest.solve(dataset = secondDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetClusterBest.save(f\"{directory}/clusterBest\")\n", "\n", "subsetClusterGreedy = solveGreedy.solve(dataset = secondDataset, \n", - " subsetSize = subsetSize, \n", - " verbose = verbose)\n", + " subsetSize = subsetSize)\n", "subsetClusterGreedy.save(f\"{directory}/clusterGreedy\")" ] },