Skip to content

Commit

Permalink
logging.
Browse files Browse the repository at this point in the history
  • Loading branch information
cbbcbail committed Aug 9, 2024
1 parent f54a0c5 commit 25a7734
Show file tree
Hide file tree
Showing 49 changed files with 241 additions and 177 deletions.
Binary file modified data/Fig1-designProcess/blend1Subset.pickle
Binary file not shown.
Binary file modified data/Fig1-designProcess/blend2Subset.pickle
Binary file not shown.
Binary file modified data/Fig1-designProcess/blend3Subset.pickle
Binary file not shown.
Binary file modified data/Fig1-designProcess/distinctSubset.pickle
Binary file not shown.
Binary file modified data/Fig1-designProcess/fullData.pickle
Binary file not shown.
Binary file modified data/Fig1-designProcess/hullSubset.pickle
Binary file not shown.
Binary file modified data/Fig1-designProcess/outliersSubset.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/clusterBest.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/clusterGreedy.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/clusterWorst.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/coverageBest.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/coverageGreedy.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/coverageWorst.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/crossingsBest.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/crossingsGreedy.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/crossingsWorst.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/distinctnessBest.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/distinctnessGreedy.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/distinctnessWorst.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/distributionBest.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/distributionGreedy.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/distributionWorst.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/firstSetFull.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/meanBest.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/meanGreedy.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/rangeBest.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/rangeGreedy.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/rangeWorst.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/secondSetFull.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/spreadBest.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/spreadGreedy.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/spreadWorst.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/varianceBest.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/varianceGreedy.pickle
Binary file not shown.
Binary file modified data/Fig2&3-objectives/varianceWorst.pickle
Binary file not shown.
56 changes: 56 additions & 0 deletions data/solverData.csv
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,59 @@ Loss Function,Algorithm,Dataset Length,Dataset Width,Subset Length,Computation T
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,8,0.09449754096567631,0.0
"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.03406029101461172,-82.66069589660857
"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.16028858395293355,-84.45664299596237
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.08147954102605581,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,13,0.11059033405035734,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.0863709170371294,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.0740786250680685,0.0
"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.033992249984294176,-84.54589628703302
"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.1607582913711667,-81.86395834115001
"Multi-criterion: 100*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,8.82104833330959,-25.188865690667818
"Multi-criterion: 10*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,9.348917667288333,-78.67270140126473
"Multi-criterion: 1*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,11.100845790933818,-87.84137713354008
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.08417670801281929,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.08312133280560374,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,11,0.09199658269062638,0.0
"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.03439829172566533,-76.52548733430072
"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.16334479115903378,-71.10396031803283
"Multi-criterion: 100*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,8.926938584074378,-34.18685064183635
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.08443129109218717,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,12,0.10422499990090728,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,10,0.08931229170411825,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,7,0.07279970869421959,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,10,0.09106883406639099,0.0
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,9,0.07939420826733112,0.0
"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.035825416911393404,-77.29872080465749
"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.15776562504470348,-73.12135522562879
"Uni-criterion: preserveMetric, hull",greedyMinSubset,200,2,8,0.08814891707152128,0.0
"Uni-criterion: sum, outlierness",greedySwap,200,2,40,0.0881077079102397,-99.75139918380013
"Uni-criterion: distinctness, distances",greedySwap,200,2,60,0.2870826250873506,-76.88675037532666
"Multi-criterion: 100*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,9.175601665861905,-40.11565830101138
"Multi-criterion: 10*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,9.356814333703369,-76.65185552813224
"Multi-criterion: 1*(earthMoversDistance) + 1*(distinctness, distances)",greedySwap,200,2,80,11.165674250107259,-81.05143913364613
"Uni-criterion: preserveMetric, mean",worstOfRandom,1000,10,10,0.0292369588278234,9.198305574729122
"Uni-criterion: preserveMetric, mean",bestOfRandom,1000,10,10,0.028541583102196455,1.3894928584717083
"Uni-criterion: preserveMetric, mean",greedySwap,1000,10,10,0.13707087468355894,0.5377753781976218
"Uni-criterion: preserveMetric, range",worstOfRandom,1000,10,10,0.026246124878525734,109.42792301666869
"Uni-criterion: preserveMetric, range",bestOfRandom,1000,10,10,0.025422124657779932,67.9094572323412
"Uni-criterion: preserveMetric, range",greedySwap,1000,10,10,0.11163204209879041,23.139741839423703
"Uni-criterion: preserveMetric, variance",worstOfRandom,1000,10,10,0.040997583884745836,33.163418779054865
"Uni-criterion: preserveMetric, variance",bestOfRandom,1000,10,10,0.03368045808747411,5.122966903670596
"Uni-criterion: preserveMetric, variance",greedySwap,1000,10,10,0.18226074986159801,1.7610126668908592
Uni-criterion: pcpLineCrossings,worstOfRandom,1000,10,10,0.15455975010991096,251.0
Uni-criterion: pcpLineCrossings,bestOfRandom,1000,10,10,0.1509659173898399,151.0
Uni-criterion: pcpLineCrossings,greedySwap,1000,10,10,1.319750000257045,84.0
Uni-criterion: discreteCoverage,worstOfRandom,1000,10,10,0.03174870880320668,-25.0
Uni-criterion: discreteCoverage,bestOfRandom,1000,10,10,0.026619874872267246,-40.0
Uni-criterion: discreteCoverage,greedySwap,1000,10,10,0.10966670885682106,-53.0
"Uni-criterion: preserveMetric, discreteDistribution",worstOfRandom,1000,10,10,0.029847667086869478,6.097999999999999
"Uni-criterion: preserveMetric, discreteDistribution",bestOfRandom,1000,10,10,0.029892582911998034,2.588
"Uni-criterion: preserveMetric, discreteDistribution",greedySwap,1000,10,10,0.13888537511229515,1.638
"Uni-criterion: distinctness, distances",worstOfRandom,1000,2,10,0.03159516677260399,-3.325124956241212
"Uni-criterion: distinctness, distances",bestOfRandom,1000,2,10,0.03259162465110421,-27.10271348458167
"Uni-criterion: distinctness, distances",greedySwap,1000,2,10,0.1527112922631204,-55.81886893905185
"Uni-criterion: spread, distances",worstOfRandom,1000,2,10,0.03820183267816901,-144.2366774800783
"Uni-criterion: spread, distances",bestOfRandom,1000,2,10,0.030908292159438133,-602.2538652359489
"Uni-criterion: spread, distances",greedySwap,1000,2,10,0.14518300024792552,-1035.7090474217432
Uni-criterion: clusterCenters,worstOfRandom,1000,2,10,0.029493208974599838,11.847265520187392
Uni-criterion: clusterCenters,bestOfRandom,1000,2,10,0.02857504179701209,2.789513312747127
Uni-criterion: clusterCenters,greedySwap,1000,2,10,0.13878262508660555,0.7724079671652213
Binary file modified figures/Fig1-designProcess/express.pdf
Binary file not shown.
Binary file modified figures/Fig2&3-objectives/objectives-1.pdf
Binary file not shown.
Binary file modified figures/Fig2&3-objectives/objectives-2.pdf
Binary file not shown.
3 changes: 2 additions & 1 deletion flexibleSubsetSelection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@
plot, # Plotting functions for datasets and subsets
algorithm, # Algorithms for subset selection
objective, # Objective functions for defining criteria
metric # Data metric functions
metric, # Data metric functions
logger # Logging information to console or files
)
93 changes: 40 additions & 53 deletions flexibleSubsetSelection/algorithm.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
# --- Imports ------------------------------------------------------------------

# Standard library
import time

# Third party
import cvxpy as cp
import gurobipy as gp
import numpy as np
import ot

# Local files
from . import logger

# Setup logger
log = logger.setup(__name__)


# --- Utility ------------------------------------------------------------------

Expand Down Expand Up @@ -47,7 +50,7 @@ def createEnvironment(outputFlag: int = 0):
return environment

def optimize(objective, constraints, environment, solver,
log_file='gurobi_log.txt', verbose=False):
log_file='gurobi_log.txt'):
"""
Sets up a cvxpy problem with given objective and constraints and solves it
using the specified solver.
Expand All @@ -60,8 +63,6 @@ def optimize(objective, constraints, environment, solver,
particularly when using external solvers like Gurobi.
solver: Optional. Solver to be used for solving the optimization
problem.
verbose: Optional. Boolean flag indicating whether to print solver
output messages during optimization. Defaults to False.
log_file: Optional. File path for Gurobi log. Defaults to 'gurobi_log.txt'.
Returns: problem: The cvxpy Problem object after solving, which contains
Expand All @@ -70,16 +71,16 @@ def optimize(objective, constraints, environment, solver,
problem = cp.Problem(objective, constraints)

if solver == cp.GUROBI:
problem.solve(solver=solver, verbose=verbose, env=environment, logfile=log_file)
problem.solve(solver=solver, env=environment, logfile=log_file)
else:
problem.solve(solver=solver, verbose=verbose)
problem.solve(solver=solver)

return problem

# --- Algorithms ---------------------------------------------------------------

def bestOfRandom(dataset, lossFunction, subsetSize, minLoss=0,
maxIterations=None, seed=None, verbose=False, selectBy="row"):
maxIterations=None, seed=None, selectBy="row"):

if maxIterations is None:
maxIterations = dataset.size[0]
Expand All @@ -88,8 +89,7 @@ def bestOfRandom(dataset, lossFunction, subsetSize, minLoss=0,
minLoss = lossFunction.calculate(dataset, z)

for i in range(maxIterations):
if verbose:
print(f"{i}: {minLoss}")
log.debug("%s: %s", i, minLoss)
curZ = randomSample(dataset.size, subsetSize, seed)[0]
curLoss = lossFunction.calculate(dataset, curZ)
if curLoss < minLoss:
Expand All @@ -100,8 +100,7 @@ def bestOfRandom(dataset, lossFunction, subsetSize, minLoss=0,


def averageOfRandom(dataset, lossFunction, subsetSize, minLoss=0,
maxIterations=None, seed=None, verbose=False,
selectBy="row"):
maxIterations=None, seed=None, selectBy="row"):

if maxIterations is None:
maxIterations = dataset.size[0]
Expand All @@ -119,7 +118,7 @@ def averageOfRandom(dataset, lossFunction, subsetSize, minLoss=0,


def worstOfRandom(dataset, lossFunction, subsetSize, minLoss=0,
maxIterations=None, seed=None, verbose=False, selectBy="row"):
maxIterations=None, seed=None, selectBy="row"):
"""
maximize representativeness of a subset of size s of dataset of size n by m
according to metric function f using the p-norm
Expand All @@ -140,8 +139,8 @@ def worstOfRandom(dataset, lossFunction, subsetSize, minLoss=0,
return z, maxLoss


def greedySwap(dataset, lossFunction, subsetSize, minLoss=0,
maxIterations=None, seed=None, verbose=False, selectBy="row"):
def greedySwap(dataset, lossFunction, subsetSize, minLoss=0, maxIterations=None,
seed=None):
"""
A greedy algorithm with a greedy swap heuristic for subset selection.
Expand All @@ -154,14 +153,12 @@ def greedySwap(dataset, lossFunction, subsetSize, minLoss=0,
maxIterations (int, optional): Maximum number of iterations
seed (int, rng, optional): The random seed or NumPy rng for random
generation and reproducibility
verbose (bool, optional): Toggle for verbose logging
Returns:
z (array): Indicator vector of included items in the subset
loss (float): The loss value of the final subset
"""
if verbose:
print(f"Solving for a subset of size {subsetSize}.")
log.debug("Solving for a subset of size %s.", subsetSize)
iterations = 0

# select random starting subset
Expand All @@ -172,8 +169,7 @@ def greedySwap(dataset, lossFunction, subsetSize, minLoss=0,
maxIterations = dataset.size[0]

for i in range(maxIterations):
if verbose:
print(f"Iteration {i}/{maxIterations}: Loss {loss}")
log.debug("Iteration %s/%s: Loss %s.", i, maxIterations, loss)
if i not in indices:
zSwapBest = np.copy(z)
lossSwapBest = loss
Expand Down Expand Up @@ -202,9 +198,8 @@ def greedySwap(dataset, lossFunction, subsetSize, minLoss=0,

return z, loss # return indicator and final loss

def greedyMinSubset(dataset, lossFunction, epsilon,
minError=0, maxIterations=None, seed=None,
verbose=False, initialSize=1):
def greedyMinSubset(dataset, lossFunction, epsilon, minError=0,
maxIterations=None, seed=None, initialSize=1):
"""
A greedy algorithm for subset selection to minimize the size of the subset
such that lossFunction(subset) <= epsilon.
Expand All @@ -218,7 +213,6 @@ def greedyMinSubset(dataset, lossFunction, epsilon,
maxIterations (int, optional): Maximum number of iterations
seed (int, rng, optional): The random seed or NumPy rng for random
generation and reproducibility
verbose (bool, optional): Toggle for verbose logging
initialSize (int, optional): Initial size of the subset
Returns:
Expand All @@ -230,8 +224,7 @@ def greedyMinSubset(dataset, lossFunction, epsilon,
# Extract dataset size
datasetLength = dataset.size[0]

if verbose:
print(f"Solving for a subset such that {lossFunction.objectives.__name__}(subset) <= {epsilon}")
log.debug("Solving for a subset such that loss(subset) <= %s.", epsilon)
iterations = 0
consecutive_stable_iterations = 0
prev_subset_size = initialSize
Expand All @@ -258,8 +251,7 @@ def greedyMinSubset(dataset, lossFunction, epsilon,
maxIterations = datasetLength

while iterations < maxIterations:
if verbose:
print(f"Iteration {iterations}: Loss {current_loss}, Error {error}, Subset Size {np.sum(z)}")
log.debug("Iteration: %s, Loss: %s, Error: %s, Subset Size: %s.", iterations, current_loss, error, np.sum(z))

# Check if error is less than or equal to epsilon
if error <= epsilon:
Expand Down Expand Up @@ -322,9 +314,10 @@ def greedyMinSubset(dataset, lossFunction, epsilon,
return z, error

def greedyMixed(dataset, lossFunction, weight=1.0, minError=0,
maxIterations=None, seed=None, verbose=False, initialSize=1):
maxIterations=None, seed=None, initialSize=1):
"""
A greedy algorithm to minimize the total loss = weight * subsetSize + lossFunction.calculate().
A greedy algorithm to minimize the total
loss = weight * subsetSize + lossFunction.calculate().
Args:
dataset (object): The Dataset class object
Expand All @@ -334,7 +327,6 @@ def greedyMixed(dataset, lossFunction, weight=1.0, minError=0,
maxIterations (int, optional): Maximum number of iterations
seed (int, rng, optional): The random seed or NumPy rng for random
generation and reproducibility
verbose (bool, optional): Toggle for verbose logging
initialSize (int, optional): Initial size of the subset
Returns:
Expand All @@ -344,8 +336,7 @@ def greedyMixed(dataset, lossFunction, weight=1.0, minError=0,
# Extract dataset size
datasetLength = dataset.size[0]

if verbose:
print(f"Solving to minimize total loss = {weight} * subsetSize + lossFunction.calculate()")
log.debug("Solving to minimize total loss = %s * subsetSize + lossFunction.calculate()", weight)
iterations = 0

# Set the random seed
Expand All @@ -371,8 +362,7 @@ def greedyMixed(dataset, lossFunction, weight=1.0, minError=0,
maxIterations = datasetLength

while iterations < maxIterations:
if verbose:
print(f"Iteration {iterations}: Total Loss {total_loss}, Subset Size {np.sum(z)}")
log.debug("Iteration %s: Total Loss %s, Subset Size %s", iterations, total_loss, np.sum(z))

# Check if error is less than or equal to minError
if error <= minError:
Expand Down Expand Up @@ -407,8 +397,7 @@ def greedyMixed(dataset, lossFunction, weight=1.0, minError=0,
return z, total_loss # return indicator vector, and total loss


def optimizeCoverage(dataset, lossFunction, environment, subsetSize,
verbose=False):
def optimizeCoverage(dataset, lossFunction, environment, subsetSize):
"""
Optimize subset selection for coverage while minimizing L1 norm.
Expand Down Expand Up @@ -438,39 +427,37 @@ def optimizeCoverage(dataset, lossFunction, environment, subsetSize,
objective = cp.Minimize(cp.sum(t)) # objective is maximizing the sum of t
problem = optimize(objective=objective,
constraints=constraints,
environment=environment,
verbose=verbose)
environment=environment)

return z.value.astype(int), problem.value


def optimizeSum(dataset, lossFunction, environment, w, solver, verbose=False):
def optimizeSum(dataset, lossFunction, environment, w, solver):

datasetLength = len(dataset.dataArray)
z = cp.Variable(datasetLength, boolean=True) # subset decision vector
constraints = []

objective = cp.Maximize(-w[0]*cp.sum(z) + w[1]*cp.sum(z@dataset.dataArray))
problem = optimize(objective, constraints, environment, solver, verbose)
problem = optimize(objective, constraints, environment, solver)

return z.value.astype(int), problem.value


def optimizeEMD(dataset, lossFunction, environment, subsetSize,
solver=cp.GUROBI, verbose=False):
solver=cp.GUROBI):

datasetLength = len(dataset.dataArray)
z = cp.Variable(datasetLength, boolean=True) # subset decision vector
constraints = [cp.sum(z) == subsetSize]
subset = np.array(z@dataset.dataArray)

objective = cp.Minimize(ot.emd2([], [], ot.dist(subset, dataset.dataArray)))
problem = optimize(objective, constraints, environment, solver, verbose)
problem = optimize(objective, constraints, environment, solver)

return z.value.astype(int), problem.value

def optimizeDistribution(dataset, lossFunction, environment, subsetSize,
verbose=False):
def optimizeDistribution(dataset, lossFunction, environment, subsetSize):

datasetLength, oneHotWidth = dataset.dataArray.shape
z = cp.Variable(datasetLength, boolean=True) # subset decision vector
Expand All @@ -488,19 +475,20 @@ def optimizeDistribution(dataset, lossFunction, environment, subsetSize,
problem = optimize(objective,
constraints,
environment,
solver=cp.GUROBI,
verbose=verbose)
solver=cp.GUROBI)

return z.value.astype(int), problem.value

def sinkhorn(dataset, lossFunction, distanceMatrix, subsetSize, environment, lambdaReg=0.1, verbose=False):
def sinkhorn(dataset, lossFunction, distanceMatrix, subsetSize, environment,
lambdaReg=0.1):

datasetLength = dataset.size[0]

# Decision variables
z = cp.Variable(datasetLength, boolean=True) # Subset selection vector
gamma = cp.Variable((datasetLength, datasetLength), nonneg=True) # Transport plan
gamma = cp.Variable((datasetLength, datasetLength), nonneg=True)

# Define the objective: Minimize the Sinkhorn distance using the precomputed distance matrix
# Minimize the Sinkhorn distance using the precomputed distance matrix
objective = cp.Minimize(cp.sum(cp.multiply(gamma, distanceMatrix)))

# Constraints
Expand All @@ -515,7 +503,6 @@ def sinkhorn(dataset, lossFunction, distanceMatrix, subsetSize, environment, lam
problem = optimize(objective,
constraints,
environment,
solver=cp.GUROBI,
verbose=verbose)
solver=cp.GUROBI)

return z.value.astype(int), problem.value
25 changes: 25 additions & 0 deletions flexibleSubsetSelection/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# --- Imports ------------------------------------------------------------------

# Standard library
import logging
import sys


# --- Logger -------------------------------------------------------------------

def setup(name: str = "flexibleSubsetSelection", level: int = logging.NOTSET):
"""
Sets up the logger for the package.
"""
log = logging.getLogger(name)
if not log.hasHandlers():
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(level)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s: %(message)s'
)
handler.setFormatter(formatter)
log.addHandler(handler)
log.setLevel(level)
log.propagate = False
return log
Loading

0 comments on commit 25a7734

Please sign in to comment.