Skip to content

Commit

Permalink
Implementing credible intervals (#42)
Browse files Browse the repository at this point in the history
* creating credible intervals output

* updating credible intervals in experiments

* update example notebooks

* update readme and output order

* readme update

* readme update

* readme update

* poetry update and version increase

* docstring updates
  • Loading branch information
Matt52 authored Sep 28, 2024
1 parent 0d83395 commit 15ec6da
Show file tree
Hide file tree
Showing 25 changed files with 1,286 additions and 616 deletions.
251 changes: 143 additions & 108 deletions README.md

Large diffs are not rendered by default.

51 changes: 45 additions & 6 deletions bayesian_testing/experiments/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,23 @@ def variant_names(self):
return [k for k in self.data]

def eval_simulation(
self, sim_count: int = 20000, seed: int = None, min_is_best: bool = False
) -> Tuple[dict, dict]:
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> Tuple[dict, dict, dict]:
"""
Should be implemented in each individual experiment.
"""
raise NotImplementedError

def probabs_of_being_best(
self, sim_count: int = 20000, seed: int = None, min_is_best: bool = False
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> dict:
"""
Calculate probabilities of being best for a current class state.
Expand All @@ -36,17 +44,22 @@ def probabs_of_being_best(
sim_count : Number of simulations to be used for probability estimation.
seed : Random seed.
min_is_best : Option to change "being best" to a minimum. Default is maximum.
interval_alpha : Credible interval probability (value between 0 and 1).
Returns
-------
pbbs : Dictionary with probabilities of being best for all variants in experiment.
"""
pbbs, loss = self.eval_simulation(sim_count, seed, min_is_best)
pbbs, loss, intervals = self.eval_simulation(sim_count, seed, min_is_best, interval_alpha)

return pbbs

def expected_loss(
self, sim_count: int = 20000, seed: int = None, min_is_best: bool = False
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> dict:
"""
Calculate expected loss for a current class state.
Expand All @@ -56,15 +69,41 @@ def expected_loss(
sim_count : Number of simulations to be used for probability estimation.
seed : Random seed.
min_is_best : Option to change "being best" to a minimum. Default is maximum.
interval_alpha : Credible interval probability (value between 0 and 1).
Returns
-------
loss : Dictionary with expected loss for all variants in experiment.
"""
pbbs, loss = self.eval_simulation(sim_count, seed, min_is_best)
pbbs, loss, intervals = self.eval_simulation(sim_count, seed, min_is_best, interval_alpha)

return loss

def credible_intervals(
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> dict:
"""
Calculate quantile-based credible intervals for a current class state.
Parameters
----------
sim_count : Number of simulations to be used for probability estimation.
seed : Random seed.
min_is_best : Option to change "being best" to a minimum. Default is maximum.
interval_alpha : Credible interval probability (value between 0 and 1).
Returns
-------
intervals : Dictionary with quantile-based credible intervals for all variants.
"""
pbbs, loss, intervals = self.eval_simulation(sim_count, seed, min_is_best, interval_alpha)

return intervals

def delete_variant(self, name: str) -> None:
"""
Delete variant and all its data from experiment.
Expand Down
41 changes: 33 additions & 8 deletions bayesian_testing/experiments/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,32 +39,51 @@ def b_priors(self):
return [self.data[k]["b_prior"] for k in self.data]

def eval_simulation(
self, sim_count: int = 20000, seed: int = None, min_is_best: bool = False
) -> Tuple[dict, dict]:
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> Tuple[dict, dict, dict]:
"""
Calculate probabilities of being best and expected loss for a current class state.
Calculate probabilities of being best, expected loss and credible intervals for a current
class state.
Parameters
----------
sim_count : Number of simulations to be used for probability estimation.
seed : Random seed.
min_is_best : Option to change "being best" to a minimum. Default is maximum.
interval_alpha : Credible interval probability (value between 0 and 1).
Returns
-------
res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
res_loss : Dictionary with expected loss for all variants in experiment.
res_intervals : Dictionary with quantile-based credible intervals for all variants.
"""
pbbs, loss = eval_bernoulli_agg(
self.totals, self.positives, self.a_priors, self.b_priors, sim_count, seed, min_is_best
pbbs, loss, intervals = eval_bernoulli_agg(
self.totals,
self.positives,
self.a_priors,
self.b_priors,
sim_count,
seed,
min_is_best,
interval_alpha,
)
res_pbbs = dict(zip(self.variant_names, pbbs))
res_loss = dict(zip(self.variant_names, loss))
res_intervals = dict(zip(self.variant_names, intervals))

return res_pbbs, res_loss
return res_pbbs, res_loss, res_intervals

def evaluate(
self, sim_count: int = 20000, seed: int = None, min_is_best: bool = False
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> List[dict]:
"""
Evaluation of experiment.
Expand All @@ -74,6 +93,7 @@ def evaluate(
sim_count : Number of simulations to be used for probability estimation.
seed : Random seed.
min_is_best : Option to change "being best" to a minimum. Default is maximum.
interval_alpha : Credible interval probability (value between 0 and 1).
Returns
-------
Expand All @@ -85,6 +105,7 @@ def evaluate(
"positives",
"positive_rate",
"posterior_mean",
"credible_interval",
"prob_being_best",
"expected_loss",
]
Expand All @@ -93,15 +114,19 @@ def evaluate(
round((i[2] + i[0]) / (i[2] + i[3] + i[1]), 5)
for i in zip(self.positives, self.totals, self.a_priors, self.b_priors)
]
eval_pbbs, eval_loss = self.eval_simulation(sim_count, seed, min_is_best)
eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
sim_count, seed, min_is_best, interval_alpha
)
pbbs = list(eval_pbbs.values())
loss = list(eval_loss.values())
intervals = list(eval_intervals.values())
data = [
self.variant_names,
self.totals,
self.positives,
positive_rate,
posterior_mean,
intervals,
pbbs,
loss,
]
Expand Down
33 changes: 26 additions & 7 deletions bayesian_testing/experiments/delta_lognormal.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,30 @@ def w_priors(self):
return [self.data[k]["w_prior"] for k in self.data]

def eval_simulation(
self, sim_count: int = 20000, seed: int = None, min_is_best: bool = False
) -> Tuple[dict, dict]:
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> Tuple[dict, dict, dict]:
"""
Calculate probabilities of being best and expected loss for a current class state.
Calculate probabilities of being best, expected loss and credible intervals for a current
class state.
Parameters
----------
sim_count : Number of simulations to be used for probability estimation.
seed : Random seed.
min_is_best : Option to change "being best" to a minimum. Default is maximum.
interval_alpha : Credible interval probability (value between 0 and 1).
Returns
-------
res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
res_loss : Dictionary with expected loss for all variants in experiment.
res_intervals : Dictionary with quantile-based credible intervals for all variants.
"""
pbbs, loss = eval_delta_lognormal_agg(
pbbs, loss, intervals = eval_delta_lognormal_agg(
self.totals,
self.positives,
self.sum_logs,
Expand All @@ -103,14 +110,20 @@ def eval_simulation(
w_priors=self.w_priors,
seed=seed,
min_is_best=min_is_best,
interval_alpha=interval_alpha,
)
res_pbbs = dict(zip(self.variant_names, pbbs))
res_loss = dict(zip(self.variant_names, loss))
res_intervals = dict(zip(self.variant_names, intervals))

return res_pbbs, res_loss
return res_pbbs, res_loss, res_intervals

def evaluate(
self, sim_count: int = 20000, seed: int = None, min_is_best: bool = False
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> List[dict]:
"""
Evaluation of experiment.
Expand All @@ -120,6 +133,7 @@ def evaluate(
sim_count : Number of simulations to be used for probability estimation.
seed : Random seed.
min_is_best : Option to change "being best" to a minimum. Default is maximum.
interval_alpha : Credible interval probability (value between 0 and 1).
Returns
-------
Expand All @@ -133,6 +147,7 @@ def evaluate(
"avg_values",
"avg_positive_values",
"posterior_mean",
"credible_interval",
"prob_being_best",
"expected_loss",
]
Expand Down Expand Up @@ -174,9 +189,12 @@ def evaluate(
b_posterior_ig,
)
]
eval_pbbs, eval_loss = self.eval_simulation(sim_count, seed, min_is_best)
eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
sim_count, seed, min_is_best, interval_alpha
)
pbbs = list(eval_pbbs.values())
loss = list(eval_loss.values())
intervals = list(eval_intervals.values())
data = [
self.variant_names,
self.totals,
Expand All @@ -185,6 +203,7 @@ def evaluate(
avg_values,
avg_pos_values,
posterior_mean,
intervals,
pbbs,
loss,
]
Expand Down
33 changes: 26 additions & 7 deletions bayesian_testing/experiments/delta_normal.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,23 +67,30 @@ def w_priors(self):
return [self.data[k]["w_prior"] for k in self.data]

def eval_simulation(
self, sim_count: int = 20000, seed: int = None, min_is_best: bool = False
) -> Tuple[dict, dict]:
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> Tuple[dict, dict, dict]:
"""
Calculate probabilities of being best and expected loss for a current class state.
Calculate probabilities of being best, expected loss and credible intervals for a current
class state.
Parameters
----------
sim_count : Number of simulations to be used for probability estimation.
seed : Random seed.
min_is_best : Option to change "being best" to a minimum. Default is maximum.
interval_alpha : Credible interval probability (value between 0 and 1).
Returns
-------
res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
res_loss : Dictionary with expected loss for all variants in experiment.
res_intervals : Dictionary with quantile-based credible intervals for all variants.
"""
pbbs, loss = eval_delta_normal_agg(
pbbs, loss, intervals = eval_delta_normal_agg(
self.totals,
self.non_zeros,
self.sum_values,
Expand All @@ -97,14 +104,20 @@ def eval_simulation(
w_priors=self.w_priors,
seed=seed,
min_is_best=min_is_best,
interval_alpha=interval_alpha,
)
res_pbbs = dict(zip(self.variant_names, pbbs))
res_loss = dict(zip(self.variant_names, loss))
res_intervals = dict(zip(self.variant_names, intervals))

return res_pbbs, res_loss
return res_pbbs, res_loss, res_intervals

def evaluate(
self, sim_count: int = 20000, seed: int = None, min_is_best: bool = False
self,
sim_count: int = 20000,
seed: int = None,
min_is_best: bool = False,
interval_alpha: float = 0.95,
) -> List[dict]:
"""
Evaluation of experiment.
Expand All @@ -114,6 +127,7 @@ def evaluate(
sim_count : Number of simulations to be used for probability estimation.
seed : Random seed.
min_is_best : Option to change "being best" to a minimum. Default is maximum.
interval_alpha : Credible interval probability (value between 0 and 1).
Returns
-------
Expand All @@ -127,6 +141,7 @@ def evaluate(
"avg_values",
"avg_non_zero_values",
"posterior_mean",
"credible_interval",
"prob_being_best",
"expected_loss",
]
Expand All @@ -144,9 +159,12 @@ def evaluate(
self.b_priors_beta,
)
]
eval_pbbs, eval_loss = self.eval_simulation(sim_count, seed, min_is_best)
eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
sim_count, seed, min_is_best, interval_alpha
)
pbbs = list(eval_pbbs.values())
loss = list(eval_loss.values())
intervals = list(eval_intervals.values())
data = [
self.variant_names,
self.totals,
Expand All @@ -155,6 +173,7 @@ def evaluate(
avg_values,
avg_pos_values,
posterior_mean,
intervals,
pbbs,
loss,
]
Expand Down
Loading

0 comments on commit 15ec6da

Please sign in to comment.