Skip to content

Commit

Permalink
add counting hereditary constraints for BAS
Browse files Browse the repository at this point in the history
related to fixing use of SETLENGTH in lm_sampleworep.c and Issue #82
Check Code coverage to see if changes
now bypass SETLENGTH (covered previously in tests)
  • Loading branch information
merliseclyde committed Nov 19, 2024
1 parent b946f81 commit f230f24
Show file tree
Hide file tree
Showing 9 changed files with 755 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ src/copyright.txt
^CRAN-SUBMISSION$
^SECURITY\.md$
^revdep$

R/counting_models_hereditary_constraint.R
33 changes: 25 additions & 8 deletions R/bas_lm.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ normalize.modelprior <- function(modelprior, p) {
normalize.n.models <- function(n.models, p, initprobs, method, bigmem) {

if (is.null(n.models)) {
p = max(30, p)
p = max(25, p)
n.models <- 2^(p - 1)
}
if (n.models > 2^(p - 1)) n.models <- 2^(p - 1)
Expand Down Expand Up @@ -277,8 +277,16 @@ normalize.n.models <- function(n.models, p, initprobs, method, bigmem) {
#' @param force.heredity Logical variable to force all levels of a factor to be
#' included together and to include higher order interactions only if lower
#' order terms are included. Currently supported with `method='MCMC'`
#' and experimentally with `method='BAS'` on non-Solaris platforms.
#' Default is FALSE.
#' and experimentally with `method='BAS'` on non-Solaris platforms. This is not
#' compatible currently for enforcing hierachical constraints with orthogonal
#' polynomials, poly(x, degree = 3). Without hereditary constraints the number of
#' possible models with all possible interactions is 2^2^k where k is the number
#' of factors with more than 2 levels. With hereditary constraints the number of
#' models is much less, but computing this for k can be quite expensive
#' for large k. For the model y ~ x1*x2*x3*x4*x5*x6) there are 7828353 models,
#' which is more than 2^22. With n.models given, this will limit the number of
#' models to the min(n.models, # models under the heredity constraints)
#' Default is FALSE currently.
#' @param pivot Logical variable to allow pivoting of columns when obtaining the
#' OLS estimates of a model so that models that are not full rank can be fit.
#' Defaults to TRUE.
Expand All @@ -287,9 +295,8 @@ normalize.n.models <- function(n.models, p, initprobs, method, bigmem) {
#' @param tol 1e-7 as
#' @param bigmem Logical variable to indicate that there is access to
#' large amounts of memory (physical or virtual) for enumeration
#' with large model spaces, e.g. > 2^25. default; used in determining rank of X^TX in cholesky
#' decomposition
#' with pivoting.
#' with large model spaces, e.g. > 2^25. default; used in determining rank of
#' X^TX in cholesky decomposition with pivoting.
#'
#' @return \code{bas} returns an object of class \code{bas}
#'
Expand Down Expand Up @@ -608,14 +615,18 @@ bas.lm <- function(formula,
if (modelprior$family == "Uniform" ||
modelprior$family == "Bernoulli") {
warning(
"Uniform prior (Bernoulli) distribution on the Model Space are not recommended for p > n; please consider using tr.beta.binomial or power.prior instead"
"Uniform prior (Bernoulli) distribution on the Model Space are not
recommended for p > n; please consider using tr.beta.binomial or power.prior instead"
)
}
if (!is.null(n.models)) n.models = min(2^n, n.models)
}
if (!is.numeric(initprobs)) {
if (n <= p && initprobs == "eplogp") {
stop(
"Full model is not full rank so cannot use the eplogp bound to create starting sampling probabilities, perhpas use 'marg-eplogp' for fiting marginal models\n"
"Full model is not full rank so cannot use the eplogp bound to create
starting sampling probabilities, use 'marg-eplogp'
for fitting marginal models or specify directly\n"
)
}
initprobs <- switch(
Expand Down Expand Up @@ -724,6 +735,12 @@ bas.lm <- function(formula,
force.heredity <- FALSE
}
}

if (force.heredity) {
# limit the number of models to be enumerated based on Dedekind numbers
n.models = count.heredity.models(mf, n.models)
}


prob <- normalize.initprobs.lm(initprobs, p)

Expand Down
315 changes: 315 additions & 0 deletions R/counting_models_hereditary_constraint.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
# for checking correctness, generates all models and then omits those that
# fail the hereditary check
no_models_f <- function(f) {

t <- terms(f)
all_factors <- attr(t, "factors")[-1, , drop = FALSE]
o <- attr(t, "order")

# adjacency matrix such that adj[i, j] == 1 implies that i is a parent of j.
adj_mat <- crossprod(all_factors != 0)
for (i in seq_len(nrow(adj_mat))) {
adj_mat[i, ] <- adj_mat[i, ] == o
}
tb_o <- tabulate(o) # how often there is an nth-order interaction

# list of parent indices for each term
required_lower_order <- lapply(seq_len(ncol(all_factors)), function(i) {
return(which(adj_mat[i, seq_len(i - 1)] == 1))
})

# generate all possible models
all_models <- as.matrix(expand.grid(rep(list(0:1), ncol(all_factors))))
colnames(all_models) <- colnames(all_factors)
colIndices <- which(lengths(required_lower_order) > 0)

# loop over all models and remove those that fail the hereditary check
keep <- rep(TRUE, nrow(all_models))
for (i in colIndices) {

n <- length(required_lower_order[[i]])
keep <- keep & (
(rowSums(all_models[ , required_lower_order[[i]], drop = FALSE]) == n) | (all_models[ , i] == 0)
)

}

all_models <- all_models[keep, , drop = FALSE]
all_models
}

brute_force <- function(f) {
nrow(no_models_f(f))
}

# analytic version of brute_force
analytic <- function(f) {

t <- terms(f)
all_factors <- attr(t, "factors")[-1, , drop = FALSE]
o <- attr(t, "order")

# base case 1: all main effects
if (all(o == 1))
return(2^length(o))
else {

adj_mat <- crossprod(all_factors != 0)
for (i in seq_len(nrow(adj_mat))) {
adj_mat[i, ] <- adj_mat[i, ] == o
}
tb_o <- tabulate(o)

# m contains the number of models for each order
m <- 0 * c(tb_o)
m[1] <- 2^tb_o[1] # no. models with only main effects

# loop upward over higher order interactions
for (i in 2:length(tb_o)) {

no_lower_order <- sum(tb_o[seq_len(i - 1)])

for (j in seq_len(tb_o[i])) {

# enumerate all combinations of interactions
combs <- utils::combn(tb_o[i], j)
for (k in seq_len(ncol(combs))) {

# the subset of the adjacency matrix that corresponds to the current combination
subset <- adj_mat[no_lower_order + combs[, k], 1:no_lower_order, drop = FALSE]

# all edges imply a parent which must be included (i.e., a constraint)
constraints <- .colSums(subset, m = j, n = no_lower_order) > 0

# if there are missing constraints, we need to check if they involve
# interactions of various levels. If they do we need to recurse
missing <- which(constraints == 0)
if (any(o[missing] > 1) && length(unique(o[missing])) > 1) {

# recursive case : construct the formula corresponding to the constraint and
# call analytic again
# this can probably be sped up by only passing (subsets) of the adjacency matrix around
cnms <- colnames(adj_mat)[1:no_lower_order]
new_f_str <- paste("y ~", paste(cnms[missing], collapse = " + "))
# print(paste(deparse(f), "->", new_f_str))
new_f <- as.formula(paste("y ~", paste(cnms[missing], collapse = " + ")))
no_models <- Recall(new_f)

} else {

# base case 2: all free variables are effects of the same order (e.g,. all main effects, all 2-way interactions, etc.)
no_constraints <- sum(constraints)
no_models <- 2^(no_lower_order - no_constraints)

}

m[i] <- m[i] + no_models

# print(subset)
# cat(sprintf(
# "order = %d, no_combs = %d, comb = %d, no_constraints = %d, no_models = %d\n\n", i, j, k, no_constraints, 2^(no_lower_order - no_constraints)
# ))
}
}
}
return(sum(m))
}
}

# see BayesFactor:::enumerateGeneralModels for a way to enumerate all possible models
# accounting for the restrictions.
# opts <- BayesFactor:::enumerateGeneralModels(y ~ x1 * x2 * x3 * x4, "withmain")
# for (f in opts)
# cat(deparse(f, 300), ",\n", sep = "")
fs <- list(
y ~ x1,
y ~ x2,
y ~ x1 + x2,
y ~ x1 + x2 + x1:x2,
y ~ x3,
y ~ x1 + x3,
y ~ x2 + x3,
y ~ x1 + x2 + x3,
y ~ x1 + x2 + x1:x2 + x3,
y ~ x1 + x3 + x1:x3,
y ~ x1 + x2 + x3 + x1:x3,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3,
y ~ x2 + x3 + x2:x3,
y ~ x1 + x2 + x3 + x2:x3,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3,
y ~ x4,
y ~ x1 + x4,
y ~ x2 + x4,
y ~ x1 + x2 + x4,
y ~ x1 + x2 + x1:x2 + x4,
y ~ x3 + x4,
y ~ x1 + x3 + x4,
y ~ x2 + x3 + x4,
y ~ x1 + x2 + x3 + x4,
y ~ x1 + x2 + x1:x2 + x3 + x4,
y ~ x1 + x3 + x1:x3 + x4,
y ~ x1 + x2 + x3 + x1:x3 + x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4,
y ~ x2 + x3 + x2:x3 + x4,
y ~ x1 + x2 + x3 + x2:x3 + x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4,
y ~ x1 + x4 + x1:x4,
y ~ x1 + x2 + x4 + x1:x4,
y ~ x1 + x2 + x1:x2 + x4 + x1:x4,
y ~ x1 + x3 + x4 + x1:x4,
y ~ x1 + x2 + x3 + x4 + x1:x4,
y ~ x1 + x2 + x1:x2 + x3 + x4 + x1:x4,
y ~ x1 + x3 + x1:x3 + x4 + x1:x4,
y ~ x1 + x2 + x3 + x1:x3 + x4 + x1:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x1:x4,
y ~ x1 + x2 + x3 + x2:x3 + x4 + x1:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x1:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4,
y ~ x2 + x4 + x2:x4,
y ~ x1 + x2 + x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x4 + x2:x4,
y ~ x2 + x3 + x4 + x2:x4,
y ~ x1 + x2 + x3 + x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x4 + x2:x4,
y ~ x1 + x2 + x3 + x1:x3 + x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x2:x4,
y ~ x2 + x3 + x2:x3 + x4 + x2:x4,
y ~ x1 + x2 + x3 + x2:x3 + x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x2:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x2:x4,
y ~ x1 + x2 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x3 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x3 + x1:x3 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x3 + x2:x3 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4,
y ~ x1 + x2 + x1:x2 + x4 + x1:x4 + x2:x4 + x1:x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4,
y ~ x3 + x4 + x3:x4,
y ~ x1 + x3 + x4 + x3:x4,
y ~ x2 + x3 + x4 + x3:x4,
y ~ x1 + x2 + x3 + x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x4 + x3:x4,
y ~ x1 + x3 + x1:x3 + x4 + x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x3:x4,
y ~ x2 + x3 + x2:x3 + x4 + x3:x4,
y ~ x1 + x2 + x3 + x2:x3 + x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x3:x4,
y ~ x1 + x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x2 + x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x3 + x1:x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x2 + x3 + x2:x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x3:x4,
y ~ x2 + x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x2:x4 + x3:x4,
y ~ x2 + x3 + x2:x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x3 + x2:x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x3 + x4 + x1:x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x4 + x1:x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x4 + x1:x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x1:x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4,
y ~ x1 + x3 + x1:x3 + x4 + x1:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x4 + x1:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x1:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4 + x1:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4 + x1:x3:x4,
y ~ x2 + x3 + x2:x3 + x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x3 + x2:x3 + x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x1:x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x1:x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x3:x4 + x1:x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4 + x1:x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4 + x1:x3:x4 + x2:x3:x4,
y ~ x1 + x2 + x1:x2 + x3 + x1:x3 + x2:x3 + x1:x2:x3 + x4 + x1:x4 + x2:x4 + x1:x2:x4 + x3:x4 + x1:x3:x4 + x2:x3:x4 + x1:x2:x3:x4
)

# validate that analytic result matches brute force
result <- rbind(sapply(fs, brute_force), sapply(fs, analytic))
all(result[1, ] == result[2, ])

# still works for e.g., 30 predictors with 1 4-way interaction
f <- as.formula((paste("y ~", paste0("x", 1:4, collapse = " * "), " + ", paste0("x", 5:30, collapse = " + "))))
analytic(f) # == analytic(y~x1*x2*x3*x4) * 2^26

# Dedekind numbers, see https://oeis.org/A014466
no_x <- 6 # 6 already takes a minute...
full_models <- lapply(seq_len(no_x), function(x) {
as.formula(paste("y ~", paste0("x", seq_len(x), collapse = " * ")))
})
sapply(full_models, analytic) # takes a minute or so

# this also works for formulas that do not respect hereditary
# main effects + 1 4-way interaction
f <- y ~ x1 + x2 + x3 + x1:x2:x3:x4
# main effects are present for the 4-way interaction, but
# no 2-way or 3-way interactions are added
no_models_f(f)
analytic(f) # matches

Loading

0 comments on commit f230f24

Please sign in to comment.