Skip to content

Commit

Permalink
Merge pull request #377 from ecmerkle/master
Browse files Browse the repository at this point in the history
add option allow.empty.cell
  • Loading branch information
yrosseel authored Aug 5, 2024
2 parents 6606f50 + fa2e74a commit 56b8a11
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 15 deletions.
19 changes: 14 additions & 5 deletions R/lav_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ lavData <- function(data = NULL, # data.frame
level.label <- character(0L)
}

# allow empty categories of ordinal variable
allow.empty.cell <- lavoptions$allow.empty.cell

# block.labels
block.label <- character(0L)
if (length(group.label) > 0L && length(level.label) == 0L) {
Expand Down Expand Up @@ -147,7 +150,8 @@ lavData <- function(data = NULL, # data.frame
ov.names.l = ov.names.l,
std.ov = std.ov,
missing = missing,
allow.single.case = allow.single.case
allow.single.case = allow.single.case,
allow.empty.cell = allow.empty.cell
)
sample.cov <- NULL # not needed, but just in case
}
Expand Down Expand Up @@ -504,7 +508,8 @@ lav_data_full <- function(data = NULL, # data.frame
ov.names.l = list(), # var per level
std.ov = FALSE, # standardize ov's?
missing = "listwise", # remove missings?
allow.single.case = FALSE # allow single case?
allow.single.case = FALSE, # allow single case?
allow.empty.cell = FALSE
) {
# number of groups and group labels
if (!is.null(group) && length(group) > 0L) {
Expand Down Expand Up @@ -552,6 +557,9 @@ lav_data_full <- function(data = NULL, # data.frame
group <- character(0L)
}

# ensure allow.empty.cell is logical
if (is.null(allow.empty.cell)) allow.empty.cell <- FALSE

# sampling weights
if (!is.null(sampling.weights)) {
if (is.character(sampling.weights)) {
Expand Down Expand Up @@ -708,7 +716,7 @@ lav_data_full <- function(data = NULL, # data.frame
ov <- lav_dataframe_vartable(
frame = data, ov.names = ov.names,
ov.names.x = ov.names.x, ordered = ordered,
as.data.frame. = FALSE
as.data.frame. = FALSE, allow.empty.cell = allow.empty.cell
)

# do some checking
Expand Down Expand Up @@ -829,6 +837,7 @@ lav_data_full <- function(data = NULL, # data.frame
weights <- vector("list", length = ngroups)

# collect information per upper-level group
datam <- data.matrix(data)
for (g in 1:ngroups) {
# extract variables in correct order
if (nlevels > 1L) {
Expand Down Expand Up @@ -895,7 +904,7 @@ lav_data_full <- function(data = NULL, # data.frame
}

# extract data
X[[g]] <- data.matrix(data[case.idx[[g]], ov.idx, drop = FALSE])
X[[g]] <- datam[case.idx[[g]], ov.idx, drop = FALSE]
dimnames(X[[g]]) <- NULL ### copy?

# sampling weights (but no normalization yet)
Expand Down Expand Up @@ -924,7 +933,7 @@ lav_data_full <- function(data = NULL, # data.frame
if (length(user.ordered.idx) > 0L) {
for (i in user.ordered.idx) {
X[[g]][, i][is.na(X[[g]][, i])] <- NA # change NaN to NA
X[[g]][, i] <- as.numeric(as.factor(X[[g]][, i]))
if (!allow.empty.cell) X[[g]][, i] <- as.numeric(as.factor(X[[g]][, i]))
# possible alternative to the previous two lines:
# X[[g]][,i] <- as.numeric(factor(X[[g]][,i], exclude = c(NA, NaN)))
}
Expand Down
18 changes: 14 additions & 4 deletions R/lav_dataframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ lav_dataframe_vartable <- function(frame = NULL, ov.names = NULL,
ov.names.x = NULL,
ordered = NULL,
factor = NULL,
as.data.frame. = FALSE) {
as.data.frame. = FALSE,
allow.empty.cell = FALSE) {
if (missing(ov.names)) {
var.names <- names(frame)
} else {
Expand Down Expand Up @@ -58,9 +59,18 @@ lav_dataframe_vartable <- function(frame = NULL, ov.names = NULL,
# handle ordered/factor
if (!is.null(ordered) && var.names[i] %in% ordered) {
type.x <- "ordered"
lev <- sort(unique(x)) # we assume integers!
nlev[i] <- length(lev)
lnam[i] <- paste(lev, collapse = "|")
if (allow.empty.cell) {
nlev[i] <- max(as.numeric(x))
if (inherits(x, 'factor')) {
lnam[i] <- paste(levels(x), collapse = "|")
} else {
lnam[i] <- paste(1:nlev[i], collapse = "|")
}
} else {
lev <- sort(unique(x)) # we assume integers!
nlev[i] <- length(lev)
lnam[i] <- paste(lev, collapse = "|")
}
user[i] <- 1L
} else if (!is.null(factor) && var.names[i] %in% factor) {
type.x <- "factor"
Expand Down
3 changes: 2 additions & 1 deletion R/lav_muthen1984.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ muthen1984 <- function(Data = NULL,
zero.keep.margins = TRUE,
zero.cell.warn = FALSE,
zero.cell.tables = TRUE,
allow.empty.cell = TRUE,
group = 1L) { # group only for error messages

# just in case Data is a vector
Expand Down Expand Up @@ -58,7 +59,7 @@ muthen1984 <- function(Data = NULL,
step1 <- lav_samplestats_step1(
Y = Data, wt = wt, ov.names = ov.names,
ov.types = ov.types, ov.levels = ov.levels, ov.names.x = ov.names.x,
eXo = eXo, scores.flag = WLS.W, group = group
eXo = eXo, scores.flag = WLS.W, allow.empty.cell = allow.empty.cell, group = group
)

FIT <- step1$FIT
Expand Down
1 change: 1 addition & 0 deletions R/lav_options_default.R
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ lav_options_default <- function() {
nm = "[0, 1]", oklen = c(1L, -2L))
elm("zero.keep.margins", "default", chr = "default", bl = TRUE)
elm("zero.cell.warn", FALSE, bl = TRUE) # since 0.6-1
elm("allow.empty.cell", FALSE, bl = TRUE) # since 0.6-19
elm("cat.wls.w", TRUE, bl = TRUE) # since 0.6-18

# starting values (char values checked in lav_options_set())
Expand Down
7 changes: 5 additions & 2 deletions R/lav_samplestats.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ lav_samplestats_from_data <- function(lavdata = NULL,
zero.add <- lavoptions$zero.add
zero.keep.margins <- lavoptions$zero.keep.margins
zero.cell.warn <- lavoptions$zero.cell.warn
allow.empty.cell <- lavoptions$allow.empty.cell
dls.a <- lavoptions$estimator.args$dls.a
dls.GammaNT <- lavoptions$estimator.args$dls.GammaNT

Expand Down Expand Up @@ -299,7 +300,8 @@ lav_samplestats_from_data <- function(lavdata = NULL,
zero.add = zero.add,
zero.keep.margins = zero.keep.margins,
zero.cell.warn = FALSE,
zero.cell.tables = TRUE
zero.cell.tables = TRUE,
allow.empty.cell = allow.empty.cell
)
} else {
CAT <- muthen1984(
Expand All @@ -315,7 +317,8 @@ lav_samplestats_from_data <- function(lavdata = NULL,
zero.add = zero.add,
zero.keep.margins = zero.keep.margins,
zero.cell.warn = FALSE,
zero.cell.tables = TRUE
zero.cell.tables = TRUE,
allow.empty.cell = allow.empty.cell
)
}
lav_verbose(current.verbose)
Expand Down
35 changes: 32 additions & 3 deletions R/lav_samplestats_step1.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ lav_samplestats_step1 <- function(Y,
ov.names.x = character(0L),
eXo = NULL,
scores.flag = TRUE, # scores?
allow.empty.cell = TRUE, # allow empty categories?
group = 1L) { # for error message


Expand Down Expand Up @@ -73,13 +74,13 @@ lav_samplestats_step1 <- function(Y,
# check if we have enough categories in this group
# FIXME: should we more tolerant here???
y.freq <- tabulate(Y[, i], nbins = ov.levels[i])
if (length(y.freq) != ov.levels[i]) {
if (length(y.freq) != ov.levels[i] & !allow.empty.cell) {
lav_msg_stop(gettextf(
"variable %1$s has fewer categories (%2$s) than
expected (%3$s) in group %4$s", ov.names[i],
length(y.freq), ov.levels[i], group))
}
if (any(y.freq == 0L)) {
if (any(y.freq == 0L) & !allow.empty.cell) {
lav_msg_stop(gettextf(
"some categories of variable `%1$s' are empty in group %2$s;
frequencies are [%3$s]", ov.names[i], group,
Expand All @@ -93,7 +94,35 @@ lav_samplestats_step1 <- function(Y,
}
FIT[[i]] <- fit
TH[[i]] <- fit$theta[fit$th.idx]
TH.NOX[[i]] <- lav_uvord_th(y = Y[, i], wt = wt)
fit.nox <- lav_uvord_th(y = Y[, i], wt = wt)
if (allow.empty.cell) {
if (any(y.freq == 0L)) {
nzidx <- y.freq != 0L
exidx <- (nzidx[1:(ov.levels[i] - 1)] * nzidx[2:ov.levels[i]]) == 1
misidx <- (nzidx[1:(ov.levels[i] - 1)] * nzidx[2:ov.levels[i]]) == 0
TH[[i]] <- TH.NOX[[i]] <- rep(0, ov.levels[i] - 1)
TH[[i]][exidx] <- fit$theta[fit$th.idx]
TH.NOX[[i]][exidx] <- fit.nox[exidx]

for (k in which(misidx)) {
if (k == 1) {
TH[[i]][k] <- -4
TH.NOX[[i]][k] <- -4
} else if (k == (ov.levels[i] - 1)) {
TH[[i]][k] <- 4
TH.NOX[[i]][k] <- 4
} else {
TH[[i]][k] <- TH[[i]][(k - 1)] + .01
TH.NOX[[i]][k] <- TH.NOX[[i]][(k - 1)] + .01
}
}
} else if (length(y.freq) != ov.levels[i]) {
nz <- ov.levels[i] - length(y.freq)
TH[[i]] <- c(TH[[i]], TH[[i]][length(y.freq)] + (1:nz) * .01)
}
} else {
TH.NOX[[i]] <- fit.nox
}
if (scores.flag) {
scores <- lav_uvord_scores(y = Y[, i], X = eXo, wt = wt)
SC.TH[, th.idx] <- scores[, fit$th.idx, drop = FALSE]
Expand Down
3 changes: 3 additions & 0 deletions man/lavOptions.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,9 @@ Categorical estimation options:
\item{\code{zero.cell.warn}:}{Logical. Only used if some observed
endogenous variables are categorical. If \code{TRUE}, give a warning if
one or more cells of a bivariate frequency table are empty.}
\item{\code{allow.empty.cell}:}{Logical. If \code{TRUE}, ignore
situations where an ordinal variable has fewer categories than
expected, or where a category is empty in a specific group.}
}

Starting values options:
Expand Down

0 comments on commit 56b8a11

Please sign in to comment.