diff --git a/R/gt_univariate_crosstabs.R b/R/gt_univariate_crosstabs.R new file mode 100644 index 0000000..abb022a --- /dev/null +++ b/R/gt_univariate_crosstabs.R @@ -0,0 +1,405 @@ + +#' A {gtsummary} wrapper function that takes a gtsummary univariate regression +#' table and adds appropriate cross tabs by exposure and outcome +#' +#'@param x Object with class `tbl_uvregression` from the gtsummary +#'tbl_uvregression function or `tbl_cmh` from the epitabulate tbl_cmh function. +#' +#'@param wide TRUE/FALSE to specify whether would like to have the output in +#'wide format. Results in four columns rather than two, but in a single row. +#'This is only works for dichotomous variables (yes/no, TRUE/FALSE, +#'male/female), others will be dropped with a warning message. (Default is FALSE) +#' +#'@importFrom gtsummary modify_table_styling modify_table_body modify_header modify_fmt_fun style_number modify_footnote +#'@importFrom dplyr mutate relocate +#' +#'@references Inspired by Daniel Sjoberg, +#' see [gtsummary github repo](https://github.com/ddsjoberg/gtsummary) + +add_crosstabs <- function(x, wide = FALSE) { + + # checking that input is class tbl_summary + if (!(inherits(x, c("tbl_uvregression", "tbl_cmh")))) { + stop("`x` must be class 'tbl_uvregression' or 'tbl_cmh'") + } + + # grab the table + the_table <- x + + # grab the type of regression + regression_type <- x$table_body$coefficients_type[1L] + + if (!regression_type %in% c("logistic", "poisson")) { + stop("The regression must be type 'logistic' or 'poisson' (negative binomials appear as 'poisson')") + } + + # hide the original N variable + # (shows up if the original tbl_uvregression didn't specify hide_n = TRUE) + ## TODO: this is an ugly quick fix to allow this to work for tbl_cmh() + if(inherits(x, c("tbl_uvregression"))){ + if (!the_table$inputs$hide_n) { + the_table <- + gtsummary::modify_table_styling( + the_table, + columns = "stat_n", + hide = TRUE + ) + } + } + + + # ODDS Ratios ---------------------------------------------------------------- + if (regression_type == "logistic") { + + # edit the table body (contents of table) + the_table <- gtsummary::modify_table_body( + the_table, + # define a function to make two steps and avoid piping + fun = function(.x){ + # remove cases from total obs to get control counts + .x <- dplyr::mutate(.x, n_nonevent = n_obs - n_event) + # move case and control counts before estimate + .x <- dplyr::relocate(.x, c(n_event, n_nonevent), .before = estimate) + } + ) + + # rename columns appropriately + the_table <- gtsummary::modify_header( + the_table, + n_nonevent = "**Control (n)**", + n_event = "**Case (n)**") + + # set the columns to numeric + the_table <- gtsummary::modify_fmt_fun( + the_table, + c(n_event, n_nonevent) ~ gtsummary::style_number) + + + } + + # RISK Ratios ---------------------------------------------------------------- + # note both poisson (glm) and negbin (MASS) are "poisson" in gtsummary + if (regression_type == "poisson" & + is.null(the_table$inputs$method.args$offset)){ + + # edit the table body + the_table <- gtsummary::modify_table_body( + the_table, + # define a function to make two steps and avoid piping + fun = function(.x){ + # move case and control counts before estimate + .x <- dplyr::relocate(.x, c(n_event, n_obs), .before = estimate) + # change the estimate label from IRR to RR (doesn't change col header, could remove) + .x <- dplyr::mutate(.x, coefficients_label = "RR") + } + ) + + # rename columns appropriately + the_table <- gtsummary::modify_header( + the_table, + n_obs = "**Total exposed (N)**", + n_event = "**Cases exposed (n)**", + estimate = "**RR**" + ) + + # set the columns to numeric + the_table <- gtsummary::modify_fmt_fun( + the_table, + c(n_event, n_obs) ~ gtsummary::style_number) + + + # update the footnote to say risk ratio + the_table <- gtsummary::modify_footnote( + the_table, + estimate = "RR = Risk Ratio", + abbreviation = TRUE + ) + } + + # INCIDENCE RATE Ratios ------------------------------------------------------ + # note both poisson (glm) and negbin (MASS) are "poisson" in gtsummary + if (regression_type == "poisson" & + !is.null(the_table$inputs$method.args$offset)){ + + # rename columns appropriately + the_table <- gtsummary::modify_header( + the_table, + exposure = "**Total exposed (person-time)**", + n_event = "**Cases exposed (n)**") + + # set the columns to numeric + the_table <- gtsummary::modify_fmt_fun( + the_table, + c(n_event, exposure) ~ gtsummary::style_number) + + } + + # WIDE FORMAT DICHOTOMOUS ---------------------------------------------------- + if (wide) { + + # drop non dichotomous variables and chuck a warning + # find if there are any non dichotomous + check_categoricals <- filter(the_table$table_body, var_type != "dichotomous") + + if (nrow(check_categoricals) >= 1 ) { + # get the variable names which are not dichotomous + check_categoricals <- distinct(check_categoricals, variable) + check_categoricals <- pull(check_categoricals) + + # chuck a warning listing the names + warning(paste0("The following non-dichotomous variables were dropped:", + check_categoricals), call. = FALSE) + + # edit the table body (contents of table) + the_table <- gtsummary::modify_table_body( + the_table, + # define a function to avoid piping + fun = function(.x){ + # only keep the row with the estimates + .x <- filter(.x, var_type == "dichotomous") + } + ) + + } + + # chuck an error if they have used show_single_row + if (!is.null(the_table$inputs$show_single_row)) { + stop("Wide format is not possible when specifying 'show_single_row' in your tbl_uvregression. Please change this") + } + + ## TODO: do some fiddling to make tbl_cmh work in wide too + if (inherits(x, c("tbl_cmh"))) { + + the_table <- gtsummary::modify_table_body( + the_table, + # define a function to avoid piping + fun = function(.x){ + + # pull values down to have all in the right row + .x <- tidyr::fill(.x, + stratifier, + .direction = "down") + } + ) + } + + + # define moving variables and labels based on regression type + if (regression_type == "logistic") { + # define vars of interest + the_vars <- c("n_event", "n_nonevent") + # define column headers for new vars + new_names <- c("**Cases exposed (n)**", + "**Controls exposed (n)**", + "**Cases unexposed (n)**", + "**Controls unexposed (n)**") + } + if (regression_type == "poisson" & + is.null(the_table$inputs$method.args$offset)) { + # define vars of interest + the_vars <- c("n_obs", "n_event") + # define column headers for new vars + new_names <- c("**Total exposed (N)**", + "**Cases exposed (n)**", + "**Total unexposed (N)**", + "**Cases unexposed (n)**") + } + if (regression_type == "poisson" & + !is.null(the_table$inputs$method.args$offset)) { + # define vars of interest + the_vars <- c("exposure", "n_event") + # define column headers for new vars + new_names <- c("**Total exposed (person-time)**", + "**Cases exposed (n)**", + "**Total unexposed (person-time)**", + "**Cases unexposed (n)**") + } + + # store the reference_row for later use in filtering + # (gets dropped by pivot_wider) + identifier <- dplyr::select(the_table$table_body, reference_row) + + # combine the vars of interest with TRUE/FALSE + new_vars <- expand.grid(the_vars, c("_FALSE", "_TRUE")) + new_vars <- paste0(new_vars$Var1, new_vars$Var2) + + # put vars and labels into a list for renaming + relabel_vars <- as.list(new_names) + names(relabel_vars) <- new_vars + + + # edit the table body (contents of table) + the_table <- gtsummary::modify_table_body( + the_table, + # define a function to avoid piping + fun = function(.x){ + + # spread to wide format + .x <- tidyr::pivot_wider(.x, + names_from = reference_row, + values_from = all_of(the_vars)) + + # pull values down to have all in the right row + .x <- tidyr::fill(.x, + all_of(new_vars), + .direction = "down") + + # add the reference_row back in for filtering + .x <- cbind(.x, identifier) + + # edit row names + .x <- mutate(.x, + # make them show up as variables + row_type = "label", + # change to show variable label + label = var_label + ) + + # only keep the row with the estimates + .x <- filter(.x, !reference_row) + + # move case and control counts before estimate + .x <- dplyr::relocate(.x, all_of(new_vars), .before = estimate) + + } + ) + + # change column names + the_table <- gtsummary::modify_header(the_table, update = relabel_vars) + + } + + # return outputs + the_table +} + + + + +# #### NOTES +# +# +# +# ## ODDS Ratios +# blabla <- linelist_cleaned %>% +# select(DIED, gender_bin, age_group) %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# method.args = list(family = binomial), +# exponentiate = TRUE, +# hide_n = TRUE) +# +# ## adding number of non-events to table +# ## from https://www.danieldsjoberg.com/gtsummary/reference/modify_table_body.html +# blabla %>% gtsummary::modify_table_body( +# ~ .x %>% +# dplyr::mutate(n_nonevent = n_obs - n_event) %>% +# dplyr::relocate(c(n_event, n_nonevent), .before = estimate)) %>% +# ## assigning header labels +# gtsummary::modify_header(n_nonevent = "**Control n**", n_event = "**Case n**") %>% +# gtsummary::modify_fmt_fun(c(n_event, n_nonevent) ~ gtsummary::style_number) +# +# +# ## RISK ratios (poisson) +# blabla <- linelist_cleaned %>% +# select(DIED, gender_bin, age_group) %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# method.args = list(family = poisson), +# exponentiate = TRUE, +# hide_n = TRUE) +# +# blabla %>% +# ## assigning header labels +# gtsummary::modify_header(n_obs = "**Total exposed N**", n_event = "**Cases exposed n**") %>% +# gtsummary::modify_fmt_fun(c(n_event, n_obs) ~ gtsummary::style_number) +# +# +# ## RISK ratios (negbin) +# blabla <- linelist_cleaned %>% +# select(DIED, gender_bin, age_group) %>% +# gtsummary::tbl_uvregression(method = MASS::glm.nb, +# y = DIED, +# exponentiate = TRUE, +# hide_n = TRUE) +# +# blabla %>% +# ## assigning header labels +# gtsummary::modify_header(n_obs = "**Total exposed N**", n_event = "**Cases exposed n**") %>% +# gtsummary::modify_fmt_fun(c(n_event, n_obs) ~ gtsummary::style_number) +# +# +# ## INCIDENCE RATE ratios +# blabla <- linelist_cleaned %>% +# mutate(obstime = sample.int(30, nrow(linelist_cleaned), replace = TRUE)) %>% +# filter(!is.na(obstime)) %>% +# select(DIED, gender_bin, age_group, obstime) %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# method.args = list(family = poisson, +# offset = log(obstime)), +# exponentiate = TRUE, +# hide_n = TRUE) +# +# blabla %>% +# ## assigning header labels +# gtsummary::modify_header(exposure = "**Total exposed (person-time)**", n_event = "**Cases exposed n**") %>% +# gtsummary::modify_fmt_fun(c(n_event, exposure) ~ gtsummary::style_number) +# +# +# +# +# ## extract the obstime variable +# if (!is.null(blabla$inputs$method.args$offset)) { +# obstimevar <- as.character(blabla$inputs$method.args$offset)[2] +# } +# +# ## select exposure vars +# exposurevars <- c(blabla$inputs$include[!blabla$inputs$include %in% c(blabla$inputs$y, obstimevar)]) +# +# cross_tab <- purrr::map( +# exposurevars, +# function(i) { +# blabla$inputs$data %>% +# group_by(.data[[i]]) %>% +# summarise(outcome = sum(.data[[blabla$inputs$y]]), +# perstime = sum(.data[[obstimevar]])) %>% +# rename(group = all_of(i)) %>% +# mutate(group = as.character(group), +# var = i) +# }) %>% +# bind_rows() +# +# heh <- blabla$inputs$data %>% +# select(DIED, age_group) %>% +# gtsummary::tbl_summary(by = DIED) +# +# +# blabla$inputs$data %>% +# select(obstime, age_group) %>% +# gtsummary::tbl_summary(by = age_group, +# statistic = list(everything() ~ "{sum}")) +# +# +# +# ## WIDE FORMAT +# ### use reference_row to filter for relevant row +# blabla <- linelist_cleaned %>% +# select(DIED, gender_bin, gender, fever) %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# method.args = list(family = binomial), +# exponentiate = TRUE, +# hide_n = TRUE) +# +# blabla2 <- linelist_cleaned %>% +# select(DIED, gender_bin, gender, fever) %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# method.args = list(family = binomial), +# exponentiate = TRUE, +# hide_n = TRUE, +# show_single_row = everything()) +# +# +# diff --git a/R/gt_univariate_stratified.R b/R/gt_univariate_stratified.R new file mode 100644 index 0000000..fcc327a --- /dev/null +++ b/R/gt_univariate_stratified.R @@ -0,0 +1,213 @@ + +#' A {gtsummary} wrapper function for producing stratified univariate regression +#' and mantel-haenszel estimates +#' +#'@param data a data frame +#'@param y case variable +#'@param strata variable to stratify by +#'@param measure "OR", "RR", "IRR" +#'@param obstime time under observation for each individual to use as an offset + + +##### THIS WILL PURRR FOR EACH EXPOSURE OVER THE INTERNAL FUNC + ## which does one exposure at a time as below + +## overall table for each exposure (tab_univ) - modify table body identify as crude +## purrr over each strata for each exposure (tab_univ) - modify table body to identify strata +## merge tables (bind rows?) +## sort by? necessary at this stage or can do at end? +## calculate the MH estimate with generalisation of zhian fx +## can either add_crosstab here optionally + ## or they just add after + ## (req adding class of this output to incl tbl_mh and outupdating add_crosstab to allow tbl_mh) + + + + + + + + +# ## add a catch to make everything factors +# linelist_cleaned$gender_bin <- as.factor(linelist_cleaned$gender_bin) +# +# ### ?Need to filter so that all variables included have no missings +# ## as done in line 227 of tab_univariate.R (or with tidyr below) +# linelist_cleaned <- linelist_cleaned %>% tidyr::drop_na(DIED, +# fever_bin, +# gender_bin, +# obstime) +# +# +# +# +# potato <- linelist_cleaned %>% +# select(DIED, fever_bin, obstime) +# +# +# +# ### TODO: simplify these by defining the args with switch() to pass to tbl_uvreg +# if (measure == "OR") { +# crude <- potato %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# method.args = list(family = binomial), +# exponentiate = TRUE, +# hide_n = TRUE) %>% +# add_crosstabs(wide = FALSE) +# } +# +# if (measure == "RR") { +# crude <- potato %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# method.args = list(family = poisson), +# exponentiate = TRUE, +# hide_n = TRUE) %>% +# add_crosstabs(wide = FALSE) +# } +# +# if (measure == "IRR") { +# crude <- potato %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# include = fever_bin, +# method.args = list(family = poisson, +# offset = log(obstime)), +# exponentiate = TRUE, +# hide_n = TRUE) %>% +# add_crosstabs(wide = FALSE) +# } +# +# +# +# # edit the table body (contents of table) +# crude <- gtsummary::modify_table_body( +# crude, +# # define a function to make two steps and avoid piping +# fun = function(.x){ +# # remove cases from total obs to get control counts +# .x <- dplyr::mutate(.x, strata = "Crude") +# # move case and control counts before chara +# .x <- dplyr::relocate(.x, strata, .before = variable) +# } +# ) +# +# +# +# ## ISSUE: levels not always present unless a factor (but also - peoples own responsibility) +# +# stratz <- purrr::map(levels(linelist_cleaned$gender_bin), +# .f = function(i){ +# the_table <- linelist_cleaned %>% +# filter(gender_bin == i) %>% +# select(DIED, fever_bin, obstime) +# +# if (measure == "OR") { +# the_table <- the_table %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# method.args = list(family = binomial), +# exponentiate = TRUE, +# hide_n = TRUE) %>% +# add_crosstabs(wide = FALSE) +# } +# +# if (measure == "RR") { +# +# the_table <- the_table %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# method.args = list(family = poisson), +# exponentiate = TRUE, +# hide_n = TRUE) %>% +# add_crosstabs(wide = FALSE) +# +# } +# +# if (measure == "IRR") { +# +# the_table <- the_table %>% +# gtsummary::tbl_uvregression(method = glm, +# y = DIED, +# include = fever_bin, +# method.args = list(family = poisson, +# offset = log(obstime)), +# exponentiate = TRUE, +# hide_n = TRUE) %>% +# add_crosstabs(wide = FALSE) +# } +# +# +# # edit the table body (contents of table) +# the_table <- gtsummary::modify_table_body( +# the_table, +# # define a function to make two steps and avoid piping +# fun = function(.x){ +# # remove cases from total obs to get control counts +# .x <- dplyr::mutate(.x, strata = i) +# # move case and control counts before chara +# .x <- dplyr::relocate(.x, strata, .before = variable) +# } +# ) +# +# the_table +# +# } +# ) +# +# collaps_stratz <- gtsummary::tbl_stack(stratz) +# +# combine_tab <- gtsummary::tbl_stack(list(crude, collaps_stratz)) +# +# # change column names +# combine_tab <- gtsummary::modify_header(combine_tab, strata = "**Strata**") +# +# +# +# ##### prep data for mh estimates +# littler <- combine_tab$table_body +# +# littler <- littler %>% +# filter(strata != "Crude", !header_row) +# +# stratalength <- length(unique(littler$strata)) +# +# exposurelength <- littler$var_nlevels[1L] +# +# ##### p-value for the mental haenszel estimate +# woolf <- get_woolf_pval(littler, measure = measure) +# +# +# ##### mantel haenszel estimates +# +# mh <- get_mh(littler, measure, conf.level) +# +# ### put results in the table +# combine_tab <- gtsummary::modify_table_body( +# combine_tab, +# # define a function to make two steps and avoid piping +# fun = function(.x){ +# # remove cases from total obs to get control counts +# ## TODO: pull out the ifelse filter in to one obj +# ## TODO: wtf is this mutate in a mutate +# mutate( +# .x <- dplyr::mutate(.x, +# mh_estimate = ifelse(strata == "Crude" & reference_row, +# mh$ratio, NA), +# mh_conf.low = ifelse(strata == "Crude" & reference_row, +# mh$lower, NA), +# mh_conf.high = ifelse(strata == "Crude" & reference_row, +# mh$upper, NA), +# woolf_p.value = ifelse(strata == "Crude" & reference_row, +# woolf$p.value, NA) +# ) +# ) +# # move case and control counts before chara +# # .x <- dplyr::relocate(.x, strata, .before = variable) +# } +# ) +# + + + diff --git a/R/gt_univariate_stratified_internal.R b/R/gt_univariate_stratified_internal.R new file mode 100644 index 0000000..10bbf70 --- /dev/null +++ b/R/gt_univariate_stratified_internal.R @@ -0,0 +1,414 @@ + + +## linelist_cleaned from test-gt_wrappers.R up to line 201 (dont worry this will be deleted later) +# linelist_cleaned$gender_bin <- linelist_cleaned$gender == "Female" +# linelist_cleaned$fever_bin <- linelist_cleaned$fever == "Yes" +# linelist_cleaned <- linelist_cleaned %>% +# mutate(obstime = sample.int(30, nrow(linelist_cleaned), replace = TRUE)) + + + +tbl_cmh_internal <- function(data, case, exposure, strata, measure, obstime = NULL, conf.level = 0.95) { + + ## make variables available for use + case_var <- tidyselect::vars_select(colnames(data), {{ case }}) + exposure_var <- tidyselect::vars_select(colnames(data), {{ exposure }}) + strata_var <- tidyselect::vars_select(colnames(data), {{ strata }}) + obstime_var <- tidyselect::vars_select(colnames(data), {{ obstime }}) + + + ## if stratifying variable is not already a factor then make it one + if(!is.factor(data[[strata_var]])) { + data <- dplyr::mutate(data, {{strata}} := as.factor({{strata}})) + } + + ## add a temporary variable with the log of obstime + if (length(obstime_var) != 0) { + data[["log_obs"]] <- log(data[[obstime_var]]) + } + + + ## drop missings from all relevant variables + data <- tidyr::drop_na(data, + {{case}}, + {{exposure}}, + {{strata}}, + {{obstime}}) + + ##### CRUDE ESTIMATES -------------------------------------------------------- + + + + ## TODO: simplify these by defining the args with switch() to pass to tbl_uvreg + if (measure == "OR") { + crude <- gtsummary::tbl_uvregression(data = data, + method = glm, + y = {{case}}, + include = {{exposure}}, + method.args = list(family = binomial), + exponentiate = TRUE, + hide_n = TRUE) + } + + if (measure == "RR") { + crude <- gtsummary::tbl_uvregression(data = data, + method = glm, + y = {{case}}, + include = {{exposure}}, + method.args = list(family = poisson), + exponentiate = TRUE, + hide_n = TRUE) + } + + if (measure == "IRR") { + crude <- gtsummary::tbl_uvregression(data = data, + method = glm, + y = {{case}}, + include = {{exposure}}, + method.args = list(family = poisson, + offset = log_obs), + exponentiate = TRUE, + hide_n = TRUE) + } + + # edit the table body (contents of table) + crude <- gtsummary::modify_table_body( + crude, + # define a function to make two steps and avoid piping + fun = function(.x){ + # remove cases from total obs to get control counts + .x <- dplyr::mutate(.x, stratifier = "Crude") + # move case and control counts before chara + .x <- dplyr::relocate(.x, stratifier, .before = variable) + } + ) + + + ######## STRATIFIED ESTIMATES ------------------------------------------------ + + strata_levels <- levels(data[[strata_var]]) + + stratz <- purrr::map(strata_levels, + .f = function(i){ + + ## filter dataset to only keep strata of interest + strat_data <- filter(data, {{strata}} == i) + + ### TODO: simplify these by defining the args with switch() to pass to tbl_uvreg + if (measure == "OR") { + the_table <- gtsummary::tbl_uvregression(data = strat_data, + method = glm, + y = {{case}}, + include = {{exposure}}, + method.args = list(family = binomial), + exponentiate = TRUE, + hide_n = TRUE) + } + + if (measure == "RR") { + the_table <- gtsummary::tbl_uvregression(data = strat_data, + method = glm, + y = {{case}}, + include = {{exposure}}, + method.args = list(family = poisson), + exponentiate = TRUE, + hide_n = TRUE) + } + + if (measure == "IRR") { + the_table <- gtsummary::tbl_uvregression(data = strat_data, + method = glm, + y = {{case}}, + include = {{exposure}}, + method.args = list(family = poisson, + offset = log_obs), + exponentiate = TRUE, + hide_n = TRUE) + } + + + # edit the table body (contents of table) + the_table <- gtsummary::modify_table_body( + the_table, + # define a function to make two steps and avoid piping + fun = function(.x){ + # remove cases from total obs to get control counts + .x <- dplyr::mutate(.x, stratifier = i) + # move strata label before exposure + .x <- dplyr::relocate(.x, stratifier, .before = variable) + } + ) + + the_table + + } + ) + + + ######### MERGE TABLES ------------------------------------------------------- + + collaps_stratz <- gtsummary::tbl_stack(stratz) + + combine_tab <- gtsummary::tbl_stack(list(crude, collaps_stratz)) + + # change column names + combine_tab <- gtsummary::modify_header(combine_tab, stratifier = "**Strata**") + + + + ######## MANTEL HAENSZEL ESTIMATES ------------------------------------------- + + ##### prep data for mh estimates + mh_data <- combine_tab$table_body + + mh_data <- filter(mh_data, stratifier != "Crude", !header_row) + + stratalength <- length(strata_levels) + + exposurelength <- mh_data$var_nlevels[1L] + + ##### p-value for the mental haenszel estimate + woolf <- get_woolf_pval(mh_data, measure = measure, + stratalength = stratalength) + + + ##### mantel haenszel estimates + + mh <- get_mh(mh_data, measure, conf.level, exposurelength, stratalength) + + ####### CLEAN UP OUTPUT TABLE ------------------------------------------------ + + ### put results in the table + combine_tab <- gtsummary::modify_table_body( + combine_tab, + ## define a function to make two steps and avoid piping + fun = function(.x){ + ## remove cases from total obs to get control counts + ## TODO: pull out the ifelse filter in to one obj + .x <- dplyr::mutate(.x, + mh_estimate = ifelse(stratifier == "Crude" & !reference_row, + mh$ratio, NA), + mh_conf.low = ifelse(stratifier == "Crude" & !reference_row, + mh$lower, NA), + mh_conf.high = ifelse(stratifier == "Crude" & !reference_row, + mh$upper, NA), + woolf_p.value = ifelse(stratifier == "Crude" & !reference_row, + woolf$p.value, NA), + + ## TODO: simplify with across() + + mh_estimate = gtsummary::style_ratio(mh_estimate), + mh_conf.low = gtsummary::style_ratio(mh_conf.low), + mh_conf.high = gtsummary::style_ratio(mh_conf.high), + woolf_p.value = gtsummary::style_pvalue(woolf_p.value), + + mh_ci = ifelse(!is.na(mh_conf.low), + paste0(mh_conf.low, ", ", mh_conf.high), + NA), + + stratifier = ifelse(row_type != "label", NA, + stratifier) + ) + # move strata label before exposure + .x <- dplyr::relocate(.x, mh_ci, .before = woolf_p.value) + } + ) + + ## rename the columnns for mh estimates + combine_tab <- gtsummary::modify_header( + combine_tab, + mh_estimate = "**CMH estimate**", + mh_ci = "**95% CI**", + woolf_p.value = "**p-value**" + ) + + ## add tbl_cmh to the class (so can use as catch in other functions) + class(combine_tab) <- c("tbl_stack", "gtsummary", "tbl_cmh") + + ## return table + combine_tab + +} + + + + + + + + + + + +# Mantel-Haenszel tests (no p-values) + +get_mh <- function(arr, measure = "OR", conf = 0.95, + exposurelength = NULL, stratalength = NULL) { + switch(measure, + OR = mh_or(arr, conf, exposurelength, stratalength), + RR = mh_rr(arr, conf), + IRR = mh_irr(arr, conf) + ) +} + + +# The M-H statistic for odds ratios already exist in R, so it's just a matter of +# formatting data input and then pulling out the correct values +mh_or <- function(arr, conf = 0.95, + exposurelength = exposurelength, stratalength = stratalength) { + + ## need to flip because need non-reference row on top + arr <- arrange(arr, tbl_id2, reference_row) + arr <- mutate(arr, n_nonevent = n_obs - n_event) + arr <- select(arr, n_event, n_nonevent) + + wtf <- aperm( + array( + t(as.matrix(arr)), + c(2,exposurelength, stratalength)), + c(2,1,3)) + + MH <- stats::mantelhaen.test(wtf, conf.level = conf, exact = TRUE) + + data.frame(ratio = MH$estimate, lower = MH$conf.int[1], upper = MH$conf.int[2]) + +} + + +# These functions are adapted from epiR::epi.2by2 lines 1185--1204 +# +# Many of the changes involve abstracting repetative routines into functions, +# splitting nested calculations into separate variables, and modifying +# variable names. +mh_rr <- function(arr, conf.level = 0.95) { + z <- get_z(conf.level) + + ## Summary incidence risk ratio (Rothman 2002 p 148 and 152, equation 8-2): + A <- arr$n_event[!arr$reference_row] + C <- arr$n_event[arr$reference_row] + total_cases <- A + C + total_exposed <- arr$n_obs[!arr$reference_row] + total_unexposed <- arr$n_obs[arr$reference_row] + N <- total_exposed + total_unexposed + A_TU <- A * total_unexposed + C_TE <- C * total_exposed + + MH_risk_ratio <- sum(A_TU / N) / sum(C_TE / N) + + total_prod <- total_cases * total_exposed * total_unexposed + var_numerator <- sum((total_prod / N^2) - ((A * C) / N)) + var_denominator <- sum(A_TU / N) * sum(C_TE / N) + + MH_risk_ratio_var <- var_numerator / var_denominator + + se_limits <- get_ci_from_var(log(MH_risk_ratio), MH_risk_ratio_var, z) + + data.frame(ratio = MH_risk_ratio, lower = se_limits[["ll"]], upper = se_limits[["ul"]]) +} + + +mh_irr <- function(arr, conf.level = 0.95) { + + ## Summary incidence rate ratio (Rothman 2002 p 153, equation 8-5): + A <- arr$n_event[!arr$reference_row] + B <- arr$exposure[!arr$reference_row] + C <- arr$n_event[arr$reference_row] + D <- arr$exposure[arr$reference_row] + cases <- A + C # M1 + + person_time <- B + D # M0 + + numerator <- sum((A * D) / person_time) + denominator <- sum((C * B) / person_time) + MH_IRR <- numerator / denominator + + MH_IRR_var <- sum((cases * B * D) / (person_time^2)) / (numerator * denominator) + + se_limits <- get_ci_from_var(log(MH_IRR), MH_IRR_var, get_z(conf.level)) + + data.frame(ratio = MH_IRR, lower = se_limits[["ll"]], upper = se_limits[["ul"]]) +} + + +get_z <- function(conf.level) { + alpha <- 1 - ((1 - conf.level) / 2) + z <- stats::qnorm(alpha, mean = 0, sd = 1) + z +} + +get_ci_from_var <- function(log_ratio, log_ratio_var, z) { + log_ratio_se <- sqrt(log_ratio_var) + lower_limit <- exp(log_ratio - (z * log_ratio_se)) + upper_limit <- exp(log_ratio + (z * log_ratio_se)) + + data.frame(se = log_ratio_se, ll = lower_limit, ul = upper_limit) +} + + + + + +#### wolf pval + +## this needs to be fed littler the dataframe +get_woolf_pval <- function(arr, measure = "OR", stratalength = stratalength) { + + nstrata <- stratalength + + if (measure == "OR") { + + ## this line is duplicated in mh_or() + arr <- mutate(arr, n_nonevent = n_obs - n_event) + + ## define cases and controls for OR + A <- arr$n_event[!arr$reference_row] + B <- arr$n_nonevent[!arr$reference_row] + C <- arr$n_event[arr$reference_row] + D <- arr$n_nonevent[arr$reference_row] + + exposed <- A + B + unexposed <- C + D + } + else { + ## define cases and controls for RR + A <- arr$n_event[!arr$reference_row] + B <- arr$n_obs[!arr$reference_row] - arr$n_event[!arr$reference_row] + C <- arr$n_event[arr$reference_row] + D <- arr$n_obs[arr$reference_row] - arr$n_event[arr$reference_row] + + exposed <- arr$exposure[!arr$reference_row] + unexposed <- arr$exposure[arr$reference_row] + } + + + # This is taken from lines 1320--1340 of epiR::epi.2by2 + # + # Many of the changes involve abstracting repetative routines into functions, + # splitting nested calculations into separate variables, and modifying + # variable names. + if (measure == "RR") { + ## Test of homogeneity of risk ratios (Jewell 2004, page 154). First work + ## out the Woolf estimate of the adjusted risk ratio (labelled adj_log_est + ## here) based on Jewell (2004, page 134): + + log_est <- log((A / exposed) / (C / unexposed)) + log_est_var <- (B / (A * exposed)) + (D / (C * unexposed)) + + } else { + + ## Test of homogeneity of odds ratios (Jewell 2004, page 154). First work + ## out the Woolf estimate of the adjusted odds ratio (labelled adj_log_est + ## here) based on Jewell (2004, page 129): + log_est <- log(((A + 0.5) * (D + 0.5)) / ((B + 0.5) * (C + 0.5))) + log_est_var <- (1 / (A + 0.5)) + (1 / (B + 0.5)) + (1 / (C + 0.5)) + (1 / (D + 0.5)) + } + + inv_log_est_var <- 1 / log_est_var + adj_log_est <- sum(inv_log_est_var * log_est) / sum(inv_log_est_var) + + ## Equation 10.3 from Jewell (2004): + res <- sum(inv_log_est_var * (log_est - adj_log_est)^2) + p_value <- 1 - stats::pchisq(res, df = nstrata - 1) + + data.frame(statistic = res, df = nstrata - 1, p.value = p_value) +} diff --git a/R/gt_wrappers.R b/R/gt_wrappers.R index a342eed..0e3b811 100644 --- a/R/gt_wrappers.R +++ b/R/gt_wrappers.R @@ -3,26 +3,6 @@ - - -#' A gtsummary wrapper function that takes a gtsummary object and removes a -#' column from the table body by column name -#' -#' @param gts_object A data frame, passed by the gtsummary::add_stat function -#' -#' @param col_name the column name from the gtsummary object's table_body to remove - -#' @return a gtsummary object without the named column -#' -#' @rdname gtsummary_wrappers -#' @export -#' -gt_remove_stat <- function(gts_object, col_name = "stat_0") { - gts_object %>% gtsummary::modify_table_body( - ~ .x %>% - dplyr::select(-dplyr::all_of(col_name))) -} - #' A gtsummary wrapper function that takes a data frame and adds cross tabs #' by exposure and outcome #' @@ -48,160 +28,180 @@ gt_remove_stat <- function(gts_object, col_name = "stat_0") { #' #' @export -add_crosstabs <- function( - data, exposure, outcome, case_reference = "outcome", var_name = NULL, show_overall = TRUE, - exposure_label = NULL, outcome_label = NULL, var_label = NULL, - two_by_two = FALSE, gt_statistic = "{n}", show_N_header = FALSE) { - - # Create exposure and outcome variables and labels ---- - exposure_sym <- as.symbol(exposure) - qexposure <- rlang::enquo(exposure_sym) - - outcome_sym <- as.symbol(outcome) - qoutcome <- rlang::enquo(outcome_sym) - - if (is.null(exposure_label)) exposure_label <- exposure - if (is.null(outcome_label)) outcome_label <- outcome - - if (is.logical(data[[exposure]])) { - # message("add_crosstabs: Ordering exposure to logical factor TRUE FALSE") - data <- data %>% - mutate(!!qexposure := factor(!!qexposure, levels = c(TRUE, FALSE))) - } - - if (is.logical(data[[outcome]])) { - # message("add_crosstabs: Ordering outcome to logical factor TRUE FALSE") - data <- data %>% - mutate(!!qoutcome := factor(!!qoutcome, levels = c(TRUE, FALSE))) - } - - - if (two_by_two) { - gts <- data %>% - dplyr::select(!!qexposure, !!qoutcome) %>% - gtsummary::tbl_summary( - include = !!qexposure, - statistic = everything() ~ gt_statistic, - by = !!qoutcome, - type = exposure ~ "categorical", - label = exposure ~ exposure_label - ) %>% - gtsummary::modify_header(label ~ "") %>% - gtsummary::modify_spanning_header(c("stat_1", "stat_2") ~ outcome_label) - - if (show_overall) { - gts <- gts %>% gtsummary::add_overall(last = TRUE) - - if(!show_N_header) { - gts <- gts %>% - gtsummary::modify_header(c("stat_1", "stat_2") ~ "**{level}**") - } - } - } else { - if(is.null(var_name)) { - var_name <- "All" - data <- data %>% mutate(All = TRUE) - summary_type <- "dichotomous" - } else { - summary_type <- "categorical" - } - - var_sym <- as.symbol(var_name) - qvar <- rlang::enquo(var_sym) - exposure_levels <- levels(data[[exposure]]) - outcome_levels <- levels(data[[outcome]]) - - - footnote <- paste0( - paste0("Case defined as ", paste(outcome_label, "value of", outcome_levels[1])), - "; ", - paste0("Control defined as ", paste(outcome_label, "value of", outcome_levels[2])), - "; ", - paste0("Exposure variable is ", exposure_label)) - if (is.null(var_label)) var_label <- var_name - - df_strata <- - data %>% - dplyr::select(dplyr::all_of(c(var_name, outcome, exposure))) %>% - tidyr::nest(data = -dplyr::all_of(outcome)) %>% - dplyr::mutate( - tbl = purrr::map( - data, ~ gtsummary::tbl_summary( - .x, - include = !!qvar, - statistic = everything() ~ gt_statistic, - by = !!qexposure, - type = var_name ~ summary_type, - label = var_name ~ var_label, - missing = "ifany" - )) - ) %>% - mutate_at(vars(outcome), as.factor) %>% - mutate(!!qoutcome := fct_relevel(!! rlang::sym(outcome), outcome_levels)) %>% - arrange(!!qoutcome) - - # gts <- gtsummary::tbl_merge(df_strata$tbl) - if (show_overall) { - gt_overall <- data %>% - dplyr::select(dplyr::all_of(c(var_name, exposure))) %>% - gtsummary::tbl_summary( - include = var_name, - statistic = everything() ~ gt_statistic, - label = var_name ~ var_label) - - if(!show_N_header) { - gt_overall <- gt_overall %>% - gtsummary::modify_header(gtsummary::all_stat_cols() ~ "**N**", ) - } - - tbls <- df_strata$tbl - tbls[[3]] <- gt_overall - gts <- gtsummary::tbl_merge(tbls) %>% - gtsummary::modify_header(label ~ exposure_label) %>% - gtsummary::modify_spanning_header(list( - c("stat_1_1", "stat_2_1") ~ "**Cases**"), - c("stat_1_2", "stat_2_2") ~ "**Controls**", - "stat_0_3" ~ "**Overall**") %>% - gtsummary::modify_footnote(gtsummary::all_stat_cols() ~ footnote) +# add_crosstabs <- function( +# data, exposure, outcome, case_reference = "outcome", var_name = NULL, show_overall = TRUE, +# exposure_label = NULL, outcome_label = NULL, var_label = NULL, +# two_by_two = FALSE, gt_statistic = "{n}", show_N_header = FALSE) { +# +# # Create exposure and outcome variables and labels ---- +# exposure_sym <- as.symbol(exposure) +# qexposure <- rlang::enquo(exposure_sym) +# +# outcome_sym <- as.symbol(outcome) +# qoutcome <- rlang::enquo(outcome_sym) +# +# if (is.null(exposure_label)) exposure_label <- exposure +# if (is.null(outcome_label)) outcome_label <- outcome +# +# if (is.logical(data[[exposure]])) { +# # message("add_crosstabs: Ordering exposure to logical factor TRUE FALSE") +# data <- data %>% +# mutate(!!qexposure := factor(!!qexposure, levels = c(TRUE, FALSE))) +# } +# +# if (is.logical(data[[outcome]])) { +# # message("add_crosstabs: Ordering outcome to logical factor TRUE FALSE") +# data <- data %>% +# mutate(!!qoutcome := factor(!!qoutcome, levels = c(TRUE, FALSE))) +# } +# +# +# if (two_by_two) { +# gts <- data %>% +# dplyr::select(!!qexposure, !!qoutcome) %>% +# gtsummary::tbl_summary( +# include = !!qexposure, +# statistic = everything() ~ gt_statistic, +# by = !!qoutcome, +# type = exposure ~ "categorical", +# label = exposure ~ exposure_label +# ) %>% +# gtsummary::modify_header(label ~ "") %>% +# gtsummary::modify_spanning_header(c("stat_1", "stat_2") ~ outcome_label) +# +# if (show_overall) { +# gts <- gts %>% gtsummary::add_overall(last = TRUE) +# +# if(!show_N_header) { +# gts <- gts %>% +# gtsummary::modify_header(c("stat_1", "stat_2") ~ "**{level}**") +# } +# } +# } else { +# if(is.null(var_name)) { +# var_name <- "All" +# data <- data %>% mutate(All = TRUE) +# summary_type <- "dichotomous" +# } else { +# summary_type <- "categorical" +# } +# +# var_sym <- as.symbol(var_name) +# qvar <- rlang::enquo(var_sym) +# exposure_levels <- levels(data[[exposure]]) +# outcome_levels <- levels(data[[outcome]]) +# +# +# footnote <- paste0( +# paste0("Case defined as ", paste(outcome_label, "value of", outcome_levels[1])), +# "; ", +# paste0("Control defined as ", paste(outcome_label, "value of", outcome_levels[2])), +# "; ", +# paste0("Exposure variable is ", exposure_label)) +# if (is.null(var_label)) var_label <- var_name +# +# df_strata <- +# data %>% +# dplyr::select(dplyr::all_of(c(var_name, outcome, exposure))) %>% +# tidyr::nest(data = -dplyr::all_of(outcome)) %>% +# dplyr::mutate( +# tbl = purrr::map( +# data, ~ gtsummary::tbl_summary( +# .x, +# include = !!qvar, +# statistic = everything() ~ gt_statistic, +# by = !!qexposure, +# type = var_name ~ summary_type, +# label = var_name ~ var_label, +# missing = "ifany" +# )) +# ) %>% +# mutate_at(vars(outcome), as.factor) %>% +# mutate(!!qoutcome := fct_relevel(!! rlang::sym(outcome), outcome_levels)) %>% +# arrange(!!qoutcome) +# +# # gts <- gtsummary::tbl_merge(df_strata$tbl) +# if (show_overall) { +# gt_overall <- data %>% +# dplyr::select(dplyr::all_of(c(var_name, exposure))) %>% +# gtsummary::tbl_summary( +# include = var_name, +# statistic = everything() ~ gt_statistic, +# label = var_name ~ var_label) +# +# if(!show_N_header) { +# gt_overall <- gt_overall %>% +# gtsummary::modify_header(gtsummary::all_stat_cols() ~ "**N**", ) +# } +# +# tbls <- df_strata$tbl +# tbls[[3]] <- gt_overall +# gts <- gtsummary::tbl_merge(tbls) %>% +# gtsummary::modify_header(label ~ exposure_label) %>% +# gtsummary::modify_spanning_header(list( +# c("stat_1_1", "stat_2_1") ~ "**Cases**"), +# c("stat_1_2", "stat_2_2") ~ "**Controls**", +# "stat_0_3" ~ "**Overall**") %>% +# gtsummary::modify_footnote(gtsummary::all_stat_cols() ~ footnote) +# +# if (!show_N_header) { +# gts <- gts %>% +# gtsummary::modify_header( +# c("stat_1_1", "stat_2_1", "stat_1_2", "stat_2_2") ~ "**{level}**") +# } +# } else { +# gts <- gtsummary::tbl_merge(df_strata$tbl) %>% +# gtsummary::modify_header(label ~ exposure_label) %>% +# gtsummary::modify_spanning_header(list( +# c("stat_1_1", "stat_2_1") ~ "**Cases**"), +# c("stat_1_2", "stat_2_2") ~ "**Controls**") %>% +# gtsummary::modify_footnote(gtsummary::all_stat_cols() ~ footnote) +# if (!show_N_header) { +# gts <- gts %>% +# gtsummary::modify_header( +# c("stat_1_1", "stat_2_1", "stat_1_2", "stat_2_2") ~ "**{level}**") +# } +# } +# } +# +# +# data <- data %>% +# mutate(!!qoutcome := as.logical(!!qoutcome)) +# gts[["data"]] <- data +# gts[["meta_data"]] <- list( +# exposure = exposure, +# outcome = outcome, +# var_name = var_name, +# show_overall = show_overall, +# exposure_label = exposure_label, +# outcome_label = outcome_label, +# var_name = ifelse(!is.null(var_name), var_name, NA), +# var_label = ifelse(!is.null(var_label), var_label, NA), +# two_by_two = two_by_two, +# gt_statistic = gt_statistic +# ) +# +# return(gts) +# } - if (!show_N_header) { - gts <- gts %>% - gtsummary::modify_header( - c("stat_1_1", "stat_2_1", "stat_1_2", "stat_2_2") ~ "**{level}**") - } - } else { - gts <- gtsummary::tbl_merge(df_strata$tbl) %>% - gtsummary::modify_header(label ~ exposure_label) %>% - gtsummary::modify_spanning_header(list( - c("stat_1_1", "stat_2_1") ~ "**Cases**"), - c("stat_1_2", "stat_2_2") ~ "**Controls**") %>% - gtsummary::modify_footnote(gtsummary::all_stat_cols() ~ footnote) - if (!show_N_header) { - gts <- gts %>% - gtsummary::modify_header( - c("stat_1_1", "stat_2_1", "stat_1_2", "stat_2_2") ~ "**{level}**") - } - } - } - data <- data %>% - mutate(!!qoutcome := as.logical(!!qoutcome)) - gts[["data"]] <- data - gts[["meta_data"]] <- list( - exposure = exposure, - outcome = outcome, - var_name = var_name, - show_overall = show_overall, - exposure_label = exposure_label, - outcome_label = outcome_label, - var_name = ifelse(!is.null(var_name), var_name, NA), - var_label = ifelse(!is.null(var_label), var_label, NA), - two_by_two = two_by_two, - gt_statistic = gt_statistic - ) +#' A gtsummary wrapper function that takes a gtsummary object and removes a +#' column from the table body by column name +#' +#' @param gts_object A data frame, passed by the gtsummary::add_stat function +#' +#' @param col_name the column name from the gtsummary object's table_body to remove - return(gts) +#' @return a gtsummary object without the named column +#' +#' @rdname gtsummary_wrappers +#' @export +#' +gt_remove_stat <- function(gts_object, col_name = "stat_0") { + gts_object %>% gtsummary::modify_table_body( + ~ .x %>% + dplyr::select(-dplyr::all_of(col_name))) }