Skip to content

Commit

Permalink
Merge pull request #1 from darwin-eu/release_040
Browse files Browse the repository at this point in the history
Release 040
  • Loading branch information
catalamarti authored Nov 26, 2024
2 parents d7a8608 + 44d733f commit 0e60731
Show file tree
Hide file tree
Showing 98 changed files with 2,202 additions and 1,228 deletions.
6 changes: 3 additions & 3 deletions CRAN-SUBMISSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Version: 0.1.0
Date: 2024-04-16 09:31:59 UTC
SHA: 9f211b1219098ced0c1f760a82a2870b3caf7cfa
Version: 0.4.0
Date: 2024-11-26 21:37:09 UTC
SHA: d15456e2ef022d0d6a4af9338a56cb1c383d5bc6
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CohortCharacteristics
Type: Package
Title: Summarise and Visualise Characteristics of Patients in the OMOP CDM
Version: 0.3.0.900
Version: 0.4.0
Authors@R: c(
person("Marti", "Catala", , "[email protected]",
role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3308-9905")),
Expand All @@ -27,14 +27,14 @@ Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.2
Imports:
CDMConnector (>= 1.3.2),
CDMConnector (>= 1.6.0),
dplyr,
tidyr,
rlang,
cli,
stringr,
omopgenerics (>= 0.3.0),
visOmopResults (>= 0.4.0),
omopgenerics (>= 0.4.0),
visOmopResults (>= 0.5.0),
PatientProfiles (>= 1.2.0),
snakecase,
lifecycle,
Expand Down
15 changes: 12 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# Generated by roxygen2: do not edit by hand

export(additionalColumns)
export(availablePlotColumns)
export(availableTableColumns)
export(benchmarkCohortCharacteristics)
export(bind)
export(exportSummarisedResult)
export(groupColumns)
export(importSummarisedResult)
export(mockCohortCharacteristics)
export(mockDisconnect)
Expand All @@ -13,6 +18,8 @@ export(plotCohortTiming)
export(plotComparedLargeScaleCharacteristics)
export(plotLargeScaleCharacteristics)
export(settings)
export(settingsColumns)
export(strataColumns)
export(summariseCharacteristics)
export(summariseCohortAttrition)
export(summariseCohortCount)
Expand All @@ -27,18 +34,20 @@ export(tableCohortOverlap)
export(tableCohortTiming)
export(tableLargeScaleCharacteristics)
export(tidy)
export(tidyColumns)
importFrom(PatientProfiles,mockDisconnect)
importFrom(dplyr,"%>%")
importFrom(lifecycle,deprecated)
importFrom(omopgenerics,additionalColumns)
importFrom(omopgenerics,bind)
importFrom(omopgenerics,exportSummarisedResult)
importFrom(omopgenerics,groupColumns)
importFrom(omopgenerics,importSummarisedResult)
importFrom(omopgenerics,settings)
importFrom(omopgenerics,settingsColumns)
importFrom(omopgenerics,strataColumns)
importFrom(omopgenerics,suppress)
importFrom(omopgenerics,tidy)
importFrom(rlang,"%||%")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(visOmopResults,tidy)
importFrom(visOmopResults,tidyColumns)
21 changes: 21 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
# CohortCharacteristics 0.4.0

* Update links darwin-eu-dev -> darwin-eu @catalamarti
* Typo in plotCohortAttrition by @martaalcalde
* uniqueCombination paramters to work in a general way @catalamarti
* minimum 5 days in x axis for density plots @catalamarti
* improve documentation of minimumFrequency by @catalamarti
* add show argument to plotCohortAttrition by @catalamarti
* simplify code for overlap and fix edge case with 0 overlap by @catalamarti
* arrange ageGroups by order that they are provided in summariseCharacteristics by @catalamarti
* otherVariablesEstimates -> estimates in summariseCharacteristics by @catalamarti
* add overlapBy argument to summariseCohortOverlap by @catalamarti
* Compatibility with visOmopResults 0.5.0 and omopgenerics 0.4.0 by @catalamarti
* add message if different pkg versions by @catalamarti
* make sure settings are characters by @catalamarti
* use requireEunomia and CDMConnector 1.6.0 by @catalamarti
* add benchmark function by @catalamarti
* Consistent documentation by @catalamarti
* Use subjects only when overlapBy = "subject_id" by @catalamarti
* add cohortId to LSC by @catalamarti

# CohortCharacteristics 0.3.0

* **breaking change** Complete refactor of `table*` and `plot*` functions
Expand Down
264 changes: 264 additions & 0 deletions R/benchmarkCohortCharacteristics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@

#' Benchmark the main functions of CohortCharacteristics package.
#'
#' @param cohort A cohort_table from a cdm_reference.
#' @param analysis Set of analysis to perform, must be a subset of: "count",
#' "attrition", "characteristics", "overlap", "timing" and
#' "large scale characteristics".
#'
#' @return A summarised_result object.
#' @export
#'
#' @examples
#' \dontrun{
#' CDMConnector::requireEunomia()
#' con <- duckdb::dbConnect(duckdb::duckdb(), CDMConnector::eunomiaDir())
#' cdm <- CDMConnector::cdmFromCon(
#' con = con, cdmSchema = "main", writeSchema = "main"
#' )
#'
#' cdm <- CDMConnector::generateConceptCohortSet(
#' cdm = cdm,
#' conceptSet = list(sinusitis = 40481087, pharyngitis = 4112343),
#' name = "my_cohort"
#' )
#'
#' benchmarkCohortCharacteristics(cdm$my_cohort)
#'
#' }
benchmarkCohortCharacteristics <- function(cohort,
analysis = c("count", "attrition", "characteristics", "overlap", "timing", "large scale characteristics")) {
# initial checks
cohort <- omopgenerics::validateCohortArgument(cohort)
omopgenerics::assertChoice(analysis, choices = c(
"count", "attrition", "characteristics", "overlap", "timing",
"large scale characteristics"), unique = TRUE)

nPerson <- omopgenerics::cdmReference(cohort)$person |>
dplyr::ungroup() |>
dplyr::tally() |>
dplyr::pull() |>
as.character()

set <- dplyr::tibble(
result_id = 1L,
result_type = "benchmark_cohort_characteristics",
package_name = "CohortCharacteristics",
package_version = pkgVersion(),
cohort = dplyr::coalesce(omopgenerics::tableName(cohort), "temp"),
source_type = omopgenerics::sourceType(cohort),
person_n = nPerson,
!!!listCounts(cohort)
)

result <- dplyr::tibble(task = character(), time = numeric())

if ("count" %in% analysis) {
task <- "summariseCohortCount"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseCohortCount() |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))
}

if ("attrition" %in% analysis) {
task <- "summariseCohortAttrition"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseCohortAttrition() |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))
}

if ("characteristics" %in% analysis) {
task <- "summariseCharacteristics demographics"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseCharacteristics(
counts = FALSE,
demographics = TRUE
) |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))

task <- "summariseCharacteristics number visits before"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseCharacteristics(
counts = FALSE,
demographics = FALSE,
tableIntersectCount = list("Number visits" = list(
tableName = "visit_occurrence", window = c(-Inf, 0)
))
) |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))

task <- "summariseCharacteristics covariates before"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseCharacteristics(
counts = FALSE,
demographics = FALSE,
cohortIntersectFlag = list("covariates before" = list(
targetCohortTable = omopgenerics::tableName(cohort),
window = c(-Inf, 0)
))
) |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))
}

if ("overlap" %in% analysis) {
task <- "summariseCohortOverlap subjects"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseCohortOverlap() |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))

task <- "summariseCohortOverlap subjects and cohort_start_date"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseCohortOverlap(overlapBy = c("subject_id", "cohort_start_date")) |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))
}

if ("timing" %in% analysis) {
task <- "summariseCohortTiming no density"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseCohortTiming(estimates = c("min", "q25", "median", "q75", "max")) |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))

task <- "summariseCohortTiming density"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseCohortTiming(estimates = c("density")) |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))
}

if ("large scale characteristics" %in% analysis) {
windows <- list(c(-Inf, -1), c(0, 0), c(1, Inf))

task <- "summariseLargeScaleCharacteristics event condition_occurrence"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseLargeScaleCharacteristics(
window = windows,
eventInWindow = "condition_occurrence"
) |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))

task <- "summariseLargeScaleCharacteristics episode drug_exposure"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseLargeScaleCharacteristics(
window = windows,
episodeInWindow = "drug_exposure"
) |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))

task <- "summariseLargeScaleCharacteristics source measurement"
benchmarkMessage(task)
t0 <- Sys.time()
cohort |>
summariseLargeScaleCharacteristics(
window = windows,
eventInWindow = "measurement",
includeSource = TRUE
) |>
suppressMessages() |>
invisible()
time <- as.numeric(Sys.time() - t0)
result <- result |>
dplyr::union_all(dplyr::tibble(task = task, time = time))
}

result |>
omopgenerics::uniteGroup(cols = "task") |>
omopgenerics::uniteStrata() |>
omopgenerics::uniteAdditional() |>
dplyr::mutate(
result_id = 1L,
cdm_name = omopgenerics::cdmName(cohort),
variable_name = "overall",
variable_level = "overall",
estimate_name = "time_in_seconds",
estimate_type = "numeric",
estimate_value = as.character(round(.data$time, 3))
) |>
dplyr::select(!"time") |>
omopgenerics::newSummarisedResult(settings = set)
}
listCounts <- function(cohort) {
counts <- omopgenerics::settings(cohort) |>
dplyr::select("cohort_name", "cohort_definition_id") |>
dplyr::left_join(
omopgenerics::cohortCount(cohort) |>
dplyr::select("cohort_definition_id", "number_records"),
by = "cohort_definition_id"
) |>
dplyr::mutate(number_records = dplyr::coalesce(.data$number_records, 0L))
counts <- counts$number_records |>
as.list() |>
rlang::set_names(nm = counts$cohort_name)
counts$total_counts <- sum(unlist(counts))
purrr::map(counts, as.character)
}
benchmarkMessage <- function(msg, nm = NULL) {
date <- format(Sys.time(), "%d-%m-%Y %H:%M:%S")
msg <- paste0("{.pkg {date}} Benchmark ", msg) |>
rlang::set_names(nm = nm)
cli::cli_inform(msg)
}
Loading

0 comments on commit 0e60731

Please sign in to comment.