Merge pull request #1 from darwin-eu/release_040

Release 040
darwin-eu · Nov 26, 2024 · 0e60731 · 0e60731
2 parents d7a8608 + 44d733f
commit 0e60731
Show file tree

Hide file tree

Showing 98 changed files with 2,202 additions and 1,228 deletions.
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -1,3 +1,3 @@
-Version: 0.1.0
-Date: 2024-04-16 09:31:59 UTC
-SHA: 9f211b1219098ced0c1f760a82a2870b3caf7cfa
+Version: 0.4.0
+Date: 2024-11-26 21:37:09 UTC
+SHA: d15456e2ef022d0d6a4af9338a56cb1c383d5bc6
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: CohortCharacteristics
 Type: Package
 Title: Summarise and Visualise Characteristics of Patients in the OMOP CDM
-Version: 0.3.0.900
+Version: 0.4.0
 Authors@R: c(
     person("Marti", "Catala", , "[email protected]", 
     role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3308-9905")),
@@ -27,14 +27,14 @@ Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.3.2
 Imports: 
-    CDMConnector (>= 1.3.2),
+    CDMConnector (>= 1.6.0),
     dplyr,
     tidyr,
     rlang,
     cli,
     stringr,
-    omopgenerics (>= 0.3.0),
-    visOmopResults (>= 0.4.0),
+    omopgenerics (>= 0.4.0),
+    visOmopResults (>= 0.5.0),
     PatientProfiles (>= 1.2.0),
     snakecase,
     lifecycle,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,12 @@
 # Generated by roxygen2: do not edit by hand
 
+export(additionalColumns)
+export(availablePlotColumns)
+export(availableTableColumns)
+export(benchmarkCohortCharacteristics)
 export(bind)
 export(exportSummarisedResult)
+export(groupColumns)
 export(importSummarisedResult)
 export(mockCohortCharacteristics)
 export(mockDisconnect)
@@ -13,6 +18,8 @@ export(plotCohortTiming)
 export(plotComparedLargeScaleCharacteristics)
 export(plotLargeScaleCharacteristics)
 export(settings)
+export(settingsColumns)
+export(strataColumns)
 export(summariseCharacteristics)
 export(summariseCohortAttrition)
 export(summariseCohortCount)
@@ -27,18 +34,20 @@ export(tableCohortOverlap)
 export(tableCohortTiming)
 export(tableLargeScaleCharacteristics)
 export(tidy)
-export(tidyColumns)
 importFrom(PatientProfiles,mockDisconnect)
 importFrom(dplyr,"%>%")
 importFrom(lifecycle,deprecated)
+importFrom(omopgenerics,additionalColumns)
 importFrom(omopgenerics,bind)
 importFrom(omopgenerics,exportSummarisedResult)
+importFrom(omopgenerics,groupColumns)
 importFrom(omopgenerics,importSummarisedResult)
 importFrom(omopgenerics,settings)
+importFrom(omopgenerics,settingsColumns)
+importFrom(omopgenerics,strataColumns)
 importFrom(omopgenerics,suppress)
+importFrom(omopgenerics,tidy)
 importFrom(rlang,"%||%")
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(rlang,.env)
-importFrom(visOmopResults,tidy)
-importFrom(visOmopResults,tidyColumns)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,24 @@
+# CohortCharacteristics 0.4.0
+
+* Update links darwin-eu-dev -> darwin-eu @catalamarti
+* Typo in plotCohortAttrition by @martaalcalde
+* uniqueCombination paramters to work in a general way @catalamarti
+* minimum 5 days in x axis for density plots @catalamarti
+* improve documentation of minimumFrequency by @catalamarti
+* add show argument to plotCohortAttrition by @catalamarti
+* simplify code for overlap and fix edge case with 0 overlap by @catalamarti
+* arrange ageGroups by order that they are provided in summariseCharacteristics by @catalamarti
+* otherVariablesEstimates -> estimates in summariseCharacteristics by @catalamarti
+* add overlapBy argument to summariseCohortOverlap by @catalamarti
+* Compatibility with visOmopResults 0.5.0 and omopgenerics 0.4.0 by @catalamarti
+* add message if different pkg versions by @catalamarti
+* make sure settings are characters by @catalamarti
+* use requireEunomia and CDMConnector 1.6.0 by @catalamarti
+* add benchmark function by @catalamarti
+* Consistent documentation by @catalamarti
+* Use subjects only when overlapBy = "subject_id" by @catalamarti
+* add cohortId to LSC by @catalamarti
+
 # CohortCharacteristics 0.3.0
 
 * **breaking change** Complete refactor of `table*` and `plot*` functions 

diff --git a/R/benchmarkCohortCharacteristics.R b/R/benchmarkCohortCharacteristics.R
@@ -0,0 +1,264 @@
+
+#' Benchmark the main functions of CohortCharacteristics package.
+#'
+#' @param cohort A cohort_table from a cdm_reference.
+#' @param analysis Set of analysis to perform, must be a subset of: "count",
+#' "attrition", "characteristics", "overlap", "timing" and
+#' "large scale characteristics".
+#'
+#' @return A summarised_result object.
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' CDMConnector::requireEunomia()
+#' con <- duckdb::dbConnect(duckdb::duckdb(), CDMConnector::eunomiaDir())
+#' cdm <- CDMConnector::cdmFromCon(
+#'   con = con, cdmSchema = "main", writeSchema = "main"
+#' )
+#'
+#' cdm <- CDMConnector::generateConceptCohortSet(
+#'   cdm = cdm,
+#'   conceptSet = list(sinusitis = 40481087, pharyngitis = 4112343),
+#'   name = "my_cohort"
+#' )
+#'
+#' benchmarkCohortCharacteristics(cdm$my_cohort)
+#'
+#' }
+benchmarkCohortCharacteristics <- function(cohort,
+                                           analysis = c("count", "attrition", "characteristics", "overlap", "timing", "large scale characteristics")) {
+  # initial checks
+  cohort <- omopgenerics::validateCohortArgument(cohort)
+  omopgenerics::assertChoice(analysis, choices = c(
+    "count", "attrition", "characteristics", "overlap", "timing",
+    "large scale characteristics"), unique = TRUE)
+
+  nPerson <- omopgenerics::cdmReference(cohort)$person |>
+    dplyr::ungroup() |>
+    dplyr::tally() |>
+    dplyr::pull() |>
+    as.character()
+
+  set <- dplyr::tibble(
+    result_id = 1L,
+    result_type = "benchmark_cohort_characteristics",
+    package_name = "CohortCharacteristics",
+    package_version = pkgVersion(),
+    cohort = dplyr::coalesce(omopgenerics::tableName(cohort), "temp"),
+    source_type = omopgenerics::sourceType(cohort),
+    person_n = nPerson,
+    !!!listCounts(cohort)
+  )
+
+  result <- dplyr::tibble(task = character(), time = numeric())
+
+  if ("count" %in% analysis) {
+    task <- "summariseCohortCount"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseCohortCount() |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+  }
+
+  if ("attrition" %in% analysis) {
+    task <- "summariseCohortAttrition"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseCohortAttrition() |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+  }
+
+  if ("characteristics" %in% analysis) {
+    task <- "summariseCharacteristics demographics"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseCharacteristics(
+        counts = FALSE,
+        demographics = TRUE
+      ) |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+
+    task <- "summariseCharacteristics number visits before"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseCharacteristics(
+        counts = FALSE,
+        demographics = FALSE,
+        tableIntersectCount = list("Number visits" = list(
+          tableName = "visit_occurrence", window = c(-Inf, 0)
+        ))
+      ) |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+
+    task <- "summariseCharacteristics covariates before"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseCharacteristics(
+        counts = FALSE,
+        demographics = FALSE,
+        cohortIntersectFlag = list("covariates before" = list(
+          targetCohortTable = omopgenerics::tableName(cohort),
+          window = c(-Inf, 0)
+        ))
+      ) |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+  }
+
+  if ("overlap" %in% analysis) {
+    task <- "summariseCohortOverlap subjects"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseCohortOverlap() |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+
+    task <- "summariseCohortOverlap subjects and cohort_start_date"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseCohortOverlap(overlapBy = c("subject_id", "cohort_start_date")) |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+  }
+
+  if ("timing" %in% analysis) {
+    task <- "summariseCohortTiming no density"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseCohortTiming(estimates = c("min", "q25", "median", "q75", "max")) |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+
+    task <- "summariseCohortTiming density"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseCohortTiming(estimates = c("density")) |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+  }
+
+  if ("large scale characteristics" %in% analysis) {
+    windows <- list(c(-Inf, -1), c(0, 0), c(1, Inf))
+
+    task <- "summariseLargeScaleCharacteristics event condition_occurrence"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseLargeScaleCharacteristics(
+        window = windows,
+        eventInWindow = "condition_occurrence"
+      ) |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+
+    task <- "summariseLargeScaleCharacteristics episode drug_exposure"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseLargeScaleCharacteristics(
+        window = windows,
+        episodeInWindow = "drug_exposure"
+      ) |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+
+    task <- "summariseLargeScaleCharacteristics source measurement"
+    benchmarkMessage(task)
+    t0 <- Sys.time()
+    cohort |>
+      summariseLargeScaleCharacteristics(
+        window = windows,
+        eventInWindow = "measurement",
+        includeSource = TRUE
+      ) |>
+      suppressMessages() |>
+      invisible()
+    time <- as.numeric(Sys.time() - t0)
+    result <- result |>
+      dplyr::union_all(dplyr::tibble(task = task, time = time))
+  }
+
+  result |>
+    omopgenerics::uniteGroup(cols = "task") |>
+    omopgenerics::uniteStrata() |>
+    omopgenerics::uniteAdditional() |>
+    dplyr::mutate(
+      result_id = 1L,
+      cdm_name = omopgenerics::cdmName(cohort),
+      variable_name = "overall",
+      variable_level = "overall",
+      estimate_name = "time_in_seconds",
+      estimate_type = "numeric",
+      estimate_value = as.character(round(.data$time, 3))
+    ) |>
+    dplyr::select(!"time") |>
+    omopgenerics::newSummarisedResult(settings = set)
+}
+listCounts <- function(cohort) {
+  counts <- omopgenerics::settings(cohort) |>
+    dplyr::select("cohort_name", "cohort_definition_id") |>
+    dplyr::left_join(
+      omopgenerics::cohortCount(cohort) |>
+        dplyr::select("cohort_definition_id", "number_records"),
+      by = "cohort_definition_id"
+    ) |>
+    dplyr::mutate(number_records = dplyr::coalesce(.data$number_records, 0L))
+  counts <- counts$number_records |>
+    as.list() |>
+    rlang::set_names(nm = counts$cohort_name)
+  counts$total_counts <- sum(unlist(counts))
+  purrr::map(counts, as.character)
+}
+benchmarkMessage <- function(msg, nm = NULL) {
+  date <- format(Sys.time(), "%d-%m-%Y %H:%M:%S")
+  msg <- paste0("{.pkg {date}} Benchmark ", msg) |>
+    rlang::set_names(nm = nm)
+  cli::cli_inform(msg)
+}