-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from darwin-eu/release_040
Release 040
- Loading branch information
Showing
98 changed files
with
2,202 additions
and
1,228 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
Version: 0.1.0 | ||
Date: 2024-04-16 09:31:59 UTC | ||
SHA: 9f211b1219098ced0c1f760a82a2870b3caf7cfa | ||
Version: 0.4.0 | ||
Date: 2024-11-26 21:37:09 UTC | ||
SHA: d15456e2ef022d0d6a4af9338a56cb1c383d5bc6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
Package: CohortCharacteristics | ||
Type: Package | ||
Title: Summarise and Visualise Characteristics of Patients in the OMOP CDM | ||
Version: 0.3.0.900 | ||
Version: 0.4.0 | ||
Authors@R: c( | ||
person("Marti", "Catala", , "[email protected]", | ||
role = c("aut", "cre"), comment = c(ORCID = "0000-0003-3308-9905")), | ||
|
@@ -27,14 +27,14 @@ Encoding: UTF-8 | |
LazyData: true | ||
RoxygenNote: 7.3.2 | ||
Imports: | ||
CDMConnector (>= 1.3.2), | ||
CDMConnector (>= 1.6.0), | ||
dplyr, | ||
tidyr, | ||
rlang, | ||
cli, | ||
stringr, | ||
omopgenerics (>= 0.3.0), | ||
visOmopResults (>= 0.4.0), | ||
omopgenerics (>= 0.4.0), | ||
visOmopResults (>= 0.5.0), | ||
PatientProfiles (>= 1.2.0), | ||
snakecase, | ||
lifecycle, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,264 @@ | ||
|
||
#' Benchmark the main functions of CohortCharacteristics package. | ||
#' | ||
#' @param cohort A cohort_table from a cdm_reference. | ||
#' @param analysis Set of analysis to perform, must be a subset of: "count", | ||
#' "attrition", "characteristics", "overlap", "timing" and | ||
#' "large scale characteristics". | ||
#' | ||
#' @return A summarised_result object. | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' CDMConnector::requireEunomia() | ||
#' con <- duckdb::dbConnect(duckdb::duckdb(), CDMConnector::eunomiaDir()) | ||
#' cdm <- CDMConnector::cdmFromCon( | ||
#' con = con, cdmSchema = "main", writeSchema = "main" | ||
#' ) | ||
#' | ||
#' cdm <- CDMConnector::generateConceptCohortSet( | ||
#' cdm = cdm, | ||
#' conceptSet = list(sinusitis = 40481087, pharyngitis = 4112343), | ||
#' name = "my_cohort" | ||
#' ) | ||
#' | ||
#' benchmarkCohortCharacteristics(cdm$my_cohort) | ||
#' | ||
#' } | ||
benchmarkCohortCharacteristics <- function(cohort, | ||
analysis = c("count", "attrition", "characteristics", "overlap", "timing", "large scale characteristics")) { | ||
# initial checks | ||
cohort <- omopgenerics::validateCohortArgument(cohort) | ||
omopgenerics::assertChoice(analysis, choices = c( | ||
"count", "attrition", "characteristics", "overlap", "timing", | ||
"large scale characteristics"), unique = TRUE) | ||
|
||
nPerson <- omopgenerics::cdmReference(cohort)$person |> | ||
dplyr::ungroup() |> | ||
dplyr::tally() |> | ||
dplyr::pull() |> | ||
as.character() | ||
|
||
set <- dplyr::tibble( | ||
result_id = 1L, | ||
result_type = "benchmark_cohort_characteristics", | ||
package_name = "CohortCharacteristics", | ||
package_version = pkgVersion(), | ||
cohort = dplyr::coalesce(omopgenerics::tableName(cohort), "temp"), | ||
source_type = omopgenerics::sourceType(cohort), | ||
person_n = nPerson, | ||
!!!listCounts(cohort) | ||
) | ||
|
||
result <- dplyr::tibble(task = character(), time = numeric()) | ||
|
||
if ("count" %in% analysis) { | ||
task <- "summariseCohortCount" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseCohortCount() |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
} | ||
|
||
if ("attrition" %in% analysis) { | ||
task <- "summariseCohortAttrition" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseCohortAttrition() |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
} | ||
|
||
if ("characteristics" %in% analysis) { | ||
task <- "summariseCharacteristics demographics" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseCharacteristics( | ||
counts = FALSE, | ||
demographics = TRUE | ||
) |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
|
||
task <- "summariseCharacteristics number visits before" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseCharacteristics( | ||
counts = FALSE, | ||
demographics = FALSE, | ||
tableIntersectCount = list("Number visits" = list( | ||
tableName = "visit_occurrence", window = c(-Inf, 0) | ||
)) | ||
) |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
|
||
task <- "summariseCharacteristics covariates before" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseCharacteristics( | ||
counts = FALSE, | ||
demographics = FALSE, | ||
cohortIntersectFlag = list("covariates before" = list( | ||
targetCohortTable = omopgenerics::tableName(cohort), | ||
window = c(-Inf, 0) | ||
)) | ||
) |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
} | ||
|
||
if ("overlap" %in% analysis) { | ||
task <- "summariseCohortOverlap subjects" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseCohortOverlap() |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
|
||
task <- "summariseCohortOverlap subjects and cohort_start_date" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseCohortOverlap(overlapBy = c("subject_id", "cohort_start_date")) |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
} | ||
|
||
if ("timing" %in% analysis) { | ||
task <- "summariseCohortTiming no density" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseCohortTiming(estimates = c("min", "q25", "median", "q75", "max")) |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
|
||
task <- "summariseCohortTiming density" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseCohortTiming(estimates = c("density")) |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
} | ||
|
||
if ("large scale characteristics" %in% analysis) { | ||
windows <- list(c(-Inf, -1), c(0, 0), c(1, Inf)) | ||
|
||
task <- "summariseLargeScaleCharacteristics event condition_occurrence" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseLargeScaleCharacteristics( | ||
window = windows, | ||
eventInWindow = "condition_occurrence" | ||
) |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
|
||
task <- "summariseLargeScaleCharacteristics episode drug_exposure" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseLargeScaleCharacteristics( | ||
window = windows, | ||
episodeInWindow = "drug_exposure" | ||
) |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
|
||
task <- "summariseLargeScaleCharacteristics source measurement" | ||
benchmarkMessage(task) | ||
t0 <- Sys.time() | ||
cohort |> | ||
summariseLargeScaleCharacteristics( | ||
window = windows, | ||
eventInWindow = "measurement", | ||
includeSource = TRUE | ||
) |> | ||
suppressMessages() |> | ||
invisible() | ||
time <- as.numeric(Sys.time() - t0) | ||
result <- result |> | ||
dplyr::union_all(dplyr::tibble(task = task, time = time)) | ||
} | ||
|
||
result |> | ||
omopgenerics::uniteGroup(cols = "task") |> | ||
omopgenerics::uniteStrata() |> | ||
omopgenerics::uniteAdditional() |> | ||
dplyr::mutate( | ||
result_id = 1L, | ||
cdm_name = omopgenerics::cdmName(cohort), | ||
variable_name = "overall", | ||
variable_level = "overall", | ||
estimate_name = "time_in_seconds", | ||
estimate_type = "numeric", | ||
estimate_value = as.character(round(.data$time, 3)) | ||
) |> | ||
dplyr::select(!"time") |> | ||
omopgenerics::newSummarisedResult(settings = set) | ||
} | ||
listCounts <- function(cohort) { | ||
counts <- omopgenerics::settings(cohort) |> | ||
dplyr::select("cohort_name", "cohort_definition_id") |> | ||
dplyr::left_join( | ||
omopgenerics::cohortCount(cohort) |> | ||
dplyr::select("cohort_definition_id", "number_records"), | ||
by = "cohort_definition_id" | ||
) |> | ||
dplyr::mutate(number_records = dplyr::coalesce(.data$number_records, 0L)) | ||
counts <- counts$number_records |> | ||
as.list() |> | ||
rlang::set_names(nm = counts$cohort_name) | ||
counts$total_counts <- sum(unlist(counts)) | ||
purrr::map(counts, as.character) | ||
} | ||
benchmarkMessage <- function(msg, nm = NULL) { | ||
date <- format(Sys.time(), "%d-%m-%Y %H:%M:%S") | ||
msg <- paste0("{.pkg {date}} Benchmark ", msg) |> | ||
rlang::set_names(nm = nm) | ||
cli::cli_inform(msg) | ||
} |
Oops, something went wrong.