Skip to content

Commit

Permalink
Merge branch 'main' of github.com:darwin-eu-dev/CDMConnector
Browse files Browse the repository at this point in the history
  • Loading branch information
ablack3 committed Mar 13, 2024
2 parents aed2118 + c8fdedd commit 90cb508
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 23 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ URL: https://darwin-eu.github.io/CDMConnector/, https://github.com/darwin-eu/CDM
BugReports: https://github.com/darwin-eu/CDMConnector/issues
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Depends:
R (>= 4.0)
Imports:
Expand Down
14 changes: 10 additions & 4 deletions R/cdm.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#'. field in the CDM_SOURCE table will be used.
#' @param achilles_schema,achillesSchema An optional schema in the CDM database
#' that contains achilles tables.
#' @param .soft_validation,.softValidation If TRUE fewer validation checks will
#' be performed.
#'
#' @return A list of dplyr database table references pointing to CDM tables
#' @importFrom dplyr all_of matches starts_with ends_with contains
Expand All @@ -25,7 +27,8 @@ cdm_from_con <- function(con,
cohort_tables = NULL,
cdm_version = "5.3",
cdm_name = NULL,
achilles_schema = NULL) {
achilles_schema = NULL,
.soft_validation = FALSE) {

if (!DBI::dbIsValid(con)) {
cli::cli_abort("The connection is not valid. Is the database connection open?")
Expand Down Expand Up @@ -124,7 +127,8 @@ cdm_from_con <- function(con,
cdm[[cohort_table]] <- cdm[[cohort_table]] |>
omopgenerics::newCohortTable(
cohortSetRef = x[[2]],
cohortAttritionRef = x[[3]]
cohortAttritionRef = x[[3]],
.softValidation = .soft_validation
)
}

Expand Down Expand Up @@ -180,15 +184,17 @@ cdmFromCon <- function(con,
cohortTables = NULL,
cdmVersion = "5.3",
cdmName = NULL,
achillesSchema = NULL) {
achillesSchema = NULL,
.softValidation = FALSE) {
cdm_from_con(
con = con,
cdm_schema = cdmSchema,
write_schema = writeSchema,
cohort_tables = cohortTables,
cdm_version = cdmVersion,
cdm_name = cdmName,
achilles_schema = achillesSchema
achilles_schema = achillesSchema,
.soft_validation = .softValidation
)
}

Expand Down
35 changes: 23 additions & 12 deletions R/cdmSubset.R
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,10 @@ cdm_subset_cohort <- function(cdm,
#'
#' `r lifecycle::badge("experimental")`
#'
#' @param cdm A cdm_reference object
#' @param n Number of persons to include in the cdm
#' @param cdm A cdm_reference object.
#' @param n Number of persons to include in the cdm.
#' @param seed Seed for the random number generator.
#' @param name Name of the table that will contain the sample of persons.
#'
#' @return A modified cdm_reference object where all clinical tables are lazy
#' queries pointing to subset
Expand Down Expand Up @@ -210,21 +212,30 @@ cdm_subset_cohort <- function(cdm,
#'
#' DBI::dbDisconnect(con, shutdown = TRUE)
#' }
cdmSample <- function(cdm, n) {
cdmSample <- function(cdm,
n,
seed = sample.int(1e6, 1),
name = "person_sample") {
checkmate::assertClass(cdm, "cdm_reference")
checkmate::assertIntegerish(n, len = 1, lower = 1, upper = 1e9, null.ok = FALSE)
checkmate::assertIntegerish(seed, len = 1, lower = 1, null.ok = FALSE)
checkmate::assertCharacter(name, len = 1, any.missing = FALSE)

assert_tables(cdm, "person")
subset <- cdm[["person"]] |>
dplyr::pull("person_id") |>
unique() |>
sort()

# Note temporary = TRUE in dbWriteTable does not work on all dbms but we want a temp table here.
person_subset <- cdm[["person"]] %>%
dplyr::select("person_id") %>%
dplyr::distinct() %>%
dplyr::slice_sample(n = n) %>%
dplyr::rename_all(tolower) %>%
dplyr::compute()
if (length(subset) > n) {
set.seed(seed)
subset <- sample(x = subset, size = n, replace = FALSE)
}

cdm_sample_person(cdm, person_subset)
subset <- dplyr::tibble("person_id" = subset)

cdm <- omopgenerics::insertTable(cdm = cdm, name = name, table = subset)

cdm_sample_person(cdm, cdm[[name]])
}


Expand Down
17 changes: 17 additions & 0 deletions R/generateConceptCohortSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -350,11 +350,28 @@ generateConceptCohortSet <- function(cdm,
excluded_records = 0,
excluded_subjects = 0)

if(utils::packageVersion("omopgenerics") < 0.1){
cdm[[name]] <- omopgenerics::newCohortTable(
table = cohortRef,
cohortSetRef = cohortSetRef,
cohortAttritionRef = cohortAttritionRef
)
} else {

cohortCodelistRef <-df %>%
dplyr::mutate(type = "index event") %>%
dplyr::select("cohort_definition_id",
"codelist_name" = "cohort_name",
"concept_id",
"type")

cdm[[name]] <- omopgenerics::newCohortTable(
table = cohortRef,
cohortSetRef = cohortSetRef,
cohortAttritionRef = cohortAttritionRef,
cohortCodelistRef = cohortCodelistRef
)
}

return(cdm)
}
Expand Down
12 changes: 8 additions & 4 deletions man/cdmSample.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 7 additions & 2 deletions man/cdm_from_con.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions tests/testthat/test-db-cdm.R
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,41 @@ test_that("adding achilles", {

DBI::dbDisconnect(con, shutdown = TRUE)
})

test_that("adding achilles", {
skip_if_not(eunomia_is_available())
skip_if_not_installed("duckdb")

con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomia_dir())

cohorts <- data.frame(
cohortId = c(1, 2, 3),
cohortName = c("X", "A", "B"),
type = c("target", "event", "event")
)

cohort_table <- dplyr::tribble(
~cohort_definition_id, ~subject_id, ~cohort_start_date, ~cohort_end_date,
1, 5, as.Date("2020-01-01"), as.Date("2020-01-01"),
2, 5, as.Date("2020-01-10"), as.Date("2020-03-10")
)

dplyr::copy_to(dest = con,
df = cohort_table,
name = "test_cohort_table",
overwrite = TRUE)

expect_error(cdmFromCon(con,
cdmSchema = "main",
writeSchema = c(schema = "main"),
cohortTables = "test_cohort_table",
.softValidation = FALSE)) # error because cohorts out of obs

expect_no_error(cdmFromCon(con,
cdmSchema = "main",
writeSchema = c(schema = "main"),
cohortTables = "test_cohort_table",
.softValidation = TRUE)) # passes without validation

DBI::dbDisconnect(con, shutdown = TRUE)
})
49 changes: 49 additions & 0 deletions tests/testthat/test-db-sample.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
test_sample <- function(con, cdm_schema, write_schema) {

cdm <- cdm_from_con(
con = con, cdm_name = "test", cdm_schema = cdm_schema,
write_schema = write_schema
)

cdmSampled1 <- cdmSample(cdm = cdm, n = 100, seed = 123)
expect_true(cdmSampled1$person |> dplyr::tally() |> dplyr::pull() == 100)
expect_true("person_sample" %in% names(cdmSampled1))
expect_false("person_sample" %in% names(cdm))

cdmSampled2 <- cdmSample(cdm = cdm, n = 200, name = "sample_person")
expect_true(cdmSampled2$person |> dplyr::tally() |> dplyr::pull() == 200)
expect_true("sample_person" %in% names(cdmSampled2))
expect_false("person_sample" %in% names(cdmSampled2))

cdmSampled3 <- cdmSample(cdm = cdm, n = 100, seed = 123, name = "sample3")
expect_true(cdmSampled3$person |> dplyr::tally() |> dplyr::pull() == 100)
expect_true("sample3" %in% names(cdmSampled3))

cdmSampled4 <- cdmSample(cdm = cdm, n = 100, seed = 1234, name = "sample4")
expect_true(cdmSampled4$person |> dplyr::tally() |> dplyr::pull() == 100)
expect_true("sample4" %in% names(cdmSampled4))

expect_true(identical(
cdmSampled1$person |> dplyr::collect() |> dplyr::arrange(.data$person_id),
cdmSampled3$person |> dplyr::collect() |> dplyr::arrange(.data$person_id)
))

expect_false(identical(
cdmSampled1$person |> dplyr::collect() |> dplyr::arrange(.data$person_id),
cdmSampled4$person |> dplyr::collect() |> dplyr::arrange(.data$person_id)
))

}

for (dbtype in dbToTest) {
test_that(glue::glue("{dbtype} - sample database"), {
if (!(dbtype %in% ciTestDbs)) skip_on_ci()
if (dbtype != "duckdb") skip_on_cran() else skip_if_not_installed("duckdb")
con <- get_connection(dbtype)
cdm_schema <- get_cdm_schema(dbtype)
write_schema <- get_write_schema(dbtype)
skip_if(any(write_schema == "") || any(cdm_schema == "") || is.null(con))
test_sample(con, cdm_schema, write_schema)
disconnect(con)
})
}

0 comments on commit 90cb508

Please sign in to comment.