From ab11f53224022d668c65ba8f98d592a7c88fea20 Mon Sep 17 00:00:00 2001 From: Najko Jahn Date: Fri, 5 Mar 2021 14:42:37 +0100 Subject: [PATCH] metrics: hack fix missing funder and link columns #183 --- R/cr_md_funder.R | 36 ++++++++++++++++++++++++------------ R/cr_md_overview.R | 8 ++++---- R/cr_md_tdm.R | 26 ++++++++++++++++++++------ R/metrics_funder.R | 24 +++++++++++------------- R/metrics_tdm.R | 2 +- 5 files changed, 60 insertions(+), 36 deletions(-) diff --git a/R/cr_md_funder.R b/R/cr_md_funder.R index 8cada8b1..cd557fb7 100644 --- a/R/cr_md_funder.R +++ b/R/cr_md_funder.R @@ -4,20 +4,32 @@ #' @family transform #' @export cr_funder_df <- function(cr) { - if (!"funder" %in% colnames(cr)) { - out <- NULL - } else { - out <- cr %>% + empty_funder(cr) %>% dplyr::select( - doi, - container_title = container.title, - publisher, - issued, - issued_year, - funder + .data$doi, + container_title = .data$container.title, + .data$publisher, + .data$issued, + .data$issued_year, + .data$funder ) %>% - tidyr::unnest(c(funder), keep_empty = TRUE) %>% - dplyr::rename(fundref_doi = DOI, doi_asserted_by = doi.asserted.by) + tidyr::unnest("funder", keep_empty = TRUE) %>% + dplyr::rename(fundref_doi = .data$DOI, doi_asserted_by = .data$doi.asserted.by) +} + +#' dirty hack to prevent for missing funder json nodes +#' https://github.com/subugoe/metacheck/issues/183 +#' +#' @noRd +empty_funder <- function(cr) { + if (!"funder" %in% colnames(cr)) { + funder <- list(tibble::tibble( + DOI = as.character(NA), name = as.character(NA), doi.asserted.by = as.character(NA) + )) + out <- mutate(cr, funder = funder) + } else { + out <- cr } out } + diff --git a/R/cr_md_overview.R b/R/cr_md_overview.R index 69a9d20b..9a36076b 100644 --- a/R/cr_md_overview.R +++ b/R/cr_md_overview.R @@ -11,8 +11,10 @@ cr_compliance_overview <- function(cr) { tdm_df <- cr_tdm_df(cr) compliant_tdm <- filter(tdm_df, - is_tdm_compliant == TRUE) + .data$is_tdm_compliant == TRUE) funder_df <- cr_funder_df(cr) + has_funder <- funder_df %>% + filter(!is.na(.data$name)) orcid_df <- cr_has_orcid(cr) cr_overview <- cr %>% @@ -22,9 +24,7 @@ cr_compliance_overview <- function(cr) { has_compliant_cc = .data$doi %in% filter(cc_df, .data$check_result == "All fine!")$doi, has_tdm_links = .data$doi %in% compliant_tdm$doi, - has_funder_info = unlist(across( - any_of("funder"), ~ sapply(.x, empty_list) - )), + has_funder_info = .data$doi %in% has_funder$doi, has_orcid = .data$doi %in% orcid_df$doi, has_open_abstract = unlist(across(any_of("abstract"), ~ !is.na(.x))), has_open_refs = unlist(across( diff --git a/R/cr_md_tdm.R b/R/cr_md_tdm.R index 279abc82..7d614db6 100644 --- a/R/cr_md_tdm.R +++ b/R/cr_md_tdm.R @@ -6,9 +6,8 @@ #' @family transform #' @export cr_tdm_df <- function(cr) { - if ("link" %in% colnames(cr)) { - out <- cr %>% - select( + empty_tdm(cr) %>% + dplyr::select( .data$doi, container_title = .data$container.title, .data$publisher, @@ -16,8 +15,8 @@ cr_tdm_df <- function(cr) { .data$issued_year, .data$link ) %>% - unnest(cols = "link", keep_empty = TRUE) %>% - mutate( + tidyr::unnest(cols = "link", keep_empty = TRUE) %>% + dplyr::mutate( is_tdm_compliant = ifelse( .data$content.version == "vor" & .data$intended.application == "text-mining", @@ -25,8 +24,23 @@ cr_tdm_df <- function(cr) { FALSE ) ) +} + +#' dirty hack to prevent for missing TDM json nodes +#' https://github.com/subugoe/metacheck/issues/183 +#' +#' @noRd +empty_tdm <- function(cr) { + if (!"link" %in% colnames(cr)) { + link <- list(tibble::tibble( + URL = as.character(NA), + content.type = as.character(NA), + content.version = as.character(NA), + intended.application = as.character(NA) + )) + out <- dplyr::mutate(cr, link = link) } else { - out <- NULL + out <- cr } out } diff --git a/R/metrics_funder.R b/R/metrics_funder.R index 09dcf4ba..c3218076 100644 --- a/R/metrics_funder.R +++ b/R/metrics_funder.R @@ -29,29 +29,27 @@ funder_metrics <- function(funder_info = NULL) { is_cr_funder_df(funder_info) - - if(length(unique(funder_info$name)) > 5) { + if(nrow(funder_info[all(!is.na("name")),]) == nrow(funder_info)) { + out <- dplyr::mutate(funder_info, name = "No funding info") + } else if (length(unique(funder_info$name)) > 5) { out <- funder_info %>% mutate(name = ifelse(is.na(.data$name), "No funding info", .data$name)) %>% mutate(name = forcats::fct_lump_n(.data$name, 5, other_level = "Other funders")) %>% mutate(name = forcats::fct_infreq(.data$name)) %>% mutate(name = forcats::fct_relevel(.data$name, "Other funders", after = Inf)) %>% - mutate(name = forcats::fct_relevel(.data$name, "No funding info", after = Inf)) %>% - group_by(indicator = .data$name) %>% - summarise(value = n_distinct(.data$doi)) %>% - dplyr::ungroup() %>% - mutate(prop = .data$value / length(unique(funder_info$doi)) * 100) + mutate(name = forcats::fct_relevel(.data$name, "No funding info", after = Inf)) } else { out <-funder_info %>% mutate(name = ifelse(is.na(.data$name), "No funding info", .data$name)) %>% mutate(name = forcats::fct_infreq(.data$name)) %>% - mutate(name = forcats::fct_relevel(.data$name, "No funding info", after = Inf)) %>% - group_by(indicator = .data$name) %>% - summarise(value = n_distinct(.data$doi)) %>% - dplyr::ungroup() %>% - mutate(prop = .data$value / length(unique(funder_info$doi)) * 100) + mutate(name = forcats::fct_relevel(.data$name, "No funding info", after = Inf)) } - return(out) + ind <- out %>% + group_by(indicator = .data$name) %>% + summarise(value = n_distinct(.data$doi)) %>% + dplyr::ungroup() %>% + mutate(prop = .data$value / length(unique(funder_info$doi)) * 100) + return(ind) } #' Check if funder compliance data is provided diff --git a/R/metrics_tdm.R b/R/metrics_tdm.R index 50daecf0..85986335 100644 --- a/R/metrics_tdm.R +++ b/R/metrics_tdm.R @@ -31,7 +31,7 @@ tdm_metrics <- function(tdm = NULL) { is_cr_tdm_df(tdm) compliant_tdm <- tdm %>% - filter(is_tdm_compliant == TRUE) + filter(.data$is_tdm_compliant == TRUE) non_compliant_tdm <- tdm[!tdm$doi %in% compliant_tdm$doi,]