Skip to content

Commit

Permalink
metrics: hack fix missing funder and link columns #183
Browse files Browse the repository at this point in the history
  • Loading branch information
njahn82 committed Mar 5, 2021
1 parent 0f61ce7 commit ab11f53
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 36 deletions.
36 changes: 24 additions & 12 deletions R/cr_md_funder.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,32 @@
#' @family transform
#' @export
cr_funder_df <- function(cr) {
if (!"funder" %in% colnames(cr)) {
out <- NULL
} else {
out <- cr %>%
empty_funder(cr) %>%
dplyr::select(
doi,
container_title = container.title,
publisher,
issued,
issued_year,
funder
.data$doi,
container_title = .data$container.title,
.data$publisher,
.data$issued,
.data$issued_year,
.data$funder
) %>%
tidyr::unnest(c(funder), keep_empty = TRUE) %>%
dplyr::rename(fundref_doi = DOI, doi_asserted_by = doi.asserted.by)
tidyr::unnest("funder", keep_empty = TRUE) %>%
dplyr::rename(fundref_doi = .data$DOI, doi_asserted_by = .data$doi.asserted.by)
}

#' dirty hack to prevent for missing funder json nodes
#' https://github.com/subugoe/metacheck/issues/183
#'
#' @noRd
empty_funder <- function(cr) {
if (!"funder" %in% colnames(cr)) {
funder <- list(tibble::tibble(
DOI = as.character(NA), name = as.character(NA), doi.asserted.by = as.character(NA)
))
out <- mutate(cr, funder = funder)
} else {
out <- cr
}
out
}

8 changes: 4 additions & 4 deletions R/cr_md_overview.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ cr_compliance_overview <- function(cr) {

tdm_df <- cr_tdm_df(cr)
compliant_tdm <- filter(tdm_df,
is_tdm_compliant == TRUE)
.data$is_tdm_compliant == TRUE)
funder_df <- cr_funder_df(cr)
has_funder <- funder_df %>%
filter(!is.na(.data$name))
orcid_df <- cr_has_orcid(cr)

cr_overview <- cr %>%
Expand All @@ -22,9 +24,7 @@ cr_compliance_overview <- function(cr) {
has_compliant_cc = .data$doi %in%
filter(cc_df, .data$check_result == "All fine!")$doi,
has_tdm_links = .data$doi %in% compliant_tdm$doi,
has_funder_info = unlist(across(
any_of("funder"), ~ sapply(.x, empty_list)
)),
has_funder_info = .data$doi %in% has_funder$doi,
has_orcid = .data$doi %in% orcid_df$doi,
has_open_abstract = unlist(across(any_of("abstract"), ~ !is.na(.x))),
has_open_refs = unlist(across(
Expand Down
26 changes: 20 additions & 6 deletions R/cr_md_tdm.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,41 @@
#' @family transform
#' @export
cr_tdm_df <- function(cr) {
if ("link" %in% colnames(cr)) {
out <- cr %>%
select(
empty_tdm(cr) %>%
dplyr::select(
.data$doi,
container_title = .data$container.title,
.data$publisher,
.data$issued,
.data$issued_year,
.data$link
) %>%
unnest(cols = "link", keep_empty = TRUE) %>%
mutate(
tidyr::unnest(cols = "link", keep_empty = TRUE) %>%
dplyr::mutate(
is_tdm_compliant = ifelse(
.data$content.version == "vor" &
.data$intended.application == "text-mining",
TRUE,
FALSE
)
)
}

#' dirty hack to prevent for missing TDM json nodes
#' https://github.com/subugoe/metacheck/issues/183
#'
#' @noRd
empty_tdm <- function(cr) {
if (!"link" %in% colnames(cr)) {
link <- list(tibble::tibble(
URL = as.character(NA),
content.type = as.character(NA),
content.version = as.character(NA),
intended.application = as.character(NA)
))
out <- dplyr::mutate(cr, link = link)
} else {
out <- NULL
out <- cr
}
out
}
24 changes: 11 additions & 13 deletions R/metrics_funder.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,29 +29,27 @@
funder_metrics <- function(funder_info = NULL) {
is_cr_funder_df(funder_info)


if(length(unique(funder_info$name)) > 5) {
if(nrow(funder_info[all(!is.na("name")),]) == nrow(funder_info)) {
out <- dplyr::mutate(funder_info, name = "No funding info")
} else if (length(unique(funder_info$name)) > 5) {
out <- funder_info %>%
mutate(name = ifelse(is.na(.data$name), "No funding info", .data$name)) %>%
mutate(name = forcats::fct_lump_n(.data$name, 5, other_level = "Other funders")) %>%
mutate(name = forcats::fct_infreq(.data$name)) %>%
mutate(name = forcats::fct_relevel(.data$name, "Other funders", after = Inf)) %>%
mutate(name = forcats::fct_relevel(.data$name, "No funding info", after = Inf)) %>%
group_by(indicator = .data$name) %>%
summarise(value = n_distinct(.data$doi)) %>%
dplyr::ungroup() %>%
mutate(prop = .data$value / length(unique(funder_info$doi)) * 100)
mutate(name = forcats::fct_relevel(.data$name, "No funding info", after = Inf))
} else {
out <-funder_info %>%
mutate(name = ifelse(is.na(.data$name), "No funding info", .data$name)) %>%
mutate(name = forcats::fct_infreq(.data$name)) %>%
mutate(name = forcats::fct_relevel(.data$name, "No funding info", after = Inf)) %>%
group_by(indicator = .data$name) %>%
summarise(value = n_distinct(.data$doi)) %>%
dplyr::ungroup() %>%
mutate(prop = .data$value / length(unique(funder_info$doi)) * 100)
mutate(name = forcats::fct_relevel(.data$name, "No funding info", after = Inf))
}
return(out)
ind <- out %>%
group_by(indicator = .data$name) %>%
summarise(value = n_distinct(.data$doi)) %>%
dplyr::ungroup() %>%
mutate(prop = .data$value / length(unique(funder_info$doi)) * 100)
return(ind)
}

#' Check if funder compliance data is provided
Expand Down
2 changes: 1 addition & 1 deletion R/metrics_tdm.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
tdm_metrics <- function(tdm = NULL) {
is_cr_tdm_df(tdm)
compliant_tdm <- tdm %>%
filter(is_tdm_compliant == TRUE)
filter(.data$is_tdm_compliant == TRUE)

non_compliant_tdm <- tdm[!tdm$doi %in% compliant_tdm$doi,]

Expand Down

0 comments on commit ab11f53

Please sign in to comment.