Skip to content

Commit

Permalink
major update aka skipping excel
Browse files Browse the repository at this point in the history
  • Loading branch information
SchmidtPaul committed Apr 13, 2021
1 parent 6a8ad47 commit f6fcb20
Show file tree
Hide file tree
Showing 69 changed files with 2,349 additions and 468 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CitaviR
Type: Package
Title: A set of tools for dealing with Citavi data
Version: 0.3.0
Version: 0.4.0
Authors@R:
c(person(given = "Paul",
family = "Schmidt",
Expand Down Expand Up @@ -33,7 +33,9 @@ Imports:
purrr,
stringdist,
RcppAlgos,
textcat
textcat,
DBI,
RSQLite
Suggests:
testthat (>= 3.0.0),
knitr,
Expand Down
13 changes: 12 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,25 @@
export("%>%")
export("%not_in%")
export(detect_language)
export(example_xlsx)
export(example_file)
export(find_obvious_dups)
export(find_potential_dups)
export(handle_obvious_dups)
export(read_Citavi_ctv6)
export(read_Citavi_xlsx)
export(update_Citavi_ctv6)
export(write_Citavi_xlsx)
import(crayon)
import(dplyr)
importFrom(DBI,dbClearResult)
importFrom(DBI,dbConnect)
importFrom(DBI,dbDisconnect)
importFrom(DBI,dbListTables)
importFrom(DBI,dbReadTable)
importFrom(DBI,dbSendQuery)
importFrom(RSQLite,SQLite)
importFrom(RcppAlgos,comboGeneral)
importFrom(dplyr,as_tibble)
importFrom(janitor,make_clean_names)
importFrom(magrittr,"%>%")
importFrom(openxlsx,write.xlsx)
Expand All @@ -29,6 +39,7 @@ importFrom(stringr,str_split)
importFrom(textcat,textcat)
importFrom(tidyr,fill)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,replace_na)
importFrom(tidyr,separate_rows)
importFrom(tidyr,unite)
importFrom(utils,tail)
8 changes: 0 additions & 8 deletions R/data.R

This file was deleted.

5 changes: 3 additions & 2 deletions R/detect_language.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,17 @@
#'
#' @details
#' `r lifecycle::badge("experimental")` \cr
#' The underyling core function determining the language is \code{textcat::textcat()}.
#' The underlying core function determining the language is \code{textcat::textcat()}.
#'
#' @examples
#' \dontrun{
#' CitDat <- CitaviR::diabetesprevalence %>%
#' dplyr::slice(1952:1955, 4390:4393)
#'
#' CitDat %>%
#' detect_language() %>%
#' dplyr::select(Abstract, det_lang, det_lang_wanted)
#'
#' }
#' @return A tibble containing at least one additional column: \code{det_lang}.
#' @importFrom textcat textcat
#' @importFrom tidyr unite
Expand Down
11 changes: 6 additions & 5 deletions R/example_xlsx.R → R/example_file.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' @title Get path to example xlsx exported from Citavi
#' @title Get path to example file
#'
#' @description
#' `r lifecycle::badge("stable")` \cr
Expand All @@ -8,12 +8,13 @@
#' @param file Name of file. If `NULL`, all example files will be listed.
#' @export
#' @examples
#' example_xlsx()
#' example_xlsx("3dupsin5refs.xlsx")
#' example_file()
#' example_file("3dupsin5refs.xlsx")
#' example_file("3dupsin5refs/3dupsin5refs.ctv6")

example_xlsx <- function(file = NULL) {
example_file <- function(file = NULL) {
if (is.null(file)) {
dir(system.file("extdata", package = "CitaviR"))
dir(system.file("extdata", package = "CitaviR"), recursive = TRUE)
} else {
system.file("extdata", file, package = "CitaviR", mustWork = TRUE)
}
Expand Down
26 changes: 20 additions & 6 deletions R/find_obvious_dups.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
#' The following columns \bold{must be present}: \code{ID}, \code{Title}, \code{Year}.
#' @param dupInfoAfterID If TRUE (default), the newly created columns
#' \code{clean_title}, \code{clean_title_id}, \code{has_obv_dup} and \code{obv_dup_id}
#' are moved right next to the \code{ID} column.
#' are moved right next to the \code{ID} column. Additionally, the \code{ID} column is
#' moved to the first position.
#' @param preferDupsWithPDF If TRUE (default), obvious duplicates are sorted by their info
#' in columns \code{has_attachment} and/or \code{Locations} (given they are present in the dataset).
#' After sorting, duplicates with the most occurences of \code{".pdf"} in \code{Locations} and a
Expand All @@ -16,8 +17,8 @@
#' was set to "English" so that column names are "Short Title" etc.
#'
#' @examples
#' path <- example_xlsx("3dupsin5refs.xlsx")
#' read_Citavi_xlsx(path) %>%
#' example_path <- example_file("3dupsin5refs/3dupsin5refs.ctv6")
#' read_Citavi_ctv6(example_path) %>%
#' find_obvious_dups() %>%
#' dplyr::select(clean_title:obv_dup_id)
#'
Expand All @@ -32,9 +33,17 @@

find_obvious_dups <- function(CitDat, dupInfoAfterID = TRUE, preferDupsWithPDF = TRUE) {

required_cols <- c("ID", "Title", "Year")
if ("StaticIDs" %in% names(CitDat)) {
CitDat <- CitDat %>% rename("RefID" = "StaticIDs")
OriginalRefIDLabel <- "StaticIDs"
} else {
CitDat <- CitDat %>% rename("RefID" = "ID")
OriginalRefIDLabel <- "ID"
}

required_cols <- c("RefID", "Title", "Year")

# ID, Title & Year present? -----------------------------------------------
# RefID, Title & Year present? --------------------------------------------
for (col_name_i in required_cols) {
if (col_name_i %not_in% names(CitDat)) {
stop(paste(col_name_i, "column is missing!"))
Expand Down Expand Up @@ -96,16 +105,21 @@ find_obvious_dups <- function(CitDat, dupInfoAfterID = TRUE, preferDupsWithPDF =
# dupInfoAfterID ----------------------------------------------------------
if (dupInfoAfterID) {
CitDat <- CitDat %>%
relocate("RefID", dplyr::everything()) %>%
relocate(c(
"clean_title",
"clean_title_id",
"has_obv_dup",
"obv_dup_id"
),
.after = "ID")
.after = "RefID")
}


# Original RefID label ----------------------------------------------------
names(CitDat)[names(CitDat) == "RefID"] <- OriginalRefIDLabel


# return tibble -----------------------------------------------------------
CitDat

Expand Down
4 changes: 2 additions & 2 deletions R/find_potential_dups.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
#' was set to "English" so that column names are "Short Title" etc.
#'
#' @examples
#' path <- example_xlsx("3dupsin5refs.xlsx")
#' CitDat <- read_Citavi_xlsx(path) %>%
#' example_path <- example_file("3dupsin5refs/3dupsin5refs.ctv6")
#' CitDat <- read_Citavi_ctv6(example_path) %>%
#' find_obvious_dups() %>%
#' find_potential_dups()
#'
Expand Down
10 changes: 5 additions & 5 deletions R/handle_obvious_dups.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@
#'
#'
#' @examples
#' path <- example_xlsx("3dupsin5refs.xlsx")
#' CitDat <- read_Citavi_xlsx(path) %>%
#' example_path <- example_file("3dupsin5refs/3dupsin5refs.ctv6")
#' CitDat <- read_Citavi_ctv6(example_path) %>%
#' find_obvious_dups()
#'
#' # before
#' CitDat %>%
#' dplyr::select("clean_title", "clean_title_id", "obv_dup_id", "DOI name", "PubMed ID")
#' dplyr::select("clean_title", "clean_title_id", "obv_dup_id", "DOI", "PubMedID")
#'
#' # after
#' CitDat %>%
#' handle_obvious_dups(fieldsToHandle = c("DOI name", "PubMed ID")) %>%
#' dplyr::select("clean_title", "clean_title_id", "obv_dup_id", "DOI name", "PubMed ID")
#' handle_obvious_dups(fieldsToHandle = c("DOI", "PubMedID")) %>%
#' dplyr::select("clean_title", "clean_title_id", "obv_dup_id", "DOI", "PubMedID")
#'
#' @return A tibble where information from obvious duplicates was brought together for \code{dup_01}, respectively.
#' @importFrom purrr map
Expand Down
60 changes: 60 additions & 0 deletions R/read_Citavi_ctv6.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#' @title Read table from Citavi database (via SQL)
#'
#' @param path Path to the local Citavi project file (.ctv6).
#' @param CitDBTableName Name of the table to be read from the connected Citavi database (via \code{DBI::dbReadTable()}).
#' Set to "Reference" by default. Shows all table names when set to NULL (via \code{DBI::dbListTables}).
#'
#' @details
#' `r lifecycle::badge("experimental")` \cr
#' The underlying core functions are \code{DBI::dbConnect()} \code{RSQLite::SQLite()}, \code{DBI::dbReadTable()} and \code{DBI::dbListTables}.
#'
#' @examples
#' # example Citavi project
#' example_path <- example_file("3dupsin5refs/3dupsin5refs.ctv6")
#'
#' # import reference (=default) table
#' CitDat <- read_Citavi_ctv6(example_path)
#' CitDat %>% dplyr::select(Title, Year, Abstract, DOI)
#'
#' # show table names
#' read_Citavi_ctv6(example_path, CitDBTableName = NULL)
#'
#' @return A tibble
#' @importFrom RSQLite SQLite
#' @importFrom DBI dbConnect
#' @importFrom DBI dbDisconnect
#' @importFrom DBI dbListTables
#' @importFrom DBI dbReadTable
#' @importFrom dplyr as_tibble
#' @export

read_Citavi_ctv6 <- function(path = NULL, CitDBTableName = "Reference") {

stopifnot(is.character(path)) # path required

if (is.null(CitDBTableName)) {

# connect, ListTables, disconnect -----------------------------------------
Citcon <- DBI::dbConnect(RSQLite::SQLite(), path)
CitDBTablesVector <- DBI::dbListTables(conn = Citcon)
Citcon %>% DBI::dbDisconnect()

CitDBTablesVector # return vector

} else {

# connect, ReadTable, disconnect ------------------------------------------
Citcon <- DBI::dbConnect(RSQLite::SQLite(), path)
CitDBTable <- DBI::dbReadTable(conn = Citcon, name = CitDBTableName)
Citcon %>% DBI::dbDisconnect()


# format ------------------------------------------------------------------
CitDBTable <- CitDBTable %>%
dplyr::as_tibble()


# return tibble -----------------------------------------------------------
CitDBTable
}
}
6 changes: 3 additions & 3 deletions R/read_Citavi_xlsx.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
#' was set to "English" so that column names are "Short title" etc.
#'
#' @examples
#' path <- example_xlsx("3dupsin5refs.xlsx") # use this package's example xlsx file
#' read_Citavi_xlsx(path)
#' example_path <- example_file("3dupsin5refs.xlsx") # use this package's example xlsx file
#' read_Citavi_xlsx(example_path)
#'
#' \dontrun{
#' CitDat <- read_Citavi_xlsx("data/yourCitaviExport.xlsx")
Expand All @@ -41,7 +41,7 @@
read_Citavi_xlsx <- function(path = NULL, keepMarksCols = TRUE, useYearDerived = TRUE, setSuggestedColOrder = TRUE, setSuggestedColTypes = TRUE, ...) {

if (is.null(path)) {
stop("You did not provide a path to the Excel file.\n If you want to use an example file provided in this package instead, try\n read_Citavi_xlsx(example_xlsx('3dupsin5refs.xlsx'))")
stop("You did not provide a path to the Excel file.\n If you want to use an example file provided in this package instead, try\n read_Citavi_xlsx(example_file('3dupsin5refs.xlsx'))")
}


Expand Down
Loading

0 comments on commit f6fcb20

Please sign in to comment.