Skip to content

Commit

Permalink
Merge pull request #33 from inbo/review
Browse files Browse the repository at this point in the history
Upgrade to version 0.0.4. Ready for review in ropensci/software-review#263
  • Loading branch information
ThierryO authored May 16, 2019
2 parents 4b1f2fd + d8680a7 commit df1ba32
Show file tree
Hide file tree
Showing 56 changed files with 2,301 additions and 644 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
^_pkgdown.yml$
^appveyor\.yml$
^codemeta\.json$
^.zenodo\.json$
^docs$
^man-roxygen$
^pkgdown$
Expand Down
34 changes: 34 additions & 0 deletions .zenodo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{
"creators": [
{
"affiliation": "Research Institute for Nature and Forest (INBO)",
"name": "Onkelinx, Thierry",
"orcid": "0000-0001-8804-4216"
}
],
"contributors": [
{
"affiliation": "Research Institute for Nature and Forest (INBO)",
"name": "Onkelinx, Thiery",
"orcid": "0000-0001-8804-4216",
"type": ["Contactperson", "ProjectLeader"]
},
{
"affiliation": "Research Institute for Nature and Forest (INBO)",
"name": "Vanderhaeghe, Floris",
"orcid": "0000-0002-6378-6229",
"type": "Projectmember"
},
{
"name": "Research Institute for Nature and Forest (INBO)",
"type": "Rightsholder"
}
],
"keywords": [
"r",
"version control",
"data.frame",
"plain text"
],
"license": "GPL-3"
}
9 changes: 6 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: git2rdata
Title: Store and Retrieve Data.frames in a Git Repository
Version: 0.0.3
Version: 0.0.4
Authors@R: c(
person(
"Thierry", "Onkelinx", role = c("aut", "cre"),
Expand All @@ -14,7 +14,7 @@ Authors@R: c(
"Research Institute for Nature and Forest",
role = c("cph", "fnd"), email = "[email protected]"))
Description: Make versioning of data.frame easy and efficient using git repositories.
Depends: R (>= 3.4.0)
Depends: R (>= 3.5.0)
Imports:
assertthat,
git2r (>= 0.23.0),
Expand All @@ -36,12 +36,15 @@ BugReports: https://github.com/inbo/git2rdata/issues
Collate:
'clean_data_path.R'
'git2rdata-package.R'
'write_vc.R'
'is_git2rdata.R'
'is_git2rmeta.R'
'list_data.R'
'meta.R'
'write_vc.R'
'prune.R'
'read_vc.R'
'recent_commit.R'
'reexport.R'
'relabel.R'
'upgrade_data.R'
VignetteBuilder: knitr
14 changes: 14 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

S3method(format,meta_detail)
S3method(format,meta_list)
S3method(is_git2rdata,character)
S3method(is_git2rdata,default)
S3method(is_git2rdata,git_repository)
S3method(is_git2rmeta,character)
S3method(is_git2rmeta,default)
S3method(is_git2rmeta,git_repository)
S3method(list_data,character)
S3method(list_data,default)
S3method(list_data,git_repository)
Expand All @@ -22,17 +28,23 @@ S3method(prune_meta,git_repository)
S3method(read_vc,character)
S3method(read_vc,default)
S3method(read_vc,git_repository)
S3method(recent_commit,default)
S3method(recent_commit,git_repository)
S3method(relabel,data.frame)
S3method(relabel,default)
S3method(relabel,list)
S3method(rm_data,character)
S3method(rm_data,default)
S3method(rm_data,git_repository)
S3method(upgrade_data,character)
S3method(upgrade_data,default)
S3method(upgrade_data,git_repository)
S3method(write_vc,character)
S3method(write_vc,default)
S3method(write_vc,git_repository)
export(commit)
export(is_git2rdata)
export(is_git2rmeta)
export(list_data)
export(meta)
export(prune_meta)
Expand All @@ -44,6 +56,7 @@ export(relabel)
export(repository)
export(rm_data)
export(status)
export(upgrade_data)
export(write_vc)
importFrom(assertthat,"on_failure<-")
importFrom(assertthat,assert_that)
Expand All @@ -64,6 +77,7 @@ importFrom(git2r,status)
importFrom(git2r,workdir)
importFrom(methods,setOldClass)
importFrom(stats,setNames)
importFrom(utils,packageVersion)
importFrom(utils,read.table)
importFrom(utils,write.table)
importFrom(yaml,as.yaml)
Expand Down
97 changes: 68 additions & 29 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,47 +1,86 @@
git2rdata 0.0.4 (2019-05-16)
============================

### BREAKING FEATURES

* `write_vc()` stores the `git2rdata` version number to the metadata. Use `upgrade_data()` to update existing data.

### NEW FEATURES

* `read_vc()` checks the meta data hash. A mismatch results in an error.
* The meta data gains a data hash. A mismatch throws a warning when reading the object. This tolerates updating the data by other software, while informing the user that such change occurred.
* `is_git2rmeta()` validates metadata.
* `list_data()` lists files with valid metadata.
* `rm_data()` and `prune_meta()` remove files with valid metadata. Other files are untouched.
* Files with invalid metadata yield a warning with `list_data()`, `rm_data()` and `prune_meta()`.

### Bugfixes

* `write_vc()` and `relabel()` handle empty strings (`''`) in characters and factors (#24).
* `read_vc()` no longer treats `#` as a comment character.
* `read_vc()` handles non ASCII characters on Windows.

### Other changes

* Use a faster algorithm to detect duplicates (suggestion by @brodieG).
* Improve documentation.
* Fix typo's in documentation, vignettes and README.
* Add a ROpenSci review badge to the README.
* The README mentions on upper bound on the size of dataframes.
* Set lifecycle to "maturing" and repo status to "active".
* The functions handle `root` containing regex expressions.
* Rework `vignette("workflow", package = "git2rdata")`.
* Update timings in `vignette("efficiency", package = "git2rdata")`
* Minor tweaks in `vignette("plain_text", package = "git2rdata")`

git2rdata 0.0.3 (2019-03-12)
============================

* Fix typo's in documentation, vignettes and README.

git2rdata 0.0.2 (2019-02-26)
============================

### BREAKING CHANGES

* metadata is added as a list to the objects rather than in YAML format.
* the [yaml](https://cran.r-project.org/package=yaml) package is used to store the metadata list in YAML format.
* `write_vc()` now uses the 'strict' argument instead of 'override'
* the functionality `rm_data()` is split into `rm_data()` and `prune_meta()` (#9)
* `meta()` appends the metadata as a list to the objects rather than in YAML format.
* `yaml::write_yaml()` writes the metadata list in YAML format.
* `write_vc()` now uses the 'strict' argument instead of 'override'.
* `rm_data()` removes the data files. Use `prune_meta()` to remove left-over metadata files (#9).

### NEW FEATURES

* vignette on [efficiency](../articles/efficiency.html) added (#2)
* existing vignette was split over three vignettes
* focus on the [plain text format](../articles/plain_text.html)
* focus on [version control](../articles/version_control.html)
* focus on [workflows](../articles/workflow.html)
* S4 methods are replaced by S3 methods (#8)
* optimized factors use stable indices, resulting in smaller diffs when levels are added or removed (#13)
* use `relabel()` to alter factor levels without changing their index (#13)
* the raw data is written and read by base R functions instead of `readr` functions (#7)
* `write_vc()` and `read_vc()` use the current working directory as default root (#6, @florisvdh)
* the user can specify a string to code missing values (default = `NA`). This allows the storage of the character string `"NA"`.
* Vignette on [efficiency](../articles/efficiency.html) added (#2).
* Three separate vignettes instead of one large vignette.
* Focus on the [plain text format](../arsticles/plain_text.html).
* Focus on [version control](../articles/version_control.html).
* Focus on [workflows](../articles/workflow.html).
* S3 methods replace the old S4 methods (#8).
* Optimized factors use stable indices. Adding or removing levels result in smaller diffs (#13).
* Use `relabel()` to alter factor levels without changing their index (#13).
* `write.table()` stores the raw data instead of `readr::write_tsv()` (#7). This avoids the `readr` dependency.
* `write_vc()` and `read_vc()` use the current working directory as default root (#6, @florisvdh).
* The user can specify a string to code missing values (default = `NA`). This allows the storage of the character string `"NA"`.
* `write_vc()` returns a list of issues which potentially result in large diffs.
* `list_data()` returns a vector with dataframes in the repository
* `list_data()` returns a vector with dataframes in the repository.

### Other changes

* `write_vc()` allows to use a custom NA string
* each helpfile contains a working example (#11)
* README updated (#12)
* Updated the rationale with links to the vignettes
* `git2rdata` has a hexsticker logo
* A DOI is added
* The installation instructions use `remotes` and build the vignettes
* `auto_commit()` was removed because of limited extra functionality over `git2r::commit()`
* dataframes are read and written by base R functions instead of `readr` functions
* `write_vc()` allows to use a custom `NA` string.
* Each helpfile contains a working example (#11).
* README updated (#12).
* Updated the rationale with links to the vignettes.
* `git2rdata` has a hexsticker logo.
* Add the [![DOI](https://zenodo.org/badge/147685405.svg)](https://zenodo.org/badge/latestdoi/147685405).
* The installation instructions use `remotes` and build the vignettes.
* We removed `auto_commit()` because of limited extra functionality over `git2r::commit()`.

git2rdata 0.0.1 (2018-11-12)
============================

### NEW FEATURES

* use readr to write and read plain text files
* allows storage of strings with "NA" or special characters
* handle ordered factors
* stop handling complex numbers
* Use `readr` to write and read plain text files.
* Allow storage of strings with "NA" or special characters.
* Handle ordered factors.
* Stop handling complex numbers.
12 changes: 7 additions & 5 deletions R/clean_data_path.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
#' Clean the data path
#' Clean the Data Path
#' Strips any file extension from the path and adds the `".tsv"` and `".yml"`
#' file extensions
#' @inheritParams write_vc
#' @param normalize normalize the path? Defaults to TRUE
#' @return a named vector with "raw_file" and "meta_file", refering to the
#' `".tsv"` and `".yml"` files
#' @param normalize Normalize the path? Defaults to TRUE
#' @return A named vector with "raw_file" and "meta_file", refering to the
#' `".tsv"` and `".yml"` files.
#' @noRd
#' @family internal
#' @importFrom assertthat assert_that is.flag noNA
clean_data_path <- function(root, file, normalize = TRUE) {
assert_that(is.flag(normalize), noNA(normalize))
dir_name <- dirname(file)
file <- gsub("\\..*$", "", basename(file))
if (dir_name == ".") {
path <- file.path(root, file)
} else {
path <- file.path(root, dir_name, file)
}
if (isTRUE(normalize)) {
if (normalize) {
path <- normalizePath(path, winslash = "/", mustWork = FALSE)
}
c(raw_file = paste0(path, ".tsv"), meta_file = paste0(path, ".yml"))
Expand Down
72 changes: 72 additions & 0 deletions R/is_git2rdata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#' Check Whether a Git2rdata Object is Valid.
#'
#' A valid git2rdata object has valid metadata. The data hash must match the
#' data hash stored in the metadata.
#' @inheritParams write_vc
#' @inheritParams is_git2rmeta
#' @return A logical value. `TRUE` in case of a valid git2rdata object.
#' Otherwise `FALSE`.
#' @rdname is_git2rdata
#' @export
#' @family internal
#' @template example-isgit2r
is_git2rdata <- function(file, root = ".",
message = c("none", "warning", "error")) {
UseMethod("is_git2rdata", root)
}

#' @export
is_git2rdata.default <- function(file, root, message) {
stop("a 'root' of class ", class(root), " is not supported")
}

#' @export
#' @importFrom assertthat assert_that is.string
#' @importFrom yaml read_yaml as.yaml
#' @importFrom utils packageVersion
#' @importFrom git2r hash
is_git2rdata.character <- function(file, root = ".",
message = c("none", "warning", "error")) {
assert_that(is.string(file), is.string(root))
message <- match.arg(message)
root <- normalizePath(root, winslash = "/", mustWork = TRUE)
check_meta <- is_git2rmeta(file = file, root = root, message = message)
if (!check_meta) {
return(FALSE)
}
file <- clean_data_path(root = root, file = file)

if (!file.exists(file["raw_file"])) {
msg <- "Data file missing."
switch(message, error = stop(msg), warning = warning(msg))
return(FALSE)
}

# read the metadata
meta_data <- read_yaml(file["meta_file"])

correct <- names(meta_data)
correct <- paste(correct[correct != "..generic"], collapse = "\t")
header <- readLines(file["raw_file"], n = 1, encoding = "UTF-8")
if (correct != header) {
msg <- paste("Corrupt data, incorrect header. Expecting:", correct)
switch(message, error = stop(msg), warning = warning(msg))
return(FALSE)
}

if (meta_data[["..generic"]][["data_hash"]] != hashfile(file[["raw_file"]])) {
msg <- "Corrupt data, mismatching data hash."
switch(message, error = stop(msg), warning = warning(msg))
return(FALSE)
}

return(TRUE)
}

#' @export
#' @importFrom git2r workdir
#' @include write_vc.R
is_git2rdata.git_repository <- function(
file, root, message = c("none", "warning", "error")) {
is_git2rdata(file = file, root = workdir(root), message = message)
}
Loading

0 comments on commit df1ba32

Please sign in to comment.