Skip to content

Commit

Permalink
Fix future bug on sameShape (#55)
Browse files Browse the repository at this point in the history
* Bug fix : was an issue with future matrix class;
* Add documentation for change and change package version
  • Loading branch information
ELToulemonde authored Feb 12, 2020
1 parent 058af69 commit b7efada
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 9 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: dataPreparation
Title: Automated Data Preparation
Version: 0.4.2
Version: 0.4.3
Authors@R: person("Emmanuel-Lin", "Toulemonde", email = "[email protected]", role = c("aut", "cre"))
Description: Do most of the painful data preparation for a data science project with a minimum amount of code; Take advantages of data.table efficiency and use some algorithmic trick in order to perform data preparation in a time and RAM efficient way.
Depends:
Expand Down
7 changes: 6 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
V 0.4.3
=======
- Fix :
- In *sameShape*: there was a future bug due to change in class "matrix". Fixed it by implementing 2 functions to check class

V 0.4.2
=======
- Fix test :
- Case in "build_encoding: min_frequency allows to drop rare values" was not built correctly.
- Case in *build_encoding*: min_frequency allows to drop rare values" was not built correctly.

V 0.4.1
=======
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
V 0.4.3
=======
- Fix :
- In *sameShape*: there was a future bug due to change in class "matrix". Fixed it by implementing 2 functions to check class

V 0.4.2
=======
- Fix test :
- Case in "build_encoding: min_frequency allows to drop rare values" was not built correctly.
- Case in *build_encoding*: min_frequency allows to drop rare values" was not built correctly.

V 0.4.1
=======
Expand Down
35 changes: 29 additions & 6 deletions R/sameShape.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ sameShape <- function(dataSet, referenceSet, verbose = TRUE){
# Store class of reference set and transform it into data.table to make computation faster
referenceSet_class <- class(referenceSet)
referenceSet <- checkAndReturnDataTable(referenceSet, name = "referenceSet")

## Computation
# Complete list of columns
if (verbose){
Expand Down Expand Up @@ -93,7 +92,6 @@ sameShape <- function(dataSet, referenceSet, verbose = TRUE){
ref_class, ".")
}
set(dataSet, NULL, col, get(transfo_function)(dataSet[[col]]))

# Control
if (! all(class(dataSet[[col]]) == ref_class)){
warning(paste0(function_name, ": transformation didn't work. Please control that function ",
Expand All @@ -110,16 +108,17 @@ sameShape <- function(dataSet, referenceSet, verbose = TRUE){
}
}
gc(verbose = FALSE)

# Factor levels
if (verbose){
printl(function_name, ": verify that every factor as the right number of levels.")
pb <- initPB(function_name, names(dataSet))
}
for (col in names(dataSet)){
print(is.factor(dataSet[[col]]))
if (is.factor(dataSet[[col]])){
transfo_levels <- levels(dataSet[[col]])
ref_levels <- levels(referenceSet[[col]])
print( identical(transfo_levels, ref_levels))
if (! identical(transfo_levels, ref_levels)){
set(dataSet, NULL, col, factor(dataSet[[col]], levels = ref_levels))
if (verbose){
Expand All @@ -138,15 +137,39 @@ sameShape <- function(dataSet, referenceSet, verbose = TRUE){

# Set class
if (! identical(referenceSet_class, class(dataSet))){
if (referenceSet_class == "data.frame"){
if (is_class_dataframe(referenceSet_class)){
setDF(dataSet)
}
if (referenceSet_class == "matrix"){
if (is_class_matrix(referenceSet_class)){
dataSet <- as.matrix(dataSet)
}
}

## Wrapp-up
return(dataSet)
}


is_class_dataframe <- function(some_class){
if (length(some_class) > 1){
return(FALSE)
}
if (some_class == "data.frame"){
return(TRUE)
}
return(FALSE)
}

is_class_matrix <- function(some_class){
if (length(some_class) > 1){ # Might be future matrix
if (all(some_class == c("matrix", "array"))){
return(TRUE)
}
else{
return(FALSE)
}
}
if (some_class == "matrix"){
return(TRUE)
}
return(FALSE)
}
45 changes: 45 additions & 0 deletions tests/testthat/test_sameShape.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,48 @@ test_that("sameShape: transform shape into data.frame",
# Then
expect_true(is.data.frame(reshaped_dataSet_2))
})

# Internal class check function
test_that("is_class_dataframe: matrix and data.table are not data.frame",
{
# Given
some_class = "matrix"
future_matrix_class = c("matrix", "array")
data_table_class = c("data.table", "data.frame")

# When + Then
expect_false(is_class_dataframe(some_class))
expect_false(is_class_dataframe(future_matrix_class))
expect_false(is_class_dataframe(data_table_class))
})

test_that("is_class_dataframe: data.frame is data.frame",
{
# Given
some_class = "data.frame"

# When + Then
expect_true(is_class_dataframe(some_class))
})

test_that("is_class_matrix: data.frame and data.table are not matrix",
{
# Given
some_class = "data.frame"
data_table_class = c("data.table", "data.frame")

# When + Then
expect_false(is_class_matrix(some_class))
expect_false(is_class_matrix(data_table_class))
})

test_that("is_class_matrix: matrix and future matrix is matrix",
{
# Given
some_class = "matrix"
future_matrix_class = c("matrix", "array")

# When + Then
expect_true(is_class_matrix(some_class))
expect_true(is_class_matrix(future_matrix_class))
})

0 comments on commit b7efada

Please sign in to comment.