Primer prep2 (#211)

* # Added 2 arguments to estimate_pls() maxIt and stopCriterion. Caught a bug where estimate_pls() called within HOC and bootstrap was passing the original model settings. created a new settings return object within seminr_model objects. Created a bootstrap test to test how model performs when processing missing values. * Updates to primer demos * Modified confidence_interval() function to support boot mean, SD, and confidence intervals. Updated Primer Chap 7. * Minor changes to output * Updated data files * Update Chap 3 * Updated text in chap 5 demo * final commit to primer prep2 * REconciling develop and Primer_prep2 * Final * Final commit for Primer_prep2. Achieved consistency with Primer
sem-in-r · Apr 27, 2021 · b296d4b · b296d4b
1 parent ee07b22
commit b296d4b
Show file tree

Hide file tree

Showing 33 changed files with 513 additions and 169 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,3 @@ _*
 .DS*
 .R*
 Meta
-doc
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: seminr
 Type: Package
 Title: Building and Estimating Structural Equation Models
-Version: 2.0.2
-Date: 2021-04-01
+Version: 2.0.3
+Date: 2021-04-27
 Authors@R: c(person("Soumya", "Ray", email = "[email protected]", role = c("aut", "ths")),
              person("Nicholas Patrick", "Danks", email = "[email protected]", role = c("aut","cre")),
              person("André", "Calero Valdez", role = "aut", email = "[email protected]"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -37,7 +37,6 @@ export(browse_plot)
 export(check_test_plot)
 export(composite)
 export(compute_itcriteria_weights)
-export(confidence_interval)
 export(constructs)
 export(correlation_weights)
 export(csem2seminr)
@@ -88,6 +87,7 @@ export(set_last_seminr_plot)
 export(simplePLS)
 export(single_item)
 export(slope_analysis)
+export(specific_effect_significance)
 export(specify_model)
 export(total_indirect_ci)
 export(two_stage)
diff --git a/R/data.R b/R/data.R
@@ -155,3 +155,47 @@
 #' @examples data("corp_rep_data2")
 #'
 "corp_rep_data2"
+
+#' Measurement Instrument for the Influencer Model
+#'
+#' The data set is used as measurement instrument for the Influencer Model which is used in
+#' Partial Least Squares Structural Equation Modeling (PLS-SEM) Using R - A Workbook (2021)
+#' Hair, J.F. (Jr), Hult, T.M., Ringle, C.M., Sarstedt, M., Danks, N.P., and Ray, S.
+#'
+#' @format A data frame with 250 rows and 24 variables:
+#' \describe{
+#'
+#'   \item{sic_1}{The influencer reflects who I am.}
+#'   \item{sic_2}{I can identify with the influencer.}
+#'   \item{sic_3}{I feel a personal connection to the influencer.}
+#'   \item{sic_4}{I (can) use the influencer to communicate who I am to other people.}
+#'   \item{sic_5}{I think the influencer (could) help(s) me become the type of person I want to be.}
+#'   \item{sic_6}{I consider the influencer to be "me".}
+#'   \item{sic_7}{The influencer suits me well.}
+#'   \item{sic_global}{My personality and the personality of the influencer relate accordingly to one another.}
+#'   \item{pq_1}{The product has excellent quality.}
+#'   \item{pq_2}{The product looks to be reliable and durable.}
+#'   \item{pq_3}{The product will have fewer problems.}
+#'   \item{pq_4}{The product has excellent quality features.}
+#'   \item{pl_1}{I dislike the product (reverse coded).}
+#'   \item{pl_2}{The product is appealing to me.}
+#'   \item{pl_3}{The presented product raises a positive feeling in me.}
+#'   \item{pl_4}{The product is interesting to me.}
+#'   \item{pi_1}{It is very likely that I will purchase this product.}
+#'   \item{pi_2}{I will purchase this product the next time I need it.}
+#'   \item{pi_3}{I would definitely try the product out.}
+#'   \item{pi_4}{I would recommend this product to my friends.}
+#'   \item{pi_5}{I am willing to purchase this product.}
+#'   \item{pic_1}{The influencer is qualified.}
+#'   \item{pic_2}{The influencer is competent.}
+#'   \item{pic_3}{The influencer is an expert.}
+#'   \item{pic_4}{The influencer is experienced.}
+#'   \item{pic_5}{The influencer is knowledgeable.}
+#'   \item{wtp}{Please state your willingness to pay (in Euro) for the presented product.}
+#'   \item{influencer_group}{A binary variable indicating which group the influencer belongs to.}
+#' }
+#'
+#' @details The data frame influencer_data contains the observed data for the model specified in the Influencer Model.
+#' @examples data("influencer_data")
+#'
+"influencer_data"
diff --git a/R/estimate_bootstrap.R b/R/estimate_bootstrap.R
@@ -78,6 +78,10 @@ bootstrap_model <- function(seminr_model, nboot = 500, cores = NULL, seed = NULL
       measurement_model <- seminr_model$measurement_model
       structural_model <- seminr_model$smMatrix
       inner_weights <- seminr_model$inner_weights
+      missing_value <- seminr_model$settings$missing_value
+      maxIt <- seminr_model$settings$maxIt
+      stopCriterion <- seminr_model$settings$stopCriterion
+      missing <- seminr_model$settings$missing
 
       if (nboot > 0) {
         # Initialize the cluster
@@ -90,15 +94,30 @@ bootstrap_model <- function(seminr_model, nboot = 500, cores = NULL, seed = NULL
         if (is.null(seed)) {seed <- sample.int(100000, size = 1)}
 
         # Export variables and functions to cluster
-        parallel::clusterExport(cl=cl, varlist=c("measurement_model", "structural_model", "inner_weights", "getRandomIndex", "d", "HTMT", "seed","total_effects"), envir=environment())
+        parallel::clusterExport(cl=cl, varlist=c("measurement_model",
+                                                 "structural_model",
+                                                 "inner_weights",
+                                                 "getRandomIndex",
+                                                 "d",
+                                                 "HTMT",
+                                                 "seed",
+                                                 "total_effects",
+                                                 "missing_value",
+                                                 "maxIt",
+                                                 "stopCriterion",
+                                                 "missing"), envir=environment())
 
         # Function to get PLS estimate results
         getEstimateResults <- function(i, d = d) {
           set.seed(seed + i)
           boot_model <- seminr::estimate_pls(data = d[getRandomIndex(d),],
-                               measurement_model,
-                               structural_model,
-                               inner_weights = inner_weights)
+                               measurement_model = measurement_model,
+                               structural_model = structural_model,
+                               inner_weights = inner_weights,
+                               missing = missing,
+                               missing_value = missing_value,
+                               stopCriterion = stopCriterion,
+                               maxIt = maxIt)
           boot_htmt <- HTMT(boot_model)
           boot_total <- total_effects(boot_model$path_coef)
           return(as.matrix(c(c(boot_model$path_coef),

diff --git a/R/estimate_pls.R b/R/estimate_pls.R
@@ -26,6 +26,12 @@
 #' @param missing_value Value in dataset that indicates missing values.
 #'   NA is used by default.
 #'
+#' @param maxIt A parameter that specifies that maximum number of iterations when estimating the
+#'   PLS model. Default value is 300.
+#'
+#' @param stopCriterion A parameter specifying the stop criterion for estimating the PLS model.
+#'   Default value is 7.
+#'
 #' @return A list of the estimated parameters for the SEMinR model including:
 #'  \item{meanData}{A vector of the indicator means.}
 #'  \item{sdData}{A vector of the indicator standard deviations}
@@ -50,7 +56,9 @@
 #'              measurement_model = NULL, structural_model = NULL, model = NULL,
 #'              inner_weights = path_weighting,
 #'              missing = mean_replacement,
-#'              missing_value = NA)
+#'              missing_value = NA,
+#'              maxIt = 300,
+#'              stopCriterion = 7)
 #'
 #' @seealso \code{\link{specify_model}} \code{\link{relationships}} \code{\link{constructs}} \code{\link{paths}} \code{\link{interaction_term}}
 #'          \code{\link{bootstrap_model}}
@@ -87,12 +95,20 @@
 #' summary(mobi_pls)
 #' plot_scores(mobi_pls)
 #' @export
-estimate_pls <- function(data, measurement_model=NULL, structural_model=NULL, model=NULL, inner_weights = path_weighting, missing = mean_replacement, missing_value = NA) {
+estimate_pls <- function(data,
+                         measurement_model = NULL,
+                         structural_model = NULL,
+                         model = NULL,
+                         inner_weights = path_weighting,
+                         missing = mean_replacement,
+                         missing_value = NA,
+                         maxIt=300,
+                         stopCriterion=7) {
   message("Generating the seminr model")
   data[data == missing_value] <- NA
+  rawdata <- data
   data <- missing(data)
   data <- stats::na.omit(data)
-  rawdata <- data
 
   # Extract model specifications
   specified_model <- extract_models(model, measurement_model, structural_model)
@@ -107,7 +123,9 @@ estimate_pls <- function(data, measurement_model=NULL, structural_model=NULL, mo
                                       sm = structural_model,
                                       mm = measurement_model,
                                       inners = inner_weights,
-                                      HOCs = HOCs)
+                                      HOCs = HOCs,
+                                      maxIt=maxIt,
+                                      stopCriterion=stopCriterion)
     measurement_model <- HOM$mm
     structural_model <- HOM$sm
     data <- HOM$data
@@ -125,10 +143,20 @@ estimate_pls <- function(data, measurement_model=NULL, structural_model=NULL, mo
   measurement_mode_scheme <- sapply(unique(c(structural_model[,1], structural_model[,2])), get_measure_mode, mmMatrix, USE.NAMES = TRUE)
 
   # Run the model in simplePLS
-  seminr_model = seminr::simplePLS(obsData = data, smMatrix = structural_model, mmMatrix = mmMatrix, inner_weights = inner_weights, measurement_mode_scheme = measurement_mode_scheme)
+  seminr_model = seminr::simplePLS(obsData = data,
+                                   smMatrix = structural_model,
+                                   mmMatrix = mmMatrix,
+                                   inner_weights = inner_weights,
+                                   maxIt=maxIt,
+                                   stopCriterion=stopCriterion,
+                                   measurement_mode_scheme = measurement_mode_scheme)
   seminr_model$data <- data
   seminr_model$rawdata <- rawdata
   seminr_model$measurement_model <- measurement_model
+  seminr_model$settings$missing_value <- missing_value
+  seminr_model$settings$maxIt <- maxIt
+  seminr_model$settings$stopCriterion <- stopCriterion
+  seminr_model$settings$missing <- missing
 
   # Correct for Bias in Reflective models using PLS Consistent
   seminr_model <- model_consistent(seminr_model)

diff --git a/R/evaluate_effects.R b/R/evaluate_effects.R
@@ -59,10 +59,15 @@ fSquared <- function(seminr_model, iv, dv) {
   #   lm <- stats::lm(formula = frmla, data = data)
   #   summary(lm)
   #   }
-  utils::capture.output(
+  suppressMessages(
     without_pls <- estimate_pls(data = seminr_model$rawdata,
                                 measurement_model = seminr_model$measurement_model,
-                                structural_model = without_sm)
+                                structural_model = without_sm,
+                                inner_weights = seminr_model$inner_weights,
+                                missing = seminr_model$settings$missing,
+                                missing_value = seminr_model$settings$missing_value,
+                                maxIt = seminr_model$settings$maxIt,
+                                stopCriterion = seminr_model$settings$stopCriterion)
   )
   with_r2 <- seminr_model$rSquared["Rsq", dv]
   ifelse(any(without_sm[,"target"] == dv),

diff --git a/R/evaluate_reliability.R b/R/evaluate_reliability.R
@@ -147,5 +147,5 @@ cronbachs_alpha <- function(seminr_model) {
       alpha_vec[[i]] <- 1
     }
   }
-  return(alpha_vec)
+  return(unlist(alpha_vec))
 }
diff --git a/R/feature_higher_order.R b/R/feature_higher_order.R
@@ -30,7 +30,7 @@ remove_HOC_in_measurement_model <- function(construct, mm) {
 }
 
 # Function to parse measurement and structural model and create the higher order model with complete information
-prepare_higher_order_model <- function(data, sm , mm, inners, HOCs) {
+prepare_higher_order_model <- function(data, sm , mm, inners, HOCs, maxIt, stopCriterion) {
   #retain the mm and sm
   orig_mm <- mm
   new_mm <- matrix(unlist(mm[!(substr(names(mm), nchar(names(mm))-10, nchar(names(mm))) == "interaction") & !(names(mm) == "higher_order_composite")]), ncol = 3, byrow = TRUE,
@@ -59,7 +59,9 @@ prepare_higher_order_model <- function(data, sm , mm, inners, HOCs) {
   new_model <- estimate_pls(data = data,
                             measurement_model = mm[!(substr(names(mm), nchar(names(mm))-10, nchar(names(mm))) == "interaction") & !(names(mm) == "higher_order_composite")],
                             structural_model = sm,
-                            inner_weights = inners)
+                            inner_weights = inners,
+                            maxIt = maxIt,
+                            stopCriterion = stopCriterion)
 
   # Add the construct scores to data
   data <- cbind(data, new_model$construct_scores[, dimensions])

diff --git a/R/feature_plspredict.R b/R/feature_plspredict.R
@@ -184,8 +184,8 @@ item_metrics <- function(pls_prediction_kfold) {
     apply(pls_prediction_kfold$lm_out_of_sample_residuals, 2, prediction_metrics))
 
   # Assign rownames to matrices
-  rownames(PLS_item_prediction_metrics_IS) <- rownames(PLS_item_prediction_metrics_OOS) <- rownames(LM_item_prediction_metrics_OOS) <- c("RMSE","MAD")
-  rownames(LM_item_prediction_metrics_OOS) <- rownames(LM_item_prediction_metrics_IS) <- c("RMSE","MAD")
+  rownames(PLS_item_prediction_metrics_IS) <- rownames(PLS_item_prediction_metrics_OOS) <- rownames(LM_item_prediction_metrics_OOS) <- c("RMSE","MAE")
+  rownames(LM_item_prediction_metrics_OOS) <- rownames(LM_item_prediction_metrics_IS) <- c("RMSE","MAE")
 
   return(list(PLS_item_prediction_metrics_IS = PLS_item_prediction_metrics_IS,
               PLS_item_prediction_metrics_OOS = PLS_item_prediction_metrics_OOS,
@@ -277,10 +277,14 @@ in_and_out_sample_predictions <- function(x, folds, ordered_data, model,techniqu
   PLS_predicted_insample_item <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$mmVariables),dimnames = list(rownames(ordered_data),model$mmVariables))
   PLS_predicted_insample_item_residuals <- matrix(0,nrow = nrow(ordered_data),ncol = length(model$mmVariables),dimnames = list(rownames(ordered_data),model$mmVariables))
   #PLS prediction on testset model
-  utils::capture.output(train_model <- seminr::estimate_pls(data = trainingData,
-                                                            measurement_model = model$measurement_model,
-                                                            structural_model = model$smMatrix,
-                                                            inner_weights = model$inner_weights))
+  suppressMessages(train_model <- seminr::estimate_pls(data = trainingData,
+                                                       measurement_model = model$measurement_model,
+                                                       structural_model = model$smMatrix,
+                                                       inner_weights = model$inner_weights,
+                                                       missing = model$settings$missing,
+                                                       missing_value = model$settings$missing_value,
+                                                       maxIt = model$settings$maxIt,
+                                                       stopCriterion = model$settings$stopCriterion))
   test_predictions <- stats::predict(object = train_model,
                                      testData = testingData,
                                      technique = technique)

diff --git a/R/library.R b/R/library.R
@@ -296,14 +296,16 @@ skew <- function(x, na.rm = FALSE) {
 }
 
 desc <- function(data, na.rm = na.rm) {
-  Mean <- apply(data, 2, mean)
-  Std.Dev. <- apply(data, 2, stats::sd)
+  Mean <- apply(data, 2, mean, na.rm = na.rm)
+  Std.Dev. <- apply(data, 2, stats::sd, na.rm = na.rm)
   Kurtosis <- kurt(data, na.rm = na.rm)
-  Min <- apply(data, 2, min)
-  Max <- apply(data, 2, max)
-  Median <- apply(data, 2, stats::median)
+  Min <- apply(data, 2, min, na.rm = na.rm)
+  Max <- apply(data, 2, max, na.rm = na.rm)
+  Median <- apply(data, 2, stats::median, na.rm = na.rm)
   Skewness <- skew(data, na.rm = na.rm)
   Missing <- apply(data, 2, function(x) sum(stats::complete.cases(x)==FALSE))
+  # Missing <- attributes(data)$Missing
+  # Missing <- apply(data, 2, function(x) sum(stats::complete.cases(x)==FALSE))
   No. <- 1:ncol(data)
   cbind(No., Missing, Mean, Median, Min, Max, Std.Dev., Kurtosis, Skewness)
 }

diff --git a/R/plot_results.R b/R/plot_results.R
@@ -150,7 +150,7 @@ slope_analysis <- function(moderated_model, dv, moderator, iv, leg_place = "bott
   graphics::lines(c(-1,0,1), res[c(2,5,8)], lty = 1)
   graphics::lines(c(-1,0,1), res[c(3,6,9)], lty = 3)
   graphics::grid()
-  graphics::legend(leg_place, c("Mod at -1SD", "Mod at Mean", "Mod at 1SD"), lty=c(2,1,3),
+  graphics::legend(leg_place, c("Mod at -1SD", "Mod at Mean", "Mod at +1SD"), lty=c(2,1,3),
          horiz=FALSE, bty="n", cex = 0.8
   )
 }

diff --git a/R/report_descriptives.R b/R/report_descriptives.R
@@ -1,7 +1,7 @@
 # A function to take a seminr model and return item and construct descriptives
 descriptives <- function(seminr_model, na.rm = TRUE) {
   #items
-  item_descriptives <- desc(seminr_model$data, na.rm = na.rm)
+  item_descriptives <- desc(seminr_model$rawdata, na.rm = na.rm)
   item_correlations <- stats::cor(seminr_model$data)
   #constructs
   construct_descriptives <- desc(seminr_model$construct_scores, na.rm = na.rm)
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,4 +6,3 @@ _* @@
     .DS*
     .R*
     Meta
-    doc