library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
library(semPlot)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(RColorBrewer)
library(semTools)
##  
## ###############################################################################
## This is semTools 0.5-6
## All users of R (or SEM) are invited to submit functions or ideas for functions.
## ###############################################################################
## 
## Attaching package: 'semTools'
## 
## The following object is masked from 'package:PerformanceAnalytics':
## 
##     kurtosis
## 
## The following object is masked from 'package:readr':
## 
##     clipboard

Cargando los datos

data_GRV <- read_csv("data/Data_referral_marketing_Sebas.csv")
## Rows: 240 Columns: 28
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (28): A1, A2, A3, A4, A5, C6, C7, C8, C9, E10, E11, E12, O13, O14, O15, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

SEM

Model

ref_mark_1_3 <- "SAT =~ SAT19 + SAT20 + SAT21
               MI =~ MI22 + MI23
               TS =~ TS16 + TS17 + TS18
               REC =~ REC24 + REC28 + REC25 + REC26
               MI ~ SAT
               TS ~ SAT
               REC ~ SAT 
               REC ~ MI
               REC ~ TS
              "

Modelo

ref_mark_fit_1_3 <- lavaan::cfa(ref_mark_1_3, data=data_GRV)

Primero se mira las cargas cruzadas. Cada uno de los items cuadren con cada constructo

inspect(ref_mark_fit_1_3, what = "std")$lambda
##         SAT    MI    TS   REC
## SAT19 0.878 0.000 0.000 0.000
## SAT20 0.984 0.000 0.000 0.000
## SAT21 0.943 0.000 0.000 0.000
## MI22  0.000 0.959 0.000 0.000
## MI23  0.000 0.877 0.000 0.000
## TS16  0.000 0.000 0.594 0.000
## TS17  0.000 0.000 0.812 0.000
## TS18  0.000 0.000 0.854 0.000
## REC24 0.000 0.000 0.000 0.887
## REC28 0.000 0.000 0.000 0.770
## REC25 0.000 0.000 0.000 0.967
## REC26 0.000 0.000 0.000 0.534

Después se revisa la validez de los constructos con alpha de cronbach que debe ser mayor a .7 y el AVE mayor a .5. Idealmente, también el composite reliability que debe ser mayor a 0.7

reliability(ref_mark_fit_1_3)
##              SAT        MI        TS       REC
## alpha  0.9521594 0.9136440 0.7887447 0.8463513
## omega  0.9519718 0.9145252 0.7967289 0.8489612
## omega2 0.9519718 0.9145252 0.7967289 0.8489612
## omega3 0.9512951 0.9145252 0.7985576 0.8509266
## avevar 0.8685910 0.8426395 0.5703285 0.5871115

Modelo externo son las cargas de las preguntas y la validez de los constructos. El modelo interno con las hipótesis

Validando el modelo interno

summary(ref_mark_fit_1_3, fit.measures=TRUE, standardized=T,rsquare=T)
## lavaan 0.6.15 ended normally after 53 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        29
## 
##   Number of observations                           240
## 
## Model Test User Model:
##                                                       
##   Test statistic                               146.191
##   Degrees of freedom                                49
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              2280.096
##   Degrees of freedom                                66
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.956
##   Tucker-Lewis Index (TLI)                       0.941
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -2735.782
##   Loglikelihood unrestricted model (H1)      -2662.686
##                                                       
##   Akaike (AIC)                                5529.564
##   Bayesian (BIC)                              5630.503
##   Sample-size adjusted Bayesian (SABIC)       5538.580
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.091
##   90 Percent confidence interval - lower         0.074
##   90 Percent confidence interval - upper         0.108
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    0.862
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.044
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   SAT =~                                                                
##     SAT19             1.000                               0.664    0.878
##     SAT20             1.008    0.039   25.704    0.000    0.669    0.984
##     SAT21             0.961    0.041   23.491    0.000    0.638    0.943
##   MI =~                                                                 
##     MI22              1.000                               0.835    0.959
##     MI23              0.940    0.058   16.323    0.000    0.784    0.877
##   TS =~                                                                 
##     TS16              1.000                               0.696    0.594
##     TS17              1.258    0.145    8.689    0.000    0.875    0.812
##     TS18              1.363    0.159    8.589    0.000    0.949    0.854
##   REC =~                                                                
##     REC24             1.000                               0.730    0.887
##     REC28             1.028    0.067   15.349    0.000    0.751    0.770
##     REC25             1.156    0.051   22.488    0.000    0.845    0.967
##     REC26             0.847    0.094    9.046    0.000    0.619    0.534
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   MI ~                                                                  
##     SAT               0.774    0.074   10.426    0.000    0.616    0.616
##   TS ~                                                                  
##     SAT               0.161    0.076    2.119    0.034    0.153    0.153
##   REC ~                                                                 
##     SAT               0.282    0.081    3.462    0.001    0.256    0.256
##     MI                0.292    0.067    4.372    0.000    0.333    0.333
##     TS                0.221    0.067    3.299    0.001    0.211    0.211
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .SAT19             0.131    0.013    9.762    0.000    0.131    0.229
##    .SAT20             0.014    0.006    2.383    0.017    0.014    0.031
##    .SAT21             0.051    0.007    7.152    0.000    0.051    0.111
##    .MI22              0.060    0.033    1.801    0.072    0.060    0.080
##    .MI23              0.185    0.034    5.479    0.000    0.185    0.231
##    .TS16              0.891    0.091    9.750    0.000    0.891    0.648
##    .TS17              0.396    0.072    5.487    0.000    0.396    0.341
##    .TS18              0.333    0.079    4.200    0.000    0.333    0.270
##    .REC24             0.144    0.019    7.605    0.000    0.144    0.213
##    .REC28             0.388    0.039    9.961    0.000    0.388    0.407
##    .REC25             0.050    0.018    2.705    0.007    0.050    0.065
##    .REC26             0.961    0.090   10.726    0.000    0.961    0.715
##     SAT               0.441    0.051    8.616    0.000    1.000    1.000
##    .MI                0.432    0.053    8.205    0.000    0.621    0.621
##    .TS                0.473    0.102    4.640    0.000    0.977    0.977
##    .REC               0.343    0.041    8.362    0.000    0.644    0.644
## 
## R-Square:
##                    Estimate
##     SAT19             0.771
##     SAT20             0.969
##     SAT21             0.889
##     MI22              0.920
##     MI23              0.769
##     TS16              0.352
##     TS17              0.659
##     TS18              0.730
##     REC24             0.787
##     REC28             0.593
##     REC25             0.935
##     REC26             0.285
##     MI                0.379
##     TS                0.023
##     REC               0.356
fitMeasures(ref_mark_fit_1_3)
##                  npar                  fmin                 chisq 
##                29.000                 0.305               146.191 
##                    df                pvalue        baseline.chisq 
##                49.000                 0.000              2280.096 
##           baseline.df       baseline.pvalue                   cfi 
##                66.000                 0.000                 0.956 
##                   tli                  nnfi                   rfi 
##                 0.941                 0.941                 0.914 
##                   nfi                  pnfi                   ifi 
##                 0.936                 0.695                 0.956 
##                   rni                  logl     unrestricted.logl 
##                 0.956             -2735.782             -2662.686 
##                   aic                   bic                ntotal 
##              5529.564              5630.503               240.000 
##                  bic2                 rmsea        rmsea.ci.lower 
##              5538.580                 0.091                 0.074 
##        rmsea.ci.upper        rmsea.ci.level          rmsea.pvalue 
##                 0.108                 0.900                 0.000 
##        rmsea.close.h0 rmsea.notclose.pvalue     rmsea.notclose.h0 
##                 0.050                 0.862                 0.080 
##                   rmr            rmr_nomean                  srmr 
##                 0.041                 0.041                 0.044 
##          srmr_bentler   srmr_bentler_nomean                  crmr 
##                 0.044                 0.044                 0.048 
##           crmr_nomean            srmr_mplus     srmr_mplus_nomean 
##                 0.048                 0.044                 0.044 
##                 cn_05                 cn_01                   gfi 
##               109.907               123.994                 0.916 
##                  agfi                  pgfi                   mfi 
##                 0.866                 0.575                 0.817 
##                  ecvi 
##                 0.851
fitMeasures(ref_mark_fit_1_3)["chisq"] / fitMeasures(ref_mark_fit_1_3)["df"]
##    chisq 
## 2.983499

Resultado final

Chisquare = 747.047

Cmin/df = 2.87

nfi = 0.840

nnfi = 0.871

CFI = 0.889

AIC = 11922.825

RMSEA = 0.088

semPlot::semPaths(ref_mark_fit_1_3, what="est", fade=FALSE, residuals=FALSE, 
         layout="spring", structural=TRUE, nCharNodes=7, edge.label.cex = 1)

lavInspect(ref_mark_fit_1_3, "cor.lv")
##       SAT    MI    TS   REC
## SAT 1.000                  
## MI  0.616 1.000            
## TS  0.153 0.094 1.000      
## REC 0.494 0.511 0.281 1.000

VIF - Multicolinealidad

# # Assume your data frame is df
# model1 <- lm(SAT19 ~ SAT20 + SAT21 + MI22 + MI23 + TS16 + TS17 + TS18 + REC24 + REC28 + REC25 + REC26, data = df)
# model2 <- lm(SAT20 ~ SAT19 + SAT21 + MI22 + MI23 + TS16 + TS17 + TS18 + REC24 + REC28 + REC25 + REC26, data = df)
# model3 <- lm(SAT21 ~ SAT19 + SAT20 + MI22 + MI23 + TS16 + TS17 + TS18 + REC24 + REC28 + REC25 + REC26, data = df)
# #... repeat for each variable ...
# 
# vif1 <- vif(model1)
# vif2 <- vif(model2)
# vif3 <- vif(model3)
# #... repeat for each variable ...
# 
# # Then combine the VIFs into a single data frame or list for easier viewing
# vif_all <- data.frame(cbind(vif1, vif2, vif3, ...))
# library(car)
# latent_vars <- c("SAT", "SAT", "SAT", "MI", "MI", "TS", "TS", "TS", "REC", "REC", "REC", "REC")
# observed_vars <- c("SAT19", "SAT20", "SAT21", "MI22", "MI23", "TS16", "TS17", "TS18", "REC24", "REC28", "REC25", "REC26")
# vars_df <- data.frame(latent_vars, observed_vars)
# get_vif <- function(df, latent_var, observed_vars) {
#   formula <- as.formula(paste(observed_vars[1], "~", paste(observed_vars[observed_vars != observed_vars[1]], collapse = " + ")))
#   model <- lm(formula, data = df)
#   vif_vals <- vif(model)
#   return(vif_vals)
# }
# vif_df <- data.frame()
# 
# for (latent_var in unique(vars_df$latent_vars)) {
#   observed_vars <- vars_df$observed_vars[vars_df$latent_vars == latent_var]
#   vif_vals <- get_vif(data_GRV, latent_var, observed_vars)
#   vif_df <- rbind(vif_df, t(vif_vals))
# }
# 
# # Naming the row
# rownames(vif_df) <- "VIF"
# Define the function
# get_vif <- function(df, latent_var, observed_vars) {
#   if(length(observed_vars) > 1) {
#     formula <- as.formula(paste(observed_vars[1], "~", paste(observed_vars[observed_vars != observed_vars[1]], collapse = " + ")))
#     model <- lm(formula, data = df)
#     vif_vals <- vif(model)
#     return(vif_vals)
#   }
#   else {
#     return(NA)
#   }
# }
# 
# # Call the function for each latent variable
# vif_df <- data.frame()
# 
# for (latent_var in unique(vars_df$latent_vars)) {
#   observed_vars <- vars_df$observed_vars[vars_df$latent_vars == latent_var]
#   vif_vals <- get_vif(data_GRV, latent_var, observed_vars)
#   vif_df <- rbind(vif_df, t(vif_vals))
# }
# 
# # Naming the row
# rownames(vif_df) <- "VIF"