library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
##
## Attaching package: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
library(semPlot)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(RColorBrewer)
library(semTools)
##
## ###############################################################################
## This is semTools 0.5-6
## All users of R (or SEM) are invited to submit functions or ideas for functions.
## ###############################################################################
##
## Attaching package: 'semTools'
##
## The following object is masked from 'package:PerformanceAnalytics':
##
## kurtosis
##
## The following object is masked from 'package:readr':
##
## clipboard
data_GRV <- read_csv("data/Data_referral_marketing_Sebas.csv")
## Rows: 240 Columns: 28
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (28): A1, A2, A3, A4, A5, C6, C7, C8, C9, E10, E11, E12, O13, O14, O15, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ref_mark_1_3 <- "SAT =~ SAT19 + SAT20 + SAT21
MI =~ MI22 + MI23
TS =~ TS16 + TS17 + TS18
REC =~ REC24 + REC28 + REC25 + REC26
MI ~ SAT
TS ~ SAT
REC ~ SAT
REC ~ MI
REC ~ TS
"
Modelo
ref_mark_fit_1_3 <- lavaan::cfa(ref_mark_1_3, data=data_GRV)
Primero se mira las cargas cruzadas. Cada uno de los items cuadren con cada constructo
inspect(ref_mark_fit_1_3, what = "std")$lambda
## SAT MI TS REC
## SAT19 0.878 0.000 0.000 0.000
## SAT20 0.984 0.000 0.000 0.000
## SAT21 0.943 0.000 0.000 0.000
## MI22 0.000 0.959 0.000 0.000
## MI23 0.000 0.877 0.000 0.000
## TS16 0.000 0.000 0.594 0.000
## TS17 0.000 0.000 0.812 0.000
## TS18 0.000 0.000 0.854 0.000
## REC24 0.000 0.000 0.000 0.887
## REC28 0.000 0.000 0.000 0.770
## REC25 0.000 0.000 0.000 0.967
## REC26 0.000 0.000 0.000 0.534
Después se revisa la validez de los constructos con alpha de cronbach que debe ser mayor a .7 y el AVE mayor a .5. Idealmente, también el composite reliability que debe ser mayor a 0.7
reliability(ref_mark_fit_1_3)
## SAT MI TS REC
## alpha 0.9521594 0.9136440 0.7887447 0.8463513
## omega 0.9519718 0.9145252 0.7967289 0.8489612
## omega2 0.9519718 0.9145252 0.7967289 0.8489612
## omega3 0.9512951 0.9145252 0.7985576 0.8509266
## avevar 0.8685910 0.8426395 0.5703285 0.5871115
Modelo externo son las cargas de las preguntas y la validez de los constructos. El modelo interno con las hipótesis
Validando el modelo interno
summary(ref_mark_fit_1_3, fit.measures=TRUE, standardized=T,rsquare=T)
## lavaan 0.6.15 ended normally after 53 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 29
##
## Number of observations 240
##
## Model Test User Model:
##
## Test statistic 146.191
## Degrees of freedom 49
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 2280.096
## Degrees of freedom 66
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.956
## Tucker-Lewis Index (TLI) 0.941
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -2735.782
## Loglikelihood unrestricted model (H1) -2662.686
##
## Akaike (AIC) 5529.564
## Bayesian (BIC) 5630.503
## Sample-size adjusted Bayesian (SABIC) 5538.580
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.091
## 90 Percent confidence interval - lower 0.074
## 90 Percent confidence interval - upper 0.108
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 0.862
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.044
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## SAT =~
## SAT19 1.000 0.664 0.878
## SAT20 1.008 0.039 25.704 0.000 0.669 0.984
## SAT21 0.961 0.041 23.491 0.000 0.638 0.943
## MI =~
## MI22 1.000 0.835 0.959
## MI23 0.940 0.058 16.323 0.000 0.784 0.877
## TS =~
## TS16 1.000 0.696 0.594
## TS17 1.258 0.145 8.689 0.000 0.875 0.812
## TS18 1.363 0.159 8.589 0.000 0.949 0.854
## REC =~
## REC24 1.000 0.730 0.887
## REC28 1.028 0.067 15.349 0.000 0.751 0.770
## REC25 1.156 0.051 22.488 0.000 0.845 0.967
## REC26 0.847 0.094 9.046 0.000 0.619 0.534
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## MI ~
## SAT 0.774 0.074 10.426 0.000 0.616 0.616
## TS ~
## SAT 0.161 0.076 2.119 0.034 0.153 0.153
## REC ~
## SAT 0.282 0.081 3.462 0.001 0.256 0.256
## MI 0.292 0.067 4.372 0.000 0.333 0.333
## TS 0.221 0.067 3.299 0.001 0.211 0.211
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .SAT19 0.131 0.013 9.762 0.000 0.131 0.229
## .SAT20 0.014 0.006 2.383 0.017 0.014 0.031
## .SAT21 0.051 0.007 7.152 0.000 0.051 0.111
## .MI22 0.060 0.033 1.801 0.072 0.060 0.080
## .MI23 0.185 0.034 5.479 0.000 0.185 0.231
## .TS16 0.891 0.091 9.750 0.000 0.891 0.648
## .TS17 0.396 0.072 5.487 0.000 0.396 0.341
## .TS18 0.333 0.079 4.200 0.000 0.333 0.270
## .REC24 0.144 0.019 7.605 0.000 0.144 0.213
## .REC28 0.388 0.039 9.961 0.000 0.388 0.407
## .REC25 0.050 0.018 2.705 0.007 0.050 0.065
## .REC26 0.961 0.090 10.726 0.000 0.961 0.715
## SAT 0.441 0.051 8.616 0.000 1.000 1.000
## .MI 0.432 0.053 8.205 0.000 0.621 0.621
## .TS 0.473 0.102 4.640 0.000 0.977 0.977
## .REC 0.343 0.041 8.362 0.000 0.644 0.644
##
## R-Square:
## Estimate
## SAT19 0.771
## SAT20 0.969
## SAT21 0.889
## MI22 0.920
## MI23 0.769
## TS16 0.352
## TS17 0.659
## TS18 0.730
## REC24 0.787
## REC28 0.593
## REC25 0.935
## REC26 0.285
## MI 0.379
## TS 0.023
## REC 0.356
fitMeasures(ref_mark_fit_1_3)
## npar fmin chisq
## 29.000 0.305 146.191
## df pvalue baseline.chisq
## 49.000 0.000 2280.096
## baseline.df baseline.pvalue cfi
## 66.000 0.000 0.956
## tli nnfi rfi
## 0.941 0.941 0.914
## nfi pnfi ifi
## 0.936 0.695 0.956
## rni logl unrestricted.logl
## 0.956 -2735.782 -2662.686
## aic bic ntotal
## 5529.564 5630.503 240.000
## bic2 rmsea rmsea.ci.lower
## 5538.580 0.091 0.074
## rmsea.ci.upper rmsea.ci.level rmsea.pvalue
## 0.108 0.900 0.000
## rmsea.close.h0 rmsea.notclose.pvalue rmsea.notclose.h0
## 0.050 0.862 0.080
## rmr rmr_nomean srmr
## 0.041 0.041 0.044
## srmr_bentler srmr_bentler_nomean crmr
## 0.044 0.044 0.048
## crmr_nomean srmr_mplus srmr_mplus_nomean
## 0.048 0.044 0.044
## cn_05 cn_01 gfi
## 109.907 123.994 0.916
## agfi pgfi mfi
## 0.866 0.575 0.817
## ecvi
## 0.851
fitMeasures(ref_mark_fit_1_3)["chisq"] / fitMeasures(ref_mark_fit_1_3)["df"]
## chisq
## 2.983499
Resultado final
Chisquare = 747.047
Cmin/df = 2.87
nfi = 0.840
nnfi = 0.871
CFI = 0.889
AIC = 11922.825
RMSEA = 0.088
semPlot::semPaths(ref_mark_fit_1_3, what="est", fade=FALSE, residuals=FALSE,
layout="spring", structural=TRUE, nCharNodes=7, edge.label.cex = 1)
lavInspect(ref_mark_fit_1_3, "cor.lv")
## SAT MI TS REC
## SAT 1.000
## MI 0.616 1.000
## TS 0.153 0.094 1.000
## REC 0.494 0.511 0.281 1.000
VIF - Multicolinealidad
# # Assume your data frame is df
# model1 <- lm(SAT19 ~ SAT20 + SAT21 + MI22 + MI23 + TS16 + TS17 + TS18 + REC24 + REC28 + REC25 + REC26, data = df)
# model2 <- lm(SAT20 ~ SAT19 + SAT21 + MI22 + MI23 + TS16 + TS17 + TS18 + REC24 + REC28 + REC25 + REC26, data = df)
# model3 <- lm(SAT21 ~ SAT19 + SAT20 + MI22 + MI23 + TS16 + TS17 + TS18 + REC24 + REC28 + REC25 + REC26, data = df)
# #... repeat for each variable ...
#
# vif1 <- vif(model1)
# vif2 <- vif(model2)
# vif3 <- vif(model3)
# #... repeat for each variable ...
#
# # Then combine the VIFs into a single data frame or list for easier viewing
# vif_all <- data.frame(cbind(vif1, vif2, vif3, ...))
# library(car)
# latent_vars <- c("SAT", "SAT", "SAT", "MI", "MI", "TS", "TS", "TS", "REC", "REC", "REC", "REC")
# observed_vars <- c("SAT19", "SAT20", "SAT21", "MI22", "MI23", "TS16", "TS17", "TS18", "REC24", "REC28", "REC25", "REC26")
# vars_df <- data.frame(latent_vars, observed_vars)
# get_vif <- function(df, latent_var, observed_vars) {
# formula <- as.formula(paste(observed_vars[1], "~", paste(observed_vars[observed_vars != observed_vars[1]], collapse = " + ")))
# model <- lm(formula, data = df)
# vif_vals <- vif(model)
# return(vif_vals)
# }
# vif_df <- data.frame()
#
# for (latent_var in unique(vars_df$latent_vars)) {
# observed_vars <- vars_df$observed_vars[vars_df$latent_vars == latent_var]
# vif_vals <- get_vif(data_GRV, latent_var, observed_vars)
# vif_df <- rbind(vif_df, t(vif_vals))
# }
#
# # Naming the row
# rownames(vif_df) <- "VIF"
# Define the function
# get_vif <- function(df, latent_var, observed_vars) {
# if(length(observed_vars) > 1) {
# formula <- as.formula(paste(observed_vars[1], "~", paste(observed_vars[observed_vars != observed_vars[1]], collapse = " + ")))
# model <- lm(formula, data = df)
# vif_vals <- vif(model)
# return(vif_vals)
# }
# else {
# return(NA)
# }
# }
#
# # Call the function for each latent variable
# vif_df <- data.frame()
#
# for (latent_var in unique(vars_df$latent_vars)) {
# observed_vars <- vars_df$observed_vars[vars_df$latent_vars == latent_var]
# vif_vals <- get_vif(data_GRV, latent_var, observed_vars)
# vif_df <- rbind(vif_df, t(vif_vals))
# }
#
# # Naming the row
# rownames(vif_df) <- "VIF"