library(pacman)
p_load(lavaan, qgraph, psych, qpcR, ggplot2, igraph, semPlot)
#d = read.csv("mcs_cfa_data.csv")
#Let's see if we have all 24 variables we need (i.e., B1, B2, B4, B5, B6, B7, M2, M3, M4, M5, M6, M7, S1, S2, S3, S5, S6, S7, V2, V3, V4, V5, V6, V7)
psych::describe(d)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## B1 1 330 3.06 1.93 3.0 2.95 2.97 1 6 5 0.29 -1.49 0.11
## S1 2 330 4.64 1.47 5.0 4.87 1.48 1 6 5 -1.00 0.15 0.08
## V2 3 330 2.82 1.84 2.0 2.66 1.48 1 6 5 0.50 -1.26 0.10
## B2 4 330 3.52 1.86 4.0 3.53 2.97 1 6 5 -0.14 -1.46 0.10
## M2 5 330 3.80 1.70 4.0 3.88 1.48 1 6 5 -0.43 -1.08 0.09
## S2 6 330 3.41 1.87 4.0 3.39 2.97 1 6 5 -0.02 -1.49 0.10
## V3 7 330 3.46 1.87 4.0 3.45 2.97 1 6 5 -0.04 -1.50 0.10
## M3 8 330 4.33 1.67 5.0 4.54 1.48 1 6 5 -0.84 -0.52 0.09
## S3 9 330 3.35 1.79 3.0 3.32 2.97 1 6 5 0.08 -1.36 0.10
## V4 10 330 3.37 1.72 4.0 3.34 1.48 1 6 5 -0.04 -1.33 0.09
## B4 11 330 3.02 1.83 3.0 2.91 2.97 1 6 5 0.26 -1.41 0.10
## M4 12 330 4.36 1.61 5.0 4.58 1.48 1 6 5 -0.92 -0.22 0.09
## V5 13 330 3.70 1.75 4.0 3.75 1.48 1 6 5 -0.27 -1.23 0.10
## B5 14 330 3.08 1.83 3.0 2.98 2.97 1 6 5 0.28 -1.42 0.10
## M5 15 330 4.13 1.72 5.0 4.28 1.48 1 6 5 -0.66 -0.86 0.09
## S5 16 330 3.82 1.83 4.0 3.90 2.22 1 6 5 -0.42 -1.27 0.10
## V6 17 330 3.66 1.75 4.0 3.70 1.48 1 6 5 -0.21 -1.22 0.10
## B6 18 330 3.49 1.80 4.0 3.48 2.97 1 6 5 -0.09 -1.38 0.10
## M6 19 330 4.18 1.74 5.0 4.36 1.48 1 6 5 -0.69 -0.83 0.10
## S6 20 330 3.37 1.93 3.5 3.33 2.22 1 6 5 0.02 -1.54 0.11
## V7 21 330 3.85 1.80 4.0 3.93 2.22 1 6 5 -0.42 -1.23 0.10
## B7 22 330 3.55 1.90 4.0 3.56 2.97 1 6 5 -0.11 -1.51 0.10
## M7 23 330 4.06 1.70 4.0 4.20 1.48 1 6 5 -0.65 -0.84 0.09
## S7 24 330 3.76 1.89 4.0 3.83 2.97 1 6 5 -0.26 -1.43 0.10
Luckily everyone has complete cases. There are 330 people here, so this is the sample from Study 2.
#I'm using the model you provided in your preprint. Lets run that first. It's known as a four-factor congeneric model.
MCFF.model <- '
ML1 =~ B1 + B2 + B4 + B5 + B6 + B7
ML2 =~ M2 + M3 + M4 + M5 + M6 + M7
ML3 =~ S1 + S2 + S3 + S5 + S6 + S7
ML4 =~ V2 + V3 + V4 + V5 + V6 + V7'
MCFF.fit <- cfa(MCFF.model, data = d, orthogonal = F, std.lv = T)
summary(MCFF.fit, stand = T, fit = T)
## lavaan 0.6-5 ended normally after 30 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of free parameters 54
##
## Number of observations 330
##
## Model Test User Model:
##
## Test statistic 778.228
## Degrees of freedom 246
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 6567.440
## Degrees of freedom 276
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.915
## Tucker-Lewis Index (TLI) 0.905
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -12896.406
## Loglikelihood unrestricted model (H1) -12507.292
##
## Akaike (AIC) 25900.811
## Bayesian (BIC) 26105.962
## Sample-size adjusted Bayesian (BIC) 25934.674
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.081
## 90 Percent confidence interval - lower 0.075
## 90 Percent confidence interval - upper 0.087
## P-value RMSEA <= 0.05 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.056
##
## Parameter Estimates:
##
## Information Expected
## Information saturated (h1) model Structured
## Standard errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ML1 =~
## B1 1.582 0.088 17.935 0.000 1.582 0.821
## B2 1.536 0.085 18.079 0.000 1.536 0.825
## B4 1.420 0.086 16.478 0.000 1.420 0.776
## B5 1.572 0.082 19.266 0.000 1.572 0.859
## B6 1.394 0.085 16.496 0.000 1.394 0.776
## B7 1.510 0.088 17.142 0.000 1.510 0.797
## ML2 =~
## M2 1.371 0.078 17.602 0.000 1.371 0.809
## M3 1.382 0.075 18.327 0.000 1.382 0.831
## M4 1.313 0.074 17.863 0.000 1.313 0.817
## M5 1.510 0.075 20.052 0.000 1.510 0.879
## M6 1.352 0.081 16.635 0.000 1.352 0.779
## M7 1.511 0.073 20.580 0.000 1.511 0.893
## ML3 =~
## S1 0.968 0.074 13.136 0.000 0.968 0.661
## S2 1.443 0.089 16.277 0.000 1.443 0.773
## S3 1.405 0.084 16.706 0.000 1.405 0.787
## S5 1.519 0.084 18.190 0.000 1.519 0.833
## S6 1.483 0.092 16.153 0.000 1.483 0.769
## S7 1.569 0.087 18.139 0.000 1.569 0.831
## ML4 =~
## V2 1.397 0.088 15.892 0.000 1.397 0.759
## V3 1.490 0.087 17.065 0.000 1.490 0.797
## V4 1.278 0.083 15.427 0.000 1.278 0.743
## V5 1.436 0.080 17.880 0.000 1.436 0.822
## V6 1.156 0.088 13.184 0.000 1.156 0.662
## V7 1.435 0.084 17.126 0.000 1.435 0.799
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ML1 ~~
## ML2 0.752 0.028 26.536 0.000 0.752 0.752
## ML3 0.756 0.029 26.065 0.000 0.756 0.756
## ML4 0.880 0.019 47.426 0.000 0.880 0.880
## ML2 ~~
## ML3 0.713 0.032 22.313 0.000 0.713 0.713
## ML4 0.785 0.026 29.764 0.000 0.785 0.785
## ML3 ~~
## ML4 0.746 0.031 24.286 0.000 0.746 0.746
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .B1 1.212 0.110 11.038 0.000 1.212 0.326
## .B2 1.107 0.101 10.979 0.000 1.107 0.319
## .B4 1.335 0.116 11.529 0.000 1.335 0.398
## .B5 0.878 0.085 10.374 0.000 0.878 0.262
## .B6 1.282 0.111 11.524 0.000 1.282 0.398
## .B7 1.313 0.116 11.328 0.000 1.313 0.365
## .M2 0.990 0.087 11.366 0.000 0.990 0.345
## .M3 0.857 0.077 11.105 0.000 0.857 0.310
## .M4 0.858 0.076 11.278 0.000 0.858 0.332
## .M5 0.673 0.066 10.184 0.000 0.673 0.228
## .M6 1.183 0.102 11.644 0.000 1.183 0.393
## .M7 0.583 0.060 9.770 0.000 0.583 0.203
## .S1 1.204 0.101 11.963 0.000 1.204 0.563
## .S2 1.397 0.125 11.145 0.000 1.397 0.402
## .S3 1.211 0.110 10.981 0.000 1.211 0.380
## .S5 1.020 0.100 10.248 0.000 1.020 0.306
## .S6 1.517 0.136 11.189 0.000 1.517 0.408
## .S7 1.100 0.107 10.278 0.000 1.100 0.309
## .V2 1.436 0.126 11.430 0.000 1.436 0.424
## .V3 1.276 0.116 11.026 0.000 1.276 0.365
## .V4 1.322 0.114 11.562 0.000 1.322 0.447
## .V5 0.990 0.093 10.662 0.000 0.990 0.324
## .V6 1.717 0.143 12.040 0.000 1.717 0.562
## .V7 1.168 0.106 11.001 0.000 1.168 0.362
## ML1 1.000 1.000 1.000
## ML2 1.000 1.000 1.000
## ML3 1.000 1.000 1.000
## ML4 1.000 1.000 1.000
Fits are all correct as described, but they’re subpar and the factor covariances are quite high, indicating that there might be a higher-order or (nested) orthogonal factor structure here. In order to compare between these models, since they’re all nested, I’ll provide Schwarz Weights (see Wagenmakers & Farrell, 2004). The formula for normalized model probabilities, which I will plot, is based on a worked-out Eq. (4) from that paper (provided):
\[\frac{w_{A2}(BIC)}{w_{A1}(BIC)+w_{A2}(BIC)}\]
#Higher-order model
MCHOF.model <- '
ML1 =~ B1 + B2 + B4 + B5 + B6 + B7
ML2 =~ M2 + M3 + M4 + M5 + M6 + M7
ML3 =~ S1 + S2 + S3 + S5 + S6 + S7
ML4 =~ V2 + V3 + V4 + V5 + V6 + V7
MLG =~ ML1 + ML2 + ML3 + ML4'
#Orthogonal (AKA nested or bifactor) model
MCO.model <- '
ML1 =~ B1 + B2 + B4 + B5 + B6 + B7
ML2 =~ M2 + M3 + M4 + M5 + M6 + M7
ML3 =~ S1 + S2 + S3 + S5 + S6 + S7
ML4 =~ V2 + V3 + V4 + V5 + V6 + V7
MLG =~ B1 + B2 + B4 + B5 + B6 + B7 + M2 + M3 + M4 + M5 + M6 + M7 + S1 + S2 + S3 + S5 + S6 + S7 + V2 + V3 + V4 + V5 + V6 + V7'
MCHOF.fit <- cfa(MCHOF.model, data = d, orthogonal = T, std.lv = T)
MCO.fit <- cfa(MCO.model, data = d, orthogonal = T, std.lv = T)
#Now lets compare
#MM = Measurement model, HOF = Higher-order model, O = Orthogonal model
round(cbind(MM = lavInspect(MCFF.fit, 'fit.measures'),
HOF = lavInspect(MCHOF.fit, 'fit.measures'),
O = lavInspect(MCO.fit, 'fit.measures')),3)
## MM HOF O
## npar 54.000 52.000 72.000
## fmin 1.179 1.188 0.877
## chisq 778.228 784.076 578.764
## df 246.000 248.000 228.000
## pvalue 0.000 0.000 0.000
## baseline.chisq 6567.440 6567.440 6567.440
## baseline.df 276.000 276.000 276.000
## baseline.pvalue 0.000 0.000 0.000
## cfi 0.915 0.915 0.944
## tli 0.905 0.905 0.933
## nnfi 0.905 0.905 0.933
## rfi 0.867 0.867 0.893
## nfi 0.882 0.881 0.912
## pnfi 0.786 0.791 0.753
## ifi 0.916 0.915 0.945
## rni 0.915 0.915 0.944
## logl -12896.406 -12899.330 -12796.674
## unrestricted.logl -12507.292 -12507.292 -12507.292
## aic 25900.811 25902.659 25737.347
## bic 26105.962 26100.212 26010.882
## ntotal 330.000 330.000 330.000
## bic2 25934.674 25935.267 25782.497
## rmsea 0.081 0.081 0.068
## rmsea.ci.lower 0.075 0.075 0.061
## rmsea.ci.upper 0.087 0.087 0.075
## rmsea.pvalue 0.000 0.000 0.000
## rmr 0.181 0.182 0.118
## rmr_nomean 0.181 0.182 0.118
## srmr 0.056 0.057 0.037
## srmr_bentler 0.056 0.057 0.037
## srmr_bentler_nomean 0.056 0.057 0.037
## crmr 0.059 0.059 0.039
## crmr_nomean 0.059 0.059 0.039
## srmr_mplus 0.056 0.057 0.037
## srmr_mplus_nomean 0.056 0.057 0.037
## cn_05 121.252 121.259 151.655
## cn_01 128.433 128.413 160.991
## gfi 0.826 0.823 0.869
## agfi 0.788 0.786 0.828
## pgfi 0.678 0.681 0.661
## mfi 0.446 0.444 0.588
## ecvi 2.686 2.691 2.190
#Account for parsimony
print(list((fitMeasures(MCFF.fit, "chisq"))/(fitMeasures(MCFF.fit, "df")), (fitMeasures(MCHOF.fit, "chisq"))/(fitMeasures(MCHOF.fit, "df")), (fitMeasures(MCO.fit, "chisq"))/(fitMeasures(MCO.fit, "df"))))
## [[1]]
## chisq
## 3.164
##
## [[2]]
## chisq
## 3.162
##
## [[3]]
## chisq
## 2.538
#Alternatively, just ANOVA the models
anova(MCFF.fit, MCHOF.fit, MCO.fit)
## Chi-Squared Difference Test
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## MCO.fit 228 25737 26011 578.76
## MCFF.fit 246 25901 26106 778.23 199.464 18 < 2e-16 ***
## MCHOF.fit 248 25903 26100 784.08 5.848 2 0.05372 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The HOF is more parsimonious and insignificantly worse-fitting than the measurement model but the orthogonal model is still better since, though it has fewer degrees of freedom, it has more than proportionally fewer \(\chi^2\) as well.
#Lets check probabilities
#Vector of BIC values, followed by weights from qpcR
BICs <- c(fitMeasures(MCFF.fit, "bic"), fitMeasures(MCHOF.fit, "bic"), fitMeasures(MCO.fit, "bic"))
BICweights <- akaike.weights(BICs)$weights
#Model name DF
Mods <- data.frame(Model = factor(rep(c('Measurement', 'Higher-Order', 'Orthogonal'))),
CRIT = factor(rep('BIC', each = 3)),
values = BICweights)
#Plot and output
ggplot(Mods, aes(Model, values, fill = Model)) + geom_bar(stat = "identity", position = "dodge", show.legend = F) + coord_cartesian(ylim = c(0,1)) + xlab('Schwarz Weights') + ylab('Normalized Probability') + theme_bw()
Mods
## Model CRIT values
## 1 Measurement BIC 2.257181e-21
## 2 Higher-Order BIC 4.001779e-20
## 3 Orthogonal BIC 1.000000e+00
The orthogonal model has far better fit, but this is to be expected regardless because the orthogonal model can fit noise or common method variance (Murray & Johnson, 2013; Mansolf & Reise, 2017; Yang et al., 2017). These can be hard to discriminate between, so it is usually necessary to do substantive model tests in order to discriminate between orthogonal, higher-order, and measurement models. This problem is so bad that in many cases, the orthogonal factor model actually fits better than the correct model for many types of simulated data (see e.g., Morgan et al., 2015). Nonetheless, it can be useful for assessing the strengths of group factor models, if we assume they are the true models underlying an assessment/questionnaire, because it lets us see how unidimensional the indicators are, for whatever reason.
We can use the loadings from the orthogonal factor model to compute several structural fit indices, including H (Hancock & Mueller, 2001; Rodriguez et al., 2016), PUC (Bonifay et al., 2015), and \(\omega_t\) and \(\omega_h\) (Watkins, 2017).
Because I’m too lazy to make a function for these in R right now, I’ll use http://edpsychassociates.com/Software/Omega.app.zip.
#Loadings for computing bifactor fit indices, and also loadings for higher-order model, which can be Schmid-Leiman transformed to assess the discrepancy (artefactual or substantive; distinguishing these requires more tests) between the orthogonal model and the higher-order model (this can indicate how much the difference in fit reflects modeling noise versus modeling loadings which aren't forced to be proportional).
#I prefer using summary() but some people like parameterEstimates(), e.g., parameterEstimates(MCO.fit, stand = T)
summary(MCO.fit, stand = T)
## lavaan 0.6-5 ended normally after 30 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of free parameters 72
##
## Number of observations 330
##
## Model Test User Model:
##
## Test statistic 578.764
## Degrees of freedom 228
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Information Expected
## Information saturated (h1) model Structured
## Standard errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ML1 =~
## B1 0.618 0.094 6.586 0.000 0.618 0.321
## B2 0.777 0.093 8.358 0.000 0.777 0.417
## B4 0.425 0.094 4.512 0.000 0.425 0.232
## B5 0.865 0.089 9.699 0.000 0.865 0.473
## B6 0.447 0.091 4.898 0.000 0.447 0.249
## B7 0.160 0.093 1.725 0.085 0.160 0.084
## ML2 =~
## M2 0.463 0.069 6.687 0.000 0.463 0.273
## M3 0.901 0.071 12.601 0.000 0.901 0.541
## M4 0.915 0.070 12.998 0.000 0.915 0.569
## M5 0.830 0.068 12.235 0.000 0.830 0.483
## M6 0.732 0.079 9.231 0.000 0.732 0.422
## M7 0.813 0.064 12.621 0.000 0.813 0.480
## ML3 =~
## S1 0.566 0.076 7.408 0.000 0.566 0.387
## S2 0.387 0.079 4.879 0.000 0.387 0.207
## S3 0.735 0.081 9.073 0.000 0.735 0.412
## S5 0.859 0.079 10.878 0.000 0.859 0.471
## S6 0.860 0.089 9.667 0.000 0.860 0.446
## S7 1.278 0.084 15.249 0.000 1.278 0.677
## ML4 =~
## V2 0.265 0.099 2.678 0.007 0.265 0.144
## V3 0.781 0.101 7.694 0.000 0.781 0.417
## V4 0.105 0.094 1.128 0.259 0.105 0.061
## V5 0.721 0.092 7.812 0.000 0.721 0.413
## V6 -0.077 0.103 -0.750 0.453 -0.077 -0.044
## V7 0.712 0.095 7.525 0.000 0.712 0.396
## MLG =~
## B1 1.445 0.093 15.572 0.000 1.445 0.750
## B2 1.376 0.090 15.224 0.000 1.376 0.739
## B4 1.333 0.089 14.950 0.000 1.333 0.728
## B5 1.396 0.088 15.919 0.000 1.396 0.763
## B6 1.317 0.087 15.089 0.000 1.317 0.733
## B7 1.512 0.089 17.041 0.000 1.512 0.798
## M2 1.316 0.080 16.450 0.000 1.316 0.777
## M3 1.082 0.084 12.889 0.000 1.082 0.650
## M4 1.002 0.082 12.210 0.000 1.002 0.623
## M5 1.252 0.083 15.013 0.000 1.252 0.729
## M6 1.130 0.087 12.919 0.000 1.130 0.651
## M7 1.269 0.081 15.622 0.000 1.269 0.750
## S1 0.756 0.078 9.754 0.000 0.756 0.517
## S2 1.418 0.089 15.959 0.000 1.418 0.760
## S3 1.178 0.090 13.158 0.000 1.178 0.660
## S5 1.243 0.091 13.712 0.000 1.243 0.681
## S6 1.226 0.098 12.530 0.000 1.226 0.636
## S7 1.139 0.097 11.720 0.000 1.139 0.603
## V2 1.354 0.089 15.184 0.000 1.354 0.736
## V3 1.341 0.092 14.594 0.000 1.341 0.717
## V4 1.279 0.083 15.415 0.000 1.279 0.744
## V5 1.293 0.085 15.248 0.000 1.293 0.740
## V6 1.211 0.087 13.970 0.000 1.211 0.693
## V7 1.317 0.087 15.059 0.000 1.317 0.733
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ML1 ~~
## ML2 0.000 0.000 0.000
## ML3 0.000 0.000 0.000
## ML4 0.000 0.000 0.000
## MLG 0.000 0.000 0.000
## ML2 ~~
## ML3 0.000 0.000 0.000
## ML4 0.000 0.000 0.000
## MLG 0.000 0.000 0.000
## ML3 ~~
## ML4 0.000 0.000 0.000
## MLG 0.000 0.000 0.000
## ML4 ~~
## MLG 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .B1 1.244 0.113 11.042 0.000 1.244 0.335
## .B2 0.971 0.110 8.855 0.000 0.971 0.280
## .B4 1.394 0.117 11.933 0.000 1.394 0.416
## .B5 0.652 0.109 6.003 0.000 0.652 0.195
## .B6 1.292 0.109 11.824 0.000 1.292 0.401
## .B7 1.281 0.112 11.420 0.000 1.281 0.356
## .M2 0.924 0.079 11.745 0.000 0.924 0.322
## .M3 0.785 0.078 10.027 0.000 0.785 0.284
## .M4 0.742 0.077 9.693 0.000 0.742 0.287
## .M5 0.696 0.068 10.196 0.000 0.696 0.236
## .M6 1.198 0.103 11.661 0.000 1.198 0.398
## .M7 0.595 0.060 9.852 0.000 0.595 0.207
## .S1 1.248 0.103 12.086 0.000 1.248 0.583
## .S2 1.317 0.111 11.816 0.000 1.317 0.379
## .S3 1.257 0.110 11.475 0.000 1.257 0.394
## .S5 1.045 0.099 10.527 0.000 1.045 0.314
## .S6 1.476 0.131 11.276 0.000 1.476 0.397
## .S7 0.631 0.124 5.099 0.000 0.631 0.177
## .V2 1.484 0.124 11.946 0.000 1.484 0.438
## .V3 1.089 0.131 8.311 0.000 1.089 0.312
## .V4 1.309 0.111 11.782 0.000 1.309 0.443
## .V5 0.861 0.108 7.956 0.000 0.861 0.282
## .V6 1.580 0.137 11.502 0.000 1.580 0.517
## .V7 0.985 0.113 8.706 0.000 0.985 0.305
## ML1 1.000 1.000 1.000
## ML2 1.000 1.000 1.000
## ML3 1.000 1.000 1.000
## ML4 1.000 1.000 1.000
## MLG 1.000 1.000 1.000
summary(MCHOF.fit, stand = T)
## lavaan 0.6-5 ended normally after 59 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of free parameters 52
##
## Number of observations 330
##
## Model Test User Model:
##
## Test statistic 784.076
## Degrees of freedom 248
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Information Expected
## Information saturated (h1) model Structured
## Standard errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## ML1 =~
## B1 0.596 0.063 9.441 0.000 1.579 0.819
## B2 0.580 0.061 9.469 0.000 1.537 0.825
## B4 0.535 0.058 9.214 0.000 1.417 0.774
## B5 0.593 0.062 9.614 0.000 1.571 0.858
## B6 0.527 0.057 9.229 0.000 1.396 0.777
## B7 0.571 0.061 9.341 0.000 1.514 0.799
## ML2 =~
## M2 0.761 0.053 14.433 0.000 1.371 0.810
## M3 0.768 0.052 14.821 0.000 1.383 0.831
## M4 0.730 0.050 14.585 0.000 1.314 0.818
## M5 0.838 0.053 15.671 0.000 1.510 0.879
## M6 0.750 0.054 13.882 0.000 1.352 0.779
## M7 0.838 0.053 15.897 0.000 1.510 0.892
## ML3 =~
## S1 0.560 0.048 11.556 0.000 0.966 0.660
## S2 0.837 0.062 13.553 0.000 1.443 0.774
## S3 0.815 0.059 13.793 0.000 1.406 0.788
## S5 0.880 0.060 14.556 0.000 1.518 0.832
## S6 0.862 0.064 13.507 0.000 1.487 0.771
## S7 0.909 0.063 14.533 0.000 1.568 0.831
## ML4 =~
## V2 0.471 0.064 7.407 0.000 1.397 0.759
## V3 0.502 0.067 7.508 0.000 1.489 0.796
## V4 0.429 0.058 7.354 0.000 1.275 0.742
## V5 0.483 0.064 7.565 0.000 1.433 0.820
## V6 0.391 0.055 7.084 0.000 1.161 0.664
## V7 0.484 0.064 7.519 0.000 1.439 0.801
## MLG =~
## ML1 2.454 0.294 8.356 0.000 0.926 0.926
## ML2 1.498 0.131 11.468 0.000 0.832 0.832
## ML3 1.405 0.127 11.041 0.000 0.815 0.815
## ML4 2.796 0.406 6.878 0.000 0.942 0.942
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .B1 1.221 0.110 11.053 0.000 1.221 0.329
## .B2 1.106 0.101 10.970 0.000 1.106 0.319
## .B4 1.342 0.116 11.535 0.000 1.342 0.400
## .B5 0.881 0.085 10.379 0.000 0.881 0.263
## .B6 1.277 0.111 11.511 0.000 1.277 0.396
## .B7 1.301 0.115 11.300 0.000 1.301 0.362
## .M2 0.989 0.087 11.359 0.000 0.989 0.345
## .M3 0.856 0.077 11.096 0.000 0.856 0.309
## .M4 0.854 0.076 11.263 0.000 0.854 0.331
## .M5 0.673 0.066 10.175 0.000 0.673 0.228
## .M6 1.184 0.102 11.641 0.000 1.184 0.393
## .M7 0.587 0.060 9.785 0.000 0.587 0.205
## .S1 1.208 0.101 11.965 0.000 1.208 0.564
## .S2 1.396 0.125 11.135 0.000 1.396 0.401
## .S3 1.210 0.110 10.970 0.000 1.210 0.380
## .S5 1.024 0.100 10.252 0.000 1.024 0.308
## .S6 1.507 0.135 11.163 0.000 1.507 0.405
## .S7 1.104 0.107 10.279 0.000 1.104 0.310
## .V2 1.435 0.126 11.422 0.000 1.435 0.424
## .V3 1.279 0.116 11.024 0.000 1.279 0.366
## .V4 1.329 0.115 11.568 0.000 1.329 0.450
## .V5 0.998 0.093 10.676 0.000 0.998 0.327
## .V6 1.706 0.142 12.024 0.000 1.706 0.559
## .V7 1.158 0.106 10.968 0.000 1.158 0.359
## .ML1 1.000 0.142 0.142
## .ML2 1.000 0.308 0.308
## .ML3 1.000 0.336 0.336
## .ML4 1.000 0.113 0.113
## MLG 1.000 1.000 1.000
It appears that the covariances among the factors are more likely due to a higher-order factor than they are individualised and direct. The higher-order factor actually explains more of the variance in the indicators than do the group factors independent of it (average \(r^{2}\) = 0.78). More importantly, however, the bifactor has nullified the loadings from ML1 to B6 and from ML4 to V4 and V6 (since were are not dealing with putatively formative/emergent factors, the statement is not “B6 to ML1”), meaning that they are, instead, related to the general dimension instead of their group factors (see Gignac, 2015; Benson et al., 2018). I will consider the loadings as they are and reflect the now-negative ML4 to V6 loading for calculating the bifactor fit indices, but note: this slightly overestimates the validity of that factor and it clearly overstates its relationship to those indicators, which may be marginal, a fact made especially salient by the sign-reversal for V6, and it leads to (in this case, marginal) understatement of the model fit and the general dimension (i.e., “latent morbid curiosity”). If the sex/ethnicity labels were included, measurement invariance could be assessed with ease. Bifactor fit indices are displayed in Table 1 below.
| Factor | H | PUC | \(\omega_t\) | \(\omega_h\) | ECV | Total Variance |
|---|---|---|---|---|---|---|
| MC | 0.962 | 0.783 | 0.974 | 0.911 | 0.763 | 0.501 |
| ML1 | 0.426 | - | 0.922 | 0.124 | 0.040 | 0.026 |
| ML2 | 0.642 | - | 0.935 | 0.285 | 0.085 | 0.056 |
| ML3 | 0.643 | - | 0.906 | 0.283 | 0.079 | 0.052 |
| ML4 | 0.386 | - | 0.902 | 0.093 | 0.034 | 0.022 |
The battery is primarily unidimensional and none of the group factors is reliable and all of them fail to reach determinacy. The battery should only be interpreted at the level of its sumscore. Subscales will reflect a single dimension and error moreso than they will variance unique to those subscales and independent of the general dimension. There is more reliable unique variance in each subscale than there is group factor variance (ML1: 0.078 versus 0.026; ML2: 0.065 versus 0.056; ML3: 0.094 versus 0.052; ML4: 0.098 versus 0.022), and 79.8% of the common variance for ML1’s indicators, 65% for ML2’s, 62.3% for ML3’s, and 81% for ML4’s are explained by a single dimension. This should be confirmed in the other samples and invariance with respect to these factors should at least be assessed with respect to sex.
#Orthogonal factor model plot
suppressWarnings(semPaths(MCO.fit, bifactor = "MLG", title=F, label.cex = 0.5, sizeLat=5, sizeMan=2,edge.label.cex=0.4, minimum = 0.0001, sizeInt = 0.5, mar=c(1,1,1,1), residuals = F, intercepts = F, thresholds = F, layout = "circle3", "std", groups = "lat", theme = "Reddit", fade = F))
Exploratory graph analysis can be preferable to EFA for many reasons (see Golino & Epskamp, 2017; see their supp. for some of this code). A relevant one for here is that its output is not going to be debatable based on extraction preferences, it just gives you what the data shows and it doesn’t tend to lead to over- or underfactoring like other common methods for determining the number of factors. This section starts with an EGA and then fits an EBICGlasso partial correlational network model with the same parsimony as the orthogonal factor model to compare. TL;DR: The theory underlying network models is that, rather than a common cause underlying items, the common variance is formed by the direct interactions between what indicators represent. For instance, we don’t need to say that “latent depression” underlies the relationship between tiredness and lack of sleep in depression, we can just say that the lack of sleep causes tiredness, etc.
EGA <- function(data, plot.EGA){
require(qgraph)
require(igraph)
require(semPlot)
require(lavaan)
cor.data <- cor_auto(data)
glasso.ebic <-EBICglasso(S=cor.data, n = nrow(data))
graph.glasso <-as.igraph(qgraph(glasso.ebic, layout = "spring", vsize = 3))
wc<- walktrap.community(graph.glasso)
n.dim <- max(wc$membership)
if (plot.EGA == TRUE) {
plot.ega <- qgraph(glasso.ebic, layout = "spring", vsize = 4, groups = as.factor(wc$membership))
}
a <- list()
a$n.dim <- n.dim
a$correlation <- cor.data
a$glasso <- glasso.ebic
a$wc <- wc$membership
dim.variables <- data.frame(items = names(data), dimension = a$wc)
dim.variables[order(dim.variables$dimension),]
a$dim.variables <- dim.variables
return(a)
}
#EGA and plot
MCEGA <- EGA(d, plot.EGA = TRUE)
## Variables detected as ordinal: B1; S1; V2; B2; M2; S2; V3; M3; S3; V4; B4; M4; V5; B5; M5; S5; V6; B6; M6; S6; V7; B7; M7; S7
## Warning in EBICglassoCore(S = S, n = n, gamma = gamma, penalize.diagonal =
## penalize.diagonal, : A dense regularized network was selected (lambda < 0.1 *
## lambda.max). Recent work indicates a possible drop in specificity. Interpret the
## presence of the smallest edges with care. Setting threshold = TRUE will enforce
## higher specificity, at the cost of sensitivity.
A more accurate depiction of the number of factors puts them at six rather than four. But, these are even less likely to be determinate beyond a singular dimension (try it out! Just kidding - they can’t be identified unless you expand on them). Lets fit that sparse Gaussian lasso network model. Note that the information criteria will not be comparable, but absolute/centrality fit will.
#Plot groupings
GR <- list(
"Motives of Dangerous People" = c(5, 8, 12, 15, 19, 23), #M
"Supernatural Danger" = c(2, 6, 9, 16, 20, 24), #S
"Body Violation" = c(1, 4, 11, 14, 18, 22), #B
"Social Violence" = c(3, 7, 10, 13, 17, 21) #V
)
#In case nodeNames = NM, legend.cex = 0.25, legend.mode = "style2" should be plotted
NM <- list(
"If a head transplant was possible, I would want to watch the procedure",
"I think the supernatural is an interesting topic",
"If I lived in Medieval Europe, I would be interested in attending a public execution",
"I would be curious to see how an autopsy is performed",
"I am curious about crime and enjoy reading detailed news accounts about murders and other violent crimes",
"I would be interested in attending or watching a video of an exorcism",
"If I lived in Ancient Rome, I would be interested in attending a gladiatorial fight",
"I would be interested in watching a documentary on motives behind real murders",
"I find the Occult interesting",
"If I saw a street fight break out, and knew I could not intervene, I would try to watch it",
"I am interested in seeing how limb amputation works",
"My favorite part of a crime show is learning about why the killer did what he did",
"I would be curious enough to watch a duel if I lived in the Wild West",
"I would like to see how bodies are prepared for funerals",
"I would be interested in watching an interview with an imprisoned serial killer talking about his crimes",
"A documentary on Voodoo would interest me",
"I prefer violent movies and TV shows to be uncensored",
"I think the preservation of bodies, like in taxidermy or mummification, is interesting",
"Being a criminal profiler who studies the personality of murderers would be an interesting job",
"I am curious how a Ouija board works",
"I am curious what a battle looked like in the Middle Ages",
"I am curious what the deadliest toxin in the world would do to the body",
"I am curious about the minds of violent people",
"I think witchcraft would be an interesting topic to learn about"
)
#Network model
congraph <- EBICglasso(cor(d), n = 330, lambda.min.ratio = 0.01, threshold = T)
congraphlay <- qgraph(congraph, layout = "spring", legend = F, palette = "colorblind", groups = GR, theme = "Reddit", fade = F)
SNetwork <- ggmFit(congraph, as.matrix(cor(d)), sampleSize = 330, nPar = fitMeasures(MCO.fit, "npar"), ebicTuning = 0.5)
## Refitting network
SNetwork
##
## ggmFit object:
## Use plot(object) to plot the network structure
## Fit measures stored under object$fitMeasures
##
## Measure Value
## 1 nvar 24
## 2 nobs 300
## 3 npar 72
## 4 df 228
## 5 fmin 1.01
## 6 chisq 663.66
## 7 pvalue < 0.0001
## 8 baseline.chisq 6567.44
## 9 baseline.df 276
## 10 baseline.pvalue 0
## 11 nfi 0.90
## 12 tli 0.92
## 13 rfi 0.88
## 14 ifi 0.93
## 15 rni 0.93
## 16 cfi 0.93
## 17 rmsea 0.076
## 18 rmsea.ci.lower 0.069
## 19 rmsea.ci.upper 0.083
## 20 rmsea.pvalue < 0.0001
## 21 rmr 0.12
## 22 srmr 0.12
## 23 logl -8274.09
## 24 unrestricted.logl -7942.26
## 25 aic.ll 16692.17
## 26 aic.ll2 16733.08
## 27 aic.x 207.66
## 28 aic.x2 807.66
## 29 bic 16965.71
## 30 bic2 16737.32
## 31 ebic 17423.35
## 32 ebicTuning 0.50
The network model fits better than the measurement or higher-order models, but worse than the orthogonal factor model. Interpret as you would. I don’t think a network model makes much theoretical sense here unless you wish to say that belief in one thing leads to belief in another (why those particular things?). This is up to you to claim. You can make the factor/network comparison fairer by better configuring the sparsity, but that is up to you.
Here are some network metrics, for interest.
centralityPlot(congraph, scale = "raw0", include = c("Strength", "ExpectedInfluence", "Betweenness", "Closeness"), orderBy = "Strength")
clusteringPlot(congraph, scale = "raw0")
Wagenmakers, E.-J., & Farrell, S. (2004). AIC model selection using Akaike weights. Psychonomic Bulletin & Review, 11(1), 192-196. https://doi.org/10.3758/BF03206482
Murray, A. L., & Johnson, W. (2013). The limitations of model fit in comparing the bi-factor versus higher-order models of human cognitive ability structure. Intelligence, 41(5), 407-422. https://doi.org/10.1016/j.intell.2013.06.004
Mansolf, M., & Reise, S. P. (2017). When and why the second-order and bifactor models are distinguishable. Intelligence, 61, 120-129. https://doi.org/10.1016/j.intell.2017.01.012
Yang, R., Spirtes, P., Scheines, R., Reise, S. P., & Mansoff, M. (2017). Finding Pure Sub-Models for Improved Differentiation of Bi-Factor and Second-Order Models. Structural Equation Modeling, 24(3), 402-413. https://doi.org/10.1080/10705511.2016.1261351
Morgan, G. B., Hodge, K. J., Wells, K. E., & Watkins, M. W. (2015). Are Fit Indices Biased in Favor of Bi-Factor Models in Cognitive Ability Research?: A Comparison of Fit in Correlated Factors, Higher-Order, and Bi-Factor Models via Monte Carlo Simulations. Journal of Intelligence, 3(1), 2-20. https://doi.org/10.3390/jintelligence3010002
Hancock, G. R., & Mueller, R. O. (2001). Rethinking construct reliability within latent variable systems In R. Cudeck, S. du Toit, & D. Sorbom (Eds.), Structural equation modeling: Present and Future (pp. 195-216). Scientific Software International.
Rodriguez, A., Reise, S. P., & Haviland, M. G. (2016). Applying bifactor statistical indices in the evaluation of psychological measures. Journal of Personality Assessment, 98, 223-237.
Bonifay, W. E., Reise, S. P., Scheines, R., & Meijer, R. R. (2015). When are multidimensional data unidimensional enough for structural equation modeling? An evaluation of the DETECT multidimensionality index. Structural Equation Modeling, 22, 504-516.
Watkins, M. W. (2017). The reliability of multidimensional neuropsychological measures: From alpha to omega. The Clinical Neuropsychologist, 31, 1113-1126.
Gignac, G. E. (2015). Estimating the Strength of a General Factor: Coefficient Omega Hierarchical. Industrial and Organizational Psychology, 8(3), 434-438. https://doi.org/10.1017/iop.2015.59
Benson, N. F., Beaujean, A. A., McGill, R. J., & Dombrowski, S. C. (2018). Revisiting Carroll’s survey of factor-analytic studies: Implications for the clinical assessment of intelligence. Psychological Assessment, 30(8), 1028-1038. https://doi.org/10.1037/pas0000556
Golino, H. F., & Epskamp, S. (2017). Exploratory graph analysis: A new approach for estimating the number of dimensions in psychological research. PLOS ONE, 12(6), e0174035. https://doi.org/10.1371/journal.pone.0174035