library(pacman)
p_load(psych, VIM, lavaan, dplyr)
HSA <- read.csv("HSA.csv")
HSA <- HSA %>% mutate(
Q30 = -Q30,
Q12 = -Q12,
Q60 = -Q60,
Q42 = -Q42,
Q24 = -Q24,
Q48 = -Q48,
Q53 = -Q53,
#Q35 = -Q35,
Q41 = -Q41,
Q59 = -Q59,
Q28 = -Q28,
Q52 = -Q52,
Q10 = -Q10,
Q46 = -Q46,
Q9 = -Q9,
Q15 = -Q15,
Q57 = -Q57,
Q21 = -Q21,
Q45 = -Q45,
Q26 = -Q26,
Q32 = -Q32,
Q14 = -Q14,
Q20 = -Q20,
Q44 = -Q44,
Q56 = -Q56,
Q1 = -Q1,
Q31 = -Q31,
Q19 = -Q19,
Q55 = -Q55)
A colleague from Germany has asked me to perform a confirmatory factor analysis of some data from a few of his colleagues from Saudi Arabia. I do not have more details on the data as of yet and I have not been told whether I have been given the go-ahead to share it. Once I receive those pieces of information, I will add them here and post/not post the data contingent on what the other researchers say is appropriate (if reminded). I will be testing
Before I begin my analysis, I think the first will be true and the second and third will not. Data are categorical, 1-5/6, so I will be using WLSMV.
# Variables
describe(HSA)
# Declare variables to be ordered
HSA[, 3:62] <- lapply(HSA[, 3:62], ordered)
# How's the missing data situation?
missing_plot <- aggr(HSA[, 3:62], col=c('steelblue', 'darkred'), numbers = T, sortVars = T, labels = names(HSA[, 3:62]), cex.axis = .7, gap = 3, ylab = c("Proportion Missing", "Pattern"))
##
## Variables sorted by number of missings:
## Variable Count
## Q1 0
## Q2 0
## Q3 0
## Q4 0
## Q5 0
## Q6 0
## Q7 0
## Q8 0
## Q9 0
## Q10 0
## Q11 0
## Q12 0
## Q13 0
## Q14 0
## Q15 0
## Q16 0
## Q17 0
## Q18 0
## Q19 0
## Q20 0
## Q21 0
## Q22 0
## Q23 0
## Q24 0
## Q25 0
## Q26 0
## Q27 0
## Q28 0
## Q29 0
## Q30 0
## Q31 0
## Q32 0
## Q33 0
## Q34 0
## Q35 0
## Q36 0
## Q37 0
## Q38 0
## Q39 0
## Q40 0
## Q41 0
## Q42 0
## Q43 0
## Q44 0
## Q45 0
## Q46 0
## Q47 0
## Q48 0
## Q49 0
## Q50 0
## Q51 0
## Q52 0
## Q53 0
## Q54 0
## Q55 0
## Q56 0
## Q57 0
## Q58 0
## Q59 0
## Q60 0
SIMPLECGF <- '
F1 =~ Q6 + Q30 + Q54 + Q12 + Q36 + Q60 + Q18 + Q42 + Q24 + Q48
F2 =~ Q5 + Q29 + Q53 + Q11 + Q35 + Q17 + Q41 + Q23 + Q47 + Q59
F3 =~ Q4 + Q28 + Q52 + Q10 + Q34 + Q58 + Q16 + Q40 + Q22 + Q46
F4 =~ Q3 + Q27 + Q9 + Q33 + Q51 + Q15 + Q39 + Q57 + Q21 + Q45
F5 =~ Q2 + Q26 + Q8 + Q32 + Q14 + Q38 + Q50 + Q20 + Q44 + Q56
F6 =~ Q1 + Q25 + Q7 + Q31 + Q13 + Q37 + Q49 + Q19 + Q43 + Q55'
SIMPLEHOF <- '
F1 =~ Q6 + Q30 + Q54 + Q12 + Q36 + Q60 + Q18 + Q42 + Q24 + Q48
F2 =~ Q5 + Q29 + Q53 + Q11 + Q35 + Q17 + Q41 + Q23 + Q47 + Q59
F3 =~ Q4 + Q28 + Q52 + Q10 + Q34 + Q58 + Q16 + Q40 + Q22 + Q46
F4 =~ Q3 + Q27 + Q9 + Q33 + Q51 + Q15 + Q39 + Q57 + Q21 + Q45
F5 =~ Q2 + Q26 + Q8 + Q32 + Q14 + Q38 + Q50 + Q20 + Q44 + Q56
F6 =~ Q1 + Q25 + Q7 + Q31 + Q13 + Q37 + Q49 + Q19 + Q43 + Q55
GP =~ F1 + F2 + F3 + F4 + F5 + F6'
COMPLEXCGF <- '
F1 =~ Q6 + Q30 + Q54
F2 =~ Q12 + Q36 + Q60
F3 =~ Q18 + Q42
F4 =~ Q24 + Q48
F5 =~ Q5 + Q29 + Q53
F6 =~ Q11 + Q35
F7 =~ Q17 + Q41
F8 =~ Q23 + Q47 + Q59
F9 =~ Q4 + Q28 + Q52
F10 =~ Q10 + Q34 + Q58
F11 =~ Q16 + Q40
F12 =~ Q22 + Q46
F13 =~ Q3 + Q27
F14 =~ Q9 + Q33 + Q51
F15 =~ Q15 + Q39 + Q57
F16 =~ Q21 + Q45
F17 =~ Q2 + Q26
F18 =~ Q8 + Q32
F19 =~ Q14 + Q38 + Q50
F20 =~ Q20 + Q44 + Q56
F21 =~ Q1 + Q25
F22 =~ Q7 + Q31
F23 =~ Q13 + Q37 + Q49
F24 =~ Q19 + Q43 + Q55
S1 =~ F1 + F2 + F3 + F4
S2 =~ F5 + F6 + F7 + F8
S3 =~ F9 + F10 + F11 + F12
S4 =~ F13 + F14 + F15 + F16
S5 =~ F17 + F18 + F19 + F20
S6 =~ F21 + F22 + F23 + F24'
COMPLEXHOF <- '
F1 =~ Q6 + Q30 + Q54
F2 =~ Q12 + Q36 + Q60
F3 =~ Q18 + Q42
F4 =~ Q24 + Q48
F5 =~ Q5 + Q29 + Q53
F6 =~ Q11 + Q35
F7 =~ Q17 + Q41
F8 =~ Q23 + Q47 + Q59
F9 =~ Q4 + Q28 + Q52
F10 =~ Q10 + Q34 + Q58
F11 =~ Q16 + Q40
F12 =~ Q22 + Q46
F13 =~ Q3 + Q27
F14 =~ Q9 + Q33 + Q51
F15 =~ Q15 + Q39 + Q57
F16 =~ Q21 + Q45
F17 =~ Q2 + Q26
F18 =~ Q8 + Q32
F19 =~ Q14 + Q38 + Q50
F20 =~ Q20 + Q44 + Q56
F21 =~ Q1 + Q25
F22 =~ Q7 + Q31
F23 =~ Q13 + Q37 + Q49
F24 =~ Q19 + Q43 + Q55
S1 =~ F1 + F2 + F3 + F4
S2 =~ F5 + F6 + F7 + F8
S3 =~ F9 + F10 + F11 + F12
S4 =~ F13 + F14 + F15 + F16
S5 =~ F17 + F18 + F19 + F20
S6 =~ F21 + F22 + F23 + F24
GP =~ S1 + S2 + S3 + S4 + S5 + S6'
SimpleC <- cfa(SIMPLECGF, data = HSA, std.lv = T, estimator = "WLSMV")
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -2.361731e-16) is smaller than zero. This may be a symptom that
## the model is not identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
SimpleH <- cfa(SIMPLEHOF, data = HSA, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -2.429199e-16) is smaller than zero. This may be a symptom that
## the model is not identified.
ComplexC <- cfa(COMPLEXCGF, data = HSA, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -2.234788e-16) is smaller than zero. This may be a symptom that
## the model is not identified.
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
ComplexH <- cfa(COMPLEXHOF, data = HSA, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -2.994651e-16) is smaller than zero. This may be a symptom that
## the model is not identified.
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
round(cbind("Simple CGF" = fitMeasures(SimpleC, FITM),
"Simple HOF" = fitMeasures(SimpleH, FITM),
"Complex CGF" = fitMeasures(ComplexC, FITM),
"Complex HOF" = fitMeasures(ComplexH, FITM)), 3)
## Simple CGF Simple HOF Complex CGF Complex HOF
## chisq 20952.936 21087.858 20406.471 20596.961
## df 1695.000 1704.000 1671.000 1680.000
## npar 323.000 314.000 347.000 338.000
## cfi 0.738 0.736 0.745 0.743
## rmsea 0.064 0.064 0.064 0.064
## rmsea.ci.lower 0.064 0.064 0.063 0.063
## rmsea.ci.upper 0.065 0.065 0.065 0.065
## aic NA NA NA NA
## bic NA NA NA NA
## tli 0.726 0.726 0.730 0.729
These theoretical models fit horribly. If these (supplied to me by the person who gave me the data) are realistic, then there's something plainly wrong with the HEXACO, or at least with its implementation in this Arab-speaking sample. So, what we need to do is to perform a parallel analysis, do a factor analysis, and see if the theoretically-expected factors emerge. If a large number of first-order factors emerge, parallel analysis of the resulting latent covariances can take place. Without a hold-out sample, this may end up overfitted. To that end, I have randomly removed 500 people from the sample and left them out as a hold-out to confirm the model structure.
inspect(SimpleC, 'r2')
## Q6 Q30 Q54 Q12 Q36 Q60 Q18 Q42 Q24 Q48 Q5 Q29 Q53
## 0.007 0.244 0.063 0.248 0.004 0.305 0.060 0.165 0.221 0.158 0.023 0.112 0.175
## Q11 Q35 Q17 Q41 Q23 Q47 Q59 Q4 Q28 Q52 Q10 Q34 Q58
## 0.084 0.181 0.057 0.047 0.097 0.049 0.076 0.002 0.167 0.258 0.080 0.141 0.142
## Q16 Q40 Q22 Q46 Q3 Q27 Q9 Q33 Q51 Q15 Q39 Q57 Q21
## 0.047 0.077 0.028 0.163 0.017 0.078 0.111 0.216 0.136 0.031 0.022 0.150 0.084
## Q45 Q2 Q26 Q8 Q32 Q14 Q38 Q50 Q20 Q44 Q56 Q1 Q25
## 0.077 0.009 0.162 0.001 0.239 0.145 0.038 0.190 0.197 0.237 0.107 0.047 0.107
## Q7 Q31 Q13 Q37 Q49 Q19 Q43 Q55
## 0.043 0.114 0.014 0.099 0.167 0.009 0.040 0.100
inspect(ComplexC, 'r2')
## Q6 Q30 Q54 Q12 Q36 Q60 Q18 Q42 Q24 Q48 Q5 Q29 Q53
## 0.007 0.236 0.062 0.397 0.003 0.497 0.058 0.157 0.362 0.255 0.023 0.111 0.176
## Q11 Q35 Q17 Q41 Q23 Q47 Q59 Q4 Q28 Q52 Q10 Q34 Q58
## 0.137 0.303 0.056 0.047 0.099 0.051 0.078 0.001 0.250 0.392 0.095 0.173 0.174
## Q16 Q40 Q22 Q46 Q3 Q27 Q9 Q33 Q51 Q15 Q39 Q57 Q21
## 0.161 0.268 0.027 0.157 0.124 0.683 0.113 0.221 0.139 0.032 0.022 0.155 0.085
## Q45 Q2 Q26 Q8 Q32 Q14 Q38 Q50 Q20 Q44 Q56 Q1 Q25
## 0.077 0.009 0.166 0.000 0.458 0.147 0.038 0.193 0.229 0.276 0.124 0.048 0.110
## Q7 Q31 Q13 Q37 Q49 Q19 Q43 Q55 F1 F2 F3 F4 F5
## 0.044 0.118 0.014 0.101 0.172 0.010 0.043 0.109 0.974 0.530 0.977 0.543 0.968
## F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18
## 0.568 0.979 0.943 0.596 0.752 0.258 0.974 0.108 0.954 0.946 0.969 0.959 0.515
## F19 F20 F21 F22 F23 F24
## 0.970 0.832 0.958 0.947 0.951 0.900
This explains very little.
describe(HSAG2)
fa.parallel(HSAG2[, 3:62])
## Parallel analysis suggests that the number of factors = 15 and the number of components = 12
HEX <- fa(HSAG2[, 3:62], nfactors = 15)
## Loading required namespace: GPArotation
## Warning in GPFoblq(L, Tmat = Tmat, normalize = normalize, eps = eps, maxit =
## maxit, : convergence not obtained in GPFoblq. 1000 iterations used.
print(HEX$loadings, cutoff = 0.2)
##
## Loadings:
## MR9 MR6 MR2 MR4 MR11 MR7 MR1 MR8 MR3 MR5
## Q1
## Q2 0.500
## Q3
## Q4 0.236
## Q5
## Q6
## Q7 -0.372
## Q8 0.379
## Q9 0.256
## Q10
## Q11
## Q12
## Q13
## Q14 0.394
## Q15 0.412
## Q16
## Q17 -0.426
## Q18 0.493
## Q19 0.230
## Q20 0.263 0.253
## Q21 0.507
## Q22 0.399
## Q23 -0.212
## Q24 0.273
## Q25
## Q26 0.230 -0.254
## Q27 0.275 -0.251
## Q28 0.205
## Q29 0.218
## Q30
## Q31 0.311 0.255
## Q32 0.337
## Q33 0.253
## Q34 0.236 0.220 0.323
## Q35 0.509
## Q36 0.331
## Q37 0.394 -0.240
## Q38 -0.328 0.212
## Q39 -0.237
## Q40
## Q41 0.572
## Q42 0.308
## Q43 0.344
## Q44 -0.364
## Q45 -0.219 -0.281
## Q46 0.271
## Q47 0.426
## Q48 0.485
## Q49 -0.357
## Q50 0.241 0.238
## Q51 0.328 0.249
## Q52 0.653
## Q53 0.480
## Q54 0.206
## Q55 0.492
## Q56 0.255
## Q57 0.307 0.216
## Q58 0.740
## Q59 0.342 0.256
## Q60 0.309 0.206
## MR13 MR14 MR15 MR12 MR10
## Q1
## Q2
## Q3 0.233
## Q4
## Q5 0.213
## Q6 0.481
## Q7
## Q8
## Q9 0.233 -0.236
## Q10 0.420
## Q11 0.380
## Q12 0.290
## Q13 0.255 0.264
## Q14
## Q15
## Q16 0.210 0.202
## Q17
## Q18
## Q19
## Q20
## Q21
## Q22
## Q23 0.230 0.248
## Q24 0.250
## Q25 0.461
## Q26
## Q27
## Q28 0.203 -0.227
## Q29
## Q30 0.236
## Q31
## Q32
## Q33
## Q34
## Q35
## Q36
## Q37
## Q38
## Q39 0.204
## Q40 0.651
## Q41
## Q42
## Q43
## Q44
## Q45
## Q46
## Q47
## Q48
## Q49
## Q50
## Q51
## Q52
## Q53
## Q54 0.202
## Q55
## Q56 0.242
## Q57
## Q58
## Q59 0.209
## Q60
##
## MR9 MR6 MR2 MR4 MR11 MR7 MR1 MR8 MR3 MR5
## SS loadings 1.533 1.122 1.121 1.090 1.049 1.008 0.972 0.937 0.928 0.927
## Proportion Var 0.026 0.019 0.019 0.018 0.017 0.017 0.016 0.016 0.015 0.015
## Cumulative Var 0.026 0.044 0.063 0.081 0.099 0.115 0.132 0.147 0.163 0.178
## MR13 MR14 MR15 MR12 MR10
## SS loadings 0.871 0.851 0.841 0.743 0.679
## Proportion Var 0.015 0.014 0.014 0.012 0.011
## Cumulative Var 0.193 0.207 0.221 0.233 0.245
Cumulatively, these factors explain so little of the total variance that I'm wondering whether this is worth it. In order to avoid cross-loadings, secondary and tertiary factor loadings are first dropped. They are added back if the initial model fails to fit. Items without exploratory loadings >0.2 are dropped. Without them, the latent covariances may be inflated. It is interesting that adding cross-loadings doesn't reduce g loadings for cognitive data even if it tends to remove them for personality data.
HSAG2[, 3:62] <- lapply(HSAG2[, 3:62], ordered)
EMPCGF <- '
F1 =~ Q46 + Q49 + Q50 + Q51 + Q52 + Q53 + Q54 + Q60
F2 =~ Q56 + Q56 + Q58 + Q59
F3 =~ Q18 + Q19 + Q20 + Q22 + Q27
F4 =~ Q24 + Q42 + Q43 + Q45 + Q47 + Q48
F5 =~ Q33 + Q34 + Q35 + Q36 + Q37
F6 =~ Q9 + Q15 + Q21
F7 =~ Q14 + Q31 + Q32 + Q39
F8 =~ Q2 + Q4 + Q8 + Q26 + Q44
F9 =~ Q17 + Q41
F10 =~ Q7 + Q29 + Q55
F11 =~ Q16 + Q40
F12 =~ Q12 + Q25
F13 =~ Q5 + Q11 + Q12
F14 =~ Q3 + Q6 + Q23 + Q55
F15 =~ Q13 + Q28'
EMPCGFI <- cfa(EMPCGF, data = HSAG2, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -9.756094e-17) is smaller than zero. This may be a symptom that
## the model is not identified.
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
fitMeasures(EMPCGFI, FITM)
## chisq df npar cfi rmsea
## 11586.479 1323.000 390.000 0.781 0.059
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.058 0.060 NA NA 0.754
The model does not fit well. Before jumping to cross-loadings, it may be desirable to check if there are near-zero factor covariances and constrain those. As it turns out, all factors are uncorrelated with F9, which shows a negative residual variance for Q17; correcting this negative residual variance does not fix the issue. Therefore, F9 was dropped. If cross-loadings are added, Q17 and Q41 may receive them.
EMPCGFR <- '
F1 =~ Q46 + Q49 + Q50 + Q51 + Q52 + Q53 + Q54 + Q60
F2 =~ Q56 + Q56 + Q58 + Q59
F3 =~ Q18 + Q19 + Q20 + Q22 + Q27
F4 =~ Q24 + Q42 + Q43 + Q45 + Q47 + Q48
F5 =~ Q33 + Q34 + Q35 + Q36 + Q37
F6 =~ Q9 + Q15 + Q21
F7 =~ Q14 + Q31 + Q32 + Q39
F8 =~ Q2 + Q4 + Q8 + Q26 + Q44
F10 =~ Q7 + Q29 + Q55
F11 =~ Q16 + Q40
F12 =~ Q12 + Q25
F13 =~ Q5 + Q11 + Q12
F14 =~ Q3 + Q6 + Q23 + Q55
F15 =~ Q13 + Q28'
EMPCGFI <- cfa(EMPCGFR, data = HSAG2, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -8.879626e-17) is smaller than zero. This may be a symptom that
## the model is not identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
fitMeasures(EMPCGFI, FITM)
## chisq df npar cfi rmsea
## 10129.461 1232.000 366.000 0.800 0.057
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.056 0.058 NA NA 0.776
Q8 does not load on F8 and apparently had no cross-loadings whilst F11 and F15 are not related. The likelihood of a general factor is slim.
EMPCGFR <- '
F1 =~ Q46 + Q49 + Q50 + Q51 + Q52 + Q53 + Q54 + Q60
F2 =~ Q56 + Q56 + Q58 + Q59
F3 =~ Q18 + Q19 + Q20 + Q22 + Q27
F4 =~ Q24 + Q42 + Q43 + Q45 + Q47 + Q48
F5 =~ Q33 + Q34 + Q35 + Q36 + Q37
F6 =~ Q9 + Q15 + Q21
F7 =~ Q14 + Q31 + Q32 + Q39
F8 =~ Q2 + Q4 + Q26 + Q44
F10 =~ Q7 + Q29 + Q55
F11 =~ Q16 + Q40
F12 =~ Q12 + Q25
F13 =~ Q5 + Q11 + Q12
F14 =~ Q3 + Q6 + Q23 + Q55
F15 =~ Q13 + Q28
F11 ~~ 0*F15'
EMPCGFI <- cfa(EMPCGFR, data = HSAG2, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -7.294648e-17) is smaller than zero. This may be a symptom that
## the model is not identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
fitMeasures(EMPCGFI, FITM)
## chisq df npar cfi rmsea
## 9197.276 1182.000 360.000 0.816 0.055
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.054 0.056 NA NA 0.793
F15 induces NPD and has low loadings, so it was dropped.
EMPCGFR <- '
F1 =~ Q46 + Q49 + Q50 + Q51 + Q52 + Q53 + Q54 + Q60
F2 =~ Q56 + Q56 + Q58 + Q59
F3 =~ Q18 + Q19 + Q20 + Q22 + Q27
F4 =~ Q24 + Q42 + Q43 + Q45 + Q47 + Q48
F5 =~ Q33 + Q34 + Q35 + Q36 + Q37
F6 =~ Q9 + Q15 + Q21
F7 =~ Q14 + Q31 + Q32 + Q39
F8 =~ Q2 + Q4 + Q26 + Q44
F10 =~ Q7 + Q29 + Q55
F11 =~ Q16 + Q40
F12 =~ Q12 + Q25
F13 =~ Q5 + Q11 + Q12
F14 =~ Q3 + Q6 + Q23 + Q55'
EMPCGFI <- cfa(EMPCGFR, data = HSAG2, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -5.219565e-17) is smaller than zero. This may be a symptom that
## the model is not identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
fitMeasures(EMPCGFI, FITM)
## chisq df npar cfi rmsea
## 8404.602 1095.000 338.000 0.820 0.055
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.054 0.056 NA NA 0.799
Just as well, Q12 negliglibly loads on F13, so it was removed.
EMPCGFR <- '
F1 =~ Q46 + Q49 + Q50 + Q51 + Q52 + Q53 + Q54 + Q60
F2 =~ Q56 + Q56 + Q58 + Q59
F3 =~ Q18 + Q19 + Q20 + Q22 + Q27
F4 =~ Q24 + Q42 + Q43 + Q45 + Q47 + Q48
F5 =~ Q33 + Q34 + Q35 + Q36 + Q37
F6 =~ Q9 + Q15 + Q21
F7 =~ Q14 + Q31 + Q32 + Q39
F8 =~ Q2 + Q4 + Q26 + Q44
F10 =~ Q7 + Q29 + Q55
F11 =~ Q16 + Q40
F12 =~ Q12 + Q25
F13 =~ Q5 + Q11
F14 =~ Q3 + Q6 + Q23 + Q55'
EMPCGFI <- cfa(EMPCGFR, data = HSAG2, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -6.959223e-17) is smaller than zero. This may be a symptom that
## the model is not identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
fitMeasures(EMPCGFI, FITM)
## chisq df npar cfi rmsea
## 8408.050 1096.000 337.000 0.820 0.055
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.054 0.056 NA NA 0.799
At this point, we can combine perfectly-related factors. F7 = F10.
EMPCGFR <- '
F1 =~ Q46 + Q49 + Q50 + Q51 + Q52 + Q53 + Q54 + Q60
F2 =~ Q56 + Q56 + Q58 + Q59
F3 =~ Q18 + Q19 + Q20 + Q22 + Q27
F4 =~ Q24 + Q42 + Q43 + Q45 + Q47 + Q48
F5 =~ Q33 + Q34 + Q35 + Q36 + Q37
F6 =~ Q9 + Q15 + Q21
F7 =~ Q14 + Q31 + Q32 + Q39 + Q7 + Q29 + Q55
F8 =~ Q2 + Q4 + Q26 + Q44
F11 =~ Q16 + Q40
F12 =~ Q12 + Q25
F13 =~ Q5 + Q11
F14 =~ Q3 + Q6 + Q23 + Q55'
EMPCGFI <- cfa(EMPCGFR, data = HSAG2, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -7.670392e-17) is smaller than zero. This may be a symptom that
## the model is not identified.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
fitMeasures(EMPCGFI, FITM)
## chisq df npar cfi rmsea
## 8543.377 1108.000 325.000 0.817 0.055
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.054 0.056 NA NA 0.798
There are no more factor covariances greater than 1 and all loadings are significant. This model can be compared to the higher-order model.
EMPHOF <- '
F1 =~ Q46 + Q49 + Q50 + Q51 + Q52 + Q53 + Q54 + Q60
F2 =~ Q56 + Q56 + Q58 + Q59
F3 =~ Q18 + Q19 + Q20 + Q22 + Q27
F4 =~ Q24 + Q42 + Q43 + Q45 + Q47 + Q48
F5 =~ Q33 + Q34 + Q35 + Q36 + Q37
F6 =~ Q9 + Q15 + Q21
F7 =~ Q14 + Q31 + Q32 + Q39 + Q7 + Q29 + Q55
F8 =~ Q2 + Q4 + Q26 + Q44
F11 =~ Q16 + Q40
F12 =~ Q12 + Q25
F13 =~ Q5 + Q11
F14 =~ Q3 + Q6 + Q23 + Q55
GP =~ F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F11 + F12 + F13 + F14'
EMPHOF <- cfa(EMPHOF, data = HSAG2, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## The variance-covariance matrix of the estimated parameters (vcov)
## does not appear to be positive definite! The smallest eigenvalue
## (= -6.923204e-17) is smaller than zero. This may be a symptom that
## the model is not identified.
fitMeasures(EMPHOF, FITM)
## chisq df npar cfi rmsea
## 9380.039 1162.000 271.000 0.798 0.056
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.055 0.057 NA NA 0.787
So the higher-order model fits worse (and makes Q55's loading insignificant), but this can occur even when the dimensionality is strongly general. So, using absolute values for the loadings, what is the value of \(\omega_h\)? All loadings are coded positively and arrived at through path-tracing rules. The formula used was
\[\omega_h = \frac{(\Sigma\lambda_{gj})^2}{(\Sigma\lambda_{gj})^2 + (\Sigma\lambda_{sj})^2 + (\Sigma\sigma_{\epsilon j})^2}\]
In typical cases where bifactor models are used, the formula may differ a bit (see: Watkins' Omega software).
GP = c(0.336448, 0.371745, 0.357476, 0.350717, 0.453604, 0.370243, 0.204272, 0.440837, 0.29294, 0.372064, 0.218648, 0.2412, 0.11232, 0.43704, 0.18288, 0.30744, 0.399747, 0.339819, 0.19068, 0.229497, 0.203619, 0.345948, 0.50691, 0.388077, 0.405528, 0.06648, 0.311625, 0.331408, 0.166936, 0.308, 0.370815, 0.333645, 0.47082, 0.140715, 0.222135, 0.303555, 0.335415, 0.102024, 0.058424, 0.391528, 0.5014, 0.239674, 0.291828, 0.471534, 0.337246, 0.133532, 0.264052, 0.134589, 0.086478, 0.313026, 0.028014)
SP = c(0.195327552, 0.215819505, 0.207535524, 0.203611533, 0.263343396, 0.214947507, 0.118591728, 0.255931413, 0.30806424, 0.391273344, 0.229936608, 0.161336, 0.0751296, 0.2923312, 0.1223264, 0.2056432, 0.314772293, 0.267583261, 0.15014692, 0.180712543, 0.160335461, 0.272409412, 0.18875779, 0.144508013, 0.151006232, 0.02475512, 0.116039625, 0.333852672, 0.168167424, 0.310272, 0.090828725, 0.081724175, 0.1153243, 0.034467225, 0.054410525, 0.074353825, 0.082157725, 0.028035072, 0.016054272, 0.107587584, 0.1377792, 0.268551036, 0.326988792, 0.258219558, 0.184681302, 0.198966936, 0.393445896, 0.139035299, 0.089334898, 0.323367166, 0.028939474)
UP = 1 - (GP^2 + SP^2)
sum(GP^2) + sum(SP^2) + sum(UP)
## [1] 51
sum(GP^2)/(sum(GP^2) + sum(SP^2) + sum(UP))
## [1] 0.09858544
sum(GP^2 + SP^2)/(sum(GP^2) + sum(SP^2) + sum(UP))
## [1] 0.1412916
There is no general factor at the phenotypic level in this HEXACO sample. There is not even a reliable factor model in the first place and the communalities are tiny. Does the model at least replicate?
EMPCGFC <- cfa(EMPCGFR, data = HSA, std.lv = T, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F, group = "Group")
## Warning in lav_samplestats_from_data(lavdata = lavdata, missing = lavoptions$missing, : lavaan WARNING: number of observations (500) too small to compute Gamma in group: 1
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite in group 1;
## use lavInspect(fit, "cov.lv") to investigate.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite in group 2;
## use lavInspect(fit, "cov.lv") to investigate.
EMPCGFM <- cfa(EMPCGFR, data = HSA, std.lv = F, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F, group = "Group", group.equal = "loadings")
## Warning in lav_samplestats_from_data(lavdata = lavdata, missing = lavoptions$missing, : lavaan WARNING: number of observations (500) too small to compute Gamma in group: 1
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite in group 1;
## use lavInspect(fit, "cov.lv") to investigate.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite in group 2;
## use lavInspect(fit, "cov.lv") to investigate.
EMPCGFS <- cfa(EMPCGFR, data = HSA, std.lv = F, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F, group = "Group", group.equal = c("loadings", "intercepts"))
## Warning in lav_samplestats_from_data(lavdata = lavdata, missing = lavoptions$missing, : lavaan WARNING: number of observations (500) too small to compute Gamma in group: 1
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite in group 1;
## use lavInspect(fit, "cov.lv") to investigate.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite in group 2;
## use lavInspect(fit, "cov.lv") to investigate.
EMPCGFR <- cfa(EMPCGFR, data = HSA, std.lv = F, estimator = "WLSMV", control = list(rel.tol = 1e-4), check.gradient = F, group = "Group", group.equal = c("loadings", "intercepts", "residuals"))
## Warning in lav_samplestats_from_data(lavdata = lavdata, missing = lavoptions$missing, : lavaan WARNING: number of observations (500) too small to compute Gamma in group: 1
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite in group 1;
## use lavInspect(fit, "cov.lv") to investigate.
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite in group 2;
## use lavInspect(fit, "cov.lv") to investigate.
round(cbind("Configural" = fitMeasures(EMPCGFC, FITM),
"Metric" = fitMeasures(EMPCGFM, FITM),
"Strong" = fitMeasures(EMPCGFS, FITM),
"Strict" = fitMeasures(EMPCGFR, FITM)), 3)
## Configural Metric Strong Strict
## chisq 8225.271 8712.799 8887.866 9053.742
## df 2216.000 2255.000 2293.000 2343.000
## npar 434.000 395.000 357.000 307.000
## cfi 0.838 0.826 0.822 0.819
## rmsea 0.045 0.046 0.046 0.046
## rmsea.ci.lower 0.043 0.045 0.045 0.045
## rmsea.ci.upper 0.046 0.047 0.047 0.047
## aic NA NA NA NA
## bic NA NA NA NA
## tli 0.821 0.811 0.810 0.811
At least this terrible model is plausibly invariant.
SM <- lavInspect(EMPCGFI, "cov.lv")
fa.parallel(SM, n.obs = 2240)
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 6 and the number of components = 2
Six dimensions?! That's as many as the HEXACO expects.
HFA <- fa(SM, n.obs = 2240, nfactors = 6)
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(model): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56839e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56834e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56832e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56831e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56821e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56816e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56813e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56839e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56817e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in pchisq(df = result$dof, ncp = x, q = result$STATISTIC):
## pnchisq(x=7.56839e+10, f=9, theta=7.56812e+10, ..): not converged in 1000000
## iter.
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(HFA$loadings, cutoff = 0.2)
##
## Loadings:
## MR6 MR2 MR1 MR5 MR3 MR4
## F1 1.007
## F2 0.403 0.401
## F3 0.746 0.237 -0.336
## F4 0.824 0.218
## F5 0.376 0.233
## F6 1.018
## F7 0.376 0.510 0.264
## F8 -0.205 -0.446 -0.250 -0.299
## F11 0.229 0.678 -0.362
## F12 0.235 0.274 0.361 0.534
## F13 0.994
## F14 1.003
##
## MR6 MR2 MR1 MR5 MR3 MR4
## SS loadings 1.910 1.690 1.685 1.321 1.154 0.616
## Proportion Var 0.159 0.141 0.140 0.110 0.096 0.051
## Cumulative Var 0.159 0.300 0.440 0.551 0.647 0.698
There's no sense in modeling these. They're ridiculous.
The HEXACO battery administered to Arab samples such as this should not be thought of as supporting a "general factor of personality". Adding cross-loadings, though it may improve fit, will weaken a general factor of personality, and will probably not lead to much more communality because the latent covariances were not extreme in the final model, and if the cross-loadings did have a huge effect, they should be further inflated.
I do not know how this result generalizes to other cultures, but from what I have seen, this sample's model fit much worse than the same battery used elsewhere. The factors obtained have no clear identity and the parallel analysis of the latent covariances, though producing an estimate of six correlated factors, did not produce sensible loadings, so I did not attempt it. These data are probably not made for it. Out of my three predictions, I was wrong for the first and basically correct for the latter two, to the degree that a general factor can even be tested on the data when the data is barely described by the achieved factor solution.
First: What happens if we do this without reverse-scoring, as should be done?
HSA <- HSA %>% mutate(
F1 = Q6 + Q30 + Q54,
F2 = Q12 + Q36 + Q60,
F3 = Q18 + Q42,
F4 = Q24 + Q48,
F5 = Q5 + Q29 + Q53,
F6 = Q11 + Q35,
F7 = Q17 + Q41,
F8 = Q23 + Q47 + Q59,
F9 = Q4 + Q28 + Q52,
F10 = Q10 + Q34 + Q58,
F11 = Q16 + Q40,
F12 = Q22 + Q46,
F13 = Q3 + Q27,
F14 = Q9 + Q33 + Q51,
F15 = Q15 + Q39 + Q57,
F16 = Q21 + Q45,
F17 = Q2 + Q26,
F18 = Q8 + Q32,
F19 = Q14 + Q38 + Q50,
F20 = Q20 + Q44 + Q56,
F21 = Q1 + Q25,
F22 = Q7 + Q31,
F23 = Q13 + Q37 + Q49,
F24 = Q19 + Q43 + Q55)
SUMMOD <- '
S1 =~ F1 + F2 + F3 + F4
S2 =~ F5 + F6 + F7 + F8
S3 =~ F9 + F10 + F11 + F12
S4 =~ F13 + F14 + F15 + F16
S5 =~ F17 + F18 + F19 + F20
S6 =~ F21 + F22 + F23 + F24
F15 ~~ 1*F15'
SUMMOD <- cfa(SUMMOD, data = HSA, std.lv = T)
fitMeasures(SUMMOD, FITM)
## chisq df npar cfi rmsea
## 3045.669 238.000 62.000 0.520 0.066
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.064 0.068 275281.728 275648.502 0.443
summary(SUMMOD, stand = T, fit = T); lavInspect(SUMMOD, 'r2')
## lavaan 0.6-7 ended normally after 54 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of free parameters 62
##
## Number of observations 2740
##
## Model Test User Model:
##
## Test statistic 3045.669
## Degrees of freedom 238
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 6120.623
## Degrees of freedom 276
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.520
## Tucker-Lewis Index (TLI) 0.443
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -137578.864
## Loglikelihood unrestricted model (H1) -136056.029
##
## Akaike (AIC) 275281.728
## Bayesian (BIC) 275648.502
## Sample-size adjusted Bayesian (BIC) 275451.508
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.066
## 90 Percent confidence interval - lower 0.064
## 90 Percent confidence interval - upper 0.068
## P-value RMSEA <= 0.05 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.067
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## S1 =~
## F1 0.549 0.058 9.456 0.000 0.549 0.235
## F2 1.774 0.089 19.853 0.000 1.774 0.610
## F3 0.257 0.047 5.477 0.000 0.257 0.136
## F4 0.616 0.046 13.295 0.000 0.616 0.330
## S2 =~
## F5 0.809 0.065 12.427 0.000 0.809 0.351
## F6 0.525 0.051 10.374 0.000 0.525 0.288
## F7 0.806 0.054 14.826 0.000 0.806 0.448
## F8 1.203 0.073 16.560 0.000 1.203 0.558
## S3 =~
## F9 1.792 0.077 23.173 0.000 1.792 0.738
## F10 0.133 0.050 2.655 0.008 0.133 0.063
## F11 0.027 0.040 0.691 0.490 0.027 0.016
## F12 0.716 0.041 17.404 0.000 0.716 0.418
## S4 =~
## F13 0.261 0.039 6.729 0.000 0.261 0.143
## F14 0.135 0.045 3.005 0.003 0.135 0.064
## F15 1.919 0.033 58.218 0.000 1.919 0.887
## F16 0.396 0.037 10.620 0.000 0.396 0.225
## S5 =~
## F17 0.688 0.038 18.006 0.000 0.688 0.427
## F18 0.826 0.039 21.141 0.000 0.826 0.504
## F19 0.334 0.049 6.859 0.000 0.334 0.165
## F20 1.277 0.058 22.105 0.000 1.277 0.530
## S6 =~
## F21 0.671 0.053 12.659 0.000 0.671 0.362
## F22 0.478 0.051 9.430 0.000 0.478 0.263
## F23 1.203 0.074 16.198 0.000 1.203 0.522
## F24 0.958 0.065 14.770 0.000 0.958 0.445
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## S1 ~~
## S2 -0.022 0.041 -0.543 0.587 -0.022 -0.022
## S3 0.608 0.040 15.169 0.000 0.608 0.608
## S4 0.378 0.032 11.879 0.000 0.378 0.378
## S5 0.683 0.040 16.967 0.000 0.683 0.683
## S6 0.014 0.042 0.324 0.746 0.014 0.014
## S2 ~~
## S3 -0.101 0.036 -2.820 0.005 -0.101 -0.101
## S4 0.059 0.031 1.898 0.058 0.059 0.059
## S5 -0.225 0.039 -5.824 0.000 -0.225 -0.225
## S6 -0.004 0.041 -0.110 0.912 -0.004 -0.004
## S3 ~~
## S4 0.200 0.028 7.143 0.000 0.200 0.200
## S5 0.668 0.035 19.030 0.000 0.668 0.668
## S6 0.330 0.037 8.988 0.000 0.330 0.330
## S4 ~~
## S5 0.342 0.029 11.852 0.000 0.342 0.342
## S6 0.058 0.032 1.840 0.066 0.058 0.058
## S5 ~~
## S6 0.226 0.040 5.701 0.000 0.226 0.226
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .F15 1.000 1.000 0.214
## .F1 5.149 0.145 35.466 0.000 5.149 0.945
## .F2 5.316 0.299 17.778 0.000 5.316 0.628
## .F3 3.478 0.095 36.517 0.000 3.478 0.981
## .F4 3.097 0.092 33.709 0.000 3.097 0.891
## .F5 4.665 0.149 31.312 0.000 4.665 0.877
## .F6 3.060 0.092 33.435 0.000 3.060 0.917
## .F7 2.579 0.099 26.084 0.000 2.579 0.799
## .F8 3.207 0.172 18.591 0.000 3.207 0.689
## .F9 2.685 0.249 10.784 0.000 2.685 0.455
## .F10 4.508 0.122 36.936 0.000 4.508 0.996
## .F11 2.817 0.076 37.008 0.000 2.817 1.000
## .F12 2.426 0.076 31.968 0.000 2.426 0.826
## .F13 3.244 0.088 36.812 0.000 3.244 0.979
## .F14 4.369 0.118 36.974 0.000 4.369 0.996
## .F16 2.951 0.081 36.505 0.000 2.951 0.950
## .F17 2.125 0.067 31.937 0.000 2.125 0.818
## .F18 2.007 0.069 29.053 0.000 2.007 0.746
## .F19 3.993 0.110 36.401 0.000 3.993 0.973
## .F20 4.173 0.150 27.766 0.000 4.173 0.719
## .F21 2.998 0.098 30.737 0.000 2.998 0.869
## .F22 3.080 0.091 33.993 0.000 3.080 0.931
## .F23 3.874 0.182 21.265 0.000 3.874 0.728
## .F24 3.725 0.141 26.365 0.000 3.725 0.802
## S1 1.000 1.000 1.000
## S2 1.000 1.000 1.000
## S3 1.000 1.000 1.000
## S4 1.000 1.000 1.000
## S5 1.000 1.000 1.000
## S6 1.000 1.000 1.000
## F15 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12
## 0.786 0.055 0.372 0.019 0.109 0.123 0.083 0.201 0.311 0.545 0.004 0.000 0.174
## F13 F14 F16 F17 F18 F19 F20 F21 F22 F23 F24
## 0.021 0.004 0.050 0.182 0.254 0.027 0.281 0.131 0.069 0.272 0.198
HSA <- HSA %>% mutate(
F1 = Q6 + (-Q30) + Q54,
F2 = (-Q12) + Q36 + (-Q60),
F3 = Q18 + (-Q42),
F4 = (-Q24) + (-Q48),
F5 = Q5 + Q29 + (-Q53),
F6 = Q11 + Q35,
F7 = Q17 + (-Q41),
F8 = Q23 + Q47 + (-Q59),
F9 = Q4 + (-Q28) + (-Q52),
F10 = (-Q10) + Q34 + Q58,
F11 = Q16 + Q40,
F12 = Q22 + (-Q46),
F13 = Q3 + Q27,
F14 = (-Q9) + Q33 + Q51,
F15 = (-Q15) + Q39 + (-Q57),
F16 = (-Q21) + (-Q45),
F17 = Q2 + (-Q26),
F18 = Q8 + (-Q32),
F19 = (-Q14) + Q38 + Q50,
F20 = (-Q20) + (-Q44) + (-Q56),
F21 = (-Q1) + Q25,
F22 = Q7 + (-Q31),
F23 = Q13 + Q37 + Q49,
F24 = (-Q19) + Q43 + (-Q55))
SUMMOD <- '
S1 =~ F1 + F2 + F3 + F4
S2 =~ F5 + F6 + F7 + F8
S3 =~ F9 + F10 + F11 + F12
S4 =~ F13 + F14 + F15 + F16
S5 =~ F17 + F18 + F19 + F20
S6 =~ F21 + F22 + F23 + F24'
SUMMOD <- cfa(SUMMOD, data = HSA, std.lv = T)
## Warning in lav_object_post_check(object): lavaan WARNING: covariance matrix of latent variables
## is not positive definite;
## use lavInspect(fit, "cov.lv") to investigate.
fitMeasures(SUMMOD, FITM)
## chisq df npar cfi rmsea
## 1707.952 237.000 63.000 0.849 0.048
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.045 0.050 266029.739 266402.429 0.825
summary(SUMMOD, stand = T, fit = T); lavInspect(SUMMOD, 'r2')
## lavaan 0.6-7 ended normally after 46 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of free parameters 63
##
## Number of observations 2740
##
## Model Test User Model:
##
## Test statistic 1707.952
## Degrees of freedom 237
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 10045.881
## Degrees of freedom 276
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.849
## Tucker-Lewis Index (TLI) 0.825
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -132951.870
## Loglikelihood unrestricted model (H1) -132097.894
##
## Akaike (AIC) 266029.739
## Bayesian (BIC) 266402.429
## Sample-size adjusted Bayesian (BIC) 266202.258
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.048
## 90 Percent confidence interval - lower 0.045
## 90 Percent confidence interval - upper 0.050
## P-value RMSEA <= 0.05 0.968
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.040
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## S1 =~
## F1 1.031 0.046 22.450 0.000 1.031 0.458
## F2 1.413 0.054 26.280 0.000 1.413 0.532
## F3 0.790 0.035 22.427 0.000 0.790 0.457
## F4 0.883 0.038 23.266 0.000 0.883 0.474
## S2 =~
## F5 1.063 0.047 22.765 0.000 1.063 0.483
## F6 0.804 0.039 20.762 0.000 0.804 0.440
## F7 0.571 0.032 17.578 0.000 0.571 0.373
## F8 0.961 0.045 21.208 0.000 0.961 0.450
## S3 =~
## F9 1.109 0.044 25.159 0.000 1.109 0.504
## F10 1.162 0.044 26.259 0.000 1.162 0.527
## F11 0.552 0.034 16.415 0.000 0.552 0.329
## F12 0.616 0.031 19.882 0.000 0.616 0.398
## S4 =~
## F13 0.493 0.036 13.647 0.000 0.493 0.271
## F14 1.236 0.055 22.504 0.000 1.236 0.531
## F15 0.705 0.043 16.575 0.000 0.705 0.346
## F16 0.026 0.032 0.814 0.416 0.026 0.015
## S5 =~
## F17 0.517 0.029 17.638 0.000 0.517 0.352
## F18 0.579 0.031 18.686 0.000 0.579 0.373
## F19 1.128 0.042 26.645 0.000 1.128 0.532
## F20 1.240 0.048 25.792 0.000 1.240 0.515
## S6 =~
## F21 0.664 0.042 15.919 0.000 0.664 0.373
## F22 0.655 0.041 15.873 0.000 0.655 0.371
## F23 0.083 0.046 1.806 0.071 0.083 0.036
## F24 0.719 0.049 14.699 0.000 0.719 0.334
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## S1 ~~
## S2 0.932 0.028 33.226 0.000 0.932 0.932
## S3 0.961 0.027 35.789 0.000 0.961 0.961
## S4 1.106 0.041 27.081 0.000 1.106 1.106
## S5 0.994 0.026 37.943 0.000 0.994 0.994
## S6 1.071 0.048 22.437 0.000 1.071 1.071
## S2 ~~
## S3 0.988 0.030 33.252 0.000 0.988 0.988
## S4 1.025 0.043 23.845 0.000 1.025 1.025
## S5 0.956 0.030 32.170 0.000 0.956 0.956
## S6 0.987 0.050 19.840 0.000 0.987 0.987
## S3 ~~
## S4 1.259 0.045 27.988 0.000 1.259 1.259
## S5 1.098 0.028 39.395 0.000 1.098 1.098
## S6 0.953 0.049 19.621 0.000 0.953 0.953
## S4 ~~
## S5 1.213 0.044 27.616 0.000 1.213 1.213
## S6 1.177 0.064 18.496 0.000 1.177 1.177
## S5 ~~
## S6 1.111 0.050 22.242 0.000 1.111 1.111
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .F1 4.008 0.117 34.310 0.000 4.008 0.790
## .F2 5.055 0.155 32.559 0.000 5.055 0.717
## .F3 2.359 0.069 34.318 0.000 2.359 0.791
## .F4 2.696 0.079 34.007 0.000 2.696 0.776
## .F5 3.714 0.113 32.744 0.000 3.714 0.767
## .F6 2.689 0.079 33.847 0.000 2.689 0.806
## .F7 2.011 0.057 35.050 0.000 2.011 0.861
## .F8 3.641 0.108 33.631 0.000 3.641 0.798
## .F9 3.603 0.107 33.666 0.000 3.603 0.746
## .F10 3.512 0.106 32.996 0.000 3.512 0.722
## .F11 2.513 0.069 36.273 0.000 2.513 0.892
## .F12 2.020 0.057 35.640 0.000 2.020 0.842
## .F13 3.069 0.084 36.648 0.000 3.069 0.927
## .F14 3.884 0.138 28.108 0.000 3.884 0.718
## .F15 3.651 0.102 35.634 0.000 3.651 0.880
## .F16 3.107 0.084 37.014 0.000 3.107 1.000
## .F17 1.890 0.052 36.138 0.000 1.890 0.876
## .F18 2.076 0.058 35.959 0.000 2.076 0.861
## .F19 3.219 0.098 32.983 0.000 3.219 0.717
## .F20 4.265 0.127 33.512 0.000 4.265 0.735
## .F21 2.733 0.081 33.594 0.000 2.733 0.861
## .F22 2.681 0.080 33.647 0.000 2.681 0.862
## .F23 5.314 0.144 37.009 0.000 5.314 0.999
## .F24 4.113 0.118 34.773 0.000 4.113 0.888
## S1 1.000 1.000 1.000
## S2 1.000 1.000 1.000
## S3 1.000 1.000 1.000
## S4 1.000 1.000 1.000
## S5 1.000 1.000 1.000
## S6 1.000 1.000 1.000
## F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13
## 0.210 0.283 0.209 0.224 0.233 0.194 0.139 0.202 0.254 0.278 0.108 0.158 0.073
## F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24
## 0.282 0.120 0.000 0.124 0.139 0.283 0.265 0.139 0.138 0.001 0.112
The factor covariances are absurd. Like some other datasets from the third world, this may be evidence of a smaller number of dimensions.
estimate.ED(HSA[, 63:86], sample.size = 2740)
## [1] ED estimated from the correlation matrix; no disattenuation; no small-sample correction.
## $n1
## [1] 19.28
##
## $n2
## [1] 13.51
##
## $nInf
## [1] 4.71
##
## $nC
## [1] 23.22
fa.parallel(HSA[, 63:86])
## Parallel analysis suggests that the number of factors = 7 and the number of components = 4
SFA <- fa(HSA[, 63:86], nfactors = 8)
print(SFA$loadings, cutoff = 0.2)
##
## Loadings:
## MR1 MR2 MR8 MR3 MR5 MR4 MR7 MR6
## F1 0.267 0.223
## F2 0.261 0.368
## F3 0.233 0.302 -0.205
## F4 0.331
## F5 0.335 0.232
## F6 0.218 0.274
## F7 0.430
## F8 0.581
## F9 0.601
## F10 0.485
## F11 0.499
## F12 0.281 0.246
## F13 0.772
## F14 0.450
## F15 0.292 0.252 0.210
## F16 0.246
## F17 0.277
## F18 0.403
## F19 0.286
## F20 0.240
## F21 0.382
## F22 0.289
## F23 -0.316 0.201
## F24 0.495
##
## MR1 MR2 MR8 MR3 MR5 MR4 MR7 MR6
## SS loadings 1.187 0.754 0.725 0.691 0.680 0.573 0.480 0.468
## Proportion Var 0.049 0.031 0.030 0.029 0.028 0.024 0.020 0.020
## Cumulative Var 0.049 0.081 0.111 0.140 0.168 0.192 0.212 0.232
Some factors are too badly defined.
SUMMOD <- '
S1 =~ F2 + F3 + F5 + F9 + F12 + F14 + F19 + F23
S2 =~ F7 + F11
S3 =~ F4 + F8
S4 =~ F18 + F22
S5 =~ F1 + F17 + F21
S6 =~ F13 + F15 + F16'
SUMMOD <- cfa(SUMMOD, data = HSA, std.lv = T)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING:
## Could not compute standard errors! The information matrix could
## not be inverted. This may be a symptom that the model is not
## identified.
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
fitMeasures(SUMMOD, FITM)
## chisq df npar cfi rmsea
## 1518.851 155.000 55.000 0.812 0.057
## rmsea.ci.lower rmsea.ci.upper aic bic tli
## 0.054 0.059 220669.418 220994.782 0.770