library(tidyverse)
library(magrittr)
library(kableExtra)
library(haven)
library(visdat)
library(ggplot2)
library(tidyverse)
library(lavaan)
library(psych)
fitstats <- \(x) fitmeasures(x, c("df","rmsea.robust", "cfi.robust", "srmr"))
### function that generates syntax
get_model <- function(vars=1:5, equal.loadings=F,
which.intercepts=1){
basemodel <- paste0("F1=~ NA*x1_",vars[1],"+a*x1_",vars[1],"+", paste0("x1_", vars[-1], collapse="+"), "\n",
"F2=~ NA*x2_",vars[1],"+a*x2_",vars[1],"+", paste0("x2_", vars[-1], collapse="+"), "\n",
"F3=~ NA*x3_",vars[1],"+a*x3_",vars[1],"+", paste0("x3_", vars[-1], collapse="+"), "\n") %>%
paste0("\nF1 ~ 0*1; F2 ~ 1; F3~1; F1~~1*F1")
metricmodel <- paste0("F1=~ NA*x1_",vars[1],"+", paste0(letters[vars],"*x1_", vars , collapse="+"), "\n",
"F2=~NA*x2_",vars[1],"+", paste0(letters[vars],"*x2_", vars , collapse="+"), "\n",
"F3=~NA*x3_",vars[1],"+", paste0(letters[vars],"*x3_", vars , collapse="+"), "\n") %>%
paste0("\nF1 ~ 0*1; F2 ~ 1; F3~1; F1~~1*F1")
errs <-sapply(vars,\(v){
paste0("\nx1_",v,"~~ x2_",v, "+x3_",v,"; x2_",v,"~~ x3_",v)
}) %>% paste0(collapse="")
ints <- sapply(which.intercepts,\(v){
paste0("x1_", vars[v],"+x2_", vars[v], "+x3_",vars[v],"~i", vars[v], "*1\n")
}) %>% paste0(collapse="")
if(equal.loadings)
return(paste(metricmodel,"\n", ints, "\n",errs))
else
return(paste(basemodel,"\n", ints, "\n",errs))
}
#load data
mydata <- readRDS("finaldata_cutoff25_40.rds")
#sc items
sc2items <- strsplit("s2_scr8 + s2_scr9 + s2_scr12 + s2_scr13 + s2_scr15", split=" \\+ ", "fixed=T") %>% unlist
sc3items <- strsplit("s3_scr8 + s3_scr9 + s3_scr12 + s3_scr13 + s3_scr15", split=" \\+ ", "fixed=T") %>% unlist
sc4items <- strsplit("s4_scr8 + s4_scr9 + s4_scr12 + s4_scr13 + s4_scr15", split=" \\+ ", "fixed=T") %>% unlist
dd <- mydata # or alldata?
df <- cbind(select(dd, all_of(sc2items)),
select(dd, all_of(sc3items)),
select(dd, all_of(sc4items)))
df[df==-990] <- NA
colnames(df) <- c(paste0("x1_", 1:5),paste0("x2_", 1:5),paste0("x3_", 1:5))Measurement Invariance of Self Concept Scale
Testing for measurement invariance across groups
Criterion: For sample sizes with adequate power, equal group sizes, and mixed invariance (i.e., some loadings are higher and some lower in the first group), Chen (2007) also suggested a criterion of a -.01 change in CFI, paired with changes in RMSEA of .015 and SRMR of .030 (for metric invariance) or .015 (for scalar or residual invariance)
df$group <- dd$group
# full model all grades
mod <- "F1=~ x1_1+x1_4+x1_5+x1_2+x1_3
F2=~ x2_1+x2_4+x2_5+x3_2+x3_3
F3=~ x3_1+x3_4+x3_5+x3_2+x3_3"
# configural invariance
fit1 <- cfa(mod, data=df, estimator="MLM", group="group")
# weak invariance
fit2 <- cfa(mod, data=df, estimator="MLM", group="group", group.equal="loadings")
# strong invariance
fit3 <- cfa(mod, data=df, estimator="MLM", group="group", group.equal=c("intercepts", "loadings"))
lavTestLRT(fit1, fit2, fit3)
Scaled Chi-Squared Difference Test (method = "satorra.bentler.2001")
lavaan NOTE:
The "Chisq" column contains standard test statistics, not the
robust test that should be reported per model. A robust difference
test is a function of two standard (not robust) statistics.
Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
fit1 120 34162 34602 239.53
fit2 132 34148 34528 250.02 9.208 12 0.685
fit3 142 34177 34508 299.15 53.314 10 6.511e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data.frame(conf=fitstats(fit1), weak=fitstats(fit2), strong=fitstats(fit3)) %>%
round(3) %>% kable(caption=paste("MI across groups"))| conf | weak | strong | |
|---|---|---|---|
| df | 120.000 | 132.000 | 142.000 |
| rmsea.robust | 0.039 | 0.037 | 0.042 |
| cfi.robust | 0.959 | 0.960 | 0.944 |
| srmr | 0.035 | 0.037 | 0.044 |
df$group <- NULLWe have moderate support for group invariance
Testing for longitudinal invariance
Metric/weak invariance holds, but strong invariance is not present:
## STEP 1 CONFIGURAL
confmod <- get_model()
fit_conf <- cfa(confmod, data = df, estimator="MLM")
## STEP 2 METRIC
metrmod <- get_model(equal.loadings = T)
fit_metr <- cfa(metrmod, data = df, estimator="MLM")
## STEP 3 SCALAR
scalmod <- get_model(equal.loadings=T, which.intercepts = 1:5)
fit_scal <- cfa(scalmod, data = df, estimator="MLM")
data.frame(configural=fitstats(fit_conf), metric=fitstats(fit_metr), strong=fitstats(fit_scal)) %>%
round(3) %>% kable()| configural | metric | strong | |
|---|---|---|---|
| df | 72.000 | 80.000 | 88.000 |
| rmsea.robust | 0.024 | 0.029 | 0.071 |
| cfi.robust | 0.987 | 0.979 | 0.863 |
| srmr | 0.024 | 0.035 | 0.060 |
Inspect the mean values of each item across time occasions in order to understand the source of invariance. The mean values look similar across grades 2 and 3, but are different in grade 1.
tmp <- data.frame(mean=colMeans(df, na.rm=T))
tmp$grade <- factor(rep(1:3, each=5))
tmp$item <- rep(1:5, 3)
ggplot(tmp, aes(item, mean, fill=grade))+geom_col()+facet_wrap(grade~ .)ggplot(tmp, aes(item, mean, color=grade, group=grade))+geom_point()+geom_line()Let us test for longitudinal invariance across grades 2 and 3
confmod <- "F2=~ NA*x2_1+a*x2_1+x2_2+x2_3+x2_4+x2_5
F3=~ NA*x3_1+a*x3_1+x3_2+x3_3+x3_4+x3_5
F2 ~ 0*1; F3~1
x2_1+x3_1~i1*1
x2_1~~ x3_1
x2_2~~ x3_2
x2_3~~ x3_3
x2_4~~ x3_4
x2_5~~ x3_5"
metrmod <- "F2=~ NA*x2_1+a*x2_1+b*x2_2+c*x2_3+d*x2_4+e*x2_5
F3=~ NA*x3_1+a*x3_1+b*x3_2+c*x3_3+d*x3_4+e*x3_5
F2 ~ 0*1; F3~1
x2_1+x3_1~i1*1
x2_1~~ x3_1
x2_2~~ x3_2
x2_3~~ x3_3
x2_4~~ x3_4
x2_5~~ x3_5"
scalmod <- "F2=~ NA*x2_1+a*x2_1+b*x2_2+c*x2_3+d*x2_4+e*x2_5
F3=~ NA*x3_1+a*x3_1+b*x3_2+c*x3_3+d*x3_4+e*x3_5
F2 ~ 0*1; F3~1
x2_1+x3_1~i1*1
x2_2+x3_2~i2*1
x2_3+x3_3~i3*1
x2_4+x3_4~i4*1
x2_5+x3_5~i5*1
x2_1~~ x3_1
x2_2~~ x3_2
x2_3~~ x3_3
x2_4~~ x3_4
x2_5~~ x3_5"
fit_conf <- cfa(confmod, data = df, estimator="MLM")
fit_metr <- cfa(metrmod, data = df, estimator="MLM")
fit_scal <- cfa(scalmod, data = df, estimator="MLM")
data.frame(configural=fitstats(fit_conf), metric=fitstats(fit_metr), strong=fitstats(fit_scal)) %>%
round(3) %>% kable()| configural | metric | strong | |
|---|---|---|---|
| df | 28.000 | 32.000 | 36.000 |
| rmsea.robust | 0.039 | 0.037 | 0.048 |
| cfi.robust | 0.980 | 0.980 | 0.962 |
| srmr | 0.025 | 0.029 | 0.034 |
The evidence against strong invariance has been weakened but is still not negligible.
Partial Invariance Model
To specify a partial invariance model we free up the intercepts of items that do not behave similarly across grades. We fix intercepts of item 1 and 4 to be equal across grades. We see that we could equally well have fixed items 2 and 5.
confmod <- get_model()
fit_conf <- cfa(confmod, data = df, estimator="MLM")
metrmod <- get_model(equal.loadings = T)
fit_metr <- cfa(metrmod, data = df, estimator="MLM")
# a model which does not constraint intercepts
scalmod <- get_model(equal.loadings = T, which.intercepts=c(2,5))
fit_scal25 <- cfa(scalmod, data = df, estimator="MLM")
scalmod <- get_model(equal.loadings = T, which.intercepts=c(1,4))
fit_scal14 <- cfa(scalmod, data = df, estimator="MLM")
data.frame(configural=fitstats(fit_conf), metric=fitstats(fit_metr), strong25=fitstats(fit_scal25),strong14=fitstats(fit_scal14)) %>%
round(3) %>% kable()| configural | metric | strong25 | strong14 | |
|---|---|---|---|---|
| df | 72.000 | 80.000 | 82.000 | 82.000 |
| rmsea.robust | 0.024 | 0.029 | 0.028 | 0.028 |
| cfi.robust | 0.987 | 0.979 | 0.980 | 0.980 |
| srmr | 0.024 | 0.035 | 0.035 | 0.035 |
Full longitudinal invariance for subsets of items
Let us restrict our measurement model for SC to items 1, 4 and 5. Then we have strong invariance and excellent fit.
items <- c(1,4,5)
confmod <- get_model(vars=items)
fit_conf <- cfa(confmod, data = df, estimator="MLM")
metrmod <- get_model(vars=items, equal.loadings = T)
fit_metr <- cfa(metrmod, data = df, estimator="MLM")
scalmod <- get_model(vars=items, equal.loadings = T, which.intercepts=1:2)
fit_scal <- cfa(scalmod, data = df, estimator="MLM")
data.frame(configural=fitstats(fit_conf), metric=fitstats(fit_metr), strong=fitstats(fit_scal)) %>%
round(3) %>% kable(caption=paste("Only items", paste(items, collapse=" and ")))| configural | metric | strong | |
|---|---|---|---|
| df | 15.000 | 19.000 | 21.000 |
| rmsea.robust | 0.022 | 0.026 | 0.026 |
| cfi.robust | 0.995 | 0.991 | 0.990 |
| srmr | 0.017 | 0.028 | 0.028 |
We have weak/moderate support for strict invariance across groups in the three-item model.
Growth curve model for MI items compared to all 5 items
All five items
df$group <- factor(dd$group, labels=c("Risk Decod", "Risk Compr"))
df$gender <- dd$s0_gend
items <- 1:5
df$sc3 <- rowSums(select(df, paste0("x3_", items)))
df$sc2 <- rowSums(select(df, paste0("x2_", items)))
df$sc1 <- rowSums(select(df, paste0("x1_", items)))
model0 <- ' i =~ 1*sc1 + 1*sc2 + 1*sc3
s =~ 0*sc1 + 1*sc2 + 2*sc3
i ~ gender + group
s ~ gender+group'
fit_growth <- growth(model0, df, estimator="MLM")
fitstats(fit_growth) df rmsea.robust cfi.robust srmr
3.000 0.051 0.988 0.016
summary(fit_growth)lavaan 0.6.17 ended normally after 74 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 12
Used Total
Number of observations 1097 1205
Model Test User Model:
Standard Scaled
Test Statistic 11.353 11.532
Degrees of freedom 3 3
P-value (Chi-square) 0.010 0.009
Scaling correction factor 0.984
Satorra-Bentler correction
Parameter Estimates:
Standard errors Robust.sem
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|)
i =~
sc1 1.000
sc2 1.000
sc3 1.000
s =~
sc1 0.000
sc2 1.000
sc3 2.000
Regressions:
Estimate Std.Err z-value P(>|z|)
i ~
gender -0.094 0.182 -0.515 0.607
group 1.942 0.181 10.746 0.000
s ~
gender 0.263 0.106 2.481 0.013
group -0.288 0.106 -2.715 0.007
Covariances:
Estimate Std.Err z-value P(>|z|)
.i ~~
.s -0.650 0.322 -2.021 0.043
Intercepts:
Estimate Std.Err z-value P(>|z|)
.i 16.464 0.375 43.921 0.000
.s -0.386 0.219 -1.758 0.079
Variances:
Estimate Std.Err z-value P(>|z|)
.sc1 7.083 0.575 12.316 0.000
.sc2 3.933 0.285 13.811 0.000
.sc3 2.973 0.478 6.222 0.000
.i 3.909 0.595 6.572 0.000
.s 0.738 0.257 2.874 0.004
Only items 1,4,5
items <- c(1,4,5)
df$sc3 <- rowSums(select(df, paste0("x3_", items)))
df$sc2 <- rowSums(select(df, paste0("x2_", items)))
df$sc1 <- rowSums(select(df, paste0("x1_", items)))
model0 <- ' i =~ 1*sc1 + 1*sc2 + 1*sc3
s =~ 0*sc1 + 1*sc2 + 2*sc3
i ~ gender + group
s ~ gender+group'
fit_growth <- growth(model0, df, estimator="MLM")
fitstats(fit_growth) df rmsea.robust cfi.robust srmr
3.000 0.051 0.983 0.017
summary(fit_growth)lavaan 0.6.17 ended normally after 63 iterations
Estimator ML
Optimization method NLMINB
Number of model parameters 12
Used Total
Number of observations 1097 1205
Model Test User Model:
Standard Scaled
Test Statistic 11.545 11.724
Degrees of freedom 3 3
P-value (Chi-square) 0.009 0.008
Scaling correction factor 0.985
Satorra-Bentler correction
Parameter Estimates:
Standard errors Robust.sem
Information Expected
Information saturated (h1) model Structured
Latent Variables:
Estimate Std.Err z-value P(>|z|)
i =~
sc1 1.000
sc2 1.000
sc3 1.000
s =~
sc1 0.000
sc2 1.000
sc3 2.000
Regressions:
Estimate Std.Err z-value P(>|z|)
i ~
gender 0.148 0.118 1.257 0.209
group 0.982 0.117 8.397 0.000
s ~
gender 0.136 0.072 1.898 0.058
group -0.122 0.072 -1.705 0.088
Covariances:
Estimate Std.Err z-value P(>|z|)
.i ~~
.s -0.353 0.137 -2.582 0.010
Intercepts:
Estimate Std.Err z-value P(>|z|)
.i 10.322 0.236 43.766 0.000
.s -0.210 0.145 -1.452 0.146
Variances:
Estimate Std.Err z-value P(>|z|)
.sc1 2.952 0.252 11.703 0.000
.sc2 1.929 0.134 14.344 0.000
.sc3 1.398 0.196 7.149 0.000
.i 1.539 0.248 6.210 0.000
.s 0.368 0.110 3.344 0.001