Measurement Invariance of Self Concept Scale

library(tidyverse)
library(magrittr)
library(kableExtra)
library(haven)
library(visdat)
library(ggplot2)
library(tidyverse)
library(lavaan)
library(psych)

fitstats <- \(x) fitmeasures(x, c("df","rmsea.robust", "cfi.robust", "srmr"))

### function that generates syntax

get_model <- function(vars=1:5, equal.loadings=F,
                      which.intercepts=1){
  
  basemodel <- paste0("F1=~ NA*x1_",vars[1],"+a*x1_",vars[1],"+", paste0("x1_", vars[-1], collapse="+"), "\n",
                      "F2=~ NA*x2_",vars[1],"+a*x2_",vars[1],"+", paste0("x2_", vars[-1], collapse="+"), "\n",
                      "F3=~ NA*x3_",vars[1],"+a*x3_",vars[1],"+", paste0("x3_", vars[-1], collapse="+"), "\n") %>% 
    paste0("\nF1 ~ 0*1; F2 ~ 1; F3~1; F1~~1*F1")

  
  metricmodel <- paste0("F1=~ NA*x1_",vars[1],"+", paste0(letters[vars],"*x1_", vars , collapse="+"), "\n",
                        "F2=~NA*x2_",vars[1],"+", paste0(letters[vars],"*x2_", vars , collapse="+"), "\n",
                        "F3=~NA*x3_",vars[1],"+", paste0(letters[vars],"*x3_", vars , collapse="+"), "\n") %>% 
                        paste0("\nF1 ~ 0*1; F2 ~ 1; F3~1; F1~~1*F1")
  
  errs <-sapply(vars,\(v){
    paste0("\nx1_",v,"~~ x2_",v, "+x3_",v,"; x2_",v,"~~ x3_",v)
  }) %>% paste0(collapse="")
  
  ints <- sapply(which.intercepts,\(v){
    paste0("x1_", vars[v],"+x2_", vars[v], "+x3_",vars[v],"~i", vars[v], "*1\n")
  }) %>% paste0(collapse="")

  if(equal.loadings)
    return(paste(metricmodel,"\n", ints, "\n",errs))
  else 
    return(paste(basemodel,"\n", ints, "\n",errs))
}


#load data
mydata <- readRDS("finaldata_cutoff25_40.rds")

#sc items
sc2items <- strsplit("s2_scr8 + s2_scr9 + s2_scr12 + s2_scr13 + s2_scr15", split=" \\+ ", "fixed=T") %>% unlist
sc3items <- strsplit("s3_scr8 + s3_scr9 + s3_scr12 + s3_scr13 + s3_scr15", split=" \\+ ", "fixed=T") %>% unlist
sc4items <- strsplit("s4_scr8 + s4_scr9 + s4_scr12 + s4_scr13 + s4_scr15", split=" \\+ ", "fixed=T") %>% unlist

dd <- mydata # or alldata?

df <- cbind(select(dd, all_of(sc2items)),
            select(dd, all_of(sc3items)),
            select(dd, all_of(sc4items)))
df[df==-990] <- NA
colnames(df) <- c(paste0("x1_", 1:5),paste0("x2_", 1:5),paste0("x3_", 1:5))

Testing for measurement invariance across groups

Criterion: For sample sizes with adequate power, equal group sizes, and mixed invariance (i.e., some loadings are higher and some lower in the first group), Chen (2007) also suggested a criterion of a -.01 change in CFI, paired with changes in RMSEA of .015 and SRMR of .030 (for metric invariance) or .015 (for scalar or residual invariance)

df$group <- dd$group


# full model all grades 
mod <- "F1=~ x1_1+x1_4+x1_5+x1_2+x1_3
F2=~ x2_1+x2_4+x2_5+x3_2+x3_3
F3=~ x3_1+x3_4+x3_5+x3_2+x3_3"

# configural invariance
fit1 <- cfa(mod, data=df, estimator="MLM", group="group")

# weak invariance
fit2 <- cfa(mod, data=df, estimator="MLM", group="group", group.equal="loadings")

# strong invariance
fit3 <- cfa(mod, data=df, estimator="MLM", group="group", group.equal=c("intercepts", "loadings"))

lavTestLRT(fit1, fit2, fit3)


Scaled Chi-Squared Difference Test (method = "satorra.bentler.2001")

lavaan NOTE:
    The "Chisq" column contains standard test statistics, not the
    robust test that should be reported per model. A robust difference
    test is a function of two standard (not robust) statistics.
 
      Df   AIC   BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
fit1 120 34162 34602 239.53                                  
fit2 132 34148 34528 250.02      9.208      12      0.685    
fit3 142 34177 34508 299.15     53.314      10  6.511e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

data.frame(conf=fitstats(fit1), weak=fitstats(fit2), strong=fitstats(fit3)) %>% 
  round(3) %>% kable(caption=paste("MI across groups"))

MI across groups
	conf	weak	strong
df	120.000	132.000	142.000
rmsea.robust	0.039	0.037	0.042
cfi.robust	0.959	0.960	0.944
srmr	0.035	0.037	0.044

df$group <- NULL

We have moderate support for group invariance

Testing for longitudinal invariance

Metric/weak invariance holds, but strong invariance is not present:

## STEP 1 CONFIGURAL
confmod <- get_model()
fit_conf <- cfa(confmod, data = df, estimator="MLM")

## STEP 2 METRIC
metrmod <- get_model(equal.loadings = T)
fit_metr <- cfa(metrmod, data = df, estimator="MLM")



## STEP 3 SCALAR
scalmod <- get_model(equal.loadings=T, which.intercepts = 1:5)
fit_scal <- cfa(scalmod, data = df, estimator="MLM")

data.frame(configural=fitstats(fit_conf), metric=fitstats(fit_metr), strong=fitstats(fit_scal)) %>% 
  round(3) %>% kable()

	configural	metric	strong
df	72.000	80.000	88.000
rmsea.robust	0.024	0.029	0.071
cfi.robust	0.987	0.979	0.863
srmr	0.024	0.035	0.060

Inspect the mean values of each item across time occasions in order to understand the source of invariance. The mean values look similar across grades 2 and 3, but are different in grade 1.

tmp <- data.frame(mean=colMeans(df, na.rm=T))
tmp$grade <- factor(rep(1:3, each=5))
tmp$item <- rep(1:5, 3)
ggplot(tmp, aes(item, mean, fill=grade))+geom_col()+facet_wrap(grade~ .)

ggplot(tmp, aes(item, mean, color=grade, group=grade))+geom_point()+geom_line()

Let us test for longitudinal invariance across grades 2 and 3

confmod <- "F2=~ NA*x2_1+a*x2_1+x2_2+x2_3+x2_4+x2_5
            F3=~ NA*x3_1+a*x3_1+x3_2+x3_3+x3_4+x3_5
            F2 ~ 0*1;  F3~1 
            x2_1+x3_1~i1*1
            x2_1~~ x3_1
            x2_2~~ x3_2
            x2_3~~ x3_3
            x2_4~~ x3_4
            x2_5~~ x3_5"
metrmod <- "F2=~ NA*x2_1+a*x2_1+b*x2_2+c*x2_3+d*x2_4+e*x2_5
            F3=~ NA*x3_1+a*x3_1+b*x3_2+c*x3_3+d*x3_4+e*x3_5
            F2 ~ 0*1;  F3~1 
            x2_1+x3_1~i1*1
            x2_1~~ x3_1
            x2_2~~ x3_2
            x2_3~~ x3_3
            x2_4~~ x3_4
            x2_5~~ x3_5"
scalmod <- "F2=~ NA*x2_1+a*x2_1+b*x2_2+c*x2_3+d*x2_4+e*x2_5
            F3=~ NA*x3_1+a*x3_1+b*x3_2+c*x3_3+d*x3_4+e*x3_5
            F2 ~ 0*1;  F3~1 
            x2_1+x3_1~i1*1
            x2_2+x3_2~i2*1
            x2_3+x3_3~i3*1
            x2_4+x3_4~i4*1
            x2_5+x3_5~i5*1
            x2_1~~ x3_1
            x2_2~~ x3_2
            x2_3~~ x3_3
            x2_4~~ x3_4
            x2_5~~ x3_5"
fit_conf <- cfa(confmod, data = df, estimator="MLM")
fit_metr <- cfa(metrmod, data = df, estimator="MLM")
fit_scal <- cfa(scalmod, data = df, estimator="MLM")

data.frame(configural=fitstats(fit_conf), metric=fitstats(fit_metr), strong=fitstats(fit_scal)) %>% 
  round(3) %>% kable()

	configural	metric	strong
df	28.000	32.000	36.000
rmsea.robust	0.039	0.037	0.048
cfi.robust	0.980	0.980	0.962
srmr	0.025	0.029	0.034

The evidence against strong invariance has been weakened but is still not negligible.

Partial Invariance Model

To specify a partial invariance model we free up the intercepts of items that do not behave similarly across grades. We fix intercepts of item 1 and 4 to be equal across grades. We see that we could equally well have fixed items 2 and 5.

confmod <- get_model()
fit_conf <- cfa(confmod, data = df, estimator="MLM")

metrmod <- get_model(equal.loadings = T)
fit_metr <- cfa(metrmod, data = df, estimator="MLM")

# a model which does not constraint intercepts 
scalmod <- get_model(equal.loadings = T, which.intercepts=c(2,5))
fit_scal25 <- cfa(scalmod, data = df, estimator="MLM")
scalmod <- get_model(equal.loadings = T, which.intercepts=c(1,4))
fit_scal14 <- cfa(scalmod, data = df, estimator="MLM")

data.frame(configural=fitstats(fit_conf), metric=fitstats(fit_metr), strong25=fitstats(fit_scal25),strong14=fitstats(fit_scal14)) %>% 
  round(3) %>% kable()

	configural	metric	strong25	strong14
df	72.000	80.000	82.000	82.000
rmsea.robust	0.024	0.029	0.028	0.028
cfi.robust	0.987	0.979	0.980	0.980
srmr	0.024	0.035	0.035	0.035

Full longitudinal invariance for subsets of items

Let us restrict our measurement model for SC to items 1, 4 and 5. Then we have strong invariance and excellent fit.

items <- c(1,4,5)
confmod <- get_model(vars=items)
fit_conf <- cfa(confmod, data = df, estimator="MLM")

metrmod <- get_model(vars=items, equal.loadings = T)
fit_metr <- cfa(metrmod, data = df, estimator="MLM")

scalmod <- get_model(vars=items, equal.loadings = T, which.intercepts=1:2)
fit_scal <- cfa(scalmod, data = df, estimator="MLM")

data.frame(configural=fitstats(fit_conf), metric=fitstats(fit_metr), strong=fitstats(fit_scal)) %>% 
  round(3) %>% kable(caption=paste("Only items", paste(items, collapse=" and ")))

Only items 1 and 4 and 5
	configural	metric	strong
df	15.000	19.000	21.000
rmsea.robust	0.022	0.026	0.026
cfi.robust	0.995	0.991	0.990
srmr	0.017	0.028	0.028

We have weak/moderate support for strict invariance across groups in the three-item model.

Growth curve model for MI items compared to all 5 items

All five items

df$group <-  factor(dd$group, labels=c("Risk Decod", "Risk Compr"))
df$gender <- dd$s0_gend

items <- 1:5
df$sc3 <- rowSums(select(df, paste0("x3_", items)))
df$sc2 <- rowSums(select(df, paste0("x2_", items)))
df$sc1 <- rowSums(select(df, paste0("x1_", items)))

model0 <-  ' i =~ 1*sc1 + 1*sc2 + 1*sc3
           s =~ 0*sc1 + 1*sc2 + 2*sc3
i ~ gender + group
s ~ gender+group'

fit_growth <- growth(model0, df, estimator="MLM")
fitstats(fit_growth)

          df rmsea.robust   cfi.robust         srmr 
       3.000        0.051        0.988        0.016

summary(fit_growth)

lavaan 0.6.17 ended normally after 74 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        12

                                                  Used       Total
  Number of observations                          1097        1205

Model Test User Model:
                                              Standard      Scaled
  Test Statistic                                11.353      11.532
  Degrees of freedom                                 3           3
  P-value (Chi-square)                           0.010       0.009
  Scaling correction factor                                  0.984
    Satorra-Bentler correction                                    

Parameter Estimates:

  Standard errors                           Robust.sem
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)
  i =~                                                
    sc1               1.000                           
    sc2               1.000                           
    sc3               1.000                           
  s =~                                                
    sc1               0.000                           
    sc2               1.000                           
    sc3               2.000                           

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)
  i ~                                                 
    gender           -0.094    0.182   -0.515    0.607
    group             1.942    0.181   10.746    0.000
  s ~                                                 
    gender            0.263    0.106    2.481    0.013
    group            -0.288    0.106   -2.715    0.007

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)
 .i ~~                                                
   .s                -0.650    0.322   -2.021    0.043

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)
   .i                16.464    0.375   43.921    0.000
   .s                -0.386    0.219   -1.758    0.079

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)
   .sc1               7.083    0.575   12.316    0.000
   .sc2               3.933    0.285   13.811    0.000
   .sc3               2.973    0.478    6.222    0.000
   .i                 3.909    0.595    6.572    0.000
   .s                 0.738    0.257    2.874    0.004

Only items 1,4,5

items <- c(1,4,5)
df$sc3 <- rowSums(select(df, paste0("x3_", items)))
df$sc2 <- rowSums(select(df, paste0("x2_", items)))
df$sc1 <- rowSums(select(df, paste0("x1_", items)))

model0 <-  ' i =~ 1*sc1 + 1*sc2 + 1*sc3
           s =~ 0*sc1 + 1*sc2 + 2*sc3
i ~ gender + group
s ~ gender+group'

fit_growth <- growth(model0, df, estimator="MLM")
fitstats(fit_growth)

          df rmsea.robust   cfi.robust         srmr 
       3.000        0.051        0.983        0.017

summary(fit_growth)

lavaan 0.6.17 ended normally after 63 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        12

                                                  Used       Total
  Number of observations                          1097        1205

Model Test User Model:
                                              Standard      Scaled
  Test Statistic                                11.545      11.724
  Degrees of freedom                                 3           3
  P-value (Chi-square)                           0.009       0.008
  Scaling correction factor                                  0.985
    Satorra-Bentler correction                                    

Parameter Estimates:

  Standard errors                           Robust.sem
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)
  i =~                                                
    sc1               1.000                           
    sc2               1.000                           
    sc3               1.000                           
  s =~                                                
    sc1               0.000                           
    sc2               1.000                           
    sc3               2.000                           

Regressions:
                   Estimate  Std.Err  z-value  P(>|z|)
  i ~                                                 
    gender            0.148    0.118    1.257    0.209
    group             0.982    0.117    8.397    0.000
  s ~                                                 
    gender            0.136    0.072    1.898    0.058
    group            -0.122    0.072   -1.705    0.088

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)
 .i ~~                                                
   .s                -0.353    0.137   -2.582    0.010

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)
   .i                10.322    0.236   43.766    0.000
   .s                -0.210    0.145   -1.452    0.146

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)
   .sc1               2.952    0.252   11.703    0.000
   .sc2               1.929    0.134   14.344    0.000
   .sc3               1.398    0.196    7.149    0.000
   .i                 1.539    0.248    6.210    0.000
   .s                 0.368    0.110    3.344    0.001