# Packages
library(pacman); p_load(lavaan)
# Fit Metrics
FITM <- c("chisq", "df", "nPar", "cfi", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "srmr", "aic", "bic")
# A function to compute p-values for different steps of model-fitting
stepwise_pvalues <- function(...) {
models <- list(...)
model_names <- as.character(substitute(list(...)))[-1L]
if (length(models) < 2) {
stop("At least two models are required for comparison.")}
chisq_values <- sapply(models, function(model) lavInspect(model, "fit")["chisq"])
df_values <- sapply(models, function(model) lavInspect(model, "fit")["df"])
comparisons <- character()
chisq_diffs <- numeric()
df_diffs <- numeric()
for (i in 2:length(models)) {
current_model_index <- i
previous_model_index <- i - 1
comparisons <- c(comparisons, paste0(model_names[previous_model_index], " vs ", model_names[current_model_index]))
chisq_diffs <- c(chisq_diffs, chisq_values[current_model_index] - chisq_values[previous_model_index])
df_diffs <- c(df_diffs, df_values[current_model_index] - df_values[previous_model_index])}
p_values <- pchisq(chisq_diffs, df_diffs, lower.tail = FALSE)
p_values_rounded <- ifelse(
p_values < 0.001,
formatC(p_values, format = "e", digits = 1),
round(p_values, 3))
result <- data.frame(Comparison = comparisons,
Chisq_Diff = chisq_diffs,
DF_Diff = df_diffs,
P_Value = p_values_rounded,
row.names = NULL)
return(result)}
A reviewer has requested evidence that factor models that are equivalent to, but are not exactly, the “correct” factor model will be able to recover bias that is present with respect to the correct factor model. A close reading of Meredith (1993) makes it clear that the “correct” model is not required; all that’s needed to recover the correct biased parameters is sufficient statistical power and models that are not extraordinarily erroneous fits.
The reviewer has stated their awareness that the “correct” model is not required for item-level data, so I will not be performing any simulations with respect to item-level data, only to the more aggregated types of factor models that they are suggesting have this requirement.
We’ll start by showing that equivalent models achieve good fit in the absence of bias.
modelSyntax <- '
factor =~ 0.7*P1 + 0.7*P2 + 0.7*P3 + 0.7*P4 + 0.7*P5 + 0.7*P6 + 0.7*P7 + 0.7*P8 + 0.7*P9 + 0.7*P10 + 0.7*P11 + 0.7*P12 + 0.7*P13 + 0.7*P14 + 0.7*P15
factor ~~ 1*factor
P1 ~~ 0.51*P1
P2 ~~ 0.51*P2
P3 ~~ 0.51*P3
P4 ~~ 0.51*P4
P5 ~~ 0.51*P5
P6 ~~ 0.51*P6
P7 ~~ 0.51*P7
P8 ~~ 0.51*P8
P9 ~~ 0.51*P9
P10 ~~ 0.51*P10
P11 ~~ 0.51*P11
P12 ~~ 0.51*P12
P13 ~~ 0.51*P13
P14 ~~ 0.51*P14
P15 ~~ 0.51*P15'
set.seed(1)
dataGroup1 <- simulateData(modelSyntax, sample.nobs = 10000)
dataGroup2 <- simulateData(modelSyntax, sample.nobs = 10000)
dataGroup1$group <- 'Control'
dataGroup2$group <- 'Experimental'
combinedData <- rbind(dataGroup1, dataGroup2)
combinedData
Fitting the correct model, fit is good and the groups’ models are found to be equivalent.
UBC <- '
F =~ P1 + P2 + P3 + P4 + P5 + P6 + P7 + P8 + P9 + P10 + P11 + P12 + P13 + P14 + P15'
Configural <- cfa(UBC, combinedData, std.lv = T, group = "group")
Metric <- cfa(UBC, combinedData, std.lv = F, group = "group",
group.equal = "loadings")
Scalar <- cfa(UBC, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts"))
Strict <- cfa(UBC, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals"))
LatentVariances <- cfa(UBC, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"))
LatentMeans <- cfa(UBC, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "means"))
round(cbind("Configural" = fitMeasures(Configural, FITM),
"Metric" = fitMeasures(Metric, FITM),
"Scalar" = fitMeasures(Scalar, FITM),
"Strict" = fitMeasures(Strict, FITM),
"Latent Variances" = fitMeasures(LatentVariances, FITM),
"Means" = fitMeasures(LatentMeans, FITM)), 3)
## Configural Metric Scalar Strict Latent Variances
## chisq 198.591 204.797 219.755 235.529 235.534
## df 180.000 194.000 208.000 223.000 224.000
## npar 90.000 76.000 62.000 47.000 46.000
## cfi 1.000 1.000 1.000 1.000 1.000
## rmsea 0.003 0.002 0.002 0.002 0.002
## rmsea.ci.lower 0.000 0.000 0.000 0.000 0.000
## rmsea.ci.upper 0.006 0.005 0.005 0.005 0.005
## srmr 0.004 0.005 0.006 0.006 0.006
## aic 704571.857 704550.063 704537.021 704522.796 704520.800
## bic 705283.171 705150.728 705027.037 704894.260 704884.360
## Means
## chisq 235.544
## df 225.000
## npar 45.000
## cfi 1.000
## rmsea 0.002
## rmsea.ci.lower 0.000
## rmsea.ci.upper 0.005
## srmr 0.006
## aic 704518.810
## bic 704874.467
Using the most sensitive index of model fit, the \(\chi^2\) exact fit test, the equivalence of the models is readily apparent. There is one deviation, but at this sample size, it is not meaningful.
stepwise_pvalues(Configural, Metric, Scalar, Strict, LatentVariances, LatentMeans)
Using an incorrect but statistically equivalent model on this data leads to similar conclusions.
UBIC <- '
F1 =~ P1 + P2 + P3
F2 =~ P4 + P5 + P6
F3 =~ P7 + P8 + P9
F4 =~ P10 + P11 + P12
F5 =~ P13 + P14 + P15'
Configural <- cfa(UBIC, combinedData, std.lv = T, group = "group")
Metric <- cfa(UBIC, combinedData, std.lv = F, group = "group",
group.equal = "loadings")
Scalar <- cfa(UBIC, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts"))
Strict <- cfa(UBIC, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals"))
LatentVariances <- cfa(UBIC, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"))
LatentCovariances <- cfa(UBIC, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"))
LatentMeans <- cfa(UBIC, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances", "means"))
round(cbind("Configural" = fitMeasures(Configural, FITM),
"Metric" = fitMeasures(Metric, FITM),
"Scalar" = fitMeasures(Scalar, FITM),
"Strict" = fitMeasures(Strict, FITM),
"Latent Variances" = fitMeasures(LatentVariances, FITM),
"Latent Covariances" = fitMeasures(LatentCovariances, FITM),
"Means" = fitMeasures(LatentMeans, FITM)), 3)
## Configural Metric Scalar Strict Latent Variances
## chisq 173.050 178.087 188.686 210.533 212.224
## df 160.000 170.000 180.000 195.000 200.000
## npar 110.000 100.000 90.000 75.000 70.000
## cfi 1.000 1.000 1.000 1.000 1.000
## rmsea 0.003 0.002 0.002 0.003 0.002
## rmsea.ci.lower 0.000 0.000 0.000 0.000 0.000
## rmsea.ci.upper 0.006 0.005 0.005 0.005 0.005
## srmr 0.004 0.005 0.005 0.005 0.006
## aic 704586.316 704571.353 704561.952 704553.799 704545.490
## bic 705455.700 705361.702 705273.266 705146.560 705098.735
## Latent Covariances Means
## chisq 219.017 223.407
## df 210.000 215.000
## npar 60.000 55.000
## cfi 1.000 1.000
## rmsea 0.002 0.002
## rmsea.ci.lower 0.000 0.000
## rmsea.ci.upper 0.005 0.005
## srmr 0.006 0.006
## aic 704532.283 704526.673
## bic 705006.493 704961.365
stepwise_pvalues(Configural, Metric, Scalar, Strict, LatentVariances, LatentCovariances, LatentMeans)
Finally, if we drop parameters and force some of the group factors to become ill-defined, in a model without bias, that’s no issue.
UBICD <- '
F1 =~ P1 + P2
F2 =~ P4 + P6
F3 =~ P7 + P8 + P9
F4 =~ P10 + P11
F5 =~ P13 + P14 + P15'
Configural <- cfa(UBICD, combinedData, std.lv = T, group = "group")
Metric <- cfa(UBICD, combinedData, std.lv = F, group = "group",
group.equal = "loadings")
Scalar <- cfa(UBICD, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts"))
Strict <- cfa(UBICD, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals"))
LatentVariances <- cfa(UBICD, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"))
LatentCovariances <- cfa(UBICD, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"))
LatentMeans <- cfa(UBICD, combinedData, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances", "means"))
round(cbind("Configural" = fitMeasures(Configural, FITM),
"Metric" = fitMeasures(Metric, FITM),
"Scalar" = fitMeasures(Scalar, FITM),
"Strict" = fitMeasures(Strict, FITM),
"Latent Variances" = fitMeasures(LatentVariances, FITM),
"Latent Covariances" = fitMeasures(LatentCovariances, FITM),
"Means" = fitMeasures(LatentMeans, FITM)), 3)
## Configural Metric Scalar Strict Latent Variances
## chisq 97.095 100.435 109.158 124.212 125.283
## df 88.000 95.000 102.000 114.000 119.000
## npar 92.000 85.000 78.000 66.000 61.000
## cfi 1.000 1.000 1.000 1.000 1.000
## rmsea 0.003 0.002 0.003 0.003 0.002
## rmsea.ci.lower 0.000 0.000 0.000 0.000 0.000
## rmsea.ci.upper 0.007 0.006 0.006 0.006 0.006
## srmr 0.004 0.005 0.005 0.005 0.006
## aic 570881.795 570871.135 570865.858 570856.912 570847.983
## bic 571608.916 571542.932 571482.330 571378.542 571330.095
## Latent Covariances Means
## chisq 137.086 139.963
## df 129.000 134.000
## npar 51.000 46.000
## cfi 1.000 1.000
## rmsea 0.003 0.002
## rmsea.ci.lower 0.000 0.000
## rmsea.ci.upper 0.006 0.005
## srmr 0.006 0.006
## aic 570839.786 570832.663
## bic 571242.864 571196.224
stepwise_pvalues(Configural, Metric, Scalar, Strict, LatentVariances, LatentCovariances, LatentMeans)
Now, we will reuse the same code from above, but we’ll make some of the loadings slightly biased and we’ll moderately bias two of the intercepts. I’m identifying the model based on a loading for simplicity’s sake, so I’ll make the biased loadings the non-identifying ones, so that constraint doesn’t hide bias.
modelGroup1 <- '
factor =~ 0.7*P1 + 0.7*P2 + 0.7*P3 + 0.7*P4 + 0.7*P5 +
0.7*P6 + 0.7*P7 + 0.7*P8 + 0.7*P9 + 0.7*P10 +
0.7*P11 + 0.7*P12 + 0.7*P13 + 0.7*P14 + 0.7*P15
P1 ~ 1*0
P2 ~ 1*0
P3 ~ 1*0
P4 ~ 1*0
P5 ~ 1*0
P6 ~ 1*0
P7 ~ 1*0
P8 ~ 1*0
P9 ~ 1*0
P10 ~ 1*0
P11 ~ 1*0
P12 ~ 1*0
P13 ~ 1*0
P14 ~ 1*0
P15 ~ 1*0'
modelGroup2 <- '
factor =~ 0.7*P1 + 0.9*P2 + 0.8*P3 + 0.7*P4 + 0.6*P5 +
0.7*P6 + 0.7*P7 + 0.7*P8 + 0.7*P9 + 0.7*P10 +
0.7*P11 + 0.7*P12 + 0.7*P13 + 0.7*P14 + 0.7*P15
P1 ~ 1*0
P2 ~ 1*0
P3 ~ 1*0
P4 ~ 0.75*0 # Biased intercept
P5 ~ 0.75*0 # Biased intercept
P6 ~ 1*0
P7 ~ 1*0
P8 ~ 1*0
P9 ~ 1*0
P10 ~ 1*0
P11 ~ 1*0
P12 ~ 1*0
P13 ~ 1*0
P14 ~ 1*0
P15 ~ 1*0'
set.seed(1)
dataGroup1 <- simulateData(modelGroup1, sample.nobs = 10000)
dataGroup2 <- simulateData(modelGroup2, sample.nobs = 10000)
dataGroup1$group <- 'Control'
dataGroup2$group <- 'Experimental'
combinedDataBiased <- rbind(dataGroup1, dataGroup2)
With the correct model, bias is observed and can be readily accounted for.
BC <- '
F =~ P1 + P2 + P3 + P4 + P5 + P6 + P7 + P8 + P9 + P10 + P11 + P12 + P13 + P14 + P15'
Configural <- cfa(BC, combinedDataBiased, std.lv = T, group = "group")
Metric <- cfa(BC, combinedDataBiased, std.lv = F, group = "group",
group.equal = "loadings")
PartialMetric <- cfa(BC, combinedDataBiased, std.lv = F, group = "group",
group.equal = "loadings",
group.partial = c("F =~ P2", "F =~ P3", "F =~ P5"))
Scalar <- cfa(BC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts"),
group.partial = c("F =~ P2", "F =~ P3", "F =~ P5"))
PartialScalar <- cfa(BC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts"),
group.partial = c("F =~ P2", "F =~ P3", "F =~ P5",
"P4 ~ 1", "P5 ~ 1"))
Strict <- cfa(BC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals"),
group.partial = c("F =~ P2", "F =~ P3", "F =~ P5",
"P4 ~ 1", "P5 ~ 1"))
LatentVariances <- cfa(BC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"),
group.partial = c("F =~ P2", "F =~ P3", "F =~ P5",
"P4 ~ 1", "P5 ~ 1"))
LatentMeans <- cfa(BC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "means"),
group.partial = c("F =~ P2", "F =~ P3", "F =~ P5",
"P4 ~ 1", "P5 ~ 1"))
round(cbind("Configural" = fitMeasures(Configural, FITM),
"Metric" = fitMeasures(Metric, FITM),
"PartialMetric" = fitMeasures(PartialMetric, FITM),
"Scalar" = fitMeasures(Scalar, FITM),
"PartialScalar" = fitMeasures(PartialScalar, FITM),
"Strict" = fitMeasures(Strict, FITM),
"Latent Variances" = fitMeasures(LatentVariances, FITM),
"Means" = fitMeasures(LatentMeans, FITM)), 3)
## Configural Metric PartialMetric Scalar PartialScalar
## chisq 190.219 450.925 203.605 727.067 219.721
## df 180.000 194.000 191.000 205.000 203.000
## npar 90.000 76.000 79.000 65.000 67.000
## cfi 1.000 0.997 1.000 0.994 1.000
## rmsea 0.002 0.012 0.003 0.016 0.003
## rmsea.ci.lower 0.000 0.010 0.000 0.015 0.000
## rmsea.ci.upper 0.005 0.013 0.005 0.017 0.005
## srmr 0.006 0.020 0.007 0.014 0.007
## aic 894708.572 894941.278 894699.958 895195.421 894692.074
## bic 895419.886 895541.943 895324.334 895709.148 895221.608
## Strict Latent Variances Means
## chisq 249.320 249.332 250.317
## df 218.000 219.000 220.000
## npar 52.000 51.000 50.000
## cfi 1.000 1.000 1.000
## rmsea 0.004 0.004 0.004
## rmsea.ci.lower 0.000 0.000 0.000
## rmsea.ci.upper 0.006 0.006 0.006
## srmr 0.008 0.008 0.008
## aic 894691.674 894689.686 894688.671
## bic 895102.655 895092.764 895083.845
stepwise_pvalues(Configural, #Metric,
PartialMetric, #Scalar,
PartialScalar, Strict, LatentVariances, LatentMeans)
With models like these where there’s a naturally more unconstrained structural model, bias in the measurement model can transfer upwards, making it doubly important to constrain those parameters if you choose to use a model like this.
BIC <- '
F1 =~ P1 + P2 + P3
F2 =~ P4 + P5 + P6
F3 =~ P7 + P8 + P9
F4 =~ P10 + P11 + P12
F5 =~ P13 + P14 + P15'
Configural <- cfa(BIC, combinedDataBiased, std.lv = T, group = "group")
Metric <- cfa(BIC, combinedDataBiased, std.lv = F, group = "group",
group.equal = "loadings")
PartialMetric <- cfa(BIC, combinedDataBiased, std.lv = F, group = "group",
group.equal = "loadings",
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5"))
Scalar <- cfa(BIC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5"))
PartialScalar <- cfa(BIC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
Strict <- cfa(BIC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
LatentVariances <- cfa(BIC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
LatentCovariances <- cfa(BIC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
LatentMeans <- cfa(BIC, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances", "means"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
round(cbind("Configural" = fitMeasures(Configural, FITM),
"Metric" = fitMeasures(Metric, FITM),
"Partial Metric" = fitMeasures(PartialMetric, FITM),
"Scalar" = fitMeasures(Scalar, FITM),
"Partial Scalar" = fitMeasures(PartialScalar, FITM),
"Strict" = fitMeasures(Strict, FITM),
"Latent Variances" = fitMeasures(LatentVariances, FITM),
"Latent Covariances" = fitMeasures(LatentCovariances, FITM),
"Means" = fitMeasures(LatentMeans, FITM)), 3)
## Configural Metric Partial Metric Scalar Partial Scalar
## chisq 171.058 289.198 183.676 350.431 188.740
## df 160.000 170.000 167.000 177.000 175.000
## npar 110.000 100.000 103.000 93.000 95.000
## cfi 1.000 0.999 1.000 0.998 1.000
## rmsea 0.003 0.008 0.003 0.010 0.003
## rmsea.ci.lower 0.000 0.007 0.000 0.008 0.000
## rmsea.ci.upper 0.005 0.010 0.006 0.011 0.005
## srmr 0.005 0.014 0.007 0.010 0.007
## aic 894729.411 894827.551 894728.030 894874.785 894717.093
## bic 895598.795 895617.900 895542.089 895609.809 895467.925
## Strict Latent Variances Latent Covariances Means
## chisq 211.981 213.905 224.246 236.332
## df 190.000 195.000 205.000 210.000
## npar 80.000 75.000 65.000 60.000
## cfi 1.000 1.000 1.000 1.000
## rmsea 0.003 0.003 0.003 0.004
## rmsea.ci.lower 0.000 0.000 0.000 0.000
## rmsea.ci.upper 0.006 0.005 0.005 0.006
## srmr 0.007 0.008 0.008 0.008
## aic 894710.335 894702.259 894692.600 894694.686
## bic 895342.614 895295.021 895206.326 895168.895
stepwise_pvalues(Configural, #Metric,
PartialMetric, #Scalar,
PartialScalar, Strict, LatentVariances, LatentCovariances, LatentMeans)
Now, we’ll get into risky territory because of the use of factors with only two indicators.
BICD <- '
F1 =~ P1 + P2
F2 =~ P4 + P6
F3 =~ P7 + P8 + P9
F4 =~ P10 + P11
F5 =~ P13 + P14 + P15'
Configural <- cfa(BICD, combinedDataBiased, std.lv = T, group = "group")
Metric <- cfa(BICD, combinedDataBiased, std.lv = F, group = "group",
group.equal = "loadings")
PartialMetric <- cfa(BICD, combinedDataBiased, std.lv = F, group = "group",
group.equal = "loadings",
group.partial = c("F1 =~ P2"))
Scalar <- cfa(BICD, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts"),
group.partial = c("F1 =~ P2"))
PartialScalar <- cfa(BICD, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts"),
group.partial = c("F1 =~ P2", "P4 ~ 1"))
Strict <- cfa(BICD, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals"),
group.partial = c("F1 =~ P2", "P4 ~ 1"))
LatentVariances <- cfa(BICD, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"),
group.partial = c("F1 =~ P2", "P4 ~ 1"))
LatentCovariances <- cfa(BICD, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"),
group.partial = c("F1 =~ P2", "P4 ~ 1"))
LatentMeans <- cfa(BICD, combinedDataBiased, std.lv = F, group = "group",
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances", "means"),
group.partial = c("F1 =~ P2", "P4 ~ 1"))
round(cbind("Configural" = fitMeasures(Configural, FITM),
"Metric" = fitMeasures(Metric, FITM),
"PartialMetric" = fitMeasures(PartialMetric, FITM),
"Scalar" = fitMeasures(Scalar, FITM),
"Partial Scalar" = fitMeasures(PartialScalar, FITM),
"Strict" = fitMeasures(Strict, FITM),
"Latent Variances" = fitMeasures(LatentVariances, FITM),
"Latent Covariances" = fitMeasures(LatentCovariances, FITM),
"Means" = fitMeasures(LatentMeans, FITM)), 3)
## Configural Metric PartialMetric Scalar Partial Scalar
## chisq 79.316 162.583 86.976 191.099 91.284
## df 88.000 95.000 94.000 101.000 100.000
## npar 92.000 85.000 86.000 79.000 80.000
## cfi 1.000 0.999 1.000 0.999 1.000
## rmsea 0.000 0.008 0.000 0.009 0.000
## rmsea.ci.lower 0.000 0.006 0.000 0.007 0.000
## rmsea.ci.upper 0.004 0.011 0.004 0.011 0.004
## srmr 0.004 0.013 0.006 0.009 0.006
## aic 720651.273 720720.539 720646.932 720737.056 720639.240
## bic 721378.393 721392.335 721326.632 721361.431 721271.519
## Strict Latent Variances Latent Covariances Means
## chisq 111.946 114.622 132.338 141.753
## df 112.000 117.000 127.000 132.000
## npar 68.000 63.000 53.000 48.000
## cfi 1.000 1.000 1.000 1.000
## rmsea 0.000 0.000 0.002 0.003
## rmsea.ci.lower 0.000 0.000 0.000 0.000
## rmsea.ci.upper 0.005 0.005 0.005 0.006
## srmr 0.007 0.008 0.008 0.008
## aic 720635.903 720628.579 720626.294 720625.709
## bic 721173.340 721126.499 721045.179 721005.077
stepwise_pvalues(Configural, #Metric,
PartialMetric, #Scalar,
PartialScalar, Strict, LatentVariances, LatentCovariances, LatentMeans)
To illustrate how well this works, let’s fit an incredibly poor model that starts off being saturated and is built on the assumption that the latent variables are uncorrelated and, instead, there are residual covariances across factor indicators. In this scenario, bias is still correctly detected and corrected even though it’s maximally overfitted.
BICB <- '
F1 =~ P1 + P2 + P3
F2 =~ P4 + P5 + P6
F3 =~ P7 + P8 + P9
F4 =~ P10 + P11 + P12
F5 =~ P13 + P14 + P15
P1 ~~ P4 + P5 + P6 + P7 + P8 + P9 + P10 + P11 + P12 + P13 + P14 + P15
P2 ~~ P4 + P5 + P6 + P7 + P8 + P9 + P10 + P11 + P12 + P13 + P14 + P15
P3 ~~ P4 + P5 + P6 + P7 + P8 + P9 + P10 + P11 + P12 + P13 + P14 + P15
P4 ~~ P7 + P8 + P9 + P10 + P11 + P12 + P13 + P14 + P15
P5 ~~ P7 + P8 + P9 + P10 + P11 + P12 + P13 + P14 + P15
P6 ~~ P7 + P8 + P9 + P10 + P11 + P12 + P13 + P14 + P15
P7 ~~ P10 + P11 + P12 + P13 + P14 + P15
P8 ~~ P10 + P11 + P12 + P13 + P14 + P15
P9 ~~ P10 + P11 + P12 + P13 + P14 + P15
P10 ~~ P13 + P14 + P15
P11 ~~ P13 + P14 + P15
P12 ~~ P13 + P14 + P15'
Configural <- cfa(BICB, combinedDataBiased, std.lv = T, group = "group", orthogonal = T)
Metric <- cfa(BICB, combinedDataBiased, std.lv = F, group = "group", orthogonal = T,
group.equal = "loadings")
PartialMetric <- cfa(BICB, combinedDataBiased, std.lv = F, group = "group", orthogonal = T,
group.equal = "loadings",
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5"))
Scalar <- cfa(BICB, combinedDataBiased, std.lv = F, group = "group", orthogonal = T,
group.equal = c("loadings", "intercepts"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5"))
PartialScalar <- cfa(BICB, combinedDataBiased, std.lv = F, group = "group", orthogonal = T,
group.equal = c("loadings", "intercepts"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
Strict <- cfa(BICB, combinedDataBiased, std.lv = F, group = "group", orthogonal = T,
group.equal = c("loadings", "intercepts", "residuals"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
LatentVariances <- cfa(BICB, combinedDataBiased, std.lv = F, group = "group", orthogonal = T,
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
LatentCovariances <- cfa(BICB, combinedDataBiased, std.lv = F, group = "group", orthogonal = T,
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
LatentMeans <- cfa(BICB, combinedDataBiased, std.lv = F, group = "group", orthogonal = T,
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "lv.covariances", "means"),
group.partial = c("F1 =~ P2", "F1 =~ P3", "F2 =~ P5", "P4 ~ 1", "P5 ~ 1"))
round(cbind("Configural" = fitMeasures(Configural, FITM),
"Metric" = fitMeasures(Metric, FITM),
"Partial Metric" = fitMeasures(PartialMetric, FITM),
"Scalar" = fitMeasures(Scalar, FITM),
"Partial Scalar" = fitMeasures(PartialScalar, FITM),
"Strict" = fitMeasures(Strict, FITM),
"Latent Variances" = fitMeasures(LatentVariances, FITM),
"Latent Covariances" = fitMeasures(LatentCovariances, FITM),
"Means" = fitMeasures(LatentMeans, FITM)), 3)
## Configural Metric Partial Metric Scalar Partial Scalar
## chisq 0.0 64.215 2.555 166.248 7.818
## df 0.0 10.000 7.000 17.000 15.000
## npar 270.0 260.000 263.000 253.000 255.000
## cfi 1.0 0.999 1.000 0.998 1.000
## rmsea 0.0 0.023 0.000 0.030 0.000
## rmsea.ci.lower 0.0 0.018 0.000 0.026 0.000
## rmsea.ci.upper 0.0 0.029 0.004 0.034 0.003
## srmr 0.0 0.007 0.001 0.007 0.002
## aic 894878.4 894922.568 894866.909 895010.601 894856.172
## bic 897012.3 896977.475 896945.526 897010.184 896871.561
## Strict Latent Variances Latent Covariances Means
## chisq 40.636 42.256 42.256 54.375
## df 30.000 35.000 35.000 40.000
## npar 240.000 235.000 235.000 230.000
## cfi 1.000 1.000 1.000 1.000
## rmsea 0.006 0.005 0.005 0.006
## rmsea.ci.lower 0.000 0.000 0.000 0.000
## rmsea.ci.upper 0.010 0.009 0.009 0.010
## srmr 0.004 0.005 0.005 0.005
## aic 894858.990 894850.610 894850.610 894852.729
## bic 896755.827 896707.930 896707.930 896670.531
stepwise_pvalues(Configural, #Metric,
PartialMetric, #Scalar,
PartialScalar, Strict, LatentVariances, LatentCovariances, LatentMeans)
There is no requirement for the model being used to be “correct,” to detect bias, only for the model to be reasonable. At the same time, many unreasonable models will still clearly showcase evidence for bias. This position can become more tenuous by giving models much worse initial fits, using samples that are too small to detect a given amount of bias, relying on insensitive model fit indices and overly liberal \(\Delta\) fit criteria, constraining biased parameters to identify the model, and so on.
In realistic scenarios, it is unlikely that major bias will go undetected by careful psychometricians conducting multi-group confirmatory factor analyses. To ensure bias is detected, item-level analyses may be preferred for their granularity even though they produce highly similar results despite, in some cases, being agnostic with respect to latent structure (e.g., Mantel-Haenszel, logistic regression, per-item ANOVAs of item by group by subjects with sum scores as an independent variable, etc. Omnibus bias detection procedures are also clearly agnostic with respect to latent structure). There are bias detection problems with certain types of item-level analyses, but they are problems that are often conceptually shared with testing via multi-group confirmatory factor analysis, like issues with the parameterization (e.g., a 1PL can’t inform about item discrimination), estimation (e.g., a nonparametric estimator may provide different results than a parametric one), and for some methods, anchor selection (analogous to model identification choices). Each of these difficulties can be addressed, and bias determinations will ultimately converge between all appropriate models since they are all explanations of the same data.
Meredith, W. (1993). Measurement invariance, factor analysis and factorial invariance. Psychometrika, 58(4), 525–543. https://doi.org/10.1007/BF02294825
Because they’re equivalent to factor models, the answer is necessarily “yes,” so unless the judgment of equivalence is in error, they should. It’s now known that network loadings are generally highly similar to factor loadings, so their equivalence should be apparent. Below, I show that we don’t even need to mimic factor model parameters to show some forms of bias: network-centric metrics can be used to indicate bias too!
p_load(bootnet, NetworkComparisonTest)
CDI <- subset(combinedData, group == "Control"); CDII <- subset(combinedData, group == "Experimental")
CDBI <- subset(combinedDataBiased, group == "Control"); CDBII <- subset(combinedDataBiased, group == "Experimental") #I realize the first group is superfluous
CDI$group <- NULL; CDII$group <- NULL; CDBI$group <- NULL; CDBII$group <- NULL
CDINet <- estimateNetwork(data = CDI, default = "EBICglasso")
CDIINet <- estimateNetwork(data = CDII, default = "EBICglasso")
CDBINet <- estimateNetwork(data = CDBI, default = "EBICglasso")
CDBIINet <- estimateNetwork(data = CDBII, default = "EBICglasso")
set.seed(123)
UnbiasedNCT <- NCT(data1 = CDINet, data2 = CDIINet,
it = 1000, paired = F,
test.edges = T, test.centrality = T,
p.adjust.methods = "fdr")
BiasedNCT <- NCT(data1 = CDBINet, data2 = CDBIINet,
it = 1000, paired = F,
test.edges = T, test.centrality = T,
p.adjust.methods = "fdr")
plot(UnbiasedNCT, what = "centrality")
plot(BiasedNCT, what = "centrality")
I am not currently aware of network-centric metrics (i.e., ones that aren’t just emulating factor model parameters or estimating them from the network model) that can indicate bias in the intercepts, but if anyone knows one, tell me.
Jamison, Golino and Christensen (2023) have made it possible to explicitly test metric invariance with exploratory graph analysis (EGA) through permutation testing. In the future, this might be doable with more than two groups and at higher stages, but for now, it can at least be shown that detecting metric invariance is possible in EGA.
p_load(EGAnet)
CD <- combinedData; CD$group <- NULL
CDB <- combinedDataBiased; CDB$group <- NULL
invariance(CD, groups = combinedData$group,
iter = 1000, seed = 123,
configural.threshold = .7, configural.type = "parametric",
corr = "auto", model = "glasso", algorithm = "louvain", uni.method = "louvain")
## Testing configural invariance...
##
## Configural invariance was found with 15 variables
## Testing metric invariance...
## Membership Difference p p_BH sig Direction
## P1 1 0.000 0.920 0.973
## P2 1 0.001 0.912 0.973
## P3 1 -0.002 0.711 0.973
## P4 1 0.003 0.482 0.973
## P5 1 -0.007 0.150 0.973
## P6 1 -0.001 0.896 0.973
## P7 1 -0.003 0.540 0.973
## P8 1 -0.003 0.584 0.973
## P9 1 0.009 0.069 0.973 .
## P10 1 -0.002 0.755 0.973
## P11 1 -0.003 0.550 0.973
## P12 1 0.001 0.893 0.973
## P13 1 0.000 0.973 0.973
## P14 1 0.004 0.327 0.973
## P15 1 -0.001 0.905 0.973
## ----
## Signif. code: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 'n.s.' 1
invariance(CDB, groups = combinedDataBiased$group,
iter = 1000, seed = 123,
configural.threshold = .7, configural.type = "parametric",
corr = "auto", model = "glasso", algorithm = "louvain", uni.method = "louvain")
## Testing configural invariance...
##
## Configural invariance was found with 15 variables
## Testing metric invariance...
## Membership Difference p p_BH sig Direction
## P1 1 0.006 0.286 0.477
## P2 1 -0.073 0.001 0.005 *** Control < Experimental
## P3 1 -0.026 0.001 0.005 *** Control < Experimental
## P4 1 0.011 0.072 0.180 .
## P5 1 0.043 0.001 0.005 *** Control > Experimental
## P6 1 0.004 0.579 0.724
## P7 1 0.002 0.735 0.848
## P8 1 0.006 0.354 0.483
## P9 1 0.009 0.143 0.306
## P10 1 0.013 0.026 0.078 * Control > Experimental
## P11 1 0.000 0.956 0.956
## P12 1 -0.007 0.251 0.471
## P13 1 -0.001 0.893 0.956
## P14 1 0.017 0.009 0.034 ** Control > Experimental
## P15 1 0.006 0.329 0.483
## ----
## Signif. code: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 'n.s.' 1
Jamison, L., Golino, H., & Christensen, A. P. (2023). Metric invariance in exploratory graph analysis via permutation testing. PsyArXiv.
sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] EGAnet_2.0.6 NetworkComparisonTest_2.2.2
## [3] bootnet_1.6 ggplot2_3.5.1
## [5] lavaan_0.6-17 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.16.0 jsonlite_1.8.8
## [4] shape_1.4.6.1 magrittr_2.0.3 jomo_2.7-6
## [7] nloptr_2.0.3 rmarkdown_2.26 vctrs_0.6.5
## [10] minqa_1.2.6 heplots_1.7.0 base64enc_0.1-3
## [13] progress_1.2.3 htmltools_0.5.8.1 polynom_1.4-1
## [16] plotrix_3.8-4 weights_1.0.4 broom_1.0.5
## [19] Formula_1.2-5 mitml_0.4-5 parallelly_1.37.1
## [22] sass_0.4.9 bslib_0.7.0 htmlwidgets_1.6.4
## [25] plyr_1.8.9 cachem_1.0.8 igraph_2.0.3
## [28] lifecycle_1.0.4 iterators_1.0.14 pkgconfig_2.0.3
## [31] Matrix_1.7-0 R6_2.5.1 fastmap_1.1.1
## [34] future_1.33.2 digest_0.6.35 fdrtool_1.2.17
## [37] GGally_2.2.1 colorspace_2.1-0 Hmisc_5.1-2
## [40] ellipse_0.5.0 progressr_0.14.0 fansi_1.0.6
## [43] nnls_1.5 gdata_3.0.0 abind_1.4-5
## [46] IsingSampler_0.2.3 compiler_4.4.0 proxy_0.4-27
## [49] withr_3.0.0 doParallel_1.0.17 glasso_1.11
## [52] htmlTable_2.4.2 backports_1.4.1 carData_3.0-5
## [55] psych_2.4.3 ggstats_0.6.0 mgm_1.2-14
## [58] highr_0.10 R.utils_2.12.3 pan_1.9
## [61] MASS_7.3-60.2 corpcor_1.6.10 gtools_3.9.5
## [64] tools_4.4.0 pbivnorm_0.6.0 foreign_0.8-86
## [67] future.apply_1.11.2 nnet_7.3-19 R.oo_1.26.0
## [70] glue_1.7.0 quadprog_1.5-8 NetworkToolbox_1.4.2
## [73] nlme_3.1-164 grid_4.4.0 checkmate_2.3.1
## [76] cluster_2.1.6 reshape2_1.4.4 generics_0.1.3
## [79] snow_0.4-4 gtable_0.3.5 R.methodsS3_1.8.2
## [82] class_7.3-22 tidyr_1.3.1 sna_2.7-2
## [85] hms_1.1.3 data.table_1.15.4 car_3.1-2
## [88] utf8_1.2.4 foreach_1.5.2 pillar_1.9.0
## [91] stringr_1.5.1 splines_4.4.0 dplyr_1.1.4
## [94] smacof_2.1-6 networktools_1.5.2 lattice_0.22-6
## [97] survival_3.5-8 tidyselect_1.2.1 pbapply_1.7-2
## [100] knitr_1.46 gridExtra_2.3 IsingFit_0.4
## [103] stats4_4.4.0 xfun_0.43 qgraph_1.9.8
## [106] stringi_1.8.4 statnet.common_4.9.0 yaml_2.3.8
## [109] boot_1.3-30 evaluate_0.23 codetools_0.2-20
## [112] wordcloud_2.6 tibble_3.2.1 cli_3.6.2
## [115] rpart_4.1.23 munsell_0.5.1 jquerylib_0.1.4
## [118] network_1.18.2 candisc_0.9.0 Rcpp_1.0.12
## [121] globals_0.16.3 coda_0.19-4.1 png_0.1-8
## [124] parallel_4.4.0 rgl_1.3.1 prettyunits_1.2.0
## [127] jpeg_0.1-10 listenv_0.9.1 lme4_1.1-35.3
## [130] glmnet_4.1-8 mvtnorm_1.2-4 scales_1.3.0
## [133] e1071_1.7-14 crayon_1.5.2 eigenmodel_1.11
## [136] purrr_1.0.2 rlang_1.1.3 mnormt_2.1.1
## [139] mice_3.16.0