It is possible to automatically fit multi-group confirmatory factor analyses with a given model in lavaan. I’ve developed and deployed a package for this, here: https://github.com/ljlasker/Automatic-MGCFA/.
This allows you test for measurement invariance very easily, just provide a model specification and let it rip.
install.packages("remotes")
remotes::install_github("ljlasker/Automatic-MGCFA")
# Common mgcfa_auto settings (all are package args)
mgcfa_opts <- list(
model_type = "custom",
model = model_syntax,
include_steps = steps_full,
estimator = "ML",
partial_failure_criterion = "delta_cfi",
partial_failure_threshold = 0.01,
partial_auto_search = "always",
partial_search_criterion = "delta_cfi",
partial_search_threshold = 0.01,
partial_search_top_n = 8L,
partial_search_stop_on_accept = TRUE,
partial_search_rank = "closest"
)
To show this off, I’ve simulated a one-factor model with two groups. Simulated without bias, the result is as-expected:
# -----------------------------------------------------------------------------
# Scenario 1: No bias (raw data)
# -----------------------------------------------------------------------------
dat_no_bias <- simulate_two_group_onefactor(
n_per_group = 700L,
intercept_shift_group2 = rep(0, 6),
latent_mean_group2 = 0,
seed = 123
)
fit_no_bias_raw <- do.call(
mgcfa_auto,
c(mgcfa_opts, list(data = dat_no_bias, group = "group"))
)
cat("\nNo-bias simulation (raw data)\n")
##
## No-bias simulation (raw data)
print(fit_no_bias_raw, digits = 3, rounding = "signif", show_freed_parameters = TRUE, verbose = FALSE)
## Mode: raw_data
## Model type: custom
## Steps: configural, metric, scalar, strict, lv.variances, means
##
## step status model_used n_freed cfi rmsea srmr aic bic
## configural ok full 0 1 0 0.00781 21000 21200
## metric ok full 0 1 0 0.01460 21000 21200
## scalar ok full 0 1 0 0.01610 21000 21100
## strict ok full 0 1 0 0.01780 21000 21100
## lv.variances ok full 0 1 0 0.01780 21000 21100
## means ok full 0 1 0 0.01790 21000 21100
Simulated with bias, we can automatically fit a partially invariant model if there are enough parameters that can still be held constant between groups:
# -----------------------------------------------------------------------------
# Scenario 2: Bias at scalar + latent means (raw data)
# -----------------------------------------------------------------------------
dat_bias <- simulate_two_group_onefactor(
n_per_group = 700L,
intercept_shift_group2 = c(0.65, 0.50, 0, 0, 0, 0),
latent_mean_group2 = 0.50,
seed = 101
)
fit_bias_raw <- do.call(
mgcfa_auto,
c(mgcfa_opts, list(data = dat_bias, group = "group"))
)
cat("\nBias simulation (raw data)\n")
##
## Bias simulation (raw data)
print(fit_bias_raw, digits = 3, rounding = "signif", show_freed_parameters = TRUE, verbose = FALSE)
## Mode: raw_data
## Model type: custom
## Steps: configural, metric, scalar, strict, lv.variances, means
##
## step status model_used n_freed cfi rmsea srmr aic
## configural ok full 0 1.000 0.0000 0.00951 21000
## metric ok full 0 1.000 0.0000 0.01680 21000
## scalar recovered_partial partial 2 1.000 0.0000 0.01770 21000
## strict ok full 0 1.000 0.0000 0.01970 20900
## lv.variances ok full 0 1.000 0.0000 0.03700 20900
## means failed full 0 0.979 0.0554 0.09690 21000
## bic
## 21100
## 21100
## 21100
## 21100
## 21100
## 21100
##
## Stopped early at step: means
## Reason: Stopped after `means` because invariance remained unacceptable under `delta_cfi` (threshold = 0.01).
##
## Freed parameters by step
## scalar : x1~1, x2~1
failed_bias_raw <- names(Filter(function(z) isTRUE(z$failed), fit_bias_raw$step_failures))
cat("\nExpected bias pattern check (raw):\n")
##
## Expected bias pattern check (raw):
cat("Failed stages:", paste(failed_bias_raw, collapse = ", "), "\n")
## Failed stages: scalar, means
And using this package, we can quickly demonstrate that results based on summary statistics should be equivalent to those based on raw data, provided enough detail is given and reporting is accurate:
# -----------------------------------------------------------------------------
# Scenario 3: Bias raw vs summary equivalence (summary stats from same raw data)
# -----------------------------------------------------------------------------
sum_bias <- mgcfa_make_summary(
data = dat_bias,
group = "group",
variables = indicators,
matrix_type = "cor",
format = "truncate", # or "round"
cor_digits = 2,
sd_digits = 2,
mean_digits = 2
)
fit_bias_summary <- do.call(
mgcfa_auto,
c(
mgcfa_opts,
list(
sample_cov = sum_bias$sample_cov,
sample_sd = sum_bias$sample_sd,
matrices_are_cor = sum_bias$matrices_are_cor,
sample_mean = sum_bias$sample_mean,
sample_nobs = sum_bias$sample_nobs,
group_labels = sum_bias$group_labels
)
)
)
cat("\nBias simulation (summary statistics)\n")
##
## Bias simulation (summary statistics)
print(fit_bias_summary, digits = 3, rounding = "signif", show_freed_parameters = TRUE, verbose = FALSE)
## Mode: summary_matrices
## Model type: custom
## Steps: configural, metric, scalar, strict, lv.variances, means
##
## step status model_used n_freed cfi rmsea srmr aic
## configural ok full 0 1.000 0.0000 0.00992 21000
## metric ok full 0 1.000 0.0000 0.01640 21000
## scalar recovered_partial partial 2 1.000 0.0000 0.01710 20900
## strict ok full 0 1.000 0.0000 0.01900 20900
## lv.variances ok full 0 1.000 0.0000 0.03670 20900
## means failed full 0 0.978 0.0556 0.09630 21000
## bic
## 21100
## 21100
## 21100
## 21100
## 21100
## 21100
##
## Stopped early at step: means
## Reason: Stopped after `means` because invariance remained unacceptable under `delta_cfi` (threshold = 0.01).
##
## Freed parameters by step
## scalar : x1~1, x2~1
failed_bias_summary <- names(Filter(function(z) isTRUE(z$failed), fit_bias_summary$step_failures))
cat("\nExpected bias pattern check (summary):\n")
##
## Expected bias pattern check (summary):
cat("Failed stages:", paste(failed_bias_summary, collapse = ", "), "\n")
## Failed stages: scalar, means
raw_df <- mgcfa_tidy_fit(
fit_bias_raw,
measures = c("chisq", "df", "cfi", "rmsea", "aic", "bic"),
include_non_partial = FALSE,
add_delta = FALSE,
rounding = "none"
)
sum_df <- mgcfa_tidy_fit(
fit_bias_summary,
measures = c("chisq", "df", "cfi", "rmsea", "aic", "bic"),
include_non_partial = FALSE,
add_delta = FALSE,
rounding = "none"
)
raw_small <- raw_df[, c("step", "measure", "value")]
sum_small <- sum_df[, c("step", "measure", "value")]
names(raw_small)[names(raw_small) == "value"] <- "value_raw"
names(sum_small)[names(sum_small) == "value"] <- "value_summary"
cmp_bias <- merge(raw_small, sum_small, by = c("step", "measure"), all = TRUE, sort = TRUE)
cmp_bias$abs_diff <- abs(cmp_bias$value_raw - cmp_bias$value_summary)
cat("\nRaw vs summary comparison (bias scenario)\n")
##
## Raw vs summary comparison (bias scenario)
print(cmp_bias)
## step measure value_raw value_summary abs_diff
## 1 configural aic 2.096048e+04 2.095818e+04 2.2953168310
## 2 configural bic 2.114927e+04 2.114697e+04 2.2953168310
## 3 configural cfi 1.000000e+00 1.000000e+00 0.0000000000
## 4 configural chisq 1.636713e+01 1.759741e+01 1.2302721254
## 5 configural df 1.800000e+01 1.800000e+01 0.0000000000
## 6 configural rmsea 0.000000e+00 0.000000e+00 0.0000000000
## 7 lv.variances aic 2.094424e+04 2.094167e+04 2.5712453971
## 8 lv.variances bic 2.105437e+04 2.105180e+04 2.5712453971
## 9 lv.variances cfi 1.000000e+00 1.000000e+00 0.0000000000
## 10 lv.variances chisq 3.013663e+01 3.109098e+01 0.9543435593
## 11 lv.variances df 3.300000e+01 3.300000e+01 0.0000000000
## 12 lv.variances rmsea 0.000000e+00 0.000000e+00 0.0000000000
## 13 means aic 2.101913e+04 2.101628e+04 2.8466822578
## 14 means bic 2.112402e+04 2.112117e+04 2.8466822578
## 15 means cfi 9.789478e-01 9.783984e-01 0.0005493142
## 16 means chisq 1.070222e+02 1.077012e+02 0.6789066987
## 17 means df 3.400000e+01 3.400000e+01 0.0000000000
## 18 means rmsea 5.539099e-02 5.564788e-02 0.0002568964
## 19 metric aic 2.095425e+04 2.095151e+04 2.7442451317
## 20 metric bic 2.111683e+04 2.111408e+04 2.7442451317
## 21 metric cfi 1.000000e+00 1.000000e+00 0.0000000000
## 22 metric chisq 2.014592e+01 2.092726e+01 0.7813438248
## 23 metric df 2.300000e+01 2.300000e+01 0.0000000000
## 24 metric rmsea 0.000000e+00 0.000000e+00 0.0000000000
## 25 scalar aic 2.095008e+04 2.094685e+04 3.2315349523
## 26 scalar bic 2.109692e+04 2.109368e+04 3.2315349523
## 27 scalar cfi 1.000000e+00 1.000000e+00 0.0000000000
## 28 scalar chisq 2.196839e+01 2.226244e+01 0.2940540041
## 29 scalar df 2.600000e+01 2.600000e+01 0.0000000000
## 30 scalar rmsea 0.000000e+00 0.000000e+00 0.0000000000
## 31 strict aic 2.094355e+04 2.094098e+04 2.5730077276
## 32 strict bic 2.105893e+04 2.105635e+04 2.5730077276
## 33 strict cfi 1.000000e+00 1.000000e+00 0.0000000000
## 34 strict chisq 2.744418e+01 2.839676e+01 0.9525812289
## 35 strict df 3.200000e+01 3.200000e+01 0.0000000000
## 36 strict rmsea 0.000000e+00 0.000000e+00 0.0000000000
And you can also plot model comparisons by whatever fit metrics you wish among what’s available:
fit_for_plot <- fit_bias_raw # or fit_bias_summary
p_aic <- mgcfa_plot_fit(
fit_for_plot,
measures = "aic",
plot_type = "delta",
include_non_partial = TRUE,
baseline_step = "configural"
)
p_bic <- mgcfa_plot_fit(
fit_for_plot,
measures = "bic",
plot_type = "delta",
include_non_partial = TRUE,
baseline_step = "configural"
)
if (requireNamespace("patchwork", quietly = TRUE)) {
p_aic + p_bic + patchwork::plot_layout(ncol = 2)
} else if (requireNamespace("gridExtra", quietly = TRUE)) {
gridExtra::grid.arrange(p_aic, p_bic, ncol = 2)
} else {
print(p_aic)
print(p_bic)
}
sessionInfo()
## R version 4.4.0 (2024-04-24)
## Platform: aarch64-apple-darwin20
## Running under: macOS 26.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] AutomaticMGCFA_0.1.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 jsonlite_2.0.0 highr_0.10 dplyr_1.1.4
## [5] compiler_4.4.0 tidyselect_1.2.1 parallel_4.4.0 jquerylib_0.1.4
## [9] scales_1.4.0 yaml_2.3.8 fastmap_1.2.0 pbivnorm_0.6.0
## [13] ggplot2_4.0.1 R6_2.6.1 labeling_0.4.3 generics_0.1.3
## [17] patchwork_1.2.0 knitr_1.46 MASS_7.3-60.2 tibble_3.2.1
## [21] bslib_0.7.0 pillar_1.9.0 RColorBrewer_1.1-3 rlang_1.1.7
## [25] utf8_1.2.4 cachem_1.1.0 xfun_0.52 quadprog_1.5-8
## [29] sass_0.4.9 S7_0.2.1 cli_3.6.5 withr_3.0.2
## [33] magrittr_2.0.3 digest_0.6.35 grid_4.4.0 rstudioapi_0.16.0
## [37] lifecycle_1.0.4 lavaan_0.6-17 vctrs_0.6.5 mnormt_2.1.1
## [41] evaluate_1.0.5 glue_1.8.0 farver_2.1.1 stats4_4.4.0
## [45] fansi_1.0.6 rmarkdown_2.29 tools_4.4.0 pkgconfig_2.0.3
## [49] htmltools_0.5.8.1