Background

It is possible to automatically fit multi-group confirmatory factor analyses with a given model in lavaan. I’ve developed and deployed a package for this, here: https://github.com/ljlasker/Automatic-MGCFA/.

This allows you test for measurement invariance very easily, just provide a model specification and let it rip.

Install

install.packages("remotes")
remotes::install_github("ljlasker/Automatic-MGCFA")

Demonstration

# Common mgcfa_auto settings (all are package args)
mgcfa_opts <- list(
  model_type = "custom",
  model = model_syntax,
  include_steps = steps_full,
  estimator = "ML",
  partial_failure_criterion = "delta_cfi",
  partial_failure_threshold = 0.01,
  partial_auto_search = "always",
  partial_search_criterion = "delta_cfi",
  partial_search_threshold = 0.01,
  partial_search_top_n = 8L,
  partial_search_stop_on_accept = TRUE,
  partial_search_rank = "closest"
)

To show this off, I’ve simulated a one-factor model with two groups. Simulated without bias, the result is as-expected:

# -----------------------------------------------------------------------------
# Scenario 1: No bias (raw data)
# -----------------------------------------------------------------------------
dat_no_bias <- simulate_two_group_onefactor(
  n_per_group = 700L,
  intercept_shift_group2 = rep(0, 6),
  latent_mean_group2 = 0,
  seed = 123
)

fit_no_bias_raw <- do.call(
  mgcfa_auto,
  c(mgcfa_opts, list(data = dat_no_bias, group = "group"))
)

cat("\nNo-bias simulation (raw data)\n")

## 
## No-bias simulation (raw data)

print(fit_no_bias_raw, digits = 3, rounding = "signif", show_freed_parameters = TRUE, verbose = FALSE)

## Mode: raw_data 
## Model type: custom 
## Steps: configural, metric, scalar, strict, lv.variances, means 
## 
##          step status model_used n_freed cfi rmsea    srmr   aic   bic
##    configural     ok       full       0   1     0 0.00781 21000 21200
##        metric     ok       full       0   1     0 0.01460 21000 21200
##        scalar     ok       full       0   1     0 0.01610 21000 21100
##        strict     ok       full       0   1     0 0.01780 21000 21100
##  lv.variances     ok       full       0   1     0 0.01780 21000 21100
##         means     ok       full       0   1     0 0.01790 21000 21100

Simulated with bias, we can automatically fit a partially invariant model if there are enough parameters that can still be held constant between groups:

# -----------------------------------------------------------------------------
# Scenario 2: Bias at scalar + latent means (raw data)
# -----------------------------------------------------------------------------
dat_bias <- simulate_two_group_onefactor(
  n_per_group = 700L,
  intercept_shift_group2 = c(0.65, 0.50, 0, 0, 0, 0),
  latent_mean_group2 = 0.50,
  seed = 101
)

fit_bias_raw <- do.call(
  mgcfa_auto,
  c(mgcfa_opts, list(data = dat_bias, group = "group"))
)

cat("\nBias simulation (raw data)\n")

## 
## Bias simulation (raw data)

print(fit_bias_raw, digits = 3, rounding = "signif", show_freed_parameters = TRUE, verbose = FALSE)

## Mode: raw_data 
## Model type: custom 
## Steps: configural, metric, scalar, strict, lv.variances, means 
## 
##          step            status model_used n_freed   cfi  rmsea    srmr   aic
##    configural                ok       full       0 1.000 0.0000 0.00951 21000
##        metric                ok       full       0 1.000 0.0000 0.01680 21000
##        scalar recovered_partial    partial       2 1.000 0.0000 0.01770 21000
##        strict                ok       full       0 1.000 0.0000 0.01970 20900
##  lv.variances                ok       full       0 1.000 0.0000 0.03700 20900
##         means            failed       full       0 0.979 0.0554 0.09690 21000
##    bic
##  21100
##  21100
##  21100
##  21100
##  21100
##  21100
## 
## Stopped early at step: means 
## Reason: Stopped after `means` because invariance remained unacceptable under `delta_cfi` (threshold = 0.01). 
## 
## Freed parameters by step
## scalar : x1~1, x2~1

failed_bias_raw <- names(Filter(function(z) isTRUE(z$failed), fit_bias_raw$step_failures))
cat("\nExpected bias pattern check (raw):\n")

## 
## Expected bias pattern check (raw):

cat("Failed stages:", paste(failed_bias_raw, collapse = ", "), "\n")

## Failed stages: scalar, means

And using this package, we can quickly demonstrate that results based on summary statistics should be equivalent to those based on raw data, provided enough detail is given and reporting is accurate:

# -----------------------------------------------------------------------------
# Scenario 3: Bias raw vs summary equivalence (summary stats from same raw data)
# -----------------------------------------------------------------------------
sum_bias <- mgcfa_make_summary(
  data = dat_bias,
  group = "group",
  variables = indicators,
  matrix_type = "cor",
  format = "truncate",  # or "round"
  cor_digits = 2,
  sd_digits = 2,
  mean_digits = 2
)

fit_bias_summary <- do.call(
  mgcfa_auto,
  c(
    mgcfa_opts,
    list(
      sample_cov = sum_bias$sample_cov,
      sample_sd = sum_bias$sample_sd,
      matrices_are_cor = sum_bias$matrices_are_cor,
      sample_mean = sum_bias$sample_mean,
      sample_nobs = sum_bias$sample_nobs,
      group_labels = sum_bias$group_labels
    )
  )
)

cat("\nBias simulation (summary statistics)\n")

## 
## Bias simulation (summary statistics)

print(fit_bias_summary, digits = 3, rounding = "signif", show_freed_parameters = TRUE, verbose = FALSE)

## Mode: summary_matrices 
## Model type: custom 
## Steps: configural, metric, scalar, strict, lv.variances, means 
## 
##          step            status model_used n_freed   cfi  rmsea    srmr   aic
##    configural                ok       full       0 1.000 0.0000 0.00992 21000
##        metric                ok       full       0 1.000 0.0000 0.01640 21000
##        scalar recovered_partial    partial       2 1.000 0.0000 0.01710 20900
##        strict                ok       full       0 1.000 0.0000 0.01900 20900
##  lv.variances                ok       full       0 1.000 0.0000 0.03670 20900
##         means            failed       full       0 0.978 0.0556 0.09630 21000
##    bic
##  21100
##  21100
##  21100
##  21100
##  21100
##  21100
## 
## Stopped early at step: means 
## Reason: Stopped after `means` because invariance remained unacceptable under `delta_cfi` (threshold = 0.01). 
## 
## Freed parameters by step
## scalar : x1~1, x2~1

failed_bias_summary <- names(Filter(function(z) isTRUE(z$failed), fit_bias_summary$step_failures))
cat("\nExpected bias pattern check (summary):\n")

## 
## Expected bias pattern check (summary):

cat("Failed stages:", paste(failed_bias_summary, collapse = ", "), "\n")

## Failed stages: scalar, means

raw_df <- mgcfa_tidy_fit(
  fit_bias_raw,
  measures = c("chisq", "df", "cfi", "rmsea", "aic", "bic"),
  include_non_partial = FALSE,
  add_delta = FALSE,
  rounding = "none"
)

sum_df <- mgcfa_tidy_fit(
  fit_bias_summary,
  measures = c("chisq", "df", "cfi", "rmsea", "aic", "bic"),
  include_non_partial = FALSE,
  add_delta = FALSE,
  rounding = "none"
)

raw_small <- raw_df[, c("step", "measure", "value")]
sum_small <- sum_df[, c("step", "measure", "value")]
names(raw_small)[names(raw_small) == "value"] <- "value_raw"
names(sum_small)[names(sum_small) == "value"] <- "value_summary"

cmp_bias <- merge(raw_small, sum_small, by = c("step", "measure"), all = TRUE, sort = TRUE)
cmp_bias$abs_diff <- abs(cmp_bias$value_raw - cmp_bias$value_summary)

cat("\nRaw vs summary comparison (bias scenario)\n")

## 
## Raw vs summary comparison (bias scenario)

print(cmp_bias)

##            step measure    value_raw value_summary     abs_diff
## 1    configural     aic 2.096048e+04  2.095818e+04 2.2953168310
## 2    configural     bic 2.114927e+04  2.114697e+04 2.2953168310
## 3    configural     cfi 1.000000e+00  1.000000e+00 0.0000000000
## 4    configural   chisq 1.636713e+01  1.759741e+01 1.2302721254
## 5    configural      df 1.800000e+01  1.800000e+01 0.0000000000
## 6    configural   rmsea 0.000000e+00  0.000000e+00 0.0000000000
## 7  lv.variances     aic 2.094424e+04  2.094167e+04 2.5712453971
## 8  lv.variances     bic 2.105437e+04  2.105180e+04 2.5712453971
## 9  lv.variances     cfi 1.000000e+00  1.000000e+00 0.0000000000
## 10 lv.variances   chisq 3.013663e+01  3.109098e+01 0.9543435593
## 11 lv.variances      df 3.300000e+01  3.300000e+01 0.0000000000
## 12 lv.variances   rmsea 0.000000e+00  0.000000e+00 0.0000000000
## 13        means     aic 2.101913e+04  2.101628e+04 2.8466822578
## 14        means     bic 2.112402e+04  2.112117e+04 2.8466822578
## 15        means     cfi 9.789478e-01  9.783984e-01 0.0005493142
## 16        means   chisq 1.070222e+02  1.077012e+02 0.6789066987
## 17        means      df 3.400000e+01  3.400000e+01 0.0000000000
## 18        means   rmsea 5.539099e-02  5.564788e-02 0.0002568964
## 19       metric     aic 2.095425e+04  2.095151e+04 2.7442451317
## 20       metric     bic 2.111683e+04  2.111408e+04 2.7442451317
## 21       metric     cfi 1.000000e+00  1.000000e+00 0.0000000000
## 22       metric   chisq 2.014592e+01  2.092726e+01 0.7813438248
## 23       metric      df 2.300000e+01  2.300000e+01 0.0000000000
## 24       metric   rmsea 0.000000e+00  0.000000e+00 0.0000000000
## 25       scalar     aic 2.095008e+04  2.094685e+04 3.2315349523
## 26       scalar     bic 2.109692e+04  2.109368e+04 3.2315349523
## 27       scalar     cfi 1.000000e+00  1.000000e+00 0.0000000000
## 28       scalar   chisq 2.196839e+01  2.226244e+01 0.2940540041
## 29       scalar      df 2.600000e+01  2.600000e+01 0.0000000000
## 30       scalar   rmsea 0.000000e+00  0.000000e+00 0.0000000000
## 31       strict     aic 2.094355e+04  2.094098e+04 2.5730077276
## 32       strict     bic 2.105893e+04  2.105635e+04 2.5730077276
## 33       strict     cfi 1.000000e+00  1.000000e+00 0.0000000000
## 34       strict   chisq 2.744418e+01  2.839676e+01 0.9525812289
## 35       strict      df 3.200000e+01  3.200000e+01 0.0000000000
## 36       strict   rmsea 0.000000e+00  0.000000e+00 0.0000000000

And you can also plot model comparisons by whatever fit metrics you wish among what’s available:

fit_for_plot <- fit_bias_raw  # or fit_bias_summary

p_aic <- mgcfa_plot_fit(
  fit_for_plot,
  measures = "aic",
  plot_type = "delta",
  include_non_partial = TRUE,
  baseline_step = "configural"
)

p_bic <- mgcfa_plot_fit(
  fit_for_plot,
  measures = "bic",
  plot_type = "delta",
  include_non_partial = TRUE,
  baseline_step = "configural"
)

if (requireNamespace("patchwork", quietly = TRUE)) {
  p_aic + p_bic + patchwork::plot_layout(ncol = 2)
} else if (requireNamespace("gridExtra", quietly = TRUE)) {
  gridExtra::grid.arrange(p_aic, p_bic, ncol = 2)
} else {
  print(p_aic)
  print(p_bic)
}

Session Info

sessionInfo()

## R version 4.4.0 (2024-04-24)
## Platform: aarch64-apple-darwin20
## Running under: macOS 26.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Chicago
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] AutomaticMGCFA_0.1.0
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       jsonlite_2.0.0     highr_0.10         dplyr_1.1.4       
##  [5] compiler_4.4.0     tidyselect_1.2.1   parallel_4.4.0     jquerylib_0.1.4   
##  [9] scales_1.4.0       yaml_2.3.8         fastmap_1.2.0      pbivnorm_0.6.0    
## [13] ggplot2_4.0.1      R6_2.6.1           labeling_0.4.3     generics_0.1.3    
## [17] patchwork_1.2.0    knitr_1.46         MASS_7.3-60.2      tibble_3.2.1      
## [21] bslib_0.7.0        pillar_1.9.0       RColorBrewer_1.1-3 rlang_1.1.7       
## [25] utf8_1.2.4         cachem_1.1.0       xfun_0.52          quadprog_1.5-8    
## [29] sass_0.4.9         S7_0.2.1           cli_3.6.5          withr_3.0.2       
## [33] magrittr_2.0.3     digest_0.6.35      grid_4.4.0         rstudioapi_0.16.0 
## [37] lifecycle_1.0.4    lavaan_0.6-17      vctrs_0.6.5        mnormt_2.1.1      
## [41] evaluate_1.0.5     glue_1.8.0         farver_2.1.1       stats4_4.4.0      
## [45] fansi_1.0.6        rmarkdown_2.29     tools_4.4.0        pkgconfig_2.0.3   
## [49] htmltools_0.5.8.1

Automatic MGCFA

Background

Install

Demonstration

Session Info