1 Required libraries

knitr::opts_chunk$set(echo = TRUE)
library(lavaan)
library(semTools)
library(psych)
library(dplyr)
library(gt)
library(ggplot2)
library(afcommon)

2 Generate Sample Data

set.seed(123)
n <- 200

# Generate true latent factor
f1 <- rnorm(n)

# Generate indicators with standardized loadings and round to whole numbers (like Likert)
q1 <- round(0.8 * f1 + sqrt(1 - 0.8^2) * rnorm(n) + 4)  # Center at 4 
q2 <- round(0.7 * f1 + sqrt(1 - 0.7^2) * rnorm(n) + 4)  
q3 <- round(0.6 * f1 + sqrt(1 - 0.6^2) * rnorm(n) + 4)  

# Constrain to 1-7 range (like a 7-point Likert scale)
q1 <- pmin(pmax(q1, 1), 7)
q2 <- pmin(pmax(q2, 1), 7)
q3 <- pmin(pmax(q3, 1), 7)

df <- data.frame(q1 = q1, q2 = q2, q3 = q3)

# Add a group variable
df$group <- sample(c("A", "B"), n, replace = TRUE)

var_list <- c("q1", "q2", "q3")

3 Basic CFA Analysis

3.1 Step 1: CFA Analysis

cfa_results <- af_cfa(df = df, var_list = var_list, latent_var_name = "new_factor")

3.2 Step 2: Examine Model Fit

# Display CFA results table
af_gt_cfa_results_tbl(cfa_results$cfa_tbl)

Latent.var	Group	CFI	TLI	RMSEA	SRMR	Cronbach	Alpha
new_factor	all	1	1	0	4.442187e-09	Questionable reliability (0.68)	0.6753071
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

Interpreting Fit Indices

CFI (Comparative Fit Index): Values > 0.90 indicate good fit
TLI (Tucker-Lewis Index): Values > 0.90 indicate good fit
RMSEA (Root Mean Square Error): Values < 0.08 indicate reasonable fit
SRMR (Standardized Root Mean Residual): Values < 0.08 indicate good fit

3.3 Step 3: Visualize the Model

# Create SEM plot
af_create_sem_plot(cfa_results$fit)
title("Basic SEM Model")

3.4 Step 4: Latent Variable Prediction

updated_df <- df %>% mutate (new_var = cfa_results$latent_predict)

# Create a data frame in long format for plotting
df_plot <- data.frame(
  index = rep(1:length(cfa_results$latent_predict), 4),
  value = c(q1, q2, q3, cfa_results$latent_predict),
  method = factor(rep(c("q1", "q2", "q3", "Latent Prediction"), 
                      each = length(cfa_results$latent_predict)))
)

# Create the combined plot
ggplot(df_plot, aes(x = index, y = value, color = method)) +
  geom_point(alpha = 0.3) +  # Points with some transparency
  geom_smooth(se = FALSE) +  # Smoothed lines without confidence intervals
  theme_minimal() +
  labs(
    x = "Index",
    y = "Factor Score",
    color = "Method"  # Legend title
  )

3.5 Step 5: Compare to alternative prediction methods

3.5.1 Use CFA Loadings

loadings <- lavaan::standardizedSolution(cfa_results$fit) |>
  subset(op == "=~", select = est.std)
loadings_predict <- as.matrix(df[, var_list]) %*% 
  loadings$est.std / sum(loadings$est.std)
c <- cor(cfa_results$latent_predict, loadings_predict)

Correlation between af_cfa lavpredict to loadings x manifests: 0.99

The loadings-based calculation (weighted sum of indicators) gives nearly the same results as lavPredict’s regression method after rescaling (used in af_cfa). The small differences are because lavPredict uses a more sophisticated approach that accounts for:

Measurement error in each indicator
The full model-implied covariance structure
Different reliability of the indicators

3.5.2 Use Distance

distance_predict <- af_dist(df, var_list)
c <- cor(cfa_results$latent_predict, distance_predict)

Correlation between af_cfa lavpredict to distance function: 0.98

3.5.3 Visualize Comparisons

# Create a data frame in long format for plotting
df_plot <- data.frame(
  index = rep(1:length(cfa_results$latent_predict), 3),
  value = c(cfa_results$latent_predict, loadings_predict, distance_predict),
  method = factor(rep(c("afcfa", "loading", "distance"), 
                      each = length(cfa_results$latent_predict)))
)

# Create the combined plot
ggplot(df_plot, aes(x = index, y = value, color = method)) +
  geom_point(alpha = 0.3) +  # Points with some transparency
  geom_smooth(se = FALSE) +  # Smoothed lines without confidence intervals
  theme_minimal() +
  labs(
    title = "Compare & Verify af_cfa to other prediction methods",
    x = "Index",
    y = "Factor Score",
    color = "Method"  # Legend title
  )

4 CFA Within Group Analysis

Perform group CFA to support within comparisons

4.1 Step 1: Test Measurment Invariance

Most researchers assess three levels of measurement invariance:

Configural invariance: equivalence of the model form
Metric invariance: equivalence of the factor loadings
Scalar invariance: equivalence of intercepts

A fourth level, strict invariance implies equivalence of the unique variances or item residuals. In practice, meeting the first three levels of invariance, or achieving partial invariance, is considered enough to guarantee appropriate cross-group comparisons of the latent constructs (Van de Schoot et al., 2012).

model <- "new_factor =~ q1 + q2 + q3"
result <- af_measurement_invariance(model = model, df = df, group = "group",
                                    var_list = c("q1", "q2", "q3"), latent_var_name = "new_factor")

Assessment of Measurment Invariance

Configural Model Fit:

CFI = 1.000, RMSEA = 0.000
Good configural fit established

Metric (Loading) Invariance:

Chi-square difference p = 0.562
ΔCFI = 0.000, ΔRMSEA = 0.000
✓ Metric invariance supported

Scalar (Intercept) Invariance:

Chi-square difference p = 0.136
ΔCFI = -0.012, ΔRMSEA = 0.053
○ Scalar invariance borderline but acceptable

Strict Invariance:

Chi-square difference p = 0.507
ΔCFI = 0.007, ΔRMSEA = -0.028
○ Strict invariance acceptable given good absolute fit

OVERALL CONCLUSION: Full measurement invariance supported:

Metric invariance achieved confidently
Scalar invariance acceptable (borderline but supported by chi-square and absolute fit)
Strict invariance supported

You can meaningfully compare means, relationships, and variances across groups, though some caution is warranted for mean comparisons.

Comparinson plot of three invariance models

result$plots$latent_comparison

4.1.1 Alternative Test

Soon to be deprecated

model <- "new_factor =~ q1 + q2 + q3"
measurementInvariance (model = model, data = df, group = "group")


Measurement invariance models:

Model 1 : fit.configural
Model 2 : fit.loadings
Model 3 : fit.intercepts
Model 4 : fit.means


Chi-Squared Difference Test

               Df    AIC    BIC  Chisq Chisq diff    RMSEA Df diff Pr(>Chisq)
fit.configural  0 1596.4 1655.8 0.0000                                       
fit.loadings    2 1593.6 1646.3 1.1512     1.1512 0.000000       2     0.5624
fit.intercepts  4 1593.6 1639.7 5.1370     3.9859 0.099646       2     0.1363
fit.means       5 1591.8 1634.6 5.3298     0.1928 0.000000       1     0.6606


Fit measures:

                 cfi rmsea cfi.delta rmsea.delta
fit.configural 1.000 0.000        NA          NA
fit.loadings   1.000 0.000     0.000       0.000
fit.intercepts 0.988 0.053     0.012       0.053
fit.means      0.996 0.026     0.009       0.028

Claude Interpretation

The dataset has full measurement invariance including mean invariance. The measurement model operates identically across groups. The groups show similar average levels of the construct. All comparisons across groups (relationships, means, etc.) are meaningful.

Configural Invariance (baseline model):

Perfect fit with CFI = 1.000 and RMSEA = 0.000
0 degrees of freedom suggests this is a just-identified model
This indicates the same factor structure works perfectly across groups

Metric/Loading Invariance:

Non-significant chi-square difference (p = 0.562)
No change in CFI or RMSEA
Strongly supports equal factor loadings across groups
The construct is measured identically across groups

Scalar/Intercept Invariance:

Non-significant chi-square difference (p = 0.136)
Small CFI decrease (0.012) and RMSEA increase (0.053)
These changes are at the borderline of traditional cutoffs (ΔCFI < 0.01, ΔRMSEA < 0.05)
Suggests mostly equivalent intercepts, though with some minor differences

Mean Invariance:

Non-significant chi-square difference (p = 0.661)
Fit actually improves (CFI increases, RMSEA decreases)
Strongly supports equal latent means across groups
Groups have similar average levels of the construct

4.2 Step 2: Run Group CFA analysis

cfa_results <- af_cfa(df = df, var_list = var_list, latent_var_name = "new_factor", group = "group")

4.3 Step 3: Examine Model Fit

# Display CFA results table
af_gt_cfa_results_tbl(cfa_results$cfa_tbl)

Latent.var	Group	CFI	TLI	RMSEA	SRMR	Cronbach	Alpha
new_factor	all	1	1	0	9.388627e-09	Questionable reliability (0.68)	0.6753071
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.

Interpreting Fit Indices

CFI (Comparative Fit Index): Values > 0.90 indicate good fit
TLI (Tucker-Lewis Index): Values > 0.90 indicate good fit
RMSEA (Root Mean Square Error): Values < 0.08 indicate reasonable fit
SRMR (Standardized Root Mean Residual): Values < 0.08 indicate good fit

4.4 Step 4: Visualize the Model

# Create SEM plot
af_create_sem_plot(cfa_results$fit)

4.5 Step 5: Latent Variable Prediction

updated_df <- df %>% mutate(new_var = cfa_results$latent_predict)

# Create a data frame in long format for plotting
df_plot <- data.frame(
  index = rep(1:length(cfa_results$latent_predict), 4),
  value = c(q1, q2, q3, cfa_results$latent_predict),
  method = factor(rep(c("q1", "q2", "q3", "Latent (grouped) Prediction"), 
                      each = length(cfa_results$latent_predict)))
)

# Create the combined plot
ggplot(df_plot, aes(x = index, y = value, color = method)) +
  geom_point(alpha = 0.3) +  # Points with some transparency
  geom_smooth(se = FALSE) +  # Smoothed lines without confidence intervals
  theme_minimal() +
  labs(
    x = "Index",
    y = "Factor Score",
    color = "Method"  # Legend title
  )

5 CFA Between Group Analysis

Perform independent per each group to support between comparisons

# Perform CFA by group
group_cfa <- af_cfa_between_group(df = df, group_var = "group", latent_var = "new_factor", cfa_vars = var_list)

# Display results
af_gt_cfa_results_tbl(group_cfa$cfa_table)

Latent.var	Group	CFI	TLI	RMSEA	SRMR	Cronbach	Alpha
new_factor	A	1	1	0	1.865890e-08	Questionable reliability (0.67)	0.6738485
new_factor	B	1	1	0	4.066879e-08	Questionable reliability (0.68)	0.6842924
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.


updated_df <- group_cfa$updated_df

6 Custom Model Analysis

You can specify custom models using lavaan syntax:

custom_model <- '
custom1 =~ q1 + q2 
custom2 =~ custom1 + q3
'

custom_results <- af_cfa(df = df, var_list = var_list, latent_var_name = "custom2",
                         group_id = "custom", model = custom_model)

# Display CFA results table
af_gt_cfa_results_tbl(custom_results$cfa_tbl)

Latent.var	Group	CFI	TLI	RMSEA	SRMR	Cronbach	Alpha
custom2	custom	NA	NA	0	2.220022e-09	Questionable reliability (0.68)	0.6753071
CFI (Comparative Fit Index) values above 0.90 indicate good fit.
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit.
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit.
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit.


# Create SEM plot
af_create_sem_plot(custom_results$fit)

CFA Analysis

Confirmatory Factor Analysis (CFA) including ‘Within’ and ‘Between’ grouped-dataset CFA

Amir Freund

2025-04-28