set.seed(123)
n <- 200
# Generate true latent factor
f1 <- rnorm(n)
# Generate indicators with standardized loadings and round to whole numbers (like Likert)
q1 <- round(0.8 * f1 + sqrt(1 - 0.8^2) * rnorm(n) + 4) # Center at 4
q2 <- round(0.7 * f1 + sqrt(1 - 0.7^2) * rnorm(n) + 4)
q3 <- round(0.6 * f1 + sqrt(1 - 0.6^2) * rnorm(n) + 4)
# Constrain to 1-7 range (like a 7-point Likert scale)
q1 <- pmin(pmax(q1, 1), 7)
q2 <- pmin(pmax(q2, 1), 7)
q3 <- pmin(pmax(q3, 1), 7)
df <- data.frame(q1 = q1, q2 = q2, q3 = q3)
# Add a group variable
df$group <- sample(c("A", "B"), n, replace = TRUE)
var_list <- c("q1", "q2", "q3")
Latent.var | Group | CFI | TLI | RMSEA | SRMR | Cronbach | Alpha |
---|---|---|---|---|---|---|---|
new_factor | all | 1 | 1 | 0 | 4.442187e-09 | Questionable reliability (0.68) | 0.6753071 |
CFI (Comparative Fit Index) values above 0.90 indicate good fit. | |||||||
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit. | |||||||
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit. | |||||||
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit. |
Interpreting Fit Indices
updated_df <- df %>% mutate (new_var = cfa_results$latent_predict)
# Create a data frame in long format for plotting
df_plot <- data.frame(
index = rep(1:length(cfa_results$latent_predict), 4),
value = c(q1, q2, q3, cfa_results$latent_predict),
method = factor(rep(c("q1", "q2", "q3", "Latent Prediction"),
each = length(cfa_results$latent_predict)))
)
# Create the combined plot
ggplot(df_plot, aes(x = index, y = value, color = method)) +
geom_point(alpha = 0.3) + # Points with some transparency
geom_smooth(se = FALSE) + # Smoothed lines without confidence intervals
theme_minimal() +
labs(
x = "Index",
y = "Factor Score",
color = "Method" # Legend title
)
loadings <- lavaan::standardizedSolution(cfa_results$fit) |>
subset(op == "=~", select = est.std)
loadings_predict <- as.matrix(df[, var_list]) %*%
loadings$est.std / sum(loadings$est.std)
c <- cor(cfa_results$latent_predict, loadings_predict)
Correlation between af_cfa lavpredict to loadings x manifests: 0.99
The loadings-based calculation (weighted sum of indicators) gives nearly the same results as lavPredict’s regression method after rescaling (used in af_cfa). The small differences are because lavPredict uses a more sophisticated approach that accounts for:
Correlation between af_cfa lavpredict to distance function: 0.98
# Create a data frame in long format for plotting
df_plot <- data.frame(
index = rep(1:length(cfa_results$latent_predict), 3),
value = c(cfa_results$latent_predict, loadings_predict, distance_predict),
method = factor(rep(c("afcfa", "loading", "distance"),
each = length(cfa_results$latent_predict)))
)
# Create the combined plot
ggplot(df_plot, aes(x = index, y = value, color = method)) +
geom_point(alpha = 0.3) + # Points with some transparency
geom_smooth(se = FALSE) + # Smoothed lines without confidence intervals
theme_minimal() +
labs(
title = "Compare & Verify af_cfa to other prediction methods",
x = "Index",
y = "Factor Score",
color = "Method" # Legend title
)
Perform group CFA to support within comparisons
Most researchers assess three levels of measurement invariance:
A fourth level, strict invariance implies equivalence of the unique variances or item residuals. In practice, meeting the first three levels of invariance, or achieving partial invariance, is considered enough to guarantee appropriate cross-group comparisons of the latent constructs (Van de Schoot et al., 2012).
model <- "new_factor =~ q1 + q2 + q3"
result <- af_measurement_invariance(model = model, df = df, group = "group",
var_list = c("q1", "q2", "q3"), latent_var_name = "new_factor")
Assessment of Measurment Invariance
Configural Model Fit:
Metric (Loading) Invariance:
Scalar (Intercept) Invariance:
Strict Invariance:
OVERALL CONCLUSION: Full measurement invariance supported:
You can meaningfully compare means, relationships, and variances across groups, though some caution is warranted for mean comparisons.
Comparinson plot of three invariance models
Soon to be deprecated
model <- "new_factor =~ q1 + q2 + q3"
measurementInvariance (model = model, data = df, group = "group")
Measurement invariance models:
Model 1 : fit.configural
Model 2 : fit.loadings
Model 3 : fit.intercepts
Model 4 : fit.means
Chi-Squared Difference Test
Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
fit.configural 0 1596.4 1655.8 0.0000
fit.loadings 2 1593.6 1646.3 1.1512 1.1512 0.000000 2 0.5624
fit.intercepts 4 1593.6 1639.7 5.1370 3.9859 0.099646 2 0.1363
fit.means 5 1591.8 1634.6 5.3298 0.1928 0.000000 1 0.6606
Fit measures:
cfi rmsea cfi.delta rmsea.delta
fit.configural 1.000 0.000 NA NA
fit.loadings 1.000 0.000 0.000 0.000
fit.intercepts 0.988 0.053 0.012 0.053
fit.means 0.996 0.026 0.009 0.028
Claude Interpretation
The dataset has full measurement invariance including mean invariance. The measurement model operates identically across groups. The groups show similar average levels of the construct. All comparisons across groups (relationships, means, etc.) are meaningful.
Scalar/Intercept Invariance:
Mean Invariance:
Latent.var | Group | CFI | TLI | RMSEA | SRMR | Cronbach | Alpha |
---|---|---|---|---|---|---|---|
new_factor | all | 1 | 1 | 0 | 9.388627e-09 | Questionable reliability (0.68) | 0.6753071 |
CFI (Comparative Fit Index) values above 0.90 indicate good fit. | |||||||
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit. | |||||||
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit. | |||||||
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit. |
Interpreting Fit Indices
updated_df <- df %>% mutate(new_var = cfa_results$latent_predict)
# Create a data frame in long format for plotting
df_plot <- data.frame(
index = rep(1:length(cfa_results$latent_predict), 4),
value = c(q1, q2, q3, cfa_results$latent_predict),
method = factor(rep(c("q1", "q2", "q3", "Latent (grouped) Prediction"),
each = length(cfa_results$latent_predict)))
)
# Create the combined plot
ggplot(df_plot, aes(x = index, y = value, color = method)) +
geom_point(alpha = 0.3) + # Points with some transparency
geom_smooth(se = FALSE) + # Smoothed lines without confidence intervals
theme_minimal() +
labs(
x = "Index",
y = "Factor Score",
color = "Method" # Legend title
)
Perform independent per each group to support between comparisons
# Perform CFA by group
group_cfa <- af_cfa_between_group(df = df, group_var = "group", latent_var = "new_factor", cfa_vars = var_list)
# Display results
af_gt_cfa_results_tbl(group_cfa$cfa_table)
Latent.var | Group | CFI | TLI | RMSEA | SRMR | Cronbach | Alpha |
---|---|---|---|---|---|---|---|
new_factor | A | 1 | 1 | 0 | 1.865890e-08 | Questionable reliability (0.67) | 0.6738485 |
new_factor | B | 1 | 1 | 0 | 4.066879e-08 | Questionable reliability (0.68) | 0.6842924 |
CFI (Comparative Fit Index) values above 0.90 indicate good fit. | |||||||
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit. | |||||||
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit. | |||||||
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit. |
You can specify custom models using lavaan syntax:
custom_model <- '
custom1 =~ q1 + q2
custom2 =~ custom1 + q3
'
custom_results <- af_cfa(df = df, var_list = var_list, latent_var_name = "custom2",
group_id = "custom", model = custom_model)
# Display CFA results table
af_gt_cfa_results_tbl(custom_results$cfa_tbl)
Latent.var | Group | CFI | TLI | RMSEA | SRMR | Cronbach | Alpha |
---|---|---|---|---|---|---|---|
custom2 | custom | NA | NA | 0 | 2.220022e-09 | Questionable reliability (0.68) | 0.6753071 |
CFI (Comparative Fit Index) values above 0.90 indicate good fit. | |||||||
TLI (Tucker-Lewis Index) values above 0.90 indicate good fit. | |||||||
RMSEA (Root Mean Square Error of Approximation) values below 0.08 indicate reasonable fit, below 0.05 indicate good fit. | |||||||
SRMR (Standardized Root Mean Residual) values below 0.08 indicate good fit. |