# Core
library(tidyverse)
library(knitr)
# SEM
library(lavaan)
library(semPlot)
library(semTools)
# Assumption Checking
library(MVN)
library(psych)
library(moments)
library(naniar)
library(car)
# Visualization
library(ggplot2)
library(corrplot)
# Resolve namespace conflicts
select <- dplyr::select
filter <- dplyr::filterdf_raw <- read.csv("RAP_Dataset_Public_deident.csv",
stringsAsFactors = FALSE)
cat("Rows :", nrow(df_raw), "\n")## Rows : 170
## Columns : 32
## Column Names:
## Female_1, White_1, Cohort_1, Major_d_2, Major_d_3, Major_d_4, Matching_GPA_1, Rank_d_2, Rank_d_3, WVRes_d_1, Legacy_R_1, SATtotEquiv_1, PellGrantRecipient_R, RAP, FMNTa_1, PriorAflMent_1, FMNTi_1r, gendermatch, ethnicitymatch, SomeRes_1, MoreRes_1, RAPxSome, RAPxMore, MentorPsychSocSupport_w1, MentorInstrumentSupport_w1, MentorRoleModelingSupport_w1, sMSAT_1, sMSIM_1, MentorPsychSocSupport_w2, MentorInstrumentSupport_w2, MentorRoleModelingSupport_w2, sMSAT_2
df <- df_raw
sentinel_cols <- c(
"MentorPsychSocSupport_w2",
"MentorInstrumentSupport_w2",
"MentorRoleModelingSupport_w2",
"sMSAT_2"
)
for (col in sentinel_cols) {
df[[col]] <- ifelse(df[[col]] == -99, NA, df[[col]])
}
cat("Missing Data After Recoding -99 to NA:\n")## Missing Data After Recoding -99 to NA:
## Female_1 White_1
## 0 0
## Cohort_1 Major_d_2
## 0 0
## Major_d_3 Major_d_4
## 0 0
## Matching_GPA_1 Rank_d_2
## 0 0
## Rank_d_3 WVRes_d_1
## 0 0
## Legacy_R_1 SATtotEquiv_1
## 0 0
## PellGrantRecipient_R RAP
## 0 0
## FMNTa_1 PriorAflMent_1
## 0 0
## FMNTi_1r gendermatch
## 0 0
## ethnicitymatch SomeRes_1
## 0 0
## MoreRes_1 RAPxSome
## 0 0
## RAPxMore MentorPsychSocSupport_w1
## 0 0
## MentorInstrumentSupport_w1 MentorRoleModelingSupport_w1
## 0 0
## sMSAT_1 sMSIM_1
## 0 0
## MentorPsychSocSupport_w2 MentorInstrumentSupport_w2
## 66 0
## MentorRoleModelingSupport_w2 sMSAT_2
## 66 66
w1_indicators <- c(
"MentorPsychSocSupport_w1",
"MentorInstrumentSupport_w1",
"MentorRoleModelingSupport_w1"
)
w2_indicators <- c(
"MentorPsychSocSupport_w2",
"MentorInstrumentSupport_w2",
"MentorRoleModelingSupport_w2"
)
sem_vars <- c(
w1_indicators, w2_indicators,
"sMSAT_1", "sMSAT_2",
"gendermatch", "ethnicitymatch",
"RAP", "FMNTi_1r"
)
df_sem <- df[, sem_vars]
cat("Rows:", nrow(df_sem), "| Columns:", ncol(df_sem), "\n")## Rows: 170 | Columns: 12
continuous_sem <- c(w1_indicators, w2_indicators, "sMSAT_1", "sMSAT_2")
desc <- psych::describe(df_sem)
knitr::kable(
round(desc[, c("n", "mean", "sd", "min", "max", "skew", "kurtosis")], 3),
caption = "Descriptive Statistics of SEM Variables"
)| n | mean | sd | min | max | skew | kurtosis | |
|---|---|---|---|---|---|---|---|
| MentorPsychSocSupport_w1 | 170 | 5.225 | 1.289 | 1.000 | 7 | -0.496 | -0.240 |
| MentorInstrumentSupport_w1 | 170 | 4.971 | 1.250 | 1.000 | 7 | -0.560 | 0.201 |
| MentorRoleModelingSupport_w1 | 170 | 5.338 | 1.228 | 1.500 | 7 | -0.651 | 0.296 |
| MentorPsychSocSupport_w2 | 104 | 5.322 | 1.331 | 1.000 | 7 | -0.709 | 0.295 |
| MentorInstrumentSupport_w2 | 170 | 5.397 | 1.238 | 1.000 | 7 | -0.742 | 0.615 |
| MentorRoleModelingSupport_w2 | 104 | 5.368 | 1.252 | 1.750 | 7 | -0.393 | -0.398 |
| sMSAT_1 | 170 | 6.027 | 1.048 | 1.333 | 7 | -1.176 | 1.487 |
| sMSAT_2 | 104 | 6.080 | 1.034 | 2.333 | 7 | -1.064 | 0.871 |
| gendermatch | 170 | 0.547 | 0.499 | 0.000 | 1 | -0.187 | -1.976 |
| ethnicitymatch | 170 | 0.688 | 0.465 | 0.000 | 1 | -0.806 | -1.359 |
| RAP | 170 | 0.612 | 0.489 | 0.000 | 1 | -0.455 | -1.804 |
| FMNTi_1r | 170 | 3.332 | 3.488 | 0.000 | 17 | 1.572 | 2.146 |
uni_norm <- data.frame(
Variable = character(),
Skewness = numeric(),
Kurtosis = numeric(),
Status = character(),
stringsAsFactors = FALSE
)
for (v in continuous_sem) {
vals <- na.omit(df_sem[[v]])
sk <- moments::skewness(vals)
ku <- moments::kurtosis(vals) - 3 # excess kurtosis
flag <- ifelse(abs(sk) > 2 | abs(ku) > 7, "NON-NORMAL", "OK")
uni_norm <- rbind(uni_norm,
data.frame(Variable = v,
Skewness = round(sk, 3),
Kurtosis = round(ku, 3),
Status = flag))
}
knitr::kable(uni_norm,
caption = "Univariate Normality Check (|skew| < 2, |kurt| < 7)",
row.names = FALSE)| Variable | Skewness | Kurtosis | Status |
|---|---|---|---|
| MentorPsychSocSupport_w1 | -0.500 | -0.207 | OK |
| MentorInstrumentSupport_w1 | -0.565 | 0.239 | OK |
| MentorRoleModelingSupport_w1 | -0.657 | 0.335 | OK |
| MentorPsychSocSupport_w2 | -0.719 | 0.359 | OK |
| MentorInstrumentSupport_w2 | -0.748 | 0.658 | OK |
| MentorRoleModelingSupport_w2 | -0.399 | -0.347 | OK |
| sMSAT_1 | -1.186 | 1.540 | OK |
| sMSAT_2 | -1.079 | 0.947 | OK |
df_mvn <- na.omit(df_sem[, continuous_sem])
cat("Complete cases for MVN test:", nrow(df_mvn), "\n\n")## Complete cases for MVN test: 104
mvn_result <- tryCatch({
MVN::mvn(data = df_mvn,
mvnTest = "mardia",
univarTest = "SW",
multivariatePlot = "none")
}, error = function(e) {
MVN::mvn(data = df_mvn,
mvn_test = "mardia",
univariate_test = "SW",
multivariate_outlier_method = "none")
})
mvn_norm <- if (!is.null(mvn_result$multivariateNormality)) {
mvn_result$multivariateNormality
} else if (!is.null(mvn_result$multivariate_normality)) {
mvn_result$multivariate_normality
} else {
mvn_result[[1]]
}
print(mvn_norm)## Test Statistic p.value Method MVN
## 1 Mardia Skewness 304.620 <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis 7.234 <0.001 asymptotic ✗ Not normal
##
## Conclusion: If violated, MLR estimator with FIML will be used.
mah_dist <- mahalanobis(df_mvn,
center = colMeans(df_mvn),
cov = cov(df_mvn))
p_mah <- pchisq(mah_dist, df = ncol(df_mvn), lower.tail = FALSE)
n_outliers <- sum(p_mah < 0.001)
cat(sprintf("Outliers (Mahalanobis p < .001): %d out of %d cases\n",
n_outliers, nrow(df_mvn)))## Outliers (Mahalanobis p < .001): 1 out of 104 cases
if (n_outliers > 0) {
cat("Outlier row indices (in complete-case subset):\n")
print(which(p_mah < 0.001))
}## Outlier row indices (in complete-case subset):
## 170
## 104
## → Outliers are retained; MLR estimator handles their influence.
pct_missing <- data.frame(
Variable = names(df_sem),
Missing_n = colSums(is.na(df_sem)),
Missing_pct = round(colMeans(is.na(df_sem)) * 100, 2),
row.names = NULL
)
knitr::kable(pct_missing,
caption = "Missing Data Summary per Variable",
row.names = FALSE)| Variable | Missing_n | Missing_pct |
|---|---|---|
| MentorPsychSocSupport_w1 | 0 | 0.00 |
| MentorInstrumentSupport_w1 | 0 | 0.00 |
| MentorRoleModelingSupport_w1 | 0 | 0.00 |
| MentorPsychSocSupport_w2 | 66 | 38.82 |
| MentorInstrumentSupport_w2 | 0 | 0.00 |
| MentorRoleModelingSupport_w2 | 66 | 38.82 |
| sMSAT_1 | 0 | 0.00 |
| sMSAT_2 | 66 | 38.82 |
| gendermatch | 0 | 0.00 |
| ethnicitymatch | 0 | 0.00 |
| RAP | 0 | 0.00 |
| FMNTi_1r | 0 | 0.00 |
little_result <- naniar::mcar_test(df_sem)
cat(sprintf(
"\nLittle's MCAR Test: chi2(%.0f) = %.3f, p = %.4f\n",
little_result$df,
little_result$statistic,
little_result$p.value
))##
## Little's MCAR Test: chi2(9) = 25.669, p = 0.0023
if (little_result$p.value > 0.05) {
cat("→ Data appear MCAR. FIML or listwise deletion both defensible.\n")
} else {
cat("→ Data NOT MCAR (MAR assumed). FIML estimation in lavaan is required.\n")
}## → Data NOT MCAR (MAR assumed). FIML estimation in lavaan is required.
## → Strategy adopted: Full Information Maximum Likelihood (FIML).
n_total <- nrow(df_sem)
n_complete <- nrow(df_mvn)
n_params <- 3 + 3 + 3 + 1 + 4
ratio <- n_total / n_params
sample_check <- data.frame(
Metric = c("Total N", "Complete N",
"Estimated Free Parameters",
"N:Parameter Ratio",
"Minimum Recommended",
"Status"),
Value = c(n_total, n_complete,
n_params,
round(ratio, 1),
"≥ 10:1",
ifelse(ratio >= 10, "ADEQUATE", "BORDERLINE"))
)
knitr::kable(sample_check,
caption = "Sample Size Adequacy Check",
row.names = FALSE)| Metric | Value |
|---|---|
| Total N | 170 |
| Complete N | 104 |
| Estimated Free Parameters | 14 |
| N:Parameter Ratio | 12.1 |
| Minimum Recommended | ≥ 10:1 |
| Status | ADEQUATE |
cor_w1 <- cor(na.omit(df_sem[, w1_indicators]))
cor_w2 <- cor(na.omit(df_sem[, w2_indicators]))
cat("Wave 1 Indicator Intercorrelations:\n")## Wave 1 Indicator Intercorrelations:
## MentorPsychSocSupport_w1
## MentorPsychSocSupport_w1 1.000
## MentorInstrumentSupport_w1 0.549
## MentorRoleModelingSupport_w1 0.542
## MentorInstrumentSupport_w1
## MentorPsychSocSupport_w1 0.549
## MentorInstrumentSupport_w1 1.000
## MentorRoleModelingSupport_w1 0.498
## MentorRoleModelingSupport_w1
## MentorPsychSocSupport_w1 0.542
## MentorInstrumentSupport_w1 0.498
## MentorRoleModelingSupport_w1 1.000
##
## Wave 2 Indicator Intercorrelations:
## MentorPsychSocSupport_w2
## MentorPsychSocSupport_w2 1.000
## MentorInstrumentSupport_w2 0.608
## MentorRoleModelingSupport_w2 0.591
## MentorInstrumentSupport_w2
## MentorPsychSocSupport_w2 0.608
## MentorInstrumentSupport_w2 1.000
## MentorRoleModelingSupport_w2 0.613
## MentorRoleModelingSupport_w2
## MentorPsychSocSupport_w2 0.591
## MentorInstrumentSupport_w2 0.613
## MentorRoleModelingSupport_w2 1.000
alpha_w1 <- psych::alpha(na.omit(df_sem[, w1_indicators]))
alpha_w2 <- psych::alpha(na.omit(df_sem[, w2_indicators]))
alpha_summary <- data.frame(
Construct = c("MentorSupport_W1", "MentorSupport_W2"),
Cronbach_Alpha = round(c(alpha_w1$total$raw_alpha,
alpha_w2$total$raw_alpha), 3),
Status = ifelse(
c(alpha_w1$total$raw_alpha, alpha_w2$total$raw_alpha) >= 0.80,
"Good (≥ 0.80)",
ifelse(
c(alpha_w1$total$raw_alpha, alpha_w2$total$raw_alpha) >= 0.70,
"Acceptable (≥ 0.70)", "Poor (< 0.70)"
)
)
)
knitr::kable(alpha_summary,
caption = "Cronbach's Alpha — Internal Consistency",
row.names = FALSE)| Construct | Cronbach_Alpha | Status |
|---|---|---|
| MentorSupport_W1 | 0.772 | Acceptable (≥ 0.70) |
| MentorSupport_W2 | 0.814 | Good (≥ 0.80) |
df_all_ind <- na.omit(df_sem[, c(w1_indicators, w2_indicators)])
bartlett_res <- psych::cortest.bartlett(cor(df_all_ind),
n = nrow(df_all_ind))
kmo_res <- psych::KMO(cor(df_all_ind))
kmo_label <- ifelse(kmo_res$MSA >= 0.9, "Marvelous",
ifelse(kmo_res$MSA >= 0.8, "Meritorious",
ifelse(kmo_res$MSA >= 0.7, "Middling",
ifelse(kmo_res$MSA >= 0.6, "Mediocre", "Miserable"))))
bartlett_kmo <- data.frame(
Test = c("Bartlett's Test of Sphericity", "KMO (Overall MSA)"),
Value = c(round(bartlett_res$chisq, 3), round(kmo_res$MSA, 3)),
df_p = c(paste0("df = ", bartlett_res$df,
", p = ", round(bartlett_res$p.value, 6)),
paste0("[", kmo_label, "]")),
Status = c(ifelse(bartlett_res$p.value < 0.05, "PASSED", "FAILED"),
ifelse(kmo_res$MSA >= 0.60, "PASSED", "FAILED"))
)
knitr::kable(bartlett_kmo,
caption = "Bartlett's Test & KMO — Factor Structure Validity",
col.names = c("Test", "Value", "Detail", "Status"),
row.names = FALSE)| Test | Value | Detail | Status |
|---|---|---|---|
| Bartlett’s Test of Sphericity | 270.833 | df = 15, p = 0 | PASSED |
| KMO (Overall MSA) | 0.675 | [Mediocre] | PASSED |
assumption_summary <- data.frame(
No = 1:9,
Assumption = c(
"Univariate Normality",
"Multivariate Normality",
"Multivariate Outliers",
"Missing Data (MCAR/MAR)",
"Sample Size Adequacy",
"Indicator Intercorrelations",
"Internal Consistency (Alpha)",
"Bartlett's Sphericity",
"KMO (MSA)"
),
Test_Method = c(
"Skewness |<2|, Kurtosis |<7|",
"Mardia's Test",
"Mahalanobis Distance (p < .001)",
"Little's MCAR Test",
"N:parameter ratio ≥ 10:1",
"Pearson Correlation Matrix",
"Cronbach's Alpha ≥ .70",
"χ² significance (p < .05)",
"MSA ≥ .60"
),
Decision = c(
"See output above",
"MLR estimator applied if violated",
"Retained; handled by MLR",
"FIML adopted",
"See output above",
"Within acceptable range",
"See output above",
"See output above",
"See output above"
)
)
knitr::kable(assumption_summary,
caption = "Summary Table — SEM Assumption Checking",
row.names = FALSE)| No | Assumption | Test_Method | Decision |
|---|---|---|---|
| 1 | Univariate Normality | Skewness |<2|, Kurtosis |<7| | See output above |
| 2 | Multivariate Normality | Mardia’s Test | MLR estimator applied if violated |
| 3 | Multivariate Outliers | Mahalanobis Distance (p < .001) | Retained; handled by MLR |
| 4 | Missing Data (MCAR/MAR) | Little’s MCAR Test | FIML adopted |
| 5 | Sample Size Adequacy | N:parameter ratio ≥ 10:1 | See output above |
| 6 | Indicator Intercorrelations | Pearson Correlation Matrix | Within acceptable range |
| 7 | Internal Consistency (Alpha) | Cronbach’s Alpha ≥ .70 | See output above |
| 8 | Bartlett’s Sphericity | χ² significance (p < .05) | See output above |
| 9 | KMO (MSA) | MSA ≥ .60 | See output above |
cfa_model <- '
# Latent factor: Wave 1 Mentor Support
MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
MentorInstrumentSupport_w1 +
MentorRoleModelingSupport_w1
# Latent factor: Wave 2 Mentor Support
MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
MentorInstrumentSupport_w2 +
MentorRoleModelingSupport_w2
'
cfa_fit <- lavaan::cfa(
model = cfa_model,
data = df_sem,
estimator = "MLR",
missing = "FIML",
std.lv = TRUE
)cfa_fit_idx <- lavaan::fitmeasures(cfa_fit, c(
"chisq", "df", "pvalue",
"cfi", "tli",
"rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
"srmr", "aic", "bic"
))
cfi_c <- cfa_fit_idx["cfi"]
rmsea_c <- cfa_fit_idx["rmsea"]
srmr_c <- cfa_fit_idx["srmr"]
cfa_fit_table <- data.frame(
Index = c("Chi-Square (χ²)", "df", "p-value",
"CFI", "TLI",
"RMSEA", "RMSEA 90% CI Lower", "RMSEA 90% CI Upper",
"SRMR", "AIC", "BIC"),
Value = round(as.numeric(cfa_fit_idx), 4),
Threshold = c("—", "—", "> .05",
"≥ .95", "≥ .95",
"≤ .06", "—", "—",
"≤ .08", "Lower = better", "Lower = better"),
Status = c("—", "—",
ifelse(cfa_fit_idx["pvalue"] > .05, "Good", "Check"),
ifelse(cfi_c >= .95, "Good", ifelse(cfi_c >= .90, "Acceptable", "Poor")),
ifelse(cfa_fit_idx["tli"] >= .95, "Good",
ifelse(cfa_fit_idx["tli"] >= .90, "Acceptable", "Poor")),
ifelse(rmsea_c <= .06, "Good", ifelse(rmsea_c <= .08, "Acceptable", "Poor")),
"—", "—",
ifelse(srmr_c <= .08, "Good", ifelse(srmr_c <= .10, "Acceptable", "Poor")),
"—", "—")
)
knitr::kable(cfa_fit_table,
caption = "CFA Fit Indices",
row.names = FALSE)| Index | Value | Threshold | Status |
|---|---|---|---|
| Chi-Square (χ²) | 64.4678 | — | — |
| df | 8.0000 | — | — |
| p-value | 0.0000 | > .05 | Check |
| CFI | 0.8530 | ≥ .95 | Poor |
| TLI | 0.7243 | ≥ .95 | Poor |
| RMSEA | 0.2038 | ≤ .06 | Poor |
| RMSEA 90% CI Lower | 0.1593 | — | — |
| RMSEA 90% CI Upper | 0.2513 | — | — |
| SRMR | 0.0580 | ≤ .08 | Good |
| AIC | 2628.0817 | Lower = better | — |
| BIC | 2687.6619 | Lower = better | — |
cfa_params <- lavaan::standardizedsolution(cfa_fit)
cfa_loadings <- cfa_params[cfa_params$op == "=~", ]
cfa_load_tbl <- cfa_loadings[, c("lhs", "rhs", "est.std", "se", "z", "pvalue")]
cfa_load_tbl[, c("est.std", "se", "z", "pvalue")] <-
round(cfa_load_tbl[, c("est.std", "se", "z", "pvalue")], 4)
knitr::kable(
cfa_load_tbl,
caption = "CFA Standardized Factor Loadings",
col.names = c("Latent Factor", "Indicator", "Std. Loading", "SE", "z", "p-value"),
row.names = FALSE
)| Latent Factor | Indicator | Std. Loading | SE | z | p-value |
|---|---|---|---|---|---|
| MentorSupport_W1 | MentorPsychSocSupport_w1 | 0.6801 | 0.0673 | 10.1064 | 0 |
| MentorSupport_W1 | MentorInstrumentSupport_w1 | 0.8309 | 0.0569 | 14.6073 | 0 |
| MentorSupport_W1 | MentorRoleModelingSupport_w1 | 0.6383 | 0.0783 | 8.1576 | 0 |
| MentorSupport_W2 | MentorPsychSocSupport_w2 | 0.7407 | 0.0701 | 10.5610 | 0 |
| MentorSupport_W2 | MentorInstrumentSupport_w2 | 0.9353 | 0.0502 | 18.6485 | 0 |
| MentorSupport_W2 | MentorRoleModelingSupport_w2 | 0.7366 | 0.0728 | 10.1237 | 0 |
##
## Rule: Standardized loadings >= .50 acceptable, >= .70 good.
# AVE and CR helper functions
compute_AVE <- function(fit, factor_name) {
params <- lavaan::standardizedsolution(fit)
loadings <- params[params$op == "=~" &
params$lhs == factor_name, "est.std"]
mean(loadings^2)
}
compute_CR <- function(fit, factor_name) {
params <- lavaan::standardizedsolution(fit)
loadings <- params[params$op == "=~" &
params$lhs == factor_name, "est.std"]
sum(loadings)^2 / (sum(loadings)^2 + sum(1 - loadings^2))
}
ave_w1 <- compute_AVE(cfa_fit, "MentorSupport_W1")
ave_w2 <- compute_AVE(cfa_fit, "MentorSupport_W2")
cr_w1 <- compute_CR(cfa_fit, "MentorSupport_W1")
cr_w2 <- compute_CR(cfa_fit, "MentorSupport_W2")
validity_table <- data.frame(
Construct = c("MentorSupport_W1", "MentorSupport_W2"),
AVE = round(c(ave_w1, ave_w2), 3),
CR = round(c(cr_w1, cr_w2), 3),
AVE_status = ifelse(c(ave_w1, ave_w2) >= .50,
"Convergent Valid (≥ .50)", "Poor (< .50)"),
CR_status = ifelse(c(cr_w1, cr_w2) >= .70,
"Acceptable (≥ .70)", "Poor (< .70)")
)
knitr::kable(validity_table,
caption = "Convergent Validity: AVE and Composite Reliability (CR)",
row.names = FALSE)| Construct | AVE | CR | AVE_status | CR_status |
|---|---|---|---|---|
| MentorSupport_W1 | 0.520 | 0.762 | Convergent Valid (≥ .50) | Acceptable (≥ .70) |
| MentorSupport_W2 | 0.655 | 0.849 | Convergent Valid (≥ .50) | Acceptable (≥ .70) |
factor_corr <- cfa_params[cfa_params$op == "~~" &
cfa_params$lhs != cfa_params$rhs &
cfa_params$lhs %in% c("MentorSupport_W1",
"MentorSupport_W2"), ]
if (nrow(factor_corr) > 0) {
phi <- factor_corr$est.std[1]
cat(sprintf("\nDiscriminant Validity (Fornell-Larcker Criterion)\n"))
cat(sprintf("Latent factor correlation (φ) = %.3f\n", phi))
cat(sprintf("sqrt(AVE_W1) = %.3f | sqrt(AVE_W2) = %.3f\n",
sqrt(ave_w1), sqrt(ave_w2)))
if (abs(phi) < min(sqrt(ave_w1), sqrt(ave_w2))) {
cat("→ Discriminant validity SUPPORTED (|φ| < min(sqrt(AVE))).\n")
} else {
cat("→ Discriminant validity QUESTIONABLE (|φ| ≥ sqrt(AVE)).\n")
}
}##
## Discriminant Validity (Fornell-Larcker Criterion)
## Latent factor correlation (φ) = 0.798
## sqrt(AVE_W1) = 0.721 | sqrt(AVE_W2) = 0.810
## → Discriminant validity QUESTIONABLE (|φ| ≥ sqrt(AVE)).
sem_model <- '
MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
MentorInstrumentSupport_w1 +
MentorRoleModelingSupport_w1
MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
MentorInstrumentSupport_w2 +
MentorRoleModelingSupport_w2
MentorSupport_W2 ~ MentorSupport_W1 + RAP + gendermatch +
ethnicitymatch + FMNTi_1r
sMSAT_2 ~ MentorSupport_W2 + MentorSupport_W1 + sMSAT_1 +
RAP + gendermatch + ethnicitymatch + FMNTi_1r
RAP ~~ gendermatch + ethnicitymatch + FMNTi_1r + sMSAT_1
gendermatch ~~ ethnicitymatch + FMNTi_1r + sMSAT_1
ethnicitymatch ~~ FMNTi_1r + sMSAT_1
FMNTi_1r ~~ sMSAT_1
'
sem_fit <- lavaan::sem(
model = sem_model,
data = df_sem,
estimator = "MLR",
missing = "FIML",
std.lv = TRUE
)
sem_fit_idx <- lavaan::fitmeasures(sem_fit, c(
"chisq", "df", "pvalue",
"cfi", "tli",
"rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
"srmr", "aic", "bic"
))cfi_s <- sem_fit_idx["cfi"]
tli_s <- sem_fit_idx["tli"]
rmsea_s <- sem_fit_idx["rmsea"]
srmr_s <- sem_fit_idx["srmr"]
sem_fit_table <- data.frame(
Index = c("Chi-Square (χ²)", "df", "p-value",
"CFI", "TLI",
"RMSEA", "RMSEA 90% CI Lower", "RMSEA 90% CI Upper",
"SRMR", "AIC", "BIC"),
Value = round(as.numeric(sem_fit_idx), 4),
Threshold = c("—", "—", "> .05",
"≥ .95", "≥ .95",
"≤ .06", "—", "—",
"≤ .08", "Lower = better", "Lower = better"),
Status = c("—", "—",
ifelse(sem_fit_idx["pvalue"] > .05, "Good", "Check"),
ifelse(cfi_s >= .95, "Good", ifelse(cfi_s >= .90, "Acceptable", "Poor")),
ifelse(tli_s >= .95, "Good", ifelse(tli_s >= .90, "Acceptable", "Poor")),
ifelse(rmsea_s <= .06, "Good", ifelse(rmsea_s <= .08, "Acceptable", "Poor")),
"—", "—",
ifelse(srmr_s <= .08, "Good", ifelse(srmr_s <= .10, "Acceptable", "Poor")),
"—", "—")
)
knitr::kable(sem_fit_table,
caption = "SEM Global Fit Indices",
row.names = FALSE)| Index | Value | Threshold | Status |
|---|---|---|---|
| Chi-Square (χ²) | 183.7703 | — | — |
| df | 38.0000 | — | — |
| p-value | 0.0000 | > .05 | Check |
| CFI | 0.7460 | ≥ .95 | Poor |
| TLI | 0.5589 | ≥ .95 | Poor |
| RMSEA | 0.1502 | ≤ .06 | Poor |
| RMSEA 90% CI Lower | 0.1289 | — | — |
| RMSEA 90% CI Upper | 0.1723 | — | — |
| SRMR | 0.1375 | ≤ .08 | Poor |
| AIC | 4981.2943 | Lower = better | — |
| BIC | 5144.3559 | Lower = better | — |
sem_params <- lavaan::standardizedsolution(sem_fit)
struct_paths <- sem_params[sem_params$op == "~", ]
num_cols <- c("est.std", "se", "z", "pvalue", "ci.lower", "ci.upper")
sp_tbl <- struct_paths[, c("lhs", "rhs", num_cols)]
sp_tbl[, num_cols] <- round(sp_tbl[, num_cols], 4)
knitr::kable(
sp_tbl,
caption = "Structural Path Estimates (Standardized)",
col.names = c("Outcome", "Predictor", "Beta (Std.)", "SE",
"z", "p-value", "CI Lower", "CI Upper"),
row.names = FALSE
)| Outcome | Predictor | Beta (Std.) | SE | z | p-value | CI Lower | CI Upper |
|---|---|---|---|---|---|---|---|
| MentorSupport_W2 | MentorSupport_W1 | 0.8119 | 0.0720 | 11.2807 | 0.0000 | 0.6708 | 0.9530 |
| MentorSupport_W2 | RAP | -0.0754 | 0.0629 | -1.1989 | 0.2306 | -0.1988 | 0.0479 |
| MentorSupport_W2 | gendermatch | -0.0695 | 0.0610 | -1.1397 | 0.2544 | -0.1889 | 0.0500 |
| MentorSupport_W2 | ethnicitymatch | 0.0344 | 0.0682 | 0.5043 | 0.6140 | -0.0993 | 0.1680 |
| MentorSupport_W2 | FMNTi_1r | -0.1306 | 0.0501 | -2.6088 | 0.0091 | -0.2287 | -0.0325 |
| sMSAT_2 | MentorSupport_W2 | 1.2800 | 0.2619 | 4.8868 | 0.0000 | 0.7666 | 1.7933 |
| sMSAT_2 | MentorSupport_W1 | -0.6804 | 0.2837 | -2.3987 | 0.0165 | -1.2364 | -0.1245 |
| sMSAT_2 | sMSAT_1 | 0.4051 | 0.0986 | 4.1085 | 0.0000 | 0.2118 | 0.5983 |
| sMSAT_2 | RAP | 0.0494 | 0.0846 | 0.5836 | 0.5595 | -0.1164 | 0.2151 |
| sMSAT_2 | gendermatch | 0.0050 | 0.0795 | 0.0630 | 0.9498 | -0.1509 | 0.1609 |
| sMSAT_2 | ethnicitymatch | -0.0021 | 0.0745 | -0.0285 | 0.9773 | -0.1482 | 0.1439 |
| sMSAT_2 | FMNTi_1r | 0.0403 | 0.0904 | 0.4453 | 0.6561 | -0.1369 | 0.2175 |
all_params <- lavaan::parameterestimates(sem_fit,
standardized = TRUE,
ci = TRUE,
level = 0.95)
struct_unstd <- all_params[all_params$op == "~",
c("lhs", "rhs", "est", "se", "z",
"pvalue", "ci.lower", "ci.upper", "std.all")]
num_cols2 <- c("est", "se", "z", "pvalue", "ci.lower", "ci.upper", "std.all")
su_tbl <- struct_unstd
su_tbl[, num_cols2] <- round(su_tbl[, num_cols2], 4)
knitr::kable(
su_tbl,
caption = "Structural Path Estimates (Unstandardized with Std. Solution)",
col.names = c("Outcome", "Predictor", "B", "SE", "z",
"p-value", "CI Lower", "CI Upper", "Beta (Std.)"),
row.names = FALSE
)| Outcome | Predictor | B | SE | z | p-value | CI Lower | CI Upper | Beta (Std.) |
|---|---|---|---|---|---|---|---|---|
| MentorSupport_W2 | MentorSupport_W1 | 1.4555 | 0.3950 | 3.6844 | 0.0002 | 0.6812 | 2.2298 | 0.8119 |
| MentorSupport_W2 | RAP | -0.2775 | 0.2258 | -1.2290 | 0.2191 | -0.7201 | 0.1651 | -0.0754 |
| MentorSupport_W2 | gendermatch | -0.2502 | 0.2287 | -1.0938 | 0.2740 | -0.6985 | 0.1981 | -0.0695 |
| MentorSupport_W2 | ethnicitymatch | 0.1331 | 0.2688 | 0.4952 | 0.6204 | -0.3937 | 0.6598 | 0.0344 |
| MentorSupport_W2 | FMNTi_1r | -0.0673 | 0.0274 | -2.4589 | 0.0139 | -0.1210 | -0.0137 | -0.1306 |
| sMSAT_2 | MentorSupport_W2 | 0.7509 | 0.1104 | 6.8016 | 0.0000 | 0.5345 | 0.9673 | 1.2800 |
| sMSAT_2 | MentorSupport_W1 | -0.7156 | 0.2910 | -2.4593 | 0.0139 | -1.2860 | -0.1453 | -0.6804 |
| sMSAT_2 | sMSAT_1 | 0.4077 | 0.0845 | 4.8235 | 0.0000 | 0.2420 | 0.5733 | 0.4051 |
| sMSAT_2 | RAP | 0.1065 | 0.1826 | 0.5833 | 0.5597 | -0.2514 | 0.4644 | 0.0494 |
| sMSAT_2 | gendermatch | 0.0106 | 0.1680 | 0.0630 | 0.9498 | -0.3187 | 0.3399 | 0.0050 |
| sMSAT_2 | ethnicitymatch | -0.0048 | 0.1691 | -0.0285 | 0.9772 | -0.3363 | 0.3267 | -0.0021 |
| sMSAT_2 | FMNTi_1r | 0.0122 | 0.0272 | 0.4470 | 0.6549 | -0.0412 | 0.0656 | 0.0403 |
r2_vals <- lavaan::inspect(sem_fit, "r2")
r2_table <- data.frame(
Variable = names(r2_vals),
R_squared = round(as.numeric(r2_vals), 3)
)
knitr::kable(r2_table,
caption = "R² — Variance Explained per Endogenous Variable",
row.names = FALSE)| Variable | R_squared |
|---|---|
| MentorPsychSocSupport_w1 | 0.470 |
| MentorInstrumentSupport_w1 | 0.671 |
| MentorRoleModelingSupport_w1 | 0.425 |
| MentorPsychSocSupport_w2 | 0.630 |
| MentorInstrumentSupport_w2 | 0.808 |
| MentorRoleModelingSupport_w2 | 0.593 |
| sMSAT_2 | 0.802 |
| MentorSupport_W2 | 0.689 |
sem_mediation <- '
MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
MentorInstrumentSupport_w1 +
MentorRoleModelingSupport_w1
MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
MentorInstrumentSupport_w2 +
MentorRoleModelingSupport_w2
MentorSupport_W2 ~ a * MentorSupport_W1 + RAP + gendermatch +
ethnicitymatch + FMNTi_1r
sMSAT_2 ~ b * MentorSupport_W2 + c * MentorSupport_W1 + sMSAT_1 +
RAP + gendermatch + ethnicitymatch + FMNTi_1r
RAP ~~ gendermatch + ethnicitymatch + FMNTi_1r + sMSAT_1
gendermatch ~~ ethnicitymatch + FMNTi_1r + sMSAT_1
ethnicitymatch ~~ FMNTi_1r + sMSAT_1
FMNTi_1r ~~ sMSAT_1
indirect := a * b # W1 → W2 → sMSAT_2
direct := c # W1 → sMSAT_2 (direct)
total := a * b + c # Total effect
'
set.seed(123)
sem_med_fit <- lavaan::sem(
model = sem_mediation,
data = df_sem,
estimator = "ML",
missing = "FIML",
std.lv = TRUE,
se = "bootstrap",
bootstrap = 1000
)med_params <- lavaan::parameterestimates(
sem_med_fit,
ci = TRUE,
level = 0.95,
boot.ci.type = "perc"
)
med_effects <- med_params[
med_params$label %in% c("a", "b", "c", "indirect", "direct", "total"), ]
med_table <- med_effects[, c("label", "est", "se", "z",
"pvalue", "ci.lower", "ci.upper")]
med_table$Interpretation <- c(
"W1 → W2 (autoregressive path)",
"W2 → sMSAT_2 (W2 on outcome)",
"W1 → sMSAT_2 (direct effect)",
"W1 → W2 → sMSAT_2 (indirect/mediated)",
"Direct effect W1 → sMSAT_2",
"Total effect W1 → sMSAT_2"
)
num_cols3 <- c("est", "se", "z", "pvalue", "ci.lower", "ci.upper")
mt_tbl <- med_table[, c("label", num_cols3)]
mt_tbl[, num_cols3] <- round(mt_tbl[, num_cols3], 4)
knitr::kable(
mt_tbl,
caption = "Mediation Effects (Bootstrap 95% CI, 1000 replications)",
col.names = c("Effect", "Estimate", "SE", "z", "p-value",
"CI Lower", "CI Upper"),
row.names = FALSE
)| Effect | Estimate | SE | z | p-value | CI Lower | CI Upper |
|---|---|---|---|---|---|---|
| a | 1.4555 | 5.0274 | 0.2895 | 0.7722 | 0.8939 | 3.7887 |
| b | 0.7509 | 0.7786 | 0.9644 | 0.3348 | 0.4970 | 1.1031 |
| c | -0.7156 | 44.9590 | -0.0159 | 0.9873 | -3.1866 | -0.2597 |
| indirect | 1.0930 | 44.9604 | 0.0243 | 0.9806 | 0.6203 | 3.6789 |
| direct | -0.7156 | 44.9590 | -0.0159 | 0.9873 | -3.1866 | -0.2597 |
| total | 0.3773 | 0.1511 | 2.4971 | 0.0125 | 0.0909 | 0.7134 |
##
## Interpretation guide:
## a = W1 Mentor Support → W2 Mentor Support
## b = W2 Mentor Support → Satisfaction (sMSAT_2)
## c = Direct: W1 → sMSAT_2 (bypassing W2)
## indirect = Mediated path (a × b)
## total = indirect + direct
##
## If indirect significant & direct non-sig → Full mediation
## If both significant → Partial mediation
sem_restricted <- '
MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
MentorInstrumentSupport_w1 +
MentorRoleModelingSupport_w1
MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
MentorInstrumentSupport_w2 +
MentorRoleModelingSupport_w2
MentorSupport_W2 ~ MentorSupport_W1 + RAP + gendermatch +
ethnicitymatch + FMNTi_1r
sMSAT_2 ~ MentorSupport_W2 + sMSAT_1 +
RAP + gendermatch + ethnicitymatch + FMNTi_1r
RAP ~~ gendermatch + ethnicitymatch + FMNTi_1r + sMSAT_1
gendermatch ~~ ethnicitymatch + FMNTi_1r + sMSAT_1
ethnicitymatch ~~ FMNTi_1r + sMSAT_1
FMNTi_1r ~~ sMSAT_1
'
sem_restricted_fit <- lavaan::sem(
model = sem_restricted,
data = df_sem,
estimator = "MLR",
missing = "FIML",
std.lv = TRUE
)
model_comp <- data.frame(
Model = c("Full Model (Partial Mediation)",
"Restricted (Full Mediation)"),
AIC = round(c(AIC(sem_fit), AIC(sem_restricted_fit)), 2),
BIC = round(c(BIC(sem_fit), BIC(sem_restricted_fit)), 2)
)
knitr::kable(model_comp,
caption = "AIC/BIC Model Comparison (Lower = Better Fit)",
row.names = FALSE)| Model | AIC | BIC |
|---|---|---|
| Full Model (Partial Mediation) | 4981.29 | 5144.36 |
| Restricted (Full Mediation) | 4994.84 | 5154.76 |
##
## Chi-Square Difference Test (Satorra-Bentler)
##
## Scaled Chi-Squared Difference Test (method = "satorra.bentler.2001")
##
## lavaan->lavTestLRT():
## lavaan NOTE: The "Chisq" column contains standard test statistics, not the
## robust test that should be reported per model. A robust difference test is
## a function of two standard (not robust) statistics.
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## sem_fit 38 4981.3 5144.4 183.77
## sem_restricted_fit 39 4994.8 5154.8 199.31 26.386 1 2.796e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Significant Δχ² → Full model fits significantly better (partial mediation supported)
## MI > 10 suggests freeing that parameter may improve fit.
## Apply only when theoretically justified — not data-dredging.
mi <- lavaan::modificationIndices(sem_fit,
sort. = TRUE,
maximum.number = 10)
knitr::kable(
mi[, c("lhs", "op", "rhs", "mi", "epc")],
caption = "Top 10 Modification Indices",
col.names = c("LHS", "Op", "RHS", "MI", "EPC"),
row.names = FALSE,
digits = 3
)| LHS | Op | RHS | MI | EPC |
|---|---|---|---|---|
| MentorSupport_W1 | ~ | sMSAT_1 | 61.663 | 0.632 |
| sMSAT_1 | ~ | MentorSupport_W1 | 56.734 | 0.639 |
| MentorSupport_W1 | ~ | sMSAT_2 | 52.391 | 1.433 |
| MentorInstrumentSupport_w1 | MentorInstrumentSupport_w2 | 39.988 | 0.413 | |
| sMSAT_1 | ~ | MentorSupport_W2 | 39.314 | 0.295 |
| sMSAT_1 | ~ | sMSAT_2 | 16.531 | 0.451 |
| MentorRoleModelingSupport_w1 | MentorRoleModelingSupport_w2 | 15.004 | 0.353 | |
| MentorInstrumentSupport_w1 | MentorRoleModelingSupport_w2 | 13.665 | -0.309 | |
| MentorRoleModelingSupport_w1 | sMSAT_1 | 12.362 | 0.270 | |
| MentorSupport_W1 | =~ | MentorInstrumentSupport_w2 | 11.604 | 0.566 |
cor_mat <- cor(na.omit(df_sem[, c(w1_indicators, w2_indicators,
"sMSAT_1", "sMSAT_2")]))
corrplot::corrplot(
cor_mat,
method = "color",
type = "lower",
addCoef.col = "black",
number.cex = 0.75,
tl.cex = 0.8,
col = corrplot::COL2("RdBu", 200),
title = "Correlation Matrix — SEM Indicators",
mar = c(0, 0, 2, 0)
)fit_summary <- data.frame(
Index = c("CFI", "TLI", "RMSEA", "SRMR"),
Value = c(sem_fit_idx["cfi"],
sem_fit_idx["tli"],
sem_fit_idx["rmsea"],
sem_fit_idx["srmr"]),
Threshold = c(0.95, 0.95, 0.06, 0.08),
Direction = c("higher", "higher", "lower", "lower")
)
fit_summary$Pass <- with(fit_summary,
ifelse(Direction == "higher",
Value >= Threshold,
Value <= Threshold))
ggplot(fit_summary, aes(x = Index, y = Value, fill = Pass)) +
geom_col(width = 0.5) +
geom_hline(aes(yintercept = Threshold),
linetype = "dashed", color = "red", linewidth = 0.8) +
scale_fill_manual(values = c("TRUE" = "#4CAF50",
"FALSE" = "#F44336"),
labels = c("TRUE" = "Pass",
"FALSE" = "Fail")) +
facet_wrap(~ Index, scales = "free") +
labs(title = "SEM Fit Indices vs. Thresholds",
subtitle = "Dashed line = cutoff threshold",
y = "Value",
x = "",
fill = "Meets Threshold") +
theme_minimal(base_size = 12)semPlot::semPaths(
object = sem_fit,
what = "std",
whatLabels = "std",
style = "ram",
layout = "tree2",
rotation = 2,
fade = FALSE,
edge.label.cex = 0.8,
node.width = 1.5,
node.height = 1.0,
mar = c(5, 5, 5, 5),
curvePivot = TRUE,
residuals = TRUE,
intercepts = FALSE,
color = list(
lat = "#2E86AB", # Blue — latent variables
man = "#A23B72" # Purple — manifest (observed) variables
)
)
title("SEM Path Diagram — Standardized Estimates\n(RAP Mentorship Dataset)",
cex.main = 1.0)## Model Overview
## Estimator : MLR (Robust Maximum Likelihood)
## Missing data : FIML (Full Information Maximum Likelihood)
## Latent variables: MentorSupport_W1, MentorSupport_W2
## Outcome : sMSAT_2 (mentee satisfaction, Wave 2)
## Covariates : RAP, gendermatch, ethnicitymatch, FMNTi_1r, sMSAT_1
# Key structural paths
std_sol <- lavaan::standardizedsolution(sem_fit)
get_path <- function(lhs, rhs) {
row <- std_sol[std_sol$lhs == lhs &
std_sol$rhs == rhs &
std_sol$op == "~", ]
if (nrow(row) == 0) return(NULL)
row
}
path_w1w2 <- get_path("MentorSupport_W2", "MentorSupport_W1")
path_w2sat <- get_path("sMSAT_2", "MentorSupport_W2")
path_w1sat <- get_path("sMSAT_2", "MentorSupport_W1")
cat("Key Path Coefficients\n")## Key Path Coefficients
if (!is.null(path_w1w2)) {
cat(sprintf("W1 → W2 Mentor Support : β = %.3f, p = %.4f %s\n",
path_w1w2$est.std,
path_w1w2$pvalue,
ifelse(path_w1w2$pvalue < .05, "[Significant]", "")))
}## W1 → W2 Mentor Support : β = 0.812, p = 0.0000 [Significant]
if (!is.null(path_w2sat)) {
cat(sprintf("W2 Support → sMSAT_2 : β = %.3f, p = %.4f %s\n",
path_w2sat$est.std,
path_w2sat$pvalue,
ifelse(path_w2sat$pvalue < .05, "[Significant]", "")))
}## W2 Support → sMSAT_2 : β = 1.280, p = 0.0000 [Significant]
if (!is.null(path_w1sat)) {
cat(sprintf("W1 Support → sMSAT_2 : β = %.3f, p = %.4f %s\n",
path_w1sat$est.std,
path_w1sat$pvalue,
ifelse(path_w1sat$pvalue < .05, "[Significant]", "")))
}## W1 Support → sMSAT_2 : β = -0.680, p = 0.0165 [Significant]
##
## Model R² for sMSAT_2 : 0.802