SETUP

# Core
library(tidyverse)
library(knitr)

# SEM
library(lavaan)      
library(semPlot)      
library(semTools)     

# Assumption Checking
library(MVN)         
library(psych)        
library(moments)     
library(naniar)      
library(car)        

# Visualization
library(ggplot2)
library(corrplot)

# Resolve namespace conflicts
select <- dplyr::select
filter <- dplyr::filter

LOAD DATA & OVERVIEW

Load Data

df_raw <- read.csv("RAP_Dataset_Public_deident.csv",
                   stringsAsFactors = FALSE)

cat("Rows    :", nrow(df_raw), "\n")
## Rows    : 170
cat("Columns :", ncol(df_raw), "\n\n")
## Columns : 32
cat("Column Names:\n")
## Column Names:
cat(paste(names(df_raw), collapse = ", "), "\n")
## Female_1, White_1, Cohort_1, Major_d_2, Major_d_3, Major_d_4, Matching_GPA_1, Rank_d_2, Rank_d_3, WVRes_d_1, Legacy_R_1, SATtotEquiv_1, PellGrantRecipient_R, RAP, FMNTa_1, PriorAflMent_1, FMNTi_1r, gendermatch, ethnicitymatch, SomeRes_1, MoreRes_1, RAPxSome, RAPxMore, MentorPsychSocSupport_w1, MentorInstrumentSupport_w1, MentorRoleModelingSupport_w1, sMSAT_1, sMSIM_1, MentorPsychSocSupport_w2, MentorInstrumentSupport_w2, MentorRoleModelingSupport_w2, sMSAT_2

Data Cleaning & Variable Selection

df <- df_raw

sentinel_cols <- c(
  "MentorPsychSocSupport_w2",
  "MentorInstrumentSupport_w2",
  "MentorRoleModelingSupport_w2",
  "sMSAT_2"
)
for (col in sentinel_cols) {
  df[[col]] <- ifelse(df[[col]] == -99, NA, df[[col]])
}

cat("Missing Data After Recoding -99 to NA:\n")
## Missing Data After Recoding -99 to NA:
print(colSums(is.na(df)))
##                     Female_1                      White_1 
##                            0                            0 
##                     Cohort_1                    Major_d_2 
##                            0                            0 
##                    Major_d_3                    Major_d_4 
##                            0                            0 
##               Matching_GPA_1                     Rank_d_2 
##                            0                            0 
##                     Rank_d_3                    WVRes_d_1 
##                            0                            0 
##                   Legacy_R_1                SATtotEquiv_1 
##                            0                            0 
##         PellGrantRecipient_R                          RAP 
##                            0                            0 
##                      FMNTa_1               PriorAflMent_1 
##                            0                            0 
##                     FMNTi_1r                  gendermatch 
##                            0                            0 
##               ethnicitymatch                    SomeRes_1 
##                            0                            0 
##                    MoreRes_1                     RAPxSome 
##                            0                            0 
##                     RAPxMore     MentorPsychSocSupport_w1 
##                            0                            0 
##   MentorInstrumentSupport_w1 MentorRoleModelingSupport_w1 
##                            0                            0 
##                      sMSAT_1                      sMSIM_1 
##                            0                            0 
##     MentorPsychSocSupport_w2   MentorInstrumentSupport_w2 
##                           66                            0 
## MentorRoleModelingSupport_w2                      sMSAT_2 
##                           66                           66
w1_indicators <- c(
  "MentorPsychSocSupport_w1",
  "MentorInstrumentSupport_w1",
  "MentorRoleModelingSupport_w1"
)

w2_indicators <- c(
  "MentorPsychSocSupport_w2",
  "MentorInstrumentSupport_w2",
  "MentorRoleModelingSupport_w2"
)

sem_vars <- c(
  w1_indicators, w2_indicators,
  "sMSAT_1", "sMSAT_2",
  "gendermatch", "ethnicitymatch",
  "RAP", "FMNTi_1r"
)

df_sem <- df[, sem_vars]

cat("Rows:", nrow(df_sem), "| Columns:", ncol(df_sem), "\n")
## Rows: 170 | Columns: 12

ASSUMPTION CHECKING

3.1 Descriptive Statistics

continuous_sem <- c(w1_indicators, w2_indicators, "sMSAT_1", "sMSAT_2")

desc <- psych::describe(df_sem)

knitr::kable(
  round(desc[, c("n", "mean", "sd", "min", "max", "skew", "kurtosis")], 3),
  caption = "Descriptive Statistics of SEM Variables"
)
Descriptive Statistics of SEM Variables
n mean sd min max skew kurtosis
MentorPsychSocSupport_w1 170 5.225 1.289 1.000 7 -0.496 -0.240
MentorInstrumentSupport_w1 170 4.971 1.250 1.000 7 -0.560 0.201
MentorRoleModelingSupport_w1 170 5.338 1.228 1.500 7 -0.651 0.296
MentorPsychSocSupport_w2 104 5.322 1.331 1.000 7 -0.709 0.295
MentorInstrumentSupport_w2 170 5.397 1.238 1.000 7 -0.742 0.615
MentorRoleModelingSupport_w2 104 5.368 1.252 1.750 7 -0.393 -0.398
sMSAT_1 170 6.027 1.048 1.333 7 -1.176 1.487
sMSAT_2 104 6.080 1.034 2.333 7 -1.064 0.871
gendermatch 170 0.547 0.499 0.000 1 -0.187 -1.976
ethnicitymatch 170 0.688 0.465 0.000 1 -0.806 -1.359
RAP 170 0.612 0.489 0.000 1 -0.455 -1.804
FMNTi_1r 170 3.332 3.488 0.000 17 1.572 2.146

3.2 Univariate Normality

uni_norm <- data.frame(
  Variable = character(),
  Skewness = numeric(),
  Kurtosis = numeric(),
  Status   = character(),
  stringsAsFactors = FALSE
)

for (v in continuous_sem) {
  vals <- na.omit(df_sem[[v]])
  sk   <- moments::skewness(vals)
  ku   <- moments::kurtosis(vals) - 3  # excess kurtosis
  flag <- ifelse(abs(sk) > 2 | abs(ku) > 7, "NON-NORMAL", "OK")
  uni_norm <- rbind(uni_norm,
                    data.frame(Variable = v,
                               Skewness = round(sk, 3),
                               Kurtosis = round(ku, 3),
                               Status   = flag))
}

knitr::kable(uni_norm,
             caption = "Univariate Normality Check (|skew| < 2, |kurt| < 7)",
             row.names = FALSE)
Univariate Normality Check (|skew| < 2, |kurt| < 7)
Variable Skewness Kurtosis Status
MentorPsychSocSupport_w1 -0.500 -0.207 OK
MentorInstrumentSupport_w1 -0.565 0.239 OK
MentorRoleModelingSupport_w1 -0.657 0.335 OK
MentorPsychSocSupport_w2 -0.719 0.359 OK
MentorInstrumentSupport_w2 -0.748 0.658 OK
MentorRoleModelingSupport_w2 -0.399 -0.347 OK
sMSAT_1 -1.186 1.540 OK
sMSAT_2 -1.079 0.947 OK

3.3 Multivariate Normality (Mardia’s Test)

df_mvn <- na.omit(df_sem[, continuous_sem])

cat("Complete cases for MVN test:", nrow(df_mvn), "\n\n")
## Complete cases for MVN test: 104
mvn_result <- tryCatch({
  MVN::mvn(data = df_mvn,
           mvnTest          = "mardia",
           univarTest       = "SW",
           multivariatePlot = "none")
}, error = function(e) {
  MVN::mvn(data        = df_mvn,
           mvn_test    = "mardia",
           univariate_test = "SW",
           multivariate_outlier_method = "none")
})

mvn_norm <- if (!is.null(mvn_result$multivariateNormality)) {
  mvn_result$multivariateNormality
} else if (!is.null(mvn_result$multivariate_normality)) {
  mvn_result$multivariate_normality
} else {
  mvn_result[[1]] 
}
print(mvn_norm)
##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness   304.620  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis     7.234  <0.001 asymptotic ✗ Not normal
cat("\nConclusion: If violated, MLR estimator with FIML will be used.\n")
## 
## Conclusion: If violated, MLR estimator with FIML will be used.

3.4 Multivariate Outlier Detection (Mahalanobis Distance)

mah_dist   <- mahalanobis(df_mvn,
                           center = colMeans(df_mvn),
                           cov    = cov(df_mvn))
p_mah      <- pchisq(mah_dist, df = ncol(df_mvn), lower.tail = FALSE)
n_outliers <- sum(p_mah < 0.001)

cat(sprintf("Outliers (Mahalanobis p < .001): %d out of %d cases\n",
            n_outliers, nrow(df_mvn)))
## Outliers (Mahalanobis p < .001): 1 out of 104 cases
if (n_outliers > 0) {
  cat("Outlier row indices (in complete-case subset):\n")
  print(which(p_mah < 0.001))
}
## Outlier row indices (in complete-case subset):
## 170 
## 104
cat("→ Outliers are retained; MLR estimator handles their influence.\n")
## → Outliers are retained; MLR estimator handles their influence.

3.5 Missing Data Analysis

pct_missing <- data.frame(
  Variable       = names(df_sem),
  Missing_n      = colSums(is.na(df_sem)),
  Missing_pct    = round(colMeans(is.na(df_sem)) * 100, 2),
  row.names      = NULL
)

knitr::kable(pct_missing,
             caption = "Missing Data Summary per Variable",
             row.names = FALSE)
Missing Data Summary per Variable
Variable Missing_n Missing_pct
MentorPsychSocSupport_w1 0 0.00
MentorInstrumentSupport_w1 0 0.00
MentorRoleModelingSupport_w1 0 0.00
MentorPsychSocSupport_w2 66 38.82
MentorInstrumentSupport_w2 0 0.00
MentorRoleModelingSupport_w2 66 38.82
sMSAT_1 0 0.00
sMSAT_2 66 38.82
gendermatch 0 0.00
ethnicitymatch 0 0.00
RAP 0 0.00
FMNTi_1r 0 0.00
little_result <- naniar::mcar_test(df_sem)
cat(sprintf(
  "\nLittle's MCAR Test: chi2(%.0f) = %.3f, p = %.4f\n",
  little_result$df,
  little_result$statistic,
  little_result$p.value
))
## 
## Little's MCAR Test: chi2(9) = 25.669, p = 0.0023
if (little_result$p.value > 0.05) {
  cat("→ Data appear MCAR. FIML or listwise deletion both defensible.\n")
} else {
  cat("→ Data NOT MCAR (MAR assumed). FIML estimation in lavaan is required.\n")
}
## → Data NOT MCAR (MAR assumed). FIML estimation in lavaan is required.
cat("→ Strategy adopted: Full Information Maximum Likelihood (FIML).\n")
## → Strategy adopted: Full Information Maximum Likelihood (FIML).

3.6 Sample Size Adequacy

n_total    <- nrow(df_sem)
n_complete <- nrow(df_mvn)

n_params   <- 3 + 3 + 3 + 1 + 4
ratio      <- n_total / n_params

sample_check <- data.frame(
  Metric  = c("Total N", "Complete N",
               "Estimated Free Parameters",
               "N:Parameter Ratio",
               "Minimum Recommended",
               "Status"),
  Value   = c(n_total, n_complete,
               n_params,
               round(ratio, 1),
               "≥ 10:1",
               ifelse(ratio >= 10, "ADEQUATE", "BORDERLINE"))
)

knitr::kable(sample_check,
             caption = "Sample Size Adequacy Check",
             row.names = FALSE)
Sample Size Adequacy Check
Metric Value
Total N 170
Complete N 104
Estimated Free Parameters 14
N:Parameter Ratio 12.1
Minimum Recommended ≥ 10:1
Status ADEQUATE

3.7 Indicator Intercorrelations

cor_w1 <- cor(na.omit(df_sem[, w1_indicators]))
cor_w2 <- cor(na.omit(df_sem[, w2_indicators]))

cat("Wave 1 Indicator Intercorrelations:\n")
## Wave 1 Indicator Intercorrelations:
print(round(cor_w1, 3))
##                              MentorPsychSocSupport_w1
## MentorPsychSocSupport_w1                        1.000
## MentorInstrumentSupport_w1                      0.549
## MentorRoleModelingSupport_w1                    0.542
##                              MentorInstrumentSupport_w1
## MentorPsychSocSupport_w1                          0.549
## MentorInstrumentSupport_w1                        1.000
## MentorRoleModelingSupport_w1                      0.498
##                              MentorRoleModelingSupport_w1
## MentorPsychSocSupport_w1                            0.542
## MentorInstrumentSupport_w1                          0.498
## MentorRoleModelingSupport_w1                        1.000
cat("\nWave 2 Indicator Intercorrelations:\n")
## 
## Wave 2 Indicator Intercorrelations:
print(round(cor_w2, 3))
##                              MentorPsychSocSupport_w2
## MentorPsychSocSupport_w2                        1.000
## MentorInstrumentSupport_w2                      0.608
## MentorRoleModelingSupport_w2                    0.591
##                              MentorInstrumentSupport_w2
## MentorPsychSocSupport_w2                          0.608
## MentorInstrumentSupport_w2                        1.000
## MentorRoleModelingSupport_w2                      0.613
##                              MentorRoleModelingSupport_w2
## MentorPsychSocSupport_w2                            0.591
## MentorInstrumentSupport_w2                          0.613
## MentorRoleModelingSupport_w2                        1.000

3.8 Internal Consistency (Cronbach’s Alpha)

alpha_w1 <- psych::alpha(na.omit(df_sem[, w1_indicators]))
alpha_w2 <- psych::alpha(na.omit(df_sem[, w2_indicators]))

alpha_summary <- data.frame(
  Construct       = c("MentorSupport_W1", "MentorSupport_W2"),
  Cronbach_Alpha  = round(c(alpha_w1$total$raw_alpha,
                             alpha_w2$total$raw_alpha), 3),
  Status          = ifelse(
    c(alpha_w1$total$raw_alpha, alpha_w2$total$raw_alpha) >= 0.80,
    "Good (≥ 0.80)",
    ifelse(
      c(alpha_w1$total$raw_alpha, alpha_w2$total$raw_alpha) >= 0.70,
      "Acceptable (≥ 0.70)", "Poor (< 0.70)"
    )
  )
)

knitr::kable(alpha_summary,
             caption = "Cronbach's Alpha — Internal Consistency",
             row.names = FALSE)
Cronbach’s Alpha — Internal Consistency
Construct Cronbach_Alpha Status
MentorSupport_W1 0.772 Acceptable (≥ 0.70)
MentorSupport_W2 0.814 Good (≥ 0.80)

3.9 Bartlett’s Test & KMO

df_all_ind   <- na.omit(df_sem[, c(w1_indicators, w2_indicators)])
bartlett_res <- psych::cortest.bartlett(cor(df_all_ind),
                                         n = nrow(df_all_ind))
kmo_res      <- psych::KMO(cor(df_all_ind))

kmo_label <- ifelse(kmo_res$MSA >= 0.9, "Marvelous",
             ifelse(kmo_res$MSA >= 0.8, "Meritorious",
             ifelse(kmo_res$MSA >= 0.7, "Middling",
             ifelse(kmo_res$MSA >= 0.6, "Mediocre", "Miserable"))))

bartlett_kmo <- data.frame(
  Test    = c("Bartlett's Test of Sphericity", "KMO (Overall MSA)"),
  Value   = c(round(bartlett_res$chisq, 3), round(kmo_res$MSA, 3)),
  df_p    = c(paste0("df = ", bartlett_res$df,
                     ", p = ", round(bartlett_res$p.value, 6)),
              paste0("[", kmo_label, "]")),
  Status  = c(ifelse(bartlett_res$p.value < 0.05, "PASSED", "FAILED"),
              ifelse(kmo_res$MSA >= 0.60,          "PASSED", "FAILED"))
)

knitr::kable(bartlett_kmo,
             caption = "Bartlett's Test & KMO — Factor Structure Validity",
             col.names = c("Test", "Value", "Detail", "Status"),
             row.names = FALSE)
Bartlett’s Test & KMO — Factor Structure Validity
Test Value Detail Status
Bartlett’s Test of Sphericity 270.833 df = 15, p = 0 PASSED
KMO (Overall MSA) 0.675 [Mediocre] PASSED

Assumption Summary

assumption_summary <- data.frame(
  No         = 1:9,
  Assumption = c(
    "Univariate Normality",
    "Multivariate Normality",
    "Multivariate Outliers",
    "Missing Data (MCAR/MAR)",
    "Sample Size Adequacy",
    "Indicator Intercorrelations",
    "Internal Consistency (Alpha)",
    "Bartlett's Sphericity",
    "KMO (MSA)"
  ),
  Test_Method = c(
    "Skewness |<2|, Kurtosis |<7|",
    "Mardia's Test",
    "Mahalanobis Distance (p < .001)",
    "Little's MCAR Test",
    "N:parameter ratio ≥ 10:1",
    "Pearson Correlation Matrix",
    "Cronbach's Alpha ≥ .70",
    "χ² significance (p < .05)",
    "MSA ≥ .60"
  ),
  Decision = c(
    "See output above",
    "MLR estimator applied if violated",
    "Retained; handled by MLR",
    "FIML adopted",
    "See output above",
    "Within acceptable range",
    "See output above",
    "See output above",
    "See output above"
  )
)

knitr::kable(assumption_summary,
             caption = "Summary Table — SEM Assumption Checking",
             row.names = FALSE)
Summary Table — SEM Assumption Checking
No Assumption Test_Method Decision
1 Univariate Normality Skewness |<2|, Kurtosis |<7| See output above
2 Multivariate Normality Mardia’s Test MLR estimator applied if violated
3 Multivariate Outliers Mahalanobis Distance (p < .001) Retained; handled by MLR
4 Missing Data (MCAR/MAR) Little’s MCAR Test FIML adopted
5 Sample Size Adequacy N:parameter ratio ≥ 10:1 See output above
6 Indicator Intercorrelations Pearson Correlation Matrix Within acceptable range
7 Internal Consistency (Alpha) Cronbach’s Alpha ≥ .70 See output above
8 Bartlett’s Sphericity χ² significance (p < .05) See output above
9 KMO (MSA) MSA ≥ .60 See output above

CONFIRMATORY FACTOR ANALYSIS (CFA)

Model Specification

cfa_model <- '
  # Latent factor: Wave 1 Mentor Support
  MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
                      MentorInstrumentSupport_w1 +
                      MentorRoleModelingSupport_w1

  # Latent factor: Wave 2 Mentor Support
  MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
                      MentorInstrumentSupport_w2 +
                      MentorRoleModelingSupport_w2
'

cfa_fit <- lavaan::cfa(
  model     = cfa_model,
  data      = df_sem,
  estimator = "MLR",  
  missing   = "FIML",
  std.lv    = TRUE    
)

CFA Fit Indices

cfa_fit_idx <- lavaan::fitmeasures(cfa_fit, c(
  "chisq", "df", "pvalue",
  "cfi", "tli",
  "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
  "srmr", "aic", "bic"
))

cfi_c   <- cfa_fit_idx["cfi"]
rmsea_c <- cfa_fit_idx["rmsea"]
srmr_c  <- cfa_fit_idx["srmr"]

cfa_fit_table <- data.frame(
  Index     = c("Chi-Square (χ²)", "df", "p-value",
                 "CFI", "TLI",
                 "RMSEA", "RMSEA 90% CI Lower", "RMSEA 90% CI Upper",
                 "SRMR", "AIC", "BIC"),
  Value     = round(as.numeric(cfa_fit_idx), 4),
  Threshold = c("—", "—", "> .05",
                 "≥ .95", "≥ .95",
                 "≤ .06", "—", "—",
                 "≤ .08", "Lower = better", "Lower = better"),
  Status    = c("—", "—",
                 ifelse(cfa_fit_idx["pvalue"] > .05, "Good", "Check"),
                 ifelse(cfi_c   >= .95, "Good", ifelse(cfi_c   >= .90, "Acceptable", "Poor")),
                 ifelse(cfa_fit_idx["tli"] >= .95, "Good",
                        ifelse(cfa_fit_idx["tli"] >= .90, "Acceptable", "Poor")),
                 ifelse(rmsea_c <= .06, "Good", ifelse(rmsea_c <= .08, "Acceptable", "Poor")),
                 "—", "—",
                 ifelse(srmr_c  <= .08, "Good", ifelse(srmr_c  <= .10, "Acceptable", "Poor")),
                 "—", "—")
)

knitr::kable(cfa_fit_table,
             caption = "CFA Fit Indices",
             row.names = FALSE)
CFA Fit Indices
Index Value Threshold Status
Chi-Square (χ²) 64.4678
df 8.0000
p-value 0.0000 > .05 Check
CFI 0.8530 ≥ .95 Poor
TLI 0.7243 ≥ .95 Poor
RMSEA 0.2038 ≤ .06 Poor
RMSEA 90% CI Lower 0.1593
RMSEA 90% CI Upper 0.2513
SRMR 0.0580 ≤ .08 Good
AIC 2628.0817 Lower = better
BIC 2687.6619 Lower = better

CFA Standardized Loadings

cfa_params   <- lavaan::standardizedsolution(cfa_fit)
cfa_loadings <- cfa_params[cfa_params$op == "=~", ]

cfa_load_tbl <- cfa_loadings[, c("lhs", "rhs", "est.std", "se", "z", "pvalue")]
cfa_load_tbl[, c("est.std", "se", "z", "pvalue")] <-
  round(cfa_load_tbl[, c("est.std", "se", "z", "pvalue")], 4)

knitr::kable(
  cfa_load_tbl,
  caption   = "CFA Standardized Factor Loadings",
  col.names = c("Latent Factor", "Indicator", "Std. Loading", "SE", "z", "p-value"),
  row.names = FALSE
)
CFA Standardized Factor Loadings
Latent Factor Indicator Std. Loading SE z p-value
MentorSupport_W1 MentorPsychSocSupport_w1 0.6801 0.0673 10.1064 0
MentorSupport_W1 MentorInstrumentSupport_w1 0.8309 0.0569 14.6073 0
MentorSupport_W1 MentorRoleModelingSupport_w1 0.6383 0.0783 8.1576 0
MentorSupport_W2 MentorPsychSocSupport_w2 0.7407 0.0701 10.5610 0
MentorSupport_W2 MentorInstrumentSupport_w2 0.9353 0.0502 18.6485 0
MentorSupport_W2 MentorRoleModelingSupport_w2 0.7366 0.0728 10.1237 0
cat("\nRule: Standardized loadings >= .50 acceptable, >= .70 good.\n")
## 
## Rule: Standardized loadings >= .50 acceptable, >= .70 good.

Convergent & Discriminant Validity

# AVE and CR helper functions
compute_AVE <- function(fit, factor_name) {
  params   <- lavaan::standardizedsolution(fit)
  loadings <- params[params$op == "=~" &
                       params$lhs == factor_name, "est.std"]
  mean(loadings^2)
}

compute_CR <- function(fit, factor_name) {
  params   <- lavaan::standardizedsolution(fit)
  loadings <- params[params$op == "=~" &
                       params$lhs == factor_name, "est.std"]
  sum(loadings)^2 / (sum(loadings)^2 + sum(1 - loadings^2))
}

ave_w1 <- compute_AVE(cfa_fit, "MentorSupport_W1")
ave_w2 <- compute_AVE(cfa_fit, "MentorSupport_W2")
cr_w1  <- compute_CR(cfa_fit,  "MentorSupport_W1")
cr_w2  <- compute_CR(cfa_fit,  "MentorSupport_W2")

validity_table <- data.frame(
  Construct = c("MentorSupport_W1", "MentorSupport_W2"),
  AVE       = round(c(ave_w1, ave_w2), 3),
  CR        = round(c(cr_w1,  cr_w2),  3),
  AVE_status = ifelse(c(ave_w1, ave_w2) >= .50,
                       "Convergent Valid (≥ .50)", "Poor (< .50)"),
  CR_status  = ifelse(c(cr_w1, cr_w2) >= .70,
                       "Acceptable (≥ .70)", "Poor (< .70)")
)

knitr::kable(validity_table,
             caption = "Convergent Validity: AVE and Composite Reliability (CR)",
             row.names = FALSE)
Convergent Validity: AVE and Composite Reliability (CR)
Construct AVE CR AVE_status CR_status
MentorSupport_W1 0.520 0.762 Convergent Valid (≥ .50) Acceptable (≥ .70)
MentorSupport_W2 0.655 0.849 Convergent Valid (≥ .50) Acceptable (≥ .70)
factor_corr <- cfa_params[cfa_params$op == "~~" &
                             cfa_params$lhs != cfa_params$rhs &
                             cfa_params$lhs %in% c("MentorSupport_W1",
                                                    "MentorSupport_W2"), ]
if (nrow(factor_corr) > 0) {
  phi <- factor_corr$est.std[1]
  cat(sprintf("\nDiscriminant Validity (Fornell-Larcker Criterion)\n"))
  cat(sprintf("Latent factor correlation (φ)  = %.3f\n", phi))
  cat(sprintf("sqrt(AVE_W1) = %.3f  |  sqrt(AVE_W2) = %.3f\n",
              sqrt(ave_w1), sqrt(ave_w2)))
  if (abs(phi) < min(sqrt(ave_w1), sqrt(ave_w2))) {
    cat("→ Discriminant validity SUPPORTED (|φ| < min(sqrt(AVE))).\n")
  } else {
    cat("→ Discriminant validity QUESTIONABLE (|φ| ≥ sqrt(AVE)).\n")
  }
}
## 
## Discriminant Validity (Fornell-Larcker Criterion)
## Latent factor correlation (φ)  = 0.798
## sqrt(AVE_W1) = 0.721  |  sqrt(AVE_W2) = 0.810
## → Discriminant validity QUESTIONABLE (|φ| ≥ sqrt(AVE)).

FULL SEM MODEL (Structural Model)

Model Specification

sem_model <- '
  MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
                      MentorInstrumentSupport_w1 +
                      MentorRoleModelingSupport_w1

  MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
                      MentorInstrumentSupport_w2 +
                      MentorRoleModelingSupport_w2

  MentorSupport_W2 ~ MentorSupport_W1 + RAP + gendermatch +
                     ethnicitymatch + FMNTi_1r

  sMSAT_2 ~ MentorSupport_W2 + MentorSupport_W1 + sMSAT_1 +
             RAP + gendermatch + ethnicitymatch + FMNTi_1r

  RAP            ~~ gendermatch + ethnicitymatch + FMNTi_1r + sMSAT_1
  gendermatch    ~~ ethnicitymatch + FMNTi_1r + sMSAT_1
  ethnicitymatch ~~ FMNTi_1r + sMSAT_1
  FMNTi_1r       ~~ sMSAT_1
'

sem_fit <- lavaan::sem(
  model     = sem_model,
  data      = df_sem,
  estimator = "MLR",
  missing   = "FIML",
  std.lv    = TRUE
)

sem_fit_idx <- lavaan::fitmeasures(sem_fit, c(
  "chisq", "df", "pvalue",
  "cfi", "tli",
  "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
  "srmr", "aic", "bic"
))

5.1 Global Fit Indices

cfi_s   <- sem_fit_idx["cfi"]
tli_s   <- sem_fit_idx["tli"]
rmsea_s <- sem_fit_idx["rmsea"]
srmr_s  <- sem_fit_idx["srmr"]

sem_fit_table <- data.frame(
  Index     = c("Chi-Square (χ²)", "df", "p-value",
                 "CFI", "TLI",
                 "RMSEA", "RMSEA 90% CI Lower", "RMSEA 90% CI Upper",
                 "SRMR", "AIC", "BIC"),
  Value     = round(as.numeric(sem_fit_idx), 4),
  Threshold = c("—", "—", "> .05",
                 "≥ .95", "≥ .95",
                 "≤ .06", "—", "—",
                 "≤ .08", "Lower = better", "Lower = better"),
  Status    = c("—", "—",
                 ifelse(sem_fit_idx["pvalue"] > .05, "Good", "Check"),
                 ifelse(cfi_s   >= .95, "Good", ifelse(cfi_s   >= .90, "Acceptable", "Poor")),
                 ifelse(tli_s   >= .95, "Good", ifelse(tli_s   >= .90, "Acceptable", "Poor")),
                 ifelse(rmsea_s <= .06, "Good", ifelse(rmsea_s <= .08, "Acceptable", "Poor")),
                 "—", "—",
                 ifelse(srmr_s  <= .08, "Good", ifelse(srmr_s  <= .10, "Acceptable", "Poor")),
                 "—", "—")
)

knitr::kable(sem_fit_table,
             caption = "SEM Global Fit Indices",
             row.names = FALSE)
SEM Global Fit Indices
Index Value Threshold Status
Chi-Square (χ²) 183.7703
df 38.0000
p-value 0.0000 > .05 Check
CFI 0.7460 ≥ .95 Poor
TLI 0.5589 ≥ .95 Poor
RMSEA 0.1502 ≤ .06 Poor
RMSEA 90% CI Lower 0.1289
RMSEA 90% CI Upper 0.1723
SRMR 0.1375 ≤ .08 Poor
AIC 4981.2943 Lower = better
BIC 5144.3559 Lower = better

5.2 Structural Path Estimates (Standardized)

sem_params   <- lavaan::standardizedsolution(sem_fit)
struct_paths <- sem_params[sem_params$op == "~", ]

num_cols <- c("est.std", "se", "z", "pvalue", "ci.lower", "ci.upper")
sp_tbl   <- struct_paths[, c("lhs", "rhs", num_cols)]
sp_tbl[, num_cols] <- round(sp_tbl[, num_cols], 4)

knitr::kable(
  sp_tbl,
  caption   = "Structural Path Estimates (Standardized)",
  col.names = c("Outcome", "Predictor", "Beta (Std.)", "SE",
                 "z", "p-value", "CI Lower", "CI Upper"),
  row.names = FALSE
)
Structural Path Estimates (Standardized)
Outcome Predictor Beta (Std.) SE z p-value CI Lower CI Upper
MentorSupport_W2 MentorSupport_W1 0.8119 0.0720 11.2807 0.0000 0.6708 0.9530
MentorSupport_W2 RAP -0.0754 0.0629 -1.1989 0.2306 -0.1988 0.0479
MentorSupport_W2 gendermatch -0.0695 0.0610 -1.1397 0.2544 -0.1889 0.0500
MentorSupport_W2 ethnicitymatch 0.0344 0.0682 0.5043 0.6140 -0.0993 0.1680
MentorSupport_W2 FMNTi_1r -0.1306 0.0501 -2.6088 0.0091 -0.2287 -0.0325
sMSAT_2 MentorSupport_W2 1.2800 0.2619 4.8868 0.0000 0.7666 1.7933
sMSAT_2 MentorSupport_W1 -0.6804 0.2837 -2.3987 0.0165 -1.2364 -0.1245
sMSAT_2 sMSAT_1 0.4051 0.0986 4.1085 0.0000 0.2118 0.5983
sMSAT_2 RAP 0.0494 0.0846 0.5836 0.5595 -0.1164 0.2151
sMSAT_2 gendermatch 0.0050 0.0795 0.0630 0.9498 -0.1509 0.1609
sMSAT_2 ethnicitymatch -0.0021 0.0745 -0.0285 0.9773 -0.1482 0.1439
sMSAT_2 FMNTi_1r 0.0403 0.0904 0.4453 0.6561 -0.1369 0.2175

5.3 Unstandardized Solution

all_params  <- lavaan::parameterestimates(sem_fit,
                                           standardized = TRUE,
                                           ci           = TRUE,
                                           level        = 0.95)
struct_unstd <- all_params[all_params$op == "~",
                             c("lhs", "rhs", "est", "se", "z",
                               "pvalue", "ci.lower", "ci.upper", "std.all")]

num_cols2 <- c("est", "se", "z", "pvalue", "ci.lower", "ci.upper", "std.all")
su_tbl    <- struct_unstd
su_tbl[, num_cols2] <- round(su_tbl[, num_cols2], 4)

knitr::kable(
  su_tbl,
  caption   = "Structural Path Estimates (Unstandardized with Std. Solution)",
  col.names = c("Outcome", "Predictor", "B", "SE", "z",
                 "p-value", "CI Lower", "CI Upper", "Beta (Std.)"),
  row.names = FALSE
)
Structural Path Estimates (Unstandardized with Std. Solution)
Outcome Predictor B SE z p-value CI Lower CI Upper Beta (Std.)
MentorSupport_W2 MentorSupport_W1 1.4555 0.3950 3.6844 0.0002 0.6812 2.2298 0.8119
MentorSupport_W2 RAP -0.2775 0.2258 -1.2290 0.2191 -0.7201 0.1651 -0.0754
MentorSupport_W2 gendermatch -0.2502 0.2287 -1.0938 0.2740 -0.6985 0.1981 -0.0695
MentorSupport_W2 ethnicitymatch 0.1331 0.2688 0.4952 0.6204 -0.3937 0.6598 0.0344
MentorSupport_W2 FMNTi_1r -0.0673 0.0274 -2.4589 0.0139 -0.1210 -0.0137 -0.1306
sMSAT_2 MentorSupport_W2 0.7509 0.1104 6.8016 0.0000 0.5345 0.9673 1.2800
sMSAT_2 MentorSupport_W1 -0.7156 0.2910 -2.4593 0.0139 -1.2860 -0.1453 -0.6804
sMSAT_2 sMSAT_1 0.4077 0.0845 4.8235 0.0000 0.2420 0.5733 0.4051
sMSAT_2 RAP 0.1065 0.1826 0.5833 0.5597 -0.2514 0.4644 0.0494
sMSAT_2 gendermatch 0.0106 0.1680 0.0630 0.9498 -0.3187 0.3399 0.0050
sMSAT_2 ethnicitymatch -0.0048 0.1691 -0.0285 0.9772 -0.3363 0.3267 -0.0021
sMSAT_2 FMNTi_1r 0.0122 0.0272 0.4470 0.6549 -0.0412 0.0656 0.0403

5.4 R² (Variance Explained)

r2_vals <- lavaan::inspect(sem_fit, "r2")

r2_table <- data.frame(
  Variable   = names(r2_vals),
  R_squared  = round(as.numeric(r2_vals), 3)
)

knitr::kable(r2_table,
             caption = "R² — Variance Explained per Endogenous Variable",
             row.names = FALSE)
R² — Variance Explained per Endogenous Variable
Variable R_squared
MentorPsychSocSupport_w1 0.470
MentorInstrumentSupport_w1 0.671
MentorRoleModelingSupport_w1 0.425
MentorPsychSocSupport_w2 0.630
MentorInstrumentSupport_w2 0.808
MentorRoleModelingSupport_w2 0.593
sMSAT_2 0.802
MentorSupport_W2 0.689

MEDIATION ANALYSIS

Indirect Effect: MentorSupport_W1 → W2 → sMSAT_2

sem_mediation <- '
  MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
                      MentorInstrumentSupport_w1 +
                      MentorRoleModelingSupport_w1

  MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
                      MentorInstrumentSupport_w2 +
                      MentorRoleModelingSupport_w2

  MentorSupport_W2 ~ a * MentorSupport_W1 + RAP + gendermatch +
                         ethnicitymatch + FMNTi_1r

  sMSAT_2 ~ b * MentorSupport_W2 + c * MentorSupport_W1 + sMSAT_1 +
              RAP + gendermatch + ethnicitymatch + FMNTi_1r

  RAP            ~~ gendermatch + ethnicitymatch + FMNTi_1r + sMSAT_1
  gendermatch    ~~ ethnicitymatch + FMNTi_1r + sMSAT_1
  ethnicitymatch ~~ FMNTi_1r + sMSAT_1
  FMNTi_1r       ~~ sMSAT_1

  indirect := a * b         # W1 → W2 → sMSAT_2
  direct   := c             # W1 → sMSAT_2 (direct)
  total    := a * b + c     # Total effect
'

set.seed(123)
sem_med_fit <- lavaan::sem(
  model     = sem_mediation,
  data      = df_sem,
  estimator = "ML",        
  missing   = "FIML",      
  std.lv    = TRUE,
  se        = "bootstrap", 
  bootstrap = 1000         
)

Mediation Results

med_params  <- lavaan::parameterestimates(
  sem_med_fit,
  ci           = TRUE,
  level        = 0.95,
  boot.ci.type = "perc"
)

med_effects <- med_params[
  med_params$label %in% c("a", "b", "c", "indirect", "direct", "total"), ]

med_table <- med_effects[, c("label", "est", "se", "z",
                               "pvalue", "ci.lower", "ci.upper")]
med_table$Interpretation <- c(
  "W1 → W2 (autoregressive path)",
  "W2 → sMSAT_2 (W2 on outcome)",
  "W1 → sMSAT_2 (direct effect)",
  "W1 → W2 → sMSAT_2 (indirect/mediated)",
  "Direct effect W1 → sMSAT_2",
  "Total effect W1 → sMSAT_2"
)

num_cols3 <- c("est", "se", "z", "pvalue", "ci.lower", "ci.upper")
mt_tbl    <- med_table[, c("label", num_cols3)]
mt_tbl[, num_cols3] <- round(mt_tbl[, num_cols3], 4)

knitr::kable(
  mt_tbl,
  caption   = "Mediation Effects (Bootstrap 95% CI, 1000 replications)",
  col.names = c("Effect", "Estimate", "SE", "z", "p-value",
                 "CI Lower", "CI Upper"),
  row.names = FALSE
)
Mediation Effects (Bootstrap 95% CI, 1000 replications)
Effect Estimate SE z p-value CI Lower CI Upper
a 1.4555 5.0274 0.2895 0.7722 0.8939 3.7887
b 0.7509 0.7786 0.9644 0.3348 0.4970 1.1031
c -0.7156 44.9590 -0.0159 0.9873 -3.1866 -0.2597
indirect 1.0930 44.9604 0.0243 0.9806 0.6203 3.6789
direct -0.7156 44.9590 -0.0159 0.9873 -3.1866 -0.2597
total 0.3773 0.1511 2.4971 0.0125 0.0909 0.7134
cat("\nInterpretation guide:\n")
## 
## Interpretation guide:
cat("  a        = W1 Mentor Support → W2 Mentor Support\n")
##   a        = W1 Mentor Support → W2 Mentor Support
cat("  b        = W2 Mentor Support → Satisfaction (sMSAT_2)\n")
##   b        = W2 Mentor Support → Satisfaction (sMSAT_2)
cat("  c        = Direct: W1 → sMSAT_2 (bypassing W2)\n")
##   c        = Direct: W1 → sMSAT_2 (bypassing W2)
cat("  indirect = Mediated path (a × b)\n")
##   indirect = Mediated path (a × b)
cat("  total    = indirect + direct\n")
##   total    = indirect + direct
cat("\nIf indirect significant & direct non-sig → Full mediation\n")
## 
## If indirect significant & direct non-sig → Full mediation
cat("If both significant                       → Partial mediation\n")
## If both significant                       → Partial mediation

MODEL COMPARISON (Partial vs. Full Mediation)

sem_restricted <- '
  MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
                      MentorInstrumentSupport_w1 +
                      MentorRoleModelingSupport_w1

  MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
                      MentorInstrumentSupport_w2 +
                      MentorRoleModelingSupport_w2

  MentorSupport_W2 ~ MentorSupport_W1 + RAP + gendermatch +
                     ethnicitymatch + FMNTi_1r

  sMSAT_2 ~ MentorSupport_W2 + sMSAT_1 +
             RAP + gendermatch + ethnicitymatch + FMNTi_1r

  RAP            ~~ gendermatch + ethnicitymatch + FMNTi_1r + sMSAT_1
  gendermatch    ~~ ethnicitymatch + FMNTi_1r + sMSAT_1
  ethnicitymatch ~~ FMNTi_1r + sMSAT_1
  FMNTi_1r       ~~ sMSAT_1
'

sem_restricted_fit <- lavaan::sem(
  model     = sem_restricted,
  data      = df_sem,
  estimator = "MLR",
  missing   = "FIML",
  std.lv    = TRUE
)

model_comp <- data.frame(
  Model   = c("Full Model (Partial Mediation)",
               "Restricted (Full Mediation)"),
  AIC     = round(c(AIC(sem_fit), AIC(sem_restricted_fit)), 2),
  BIC     = round(c(BIC(sem_fit), BIC(sem_restricted_fit)), 2)
)

knitr::kable(model_comp,
             caption = "AIC/BIC Model Comparison (Lower = Better Fit)",
             row.names = FALSE)
AIC/BIC Model Comparison (Lower = Better Fit)
Model AIC BIC
Full Model (Partial Mediation) 4981.29 5144.36
Restricted (Full Mediation) 4994.84 5154.76
cat("\nChi-Square Difference Test (Satorra-Bentler)\n")
## 
## Chi-Square Difference Test (Satorra-Bentler)
lrt_result <- lavaan::lavTestLRT(sem_fit, sem_restricted_fit)
print(lrt_result)
## 
## Scaled Chi-Squared Difference Test (method = "satorra.bentler.2001")
## 
## lavaan->lavTestLRT():  
##    lavaan NOTE: The "Chisq" column contains standard test statistics, not the 
##    robust test that should be reported per model. A robust difference test is 
##    a function of two standard (not robust) statistics.
## 
##                    Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## sem_fit            38 4981.3 5144.4 183.77                                  
## sem_restricted_fit 39 4994.8 5154.8 199.31     26.386       1  2.796e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("Significant Δχ² → Full model fits significantly better (partial mediation supported)\n")
## Significant Δχ² → Full model fits significantly better (partial mediation supported)

MODIFICATION INDICES

cat("MI > 10 suggests freeing that parameter may improve fit.\n")
## MI > 10 suggests freeing that parameter may improve fit.
cat("Apply only when theoretically justified — not data-dredging.\n\n")
## Apply only when theoretically justified — not data-dredging.
mi <- lavaan::modificationIndices(sem_fit,
                                   sort.          = TRUE,
                                   maximum.number = 10)

knitr::kable(
  mi[, c("lhs", "op", "rhs", "mi", "epc")],
  caption = "Top 10 Modification Indices",
  col.names = c("LHS", "Op", "RHS", "MI", "EPC"),
  row.names = FALSE,
  digits    = 3
)
Top 10 Modification Indices
LHS Op RHS MI EPC
MentorSupport_W1 ~ sMSAT_1 61.663 0.632
sMSAT_1 ~ MentorSupport_W1 56.734 0.639
MentorSupport_W1 ~ sMSAT_2 52.391 1.433
MentorInstrumentSupport_w1 MentorInstrumentSupport_w2 39.988 0.413
sMSAT_1 ~ MentorSupport_W2 39.314 0.295
sMSAT_1 ~ sMSAT_2 16.531 0.451
MentorRoleModelingSupport_w1 MentorRoleModelingSupport_w2 15.004 0.353
MentorInstrumentSupport_w1 MentorRoleModelingSupport_w2 13.665 -0.309
MentorRoleModelingSupport_w1 sMSAT_1 12.362 0.270
MentorSupport_W1 =~ MentorInstrumentSupport_w2 11.604 0.566

VISUALIZATIONS

Correlation Matrix Heatmap

cor_mat <- cor(na.omit(df_sem[, c(w1_indicators, w2_indicators,
                                   "sMSAT_1", "sMSAT_2")]))

corrplot::corrplot(
  cor_mat,
  method      = "color",
  type        = "lower",
  addCoef.col = "black",
  number.cex  = 0.75,
  tl.cex      = 0.8,
  col         = corrplot::COL2("RdBu", 200),
  title       = "Correlation Matrix — SEM Indicators",
  mar         = c(0, 0, 2, 0)
)

Fit Index Summary Plot

fit_summary <- data.frame(
  Index     = c("CFI", "TLI", "RMSEA", "SRMR"),
  Value     = c(sem_fit_idx["cfi"],
                sem_fit_idx["tli"],
                sem_fit_idx["rmsea"],
                sem_fit_idx["srmr"]),
  Threshold = c(0.95, 0.95, 0.06, 0.08),
  Direction = c("higher", "higher", "lower", "lower")
)

fit_summary$Pass <- with(fit_summary,
  ifelse(Direction == "higher",
         Value >= Threshold,
         Value <= Threshold))

ggplot(fit_summary, aes(x = Index, y = Value, fill = Pass)) +
  geom_col(width = 0.5) +
  geom_hline(aes(yintercept = Threshold),
             linetype = "dashed", color = "red", linewidth = 0.8) +
  scale_fill_manual(values = c("TRUE"  = "#4CAF50",
                                "FALSE" = "#F44336"),
                    labels  = c("TRUE"  = "Pass",
                                "FALSE" = "Fail")) +
  facet_wrap(~ Index, scales = "free") +
  labs(title    = "SEM Fit Indices vs. Thresholds",
       subtitle = "Dashed line = cutoff threshold",
       y        = "Value",
       x        = "",
       fill     = "Meets Threshold") +
  theme_minimal(base_size = 12)

SEM Path Diagram

semPlot::semPaths(
  object         = sem_fit,
  what           = "std",
  whatLabels     = "std",
  style          = "ram",
  layout         = "tree2",
  rotation       = 2,
  fade           = FALSE,
  edge.label.cex = 0.8,
  node.width     = 1.5,
  node.height    = 1.0,
  mar            = c(5, 5, 5, 5),
  curvePivot     = TRUE,
  residuals      = TRUE,
  intercepts     = FALSE,
  color          = list(
    lat = "#2E86AB",   # Blue  — latent variables
    man = "#A23B72"    # Purple — manifest (observed) variables
  )
)
title("SEM Path Diagram — Standardized Estimates\n(RAP Mentorship Dataset)",
      cex.main = 1.0)


FINAL SUMMARY

cat("Model Overview\n")
## Model Overview
cat("Estimator       : MLR (Robust Maximum Likelihood)\n")
## Estimator       : MLR (Robust Maximum Likelihood)
cat("Missing data    : FIML (Full Information Maximum Likelihood)\n")
## Missing data    : FIML (Full Information Maximum Likelihood)
cat("Latent variables: MentorSupport_W1, MentorSupport_W2\n")
## Latent variables: MentorSupport_W1, MentorSupport_W2
cat("Outcome         : sMSAT_2 (mentee satisfaction, Wave 2)\n")
## Outcome         : sMSAT_2 (mentee satisfaction, Wave 2)
cat("Covariates      : RAP, gendermatch, ethnicitymatch, FMNTi_1r, sMSAT_1\n\n")
## Covariates      : RAP, gendermatch, ethnicitymatch, FMNTi_1r, sMSAT_1
# Key structural paths
std_sol  <- lavaan::standardizedsolution(sem_fit)
get_path <- function(lhs, rhs) {
  row <- std_sol[std_sol$lhs == lhs &
                   std_sol$rhs == rhs &
                   std_sol$op  == "~", ]
  if (nrow(row) == 0) return(NULL)
  row
}

path_w1w2  <- get_path("MentorSupport_W2", "MentorSupport_W1")
path_w2sat <- get_path("sMSAT_2", "MentorSupport_W2")
path_w1sat <- get_path("sMSAT_2", "MentorSupport_W1")

cat("Key Path Coefficients\n")
## Key Path Coefficients
if (!is.null(path_w1w2)) {
  cat(sprintf("W1 → W2 Mentor Support  : β = %.3f, p = %.4f  %s\n",
              path_w1w2$est.std,
              path_w1w2$pvalue,
              ifelse(path_w1w2$pvalue < .05, "[Significant]", "")))
}
## W1 → W2 Mentor Support  : β = 0.812, p = 0.0000  [Significant]
if (!is.null(path_w2sat)) {
  cat(sprintf("W2 Support → sMSAT_2    : β = %.3f, p = %.4f  %s\n",
              path_w2sat$est.std,
              path_w2sat$pvalue,
              ifelse(path_w2sat$pvalue < .05, "[Significant]", "")))
}
## W2 Support → sMSAT_2    : β = 1.280, p = 0.0000  [Significant]
if (!is.null(path_w1sat)) {
  cat(sprintf("W1 Support → sMSAT_2    : β = %.3f, p = %.4f  %s\n",
              path_w1sat$est.std,
              path_w1sat$pvalue,
              ifelse(path_w1sat$pvalue < .05, "[Significant]", "")))
}
## W1 Support → sMSAT_2    : β = -0.680, p = 0.0165  [Significant]
cat(sprintf("\nModel R² for sMSAT_2    : %.3f\n",
            lavaan::inspect(sem_fit, "r2")["sMSAT_2"]))
## 
## Model R² for sMSAT_2    : 0.802