SETUP

# Core
library(tidyverse)
library(knitr)

# SEM
library(lavaan)      
library(semPlot)      
library(semTools)     

# Assumption Checking
library(MVN)         
library(psych)        
library(moments)     
library(naniar)      
library(car)        

# Visualization
library(ggplot2)
library(corrplot)

# Resolve namespace conflicts
select <- dplyr::select
filter <- dplyr::filter

LOAD DATA & OVERVIEW

Load Data

df_raw <- read.csv("RAP_Dataset_Public_deident.csv",
                   stringsAsFactors = FALSE)

cat("Rows    :", nrow(df_raw), "\n")

## Rows    : 170

cat("Columns :", ncol(df_raw), "\n\n")

## Columns : 32

cat("Column Names:\n")

## Column Names:

cat(paste(names(df_raw), collapse = ", "), "\n")

## Female_1, White_1, Cohort_1, Major_d_2, Major_d_3, Major_d_4, Matching_GPA_1, Rank_d_2, Rank_d_3, WVRes_d_1, Legacy_R_1, SATtotEquiv_1, PellGrantRecipient_R, RAP, FMNTa_1, PriorAflMent_1, FMNTi_1r, gendermatch, ethnicitymatch, SomeRes_1, MoreRes_1, RAPxSome, RAPxMore, MentorPsychSocSupport_w1, MentorInstrumentSupport_w1, MentorRoleModelingSupport_w1, sMSAT_1, sMSIM_1, MentorPsychSocSupport_w2, MentorInstrumentSupport_w2, MentorRoleModelingSupport_w2, sMSAT_2

Data Cleaning & Variable Selection

df <- df_raw

sentinel_cols <- c(
  "MentorPsychSocSupport_w2",
  "MentorInstrumentSupport_w2",
  "MentorRoleModelingSupport_w2",
  "sMSAT_2"
)
for (col in sentinel_cols) {
  df[[col]] <- ifelse(df[[col]] == -99, NA, df[[col]])
}

cat("Missing Data After Recoding -99 to NA:\n")

## Missing Data After Recoding -99 to NA:

print(colSums(is.na(df)))

##                     Female_1                      White_1 
##                            0                            0 
##                     Cohort_1                    Major_d_2 
##                            0                            0 
##                    Major_d_3                    Major_d_4 
##                            0                            0 
##               Matching_GPA_1                     Rank_d_2 
##                            0                            0 
##                     Rank_d_3                    WVRes_d_1 
##                            0                            0 
##                   Legacy_R_1                SATtotEquiv_1 
##                            0                            0 
##         PellGrantRecipient_R                          RAP 
##                            0                            0 
##                      FMNTa_1               PriorAflMent_1 
##                            0                            0 
##                     FMNTi_1r                  gendermatch 
##                            0                            0 
##               ethnicitymatch                    SomeRes_1 
##                            0                            0 
##                    MoreRes_1                     RAPxSome 
##                            0                            0 
##                     RAPxMore     MentorPsychSocSupport_w1 
##                            0                            0 
##   MentorInstrumentSupport_w1 MentorRoleModelingSupport_w1 
##                            0                            0 
##                      sMSAT_1                      sMSIM_1 
##                            0                            0 
##     MentorPsychSocSupport_w2   MentorInstrumentSupport_w2 
##                           66                            0 
## MentorRoleModelingSupport_w2                      sMSAT_2 
##                           66                           66

w1_indicators <- c(
  "MentorPsychSocSupport_w1",
  "MentorInstrumentSupport_w1",
  "MentorRoleModelingSupport_w1"
)

w2_indicators <- c(
  "MentorPsychSocSupport_w2",
  "MentorInstrumentSupport_w2",
  "MentorRoleModelingSupport_w2"
)

sem_vars <- c(
  w1_indicators, w2_indicators,
  "sMSAT_1", "sMSAT_2",
  "gendermatch", "ethnicitymatch",
  "RAP", "FMNTi_1r"
)

df_sem <- df[, sem_vars]

cat("Rows:", nrow(df_sem), "| Columns:", ncol(df_sem), "\n")

## Rows: 170 | Columns: 12

ASSUMPTION CHECKING

3.1 Descriptive Statistics

continuous_sem <- c(w1_indicators, w2_indicators, "sMSAT_1", "sMSAT_2")

desc <- psych::describe(df_sem)

knitr::kable(
  round(desc[, c("n", "mean", "sd", "min", "max", "skew", "kurtosis")], 3),
  caption = "Descriptive Statistics of SEM Variables"
)

Descriptive Statistics of SEM Variables
	n	mean	sd	min	max	skew	kurtosis
MentorPsychSocSupport_w1	170	5.225	1.289	1.000	7	-0.496	-0.240
MentorInstrumentSupport_w1	170	4.971	1.250	1.000	7	-0.560	0.201
MentorRoleModelingSupport_w1	170	5.338	1.228	1.500	7	-0.651	0.296
MentorPsychSocSupport_w2	104	5.322	1.331	1.000	7	-0.709	0.295
MentorInstrumentSupport_w2	170	5.397	1.238	1.000	7	-0.742	0.615
MentorRoleModelingSupport_w2	104	5.368	1.252	1.750	7	-0.393	-0.398
sMSAT_1	170	6.027	1.048	1.333	7	-1.176	1.487
sMSAT_2	104	6.080	1.034	2.333	7	-1.064	0.871
gendermatch	170	0.547	0.499	0.000	1	-0.187	-1.976
ethnicitymatch	170	0.688	0.465	0.000	1	-0.806	-1.359
RAP	170	0.612	0.489	0.000	1	-0.455	-1.804
FMNTi_1r	170	3.332	3.488	0.000	17	1.572	2.146

3.2 Univariate Normality

uni_norm <- data.frame(
  Variable = character(),
  Skewness = numeric(),
  Kurtosis = numeric(),
  Status   = character(),
  stringsAsFactors = FALSE
)

for (v in continuous_sem) {
  vals <- na.omit(df_sem[[v]])
  sk   <- moments::skewness(vals)
  ku   <- moments::kurtosis(vals) - 3  # excess kurtosis
  flag <- ifelse(abs(sk) > 2 | abs(ku) > 7, "NON-NORMAL", "OK")
  uni_norm <- rbind(uni_norm,
                    data.frame(Variable = v,
                               Skewness = round(sk, 3),
                               Kurtosis = round(ku, 3),
                               Status   = flag))
}

knitr::kable(uni_norm,
             caption = "Univariate Normality Check (|skew| < 2, |kurt| < 7)",
             row.names = FALSE)

Univariate Normality Check (|skew| < 2, |kurt| < 7)
Variable	Skewness	Kurtosis	Status
MentorPsychSocSupport_w1	-0.500	-0.207	OK
MentorInstrumentSupport_w1	-0.565	0.239	OK
MentorRoleModelingSupport_w1	-0.657	0.335	OK
MentorPsychSocSupport_w2	-0.719	0.359	OK
MentorInstrumentSupport_w2	-0.748	0.658	OK
MentorRoleModelingSupport_w2	-0.399	-0.347	OK
sMSAT_1	-1.186	1.540	OK
sMSAT_2	-1.079	0.947	OK

3.3 Multivariate Normality (Mardia’s Test)

df_mvn <- na.omit(df_sem[, continuous_sem])

cat("Complete cases for MVN test:", nrow(df_mvn), "\n\n")

## Complete cases for MVN test: 104

mvn_result <- tryCatch({
  MVN::mvn(data = df_mvn,
           mvnTest          = "mardia",
           univarTest       = "SW",
           multivariatePlot = "none")
}, error = function(e) {
  MVN::mvn(data        = df_mvn,
           mvn_test    = "mardia",
           univariate_test = "SW",
           multivariate_outlier_method = "none")
})

mvn_norm <- if (!is.null(mvn_result$multivariateNormality)) {
  mvn_result$multivariateNormality
} else if (!is.null(mvn_result$multivariate_normality)) {
  mvn_result$multivariate_normality
} else {
  mvn_result[[1]] 
}
print(mvn_norm)

##              Test Statistic p.value     Method          MVN
## 1 Mardia Skewness   304.620  <0.001 asymptotic ✗ Not normal
## 2 Mardia Kurtosis     7.234  <0.001 asymptotic ✗ Not normal

cat("\nConclusion: If violated, MLR estimator with FIML will be used.\n")

## 
## Conclusion: If violated, MLR estimator with FIML will be used.

3.4 Multivariate Outlier Detection (Mahalanobis Distance)

mah_dist   <- mahalanobis(df_mvn,
                           center = colMeans(df_mvn),
                           cov    = cov(df_mvn))
p_mah      <- pchisq(mah_dist, df = ncol(df_mvn), lower.tail = FALSE)
n_outliers <- sum(p_mah < 0.001)

cat(sprintf("Outliers (Mahalanobis p < .001): %d out of %d cases\n",
            n_outliers, nrow(df_mvn)))

## Outliers (Mahalanobis p < .001): 1 out of 104 cases

if (n_outliers > 0) {
  cat("Outlier row indices (in complete-case subset):\n")
  print(which(p_mah < 0.001))
}

## Outlier row indices (in complete-case subset):
## 170 
## 104

cat("→ Outliers are retained; MLR estimator handles their influence.\n")

## → Outliers are retained; MLR estimator handles their influence.

3.5 Missing Data Analysis

pct_missing <- data.frame(
  Variable       = names(df_sem),
  Missing_n      = colSums(is.na(df_sem)),
  Missing_pct    = round(colMeans(is.na(df_sem)) * 100, 2),
  row.names      = NULL
)

knitr::kable(pct_missing,
             caption = "Missing Data Summary per Variable",
             row.names = FALSE)

Missing Data Summary per Variable
Variable	Missing_n	Missing_pct
MentorPsychSocSupport_w1	0	0.00
MentorInstrumentSupport_w1	0	0.00
MentorRoleModelingSupport_w1	0	0.00
MentorPsychSocSupport_w2	66	38.82
MentorInstrumentSupport_w2	0	0.00
MentorRoleModelingSupport_w2	66	38.82
sMSAT_1	0	0.00
sMSAT_2	66	38.82
gendermatch	0	0.00
ethnicitymatch	0	0.00
RAP	0	0.00
FMNTi_1r	0	0.00

little_result <- naniar::mcar_test(df_sem)
cat(sprintf(
  "\nLittle's MCAR Test: chi2(%.0f) = %.3f, p = %.4f\n",
  little_result$df,
  little_result$statistic,
  little_result$p.value
))

## 
## Little's MCAR Test: chi2(9) = 25.669, p = 0.0023

if (little_result$p.value > 0.05) {
  cat("→ Data appear MCAR. FIML or listwise deletion both defensible.\n")
} else {
  cat("→ Data NOT MCAR (MAR assumed). FIML estimation in lavaan is required.\n")
}

## → Data NOT MCAR (MAR assumed). FIML estimation in lavaan is required.

cat("→ Strategy adopted: Full Information Maximum Likelihood (FIML).\n")

## → Strategy adopted: Full Information Maximum Likelihood (FIML).

3.6 Sample Size Adequacy

n_total    <- nrow(df_sem)
n_complete <- nrow(df_mvn)

n_params   <- 3 + 3 + 3 + 1 + 4
ratio      <- n_total / n_params

sample_check <- data.frame(
  Metric  = c("Total N", "Complete N",
               "Estimated Free Parameters",
               "N:Parameter Ratio",
               "Minimum Recommended",
               "Status"),
  Value   = c(n_total, n_complete,
               n_params,
               round(ratio, 1),
               "≥ 10:1",
               ifelse(ratio >= 10, "ADEQUATE", "BORDERLINE"))
)

knitr::kable(sample_check,
             caption = "Sample Size Adequacy Check",
             row.names = FALSE)

Sample Size Adequacy Check
Metric	Value
Total N	170
Complete N	104
Estimated Free Parameters	14
N:Parameter Ratio	12.1
Minimum Recommended	≥ 10:1
Status	ADEQUATE

3.7 Indicator Intercorrelations

cor_w1 <- cor(na.omit(df_sem[, w1_indicators]))
cor_w2 <- cor(na.omit(df_sem[, w2_indicators]))

cat("Wave 1 Indicator Intercorrelations:\n")

## Wave 1 Indicator Intercorrelations:

print(round(cor_w1, 3))

##                              MentorPsychSocSupport_w1
## MentorPsychSocSupport_w1                        1.000
## MentorInstrumentSupport_w1                      0.549
## MentorRoleModelingSupport_w1                    0.542
##                              MentorInstrumentSupport_w1
## MentorPsychSocSupport_w1                          0.549
## MentorInstrumentSupport_w1                        1.000
## MentorRoleModelingSupport_w1                      0.498
##                              MentorRoleModelingSupport_w1
## MentorPsychSocSupport_w1                            0.542
## MentorInstrumentSupport_w1                          0.498
## MentorRoleModelingSupport_w1                        1.000

cat("\nWave 2 Indicator Intercorrelations:\n")

## 
## Wave 2 Indicator Intercorrelations:

print(round(cor_w2, 3))

##                              MentorPsychSocSupport_w2
## MentorPsychSocSupport_w2                        1.000
## MentorInstrumentSupport_w2                      0.608
## MentorRoleModelingSupport_w2                    0.591
##                              MentorInstrumentSupport_w2
## MentorPsychSocSupport_w2                          0.608
## MentorInstrumentSupport_w2                        1.000
## MentorRoleModelingSupport_w2                      0.613
##                              MentorRoleModelingSupport_w2
## MentorPsychSocSupport_w2                            0.591
## MentorInstrumentSupport_w2                          0.613
## MentorRoleModelingSupport_w2                        1.000

3.8 Internal Consistency (Cronbach’s Alpha)

alpha_w1 <- psych::alpha(na.omit(df_sem[, w1_indicators]))
alpha_w2 <- psych::alpha(na.omit(df_sem[, w2_indicators]))

alpha_summary <- data.frame(
  Construct       = c("MentorSupport_W1", "MentorSupport_W2"),
  Cronbach_Alpha  = round(c(alpha_w1$total$raw_alpha,
                             alpha_w2$total$raw_alpha), 3),
  Status          = ifelse(
    c(alpha_w1$total$raw_alpha, alpha_w2$total$raw_alpha) >= 0.80,
    "Good (≥ 0.80)",
    ifelse(
      c(alpha_w1$total$raw_alpha, alpha_w2$total$raw_alpha) >= 0.70,
      "Acceptable (≥ 0.70)", "Poor (< 0.70)"
    )
  )
)

knitr::kable(alpha_summary,
             caption = "Cronbach's Alpha — Internal Consistency",
             row.names = FALSE)

Cronbach’s Alpha — Internal Consistency
Construct	Cronbach_Alpha	Status
MentorSupport_W1	0.772	Acceptable (≥ 0.70)
MentorSupport_W2	0.814	Good (≥ 0.80)

3.9 Bartlett’s Test & KMO

df_all_ind   <- na.omit(df_sem[, c(w1_indicators, w2_indicators)])
bartlett_res <- psych::cortest.bartlett(cor(df_all_ind),
                                         n = nrow(df_all_ind))
kmo_res      <- psych::KMO(cor(df_all_ind))

kmo_label <- ifelse(kmo_res$MSA >= 0.9, "Marvelous",
             ifelse(kmo_res$MSA >= 0.8, "Meritorious",
             ifelse(kmo_res$MSA >= 0.7, "Middling",
             ifelse(kmo_res$MSA >= 0.6, "Mediocre", "Miserable"))))

bartlett_kmo <- data.frame(
  Test    = c("Bartlett's Test of Sphericity", "KMO (Overall MSA)"),
  Value   = c(round(bartlett_res$chisq, 3), round(kmo_res$MSA, 3)),
  df_p    = c(paste0("df = ", bartlett_res$df,
                     ", p = ", round(bartlett_res$p.value, 6)),
              paste0("[", kmo_label, "]")),
  Status  = c(ifelse(bartlett_res$p.value < 0.05, "PASSED", "FAILED"),
              ifelse(kmo_res$MSA >= 0.60,          "PASSED", "FAILED"))
)

knitr::kable(bartlett_kmo,
             caption = "Bartlett's Test & KMO — Factor Structure Validity",
             col.names = c("Test", "Value", "Detail", "Status"),
             row.names = FALSE)

Bartlett’s Test & KMO — Factor Structure Validity
Test	Value	Detail	Status
Bartlett’s Test of Sphericity	270.833	df = 15, p = 0	PASSED
KMO (Overall MSA)	0.675	[Mediocre]	PASSED

Assumption Summary

assumption_summary <- data.frame(
  No         = 1:9,
  Assumption = c(
    "Univariate Normality",
    "Multivariate Normality",
    "Multivariate Outliers",
    "Missing Data (MCAR/MAR)",
    "Sample Size Adequacy",
    "Indicator Intercorrelations",
    "Internal Consistency (Alpha)",
    "Bartlett's Sphericity",
    "KMO (MSA)"
  ),
  Test_Method = c(
    "Skewness |<2|, Kurtosis |<7|",
    "Mardia's Test",
    "Mahalanobis Distance (p < .001)",
    "Little's MCAR Test",
    "N:parameter ratio ≥ 10:1",
    "Pearson Correlation Matrix",
    "Cronbach's Alpha ≥ .70",
    "χ² significance (p < .05)",
    "MSA ≥ .60"
  ),
  Decision = c(
    "See output above",
    "MLR estimator applied if violated",
    "Retained; handled by MLR",
    "FIML adopted",
    "See output above",
    "Within acceptable range",
    "See output above",
    "See output above",
    "See output above"
  )
)

knitr::kable(assumption_summary,
             caption = "Summary Table — SEM Assumption Checking",
             row.names = FALSE)

Summary Table — SEM Assumption Checking
No	Assumption	Test_Method	Decision
1	Univariate Normality	Skewness \|<2\|, Kurtosis \|<7\|	See output above
2	Multivariate Normality	Mardia’s Test	MLR estimator applied if violated
3	Multivariate Outliers	Mahalanobis Distance (p < .001)	Retained; handled by MLR
4	Missing Data (MCAR/MAR)	Little’s MCAR Test	FIML adopted
5	Sample Size Adequacy	N:parameter ratio ≥ 10:1	See output above
6	Indicator Intercorrelations	Pearson Correlation Matrix	Within acceptable range
7	Internal Consistency (Alpha)	Cronbach’s Alpha ≥ .70	See output above
8	Bartlett’s Sphericity	χ² significance (p < .05)	See output above
9	KMO (MSA)	MSA ≥ .60	See output above

CONFIRMATORY FACTOR ANALYSIS (CFA)

Model Specification

cfa_model <- '
  # Latent factor: Wave 1 Mentor Support
  MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
                      MentorInstrumentSupport_w1 +
                      MentorRoleModelingSupport_w1

  # Latent factor: Wave 2 Mentor Support
  MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
                      MentorInstrumentSupport_w2 +
                      MentorRoleModelingSupport_w2
'

cfa_fit <- lavaan::cfa(
  model     = cfa_model,
  data      = df_sem,
  estimator = "MLR",  
  missing   = "FIML",
  std.lv    = TRUE    
)

CFA Fit Indices

cfa_fit_idx <- lavaan::fitmeasures(cfa_fit, c(
  "chisq", "df", "pvalue",
  "cfi", "tli",
  "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
  "srmr", "aic", "bic"
))

cfi_c   <- cfa_fit_idx["cfi"]
rmsea_c <- cfa_fit_idx["rmsea"]
srmr_c  <- cfa_fit_idx["srmr"]

cfa_fit_table <- data.frame(
  Index     = c("Chi-Square (χ²)", "df", "p-value",
                 "CFI", "TLI",
                 "RMSEA", "RMSEA 90% CI Lower", "RMSEA 90% CI Upper",
                 "SRMR", "AIC", "BIC"),
  Value     = round(as.numeric(cfa_fit_idx), 4),
  Threshold = c("—", "—", "> .05",
                 "≥ .95", "≥ .95",
                 "≤ .06", "—", "—",
                 "≤ .08", "Lower = better", "Lower = better"),
  Status    = c("—", "—",
                 ifelse(cfa_fit_idx["pvalue"] > .05, "Good", "Check"),
                 ifelse(cfi_c   >= .95, "Good", ifelse(cfi_c   >= .90, "Acceptable", "Poor")),
                 ifelse(cfa_fit_idx["tli"] >= .95, "Good",
                        ifelse(cfa_fit_idx["tli"] >= .90, "Acceptable", "Poor")),
                 ifelse(rmsea_c <= .06, "Good", ifelse(rmsea_c <= .08, "Acceptable", "Poor")),
                 "—", "—",
                 ifelse(srmr_c  <= .08, "Good", ifelse(srmr_c  <= .10, "Acceptable", "Poor")),
                 "—", "—")
)

knitr::kable(cfa_fit_table,
             caption = "CFA Fit Indices",
             row.names = FALSE)

CFA Fit Indices
Index	Value	Threshold	Status
Chi-Square (χ²)	64.4678	—	—
df	8.0000	—	—
p-value	0.0000	> .05	Check
CFI	0.8530	≥ .95	Poor
TLI	0.7243	≥ .95	Poor
RMSEA	0.2038	≤ .06	Poor
RMSEA 90% CI Lower	0.1593	—	—
RMSEA 90% CI Upper	0.2513	—	—
SRMR	0.0580	≤ .08	Good
AIC	2628.0817	Lower = better	—
BIC	2687.6619	Lower = better	—

CFA Standardized Loadings

cfa_params   <- lavaan::standardizedsolution(cfa_fit)
cfa_loadings <- cfa_params[cfa_params$op == "=~", ]

cfa_load_tbl <- cfa_loadings[, c("lhs", "rhs", "est.std", "se", "z", "pvalue")]
cfa_load_tbl[, c("est.std", "se", "z", "pvalue")] <-
  round(cfa_load_tbl[, c("est.std", "se", "z", "pvalue")], 4)

knitr::kable(
  cfa_load_tbl,
  caption   = "CFA Standardized Factor Loadings",
  col.names = c("Latent Factor", "Indicator", "Std. Loading", "SE", "z", "p-value"),
  row.names = FALSE
)

CFA Standardized Factor Loadings
Latent Factor	Indicator	Std. Loading	SE	z
MentorSupport_W1	MentorPsychSocSupport_w1	0.6801	0.0673	10.1064
MentorSupport_W1	MentorInstrumentSupport_w1	0.8309	0.0569	14.6073
MentorSupport_W1	MentorRoleModelingSupport_w1	0.6383	0.0783	8.1576
MentorSupport_W2	MentorPsychSocSupport_w2	0.7407	0.0701	10.5610
MentorSupport_W2	MentorInstrumentSupport_w2	0.9353	0.0502	18.6485
MentorSupport_W2	MentorRoleModelingSupport_w2	0.7366	0.0728	10.1237

cat("\nRule: Standardized loadings >= .50 acceptable, >= .70 good.\n")

## 
## Rule: Standardized loadings >= .50 acceptable, >= .70 good.

Convergent & Discriminant Validity

# AVE and CR helper functions
compute_AVE <- function(fit, factor_name) {
  params   <- lavaan::standardizedsolution(fit)
  loadings <- params[params$op == "=~" &
                       params$lhs == factor_name, "est.std"]
  mean(loadings^2)
}

compute_CR <- function(fit, factor_name) {
  params   <- lavaan::standardizedsolution(fit)
  loadings <- params[params$op == "=~" &
                       params$lhs == factor_name, "est.std"]
  sum(loadings)^2 / (sum(loadings)^2 + sum(1 - loadings^2))
}

ave_w1 <- compute_AVE(cfa_fit, "MentorSupport_W1")
ave_w2 <- compute_AVE(cfa_fit, "MentorSupport_W2")
cr_w1  <- compute_CR(cfa_fit,  "MentorSupport_W1")
cr_w2  <- compute_CR(cfa_fit,  "MentorSupport_W2")

validity_table <- data.frame(
  Construct = c("MentorSupport_W1", "MentorSupport_W2"),
  AVE       = round(c(ave_w1, ave_w2), 3),
  CR        = round(c(cr_w1,  cr_w2),  3),
  AVE_status = ifelse(c(ave_w1, ave_w2) >= .50,
                       "Convergent Valid (≥ .50)", "Poor (< .50)"),
  CR_status  = ifelse(c(cr_w1, cr_w2) >= .70,
                       "Acceptable (≥ .70)", "Poor (< .70)")
)

knitr::kable(validity_table,
             caption = "Convergent Validity: AVE and Composite Reliability (CR)",
             row.names = FALSE)

Convergent Validity: AVE and Composite Reliability (CR)
Construct	AVE	CR	AVE_status	CR_status
MentorSupport_W1	0.520	0.762	Convergent Valid (≥ .50)	Acceptable (≥ .70)
MentorSupport_W2	0.655	0.849	Convergent Valid (≥ .50)	Acceptable (≥ .70)

factor_corr <- cfa_params[cfa_params$op == "~~" &
                             cfa_params$lhs != cfa_params$rhs &
                             cfa_params$lhs %in% c("MentorSupport_W1",
                                                    "MentorSupport_W2"), ]
if (nrow(factor_corr) > 0) {
  phi <- factor_corr$est.std[1]
  cat(sprintf("\nDiscriminant Validity (Fornell-Larcker Criterion)\n"))
  cat(sprintf("Latent factor correlation (φ)  = %.3f\n", phi))
  cat(sprintf("sqrt(AVE_W1) = %.3f  |  sqrt(AVE_W2) = %.3f\n",
              sqrt(ave_w1), sqrt(ave_w2)))
  if (abs(phi) < min(sqrt(ave_w1), sqrt(ave_w2))) {
    cat("→ Discriminant validity SUPPORTED (|φ| < min(sqrt(AVE))).\n")
  } else {
    cat("→ Discriminant validity QUESTIONABLE (|φ| ≥ sqrt(AVE)).\n")
  }
}

## 
## Discriminant Validity (Fornell-Larcker Criterion)
## Latent factor correlation (φ)  = 0.798
## sqrt(AVE_W1) = 0.721  |  sqrt(AVE_W2) = 0.810
## → Discriminant validity QUESTIONABLE (|φ| ≥ sqrt(AVE)).

FULL SEM MODEL (Structural Model)

Model Specification

sem_model <- '
  MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
                      MentorInstrumentSupport_w1 +
                      MentorRoleModelingSupport_w1

  MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
                      MentorInstrumentSupport_w2 +
                      MentorRoleModelingSupport_w2

  MentorSupport_W2 ~ MentorSupport_W1 + RAP + gendermatch +
                     ethnicitymatch + FMNTi_1r

  sMSAT_2 ~ MentorSupport_W2 + MentorSupport_W1 + sMSAT_1 +
             RAP + gendermatch + ethnicitymatch + FMNTi_1r

  RAP            ~~ gendermatch + ethnicitymatch + FMNTi_1r + sMSAT_1
  gendermatch    ~~ ethnicitymatch + FMNTi_1r + sMSAT_1
  ethnicitymatch ~~ FMNTi_1r + sMSAT_1
  FMNTi_1r       ~~ sMSAT_1
'

sem_fit <- lavaan::sem(
  model     = sem_model,
  data      = df_sem,
  estimator = "MLR",
  missing   = "FIML",
  std.lv    = TRUE
)

sem_fit_idx <- lavaan::fitmeasures(sem_fit, c(
  "chisq", "df", "pvalue",
  "cfi", "tli",
  "rmsea", "rmsea.ci.lower", "rmsea.ci.upper",
  "srmr", "aic", "bic"
))

5.1 Global Fit Indices

cfi_s   <- sem_fit_idx["cfi"]
tli_s   <- sem_fit_idx["tli"]
rmsea_s <- sem_fit_idx["rmsea"]
srmr_s  <- sem_fit_idx["srmr"]

sem_fit_table <- data.frame(
  Index     = c("Chi-Square (χ²)", "df", "p-value",
                 "CFI", "TLI",
                 "RMSEA", "RMSEA 90% CI Lower", "RMSEA 90% CI Upper",
                 "SRMR", "AIC", "BIC"),
  Value     = round(as.numeric(sem_fit_idx), 4),
  Threshold = c("—", "—", "> .05",
                 "≥ .95", "≥ .95",
                 "≤ .06", "—", "—",
                 "≤ .08", "Lower = better", "Lower = better"),
  Status    = c("—", "—",
                 ifelse(sem_fit_idx["pvalue"] > .05, "Good", "Check"),
                 ifelse(cfi_s   >= .95, "Good", ifelse(cfi_s   >= .90, "Acceptable", "Poor")),
                 ifelse(tli_s   >= .95, "Good", ifelse(tli_s   >= .90, "Acceptable", "Poor")),
                 ifelse(rmsea_s <= .06, "Good", ifelse(rmsea_s <= .08, "Acceptable", "Poor")),
                 "—", "—",
                 ifelse(srmr_s  <= .08, "Good", ifelse(srmr_s  <= .10, "Acceptable", "Poor")),
                 "—", "—")
)

knitr::kable(sem_fit_table,
             caption = "SEM Global Fit Indices",
             row.names = FALSE)

SEM Global Fit Indices
Index	Value	Threshold	Status
Chi-Square (χ²)	183.7703	—	—
df	38.0000	—	—
p-value	0.0000	> .05	Check
CFI	0.7460	≥ .95	Poor
TLI	0.5589	≥ .95	Poor
RMSEA	0.1502	≤ .06	Poor
RMSEA 90% CI Lower	0.1289	—	—
RMSEA 90% CI Upper	0.1723	—	—
SRMR	0.1375	≤ .08	Poor
AIC	4981.2943	Lower = better	—
BIC	5144.3559	Lower = better	—

5.2 Structural Path Estimates (Standardized)

sem_params   <- lavaan::standardizedsolution(sem_fit)
struct_paths <- sem_params[sem_params$op == "~", ]

num_cols <- c("est.std", "se", "z", "pvalue", "ci.lower", "ci.upper")
sp_tbl   <- struct_paths[, c("lhs", "rhs", num_cols)]
sp_tbl[, num_cols] <- round(sp_tbl[, num_cols], 4)

knitr::kable(
  sp_tbl,
  caption   = "Structural Path Estimates (Standardized)",
  col.names = c("Outcome", "Predictor", "Beta (Std.)", "SE",
                 "z", "p-value", "CI Lower", "CI Upper"),
  row.names = FALSE
)

Structural Path Estimates (Standardized)
Outcome	Predictor	Beta (Std.)	SE	z	p-value	CI Lower	CI Upper
MentorSupport_W2	MentorSupport_W1	0.8119	0.0720	11.2807	0.0000	0.6708	0.9530
MentorSupport_W2	RAP	-0.0754	0.0629	-1.1989	0.2306	-0.1988	0.0479
MentorSupport_W2	gendermatch	-0.0695	0.0610	-1.1397	0.2544	-0.1889	0.0500
MentorSupport_W2	ethnicitymatch	0.0344	0.0682	0.5043	0.6140	-0.0993	0.1680
MentorSupport_W2	FMNTi_1r	-0.1306	0.0501	-2.6088	0.0091	-0.2287	-0.0325
sMSAT_2	MentorSupport_W2	1.2800	0.2619	4.8868	0.0000	0.7666	1.7933
sMSAT_2	MentorSupport_W1	-0.6804	0.2837	-2.3987	0.0165	-1.2364	-0.1245
sMSAT_2	sMSAT_1	0.4051	0.0986	4.1085	0.0000	0.2118	0.5983
sMSAT_2	RAP	0.0494	0.0846	0.5836	0.5595	-0.1164	0.2151
sMSAT_2	gendermatch	0.0050	0.0795	0.0630	0.9498	-0.1509	0.1609
sMSAT_2	ethnicitymatch	-0.0021	0.0745	-0.0285	0.9773	-0.1482	0.1439
sMSAT_2	FMNTi_1r	0.0403	0.0904	0.4453	0.6561	-0.1369	0.2175

5.3 Unstandardized Solution

all_params  <- lavaan::parameterestimates(sem_fit,
                                           standardized = TRUE,
                                           ci           = TRUE,
                                           level        = 0.95)
struct_unstd <- all_params[all_params$op == "~",
                             c("lhs", "rhs", "est", "se", "z",
                               "pvalue", "ci.lower", "ci.upper", "std.all")]

num_cols2 <- c("est", "se", "z", "pvalue", "ci.lower", "ci.upper", "std.all")
su_tbl    <- struct_unstd
su_tbl[, num_cols2] <- round(su_tbl[, num_cols2], 4)

knitr::kable(
  su_tbl,
  caption   = "Structural Path Estimates (Unstandardized with Std. Solution)",
  col.names = c("Outcome", "Predictor", "B", "SE", "z",
                 "p-value", "CI Lower", "CI Upper", "Beta (Std.)"),
  row.names = FALSE
)

Structural Path Estimates (Unstandardized with Std. Solution)
Outcome	Predictor	B	SE	z	p-value	CI Lower	CI Upper	Beta (Std.)
MentorSupport_W2	MentorSupport_W1	1.4555	0.3950	3.6844	0.0002	0.6812	2.2298	0.8119
MentorSupport_W2	RAP	-0.2775	0.2258	-1.2290	0.2191	-0.7201	0.1651	-0.0754
MentorSupport_W2	gendermatch	-0.2502	0.2287	-1.0938	0.2740	-0.6985	0.1981	-0.0695
MentorSupport_W2	ethnicitymatch	0.1331	0.2688	0.4952	0.6204	-0.3937	0.6598	0.0344
MentorSupport_W2	FMNTi_1r	-0.0673	0.0274	-2.4589	0.0139	-0.1210	-0.0137	-0.1306
sMSAT_2	MentorSupport_W2	0.7509	0.1104	6.8016	0.0000	0.5345	0.9673	1.2800
sMSAT_2	MentorSupport_W1	-0.7156	0.2910	-2.4593	0.0139	-1.2860	-0.1453	-0.6804
sMSAT_2	sMSAT_1	0.4077	0.0845	4.8235	0.0000	0.2420	0.5733	0.4051
sMSAT_2	RAP	0.1065	0.1826	0.5833	0.5597	-0.2514	0.4644	0.0494
sMSAT_2	gendermatch	0.0106	0.1680	0.0630	0.9498	-0.3187	0.3399	0.0050
sMSAT_2	ethnicitymatch	-0.0048	0.1691	-0.0285	0.9772	-0.3363	0.3267	-0.0021
sMSAT_2	FMNTi_1r	0.0122	0.0272	0.4470	0.6549	-0.0412	0.0656	0.0403

5.4 R² (Variance Explained)

r2_vals <- lavaan::inspect(sem_fit, "r2")

r2_table <- data.frame(
  Variable   = names(r2_vals),
  R_squared  = round(as.numeric(r2_vals), 3)
)

knitr::kable(r2_table,
             caption = "R² — Variance Explained per Endogenous Variable",
             row.names = FALSE)

R² — Variance Explained per Endogenous Variable
Variable	R_squared
MentorPsychSocSupport_w1	0.470
MentorInstrumentSupport_w1	0.671
MentorRoleModelingSupport_w1	0.425
MentorPsychSocSupport_w2	0.630
MentorInstrumentSupport_w2	0.808
MentorRoleModelingSupport_w2	0.593
sMSAT_2	0.802
MentorSupport_W2	0.689

MEDIATION ANALYSIS

Indirect Effect: MentorSupport_W1 → W2 → sMSAT_2

sem_mediation <- '
  MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
                      MentorInstrumentSupport_w1 +
                      MentorRoleModelingSupport_w1

  MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
                      MentorInstrumentSupport_w2 +
                      MentorRoleModelingSupport_w2

  MentorSupport_W2 ~ a * MentorSupport_W1 + RAP + gendermatch +
                         ethnicitymatch + FMNTi_1r

  sMSAT_2 ~ b * MentorSupport_W2 + c * MentorSupport_W1 + sMSAT_1 +
              RAP + gendermatch + ethnicitymatch + FMNTi_1r

  RAP            ~~ gendermatch + ethnicitymatch + FMNTi_1r + sMSAT_1
  gendermatch    ~~ ethnicitymatch + FMNTi_1r + sMSAT_1
  ethnicitymatch ~~ FMNTi_1r + sMSAT_1
  FMNTi_1r       ~~ sMSAT_1

  indirect := a * b         # W1 → W2 → sMSAT_2
  direct   := c             # W1 → sMSAT_2 (direct)
  total    := a * b + c     # Total effect
'

set.seed(123)
sem_med_fit <- lavaan::sem(
  model     = sem_mediation,
  data      = df_sem,
  estimator = "ML",        
  missing   = "FIML",      
  std.lv    = TRUE,
  se        = "bootstrap", 
  bootstrap = 1000         
)

Mediation Results

med_params  <- lavaan::parameterestimates(
  sem_med_fit,
  ci           = TRUE,
  level        = 0.95,
  boot.ci.type = "perc"
)

med_effects <- med_params[
  med_params$label %in% c("a", "b", "c", "indirect", "direct", "total"), ]

med_table <- med_effects[, c("label", "est", "se", "z",
                               "pvalue", "ci.lower", "ci.upper")]
med_table$Interpretation <- c(
  "W1 → W2 (autoregressive path)",
  "W2 → sMSAT_2 (W2 on outcome)",
  "W1 → sMSAT_2 (direct effect)",
  "W1 → W2 → sMSAT_2 (indirect/mediated)",
  "Direct effect W1 → sMSAT_2",
  "Total effect W1 → sMSAT_2"
)

num_cols3 <- c("est", "se", "z", "pvalue", "ci.lower", "ci.upper")
mt_tbl    <- med_table[, c("label", num_cols3)]
mt_tbl[, num_cols3] <- round(mt_tbl[, num_cols3], 4)

knitr::kable(
  mt_tbl,
  caption   = "Mediation Effects (Bootstrap 95% CI, 1000 replications)",
  col.names = c("Effect", "Estimate", "SE", "z", "p-value",
                 "CI Lower", "CI Upper"),
  row.names = FALSE
)

Mediation Effects (Bootstrap 95% CI, 1000 replications)
Effect	Estimate	SE	z	p-value	CI Lower	CI Upper
a	1.4555	5.0274	0.2895	0.7722	0.8939	3.7887
b	0.7509	0.7786	0.9644	0.3348	0.4970	1.1031
c	-0.7156	44.9590	-0.0159	0.9873	-3.1866	-0.2597
indirect	1.0930	44.9604	0.0243	0.9806	0.6203	3.6789
direct	-0.7156	44.9590	-0.0159	0.9873	-3.1866	-0.2597
total	0.3773	0.1511	2.4971	0.0125	0.0909	0.7134

cat("\nInterpretation guide:\n")

## 
## Interpretation guide:

cat("  a        = W1 Mentor Support → W2 Mentor Support\n")

##   a        = W1 Mentor Support → W2 Mentor Support

cat("  b        = W2 Mentor Support → Satisfaction (sMSAT_2)\n")

##   b        = W2 Mentor Support → Satisfaction (sMSAT_2)

cat("  c        = Direct: W1 → sMSAT_2 (bypassing W2)\n")

##   c        = Direct: W1 → sMSAT_2 (bypassing W2)

cat("  indirect = Mediated path (a × b)\n")

##   indirect = Mediated path (a × b)

cat("  total    = indirect + direct\n")

##   total    = indirect + direct

cat("\nIf indirect significant & direct non-sig → Full mediation\n")

## 
## If indirect significant & direct non-sig → Full mediation

cat("If both significant                       → Partial mediation\n")

## If both significant                       → Partial mediation

MODEL COMPARISON (Partial vs. Full Mediation)

sem_restricted <- '
  MentorSupport_W1 =~ MentorPsychSocSupport_w1 +
                      MentorInstrumentSupport_w1 +
                      MentorRoleModelingSupport_w1

  MentorSupport_W2 =~ MentorPsychSocSupport_w2 +
                      MentorInstrumentSupport_w2 +
                      MentorRoleModelingSupport_w2

  MentorSupport_W2 ~ MentorSupport_W1 + RAP + gendermatch +
                     ethnicitymatch + FMNTi_1r

  sMSAT_2 ~ MentorSupport_W2 + sMSAT_1 +
             RAP + gendermatch + ethnicitymatch + FMNTi_1r

  RAP            ~~ gendermatch + ethnicitymatch + FMNTi_1r + sMSAT_1
  gendermatch    ~~ ethnicitymatch + FMNTi_1r + sMSAT_1
  ethnicitymatch ~~ FMNTi_1r + sMSAT_1
  FMNTi_1r       ~~ sMSAT_1
'

sem_restricted_fit <- lavaan::sem(
  model     = sem_restricted,
  data      = df_sem,
  estimator = "MLR",
  missing   = "FIML",
  std.lv    = TRUE
)

model_comp <- data.frame(
  Model   = c("Full Model (Partial Mediation)",
               "Restricted (Full Mediation)"),
  AIC     = round(c(AIC(sem_fit), AIC(sem_restricted_fit)), 2),
  BIC     = round(c(BIC(sem_fit), BIC(sem_restricted_fit)), 2)
)

knitr::kable(model_comp,
             caption = "AIC/BIC Model Comparison (Lower = Better Fit)",
             row.names = FALSE)

AIC/BIC Model Comparison (Lower = Better Fit)
Model	AIC	BIC
Full Model (Partial Mediation)	4981.29	5144.36
Restricted (Full Mediation)	4994.84	5154.76

cat("\nChi-Square Difference Test (Satorra-Bentler)\n")

## 
## Chi-Square Difference Test (Satorra-Bentler)

lrt_result <- lavaan::lavTestLRT(sem_fit, sem_restricted_fit)
print(lrt_result)

## 
## Scaled Chi-Squared Difference Test (method = "satorra.bentler.2001")
## 
## lavaan->lavTestLRT():  
##    lavaan NOTE: The "Chisq" column contains standard test statistics, not the 
##    robust test that should be reported per model. A robust difference test is 
##    a function of two standard (not robust) statistics.
## 
##                    Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## sem_fit            38 4981.3 5144.4 183.77                                  
## sem_restricted_fit 39 4994.8 5154.8 199.31     26.386       1  2.796e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("Significant Δχ² → Full model fits significantly better (partial mediation supported)\n")

## Significant Δχ² → Full model fits significantly better (partial mediation supported)

MODIFICATION INDICES

cat("MI > 10 suggests freeing that parameter may improve fit.\n")

## MI > 10 suggests freeing that parameter may improve fit.

cat("Apply only when theoretically justified — not data-dredging.\n\n")

## Apply only when theoretically justified — not data-dredging.

mi <- lavaan::modificationIndices(sem_fit,
                                   sort.          = TRUE,
                                   maximum.number = 10)

knitr::kable(
  mi[, c("lhs", "op", "rhs", "mi", "epc")],
  caption = "Top 10 Modification Indices",
  col.names = c("LHS", "Op", "RHS", "MI", "EPC"),
  row.names = FALSE,
  digits    = 3
)

Top 10 Modification Indices
LHS	Op	RHS	MI	EPC
MentorSupport_W1	~	sMSAT_1	61.663	0.632
sMSAT_1	~	MentorSupport_W1	56.734	0.639
MentorSupport_W1	~	sMSAT_2	52.391	1.433
MentorInstrumentSupport_w1		MentorInstrumentSupport_w2	39.988	0.413
sMSAT_1	~	MentorSupport_W2	39.314	0.295
sMSAT_1	~	sMSAT_2	16.531	0.451
MentorRoleModelingSupport_w1		MentorRoleModelingSupport_w2	15.004	0.353
MentorInstrumentSupport_w1		MentorRoleModelingSupport_w2	13.665	-0.309
MentorRoleModelingSupport_w1		sMSAT_1	12.362	0.270
MentorSupport_W1	=~	MentorInstrumentSupport_w2	11.604	0.566

VISUALIZATIONS

Correlation Matrix Heatmap

cor_mat <- cor(na.omit(df_sem[, c(w1_indicators, w2_indicators,
                                   "sMSAT_1", "sMSAT_2")]))

corrplot::corrplot(
  cor_mat,
  method      = "color",
  type        = "lower",
  addCoef.col = "black",
  number.cex  = 0.75,
  tl.cex      = 0.8,
  col         = corrplot::COL2("RdBu", 200),
  title       = "Correlation Matrix — SEM Indicators",
  mar         = c(0, 0, 2, 0)
)

Fit Index Summary Plot

fit_summary <- data.frame(
  Index     = c("CFI", "TLI", "RMSEA", "SRMR"),
  Value     = c(sem_fit_idx["cfi"],
                sem_fit_idx["tli"],
                sem_fit_idx["rmsea"],
                sem_fit_idx["srmr"]),
  Threshold = c(0.95, 0.95, 0.06, 0.08),
  Direction = c("higher", "higher", "lower", "lower")
)

fit_summary$Pass <- with(fit_summary,
  ifelse(Direction == "higher",
         Value >= Threshold,
         Value <= Threshold))

ggplot(fit_summary, aes(x = Index, y = Value, fill = Pass)) +
  geom_col(width = 0.5) +
  geom_hline(aes(yintercept = Threshold),
             linetype = "dashed", color = "red", linewidth = 0.8) +
  scale_fill_manual(values = c("TRUE"  = "#4CAF50",
                                "FALSE" = "#F44336"),
                    labels  = c("TRUE"  = "Pass",
                                "FALSE" = "Fail")) +
  facet_wrap(~ Index, scales = "free") +
  labs(title    = "SEM Fit Indices vs. Thresholds",
       subtitle = "Dashed line = cutoff threshold",
       y        = "Value",
       x        = "",
       fill     = "Meets Threshold") +
  theme_minimal(base_size = 12)

SEM Path Diagram

semPlot::semPaths(
  object         = sem_fit,
  what           = "std",
  whatLabels     = "std",
  style          = "ram",
  layout         = "tree2",
  rotation       = 2,
  fade           = FALSE,
  edge.label.cex = 0.8,
  node.width     = 1.5,
  node.height    = 1.0,
  mar            = c(5, 5, 5, 5),
  curvePivot     = TRUE,
  residuals      = TRUE,
  intercepts     = FALSE,
  color          = list(
    lat = "#2E86AB",   # Blue  — latent variables
    man = "#A23B72"    # Purple — manifest (observed) variables
  )
)
title("SEM Path Diagram — Standardized Estimates\n(RAP Mentorship Dataset)",
      cex.main = 1.0)

FINAL SUMMARY

cat("Model Overview\n")

## Model Overview

cat("Estimator       : MLR (Robust Maximum Likelihood)\n")

## Estimator       : MLR (Robust Maximum Likelihood)

cat("Missing data    : FIML (Full Information Maximum Likelihood)\n")

## Missing data    : FIML (Full Information Maximum Likelihood)

cat("Latent variables: MentorSupport_W1, MentorSupport_W2\n")

## Latent variables: MentorSupport_W1, MentorSupport_W2

cat("Outcome         : sMSAT_2 (mentee satisfaction, Wave 2)\n")

## Outcome         : sMSAT_2 (mentee satisfaction, Wave 2)

cat("Covariates      : RAP, gendermatch, ethnicitymatch, FMNTi_1r, sMSAT_1\n\n")

## Covariates      : RAP, gendermatch, ethnicitymatch, FMNTi_1r, sMSAT_1

# Key structural paths
std_sol  <- lavaan::standardizedsolution(sem_fit)
get_path <- function(lhs, rhs) {
  row <- std_sol[std_sol$lhs == lhs &
                   std_sol$rhs == rhs &
                   std_sol$op  == "~", ]
  if (nrow(row) == 0) return(NULL)
  row
}

path_w1w2  <- get_path("MentorSupport_W2", "MentorSupport_W1")
path_w2sat <- get_path("sMSAT_2", "MentorSupport_W2")
path_w1sat <- get_path("sMSAT_2", "MentorSupport_W1")

cat("Key Path Coefficients\n")

## Key Path Coefficients

if (!is.null(path_w1w2)) {
  cat(sprintf("W1 → W2 Mentor Support  : β = %.3f, p = %.4f  %s\n",
              path_w1w2$est.std,
              path_w1w2$pvalue,
              ifelse(path_w1w2$pvalue < .05, "[Significant]", "")))
}

## W1 → W2 Mentor Support  : β = 0.812, p = 0.0000  [Significant]

if (!is.null(path_w2sat)) {
  cat(sprintf("W2 Support → sMSAT_2    : β = %.3f, p = %.4f  %s\n",
              path_w2sat$est.std,
              path_w2sat$pvalue,
              ifelse(path_w2sat$pvalue < .05, "[Significant]", "")))
}

## W2 Support → sMSAT_2    : β = 1.280, p = 0.0000  [Significant]

if (!is.null(path_w1sat)) {
  cat(sprintf("W1 Support → sMSAT_2    : β = %.3f, p = %.4f  %s\n",
              path_w1sat$est.std,
              path_w1sat$pvalue,
              ifelse(path_w1sat$pvalue < .05, "[Significant]", "")))
}

## W1 Support → sMSAT_2    : β = -0.680, p = 0.0165  [Significant]

cat(sprintf("\nModel R² for sMSAT_2    : %.3f\n",
            lavaan::inspect(sem_fit, "r2")["sMSAT_2"]))

## 
## Model R² for sMSAT_2    : 0.802

Modul 5

Implementation of Structural Equation Modeling (SEM) on the RAP Apprenticeship Training and Mentorship Outcomes Dataset

Mylovia Mahesa Ayu (24031554001)

Syafira Najema Putri Anisa (24031554013)

Rizki Piji Fathoni (24031554029)

Class INT24 | Lecturer: Ulfa Siti Nuraini., S.Stat., M.Stat.

SETUP

LOAD DATA & OVERVIEW

Load Data

Data Cleaning & Variable Selection

ASSUMPTION CHECKING

3.1 Descriptive Statistics

3.2 Univariate Normality

3.3 Multivariate Normality (Mardia’s Test)

3.4 Multivariate Outlier Detection (Mahalanobis Distance)

3.5 Missing Data Analysis

3.6 Sample Size Adequacy

3.7 Indicator Intercorrelations

3.8 Internal Consistency (Cronbach’s Alpha)

3.9 Bartlett’s Test & KMO

Assumption Summary

CONFIRMATORY FACTOR ANALYSIS (CFA)

Model Specification

CFA Fit Indices

CFA Standardized Loadings

Convergent & Discriminant Validity

FULL SEM MODEL (Structural Model)

Model Specification

5.1 Global Fit Indices

5.2 Structural Path Estimates (Standardized)

5.3 Unstandardized Solution

5.4 R² (Variance Explained)

MEDIATION ANALYSIS

Indirect Effect: MentorSupport_W1 → W2 → sMSAT_2

Mediation Results

MODEL COMPARISON (Partial vs. Full Mediation)

MODIFICATION INDICES

VISUALIZATIONS

Correlation Matrix Heatmap

Fit Index Summary Plot

SEM Path Diagram

FINAL SUMMARY