library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 4.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
library(semPlot)
## Warning in check_dep_version(): ABI version mismatch:
## lme4 was built with Matrix ABI version 1
## Current Matrix ABI version is 0
## Please re-install lme4 from source or restore original 'Matrix' package
data_sem <- read.csv('./data/EEG_RS_lanExp_income_imputated_all_v5.csv') %>%
rename(Age = Age_EEG_yr)
model_2 <- '
# Level 1: Direct paths
CDRAN ~ a * PW + SES + Age
CWR ~ b1 * CDRAN + c1 * PW + SES + Age
CDICT ~ b2 * CDRAN + c2 * PW + SES + Age
EWR ~ b3 * CDRAN + c3 * PW + SES + Age
EDICT ~ b4 * CDRAN + c4 * PW + SES + Age
# Residual covariances to control for shared variance
CWR ~~ CDICT
CWR ~~ EDICT
CWR ~~ EWR
CDICT ~~ EDICT
CDICT ~~ EWR
EDICT ~~ EWR
# Level 2: Between-group model
PW ~ 1 # Random intercept for PW
CDRAN ~ 1
CWR ~ 1
CDICT ~ 1
EDICT ~ 1
EWR ~ 1
# DEFINE INDIRECT AND TOTAL EFFECTS: PW -> CDRAN -> Academic Skills
# Indirect effects (PW -> CDRAN -> Skill)
Indirect_PW_CWR := a * b1
Indirect_PW_CDICT := a * b2
Indirect_PW_EDICT := a * b3
Indirect_PW_EWR := a * b4
# Total effects (Indirect + Direct)
Total_PW_CWR := (a * b1) + c1
Total_PW_CDICT := (a * b2) + c2
Total_PW_EDICT := (a * b3) + c3
Total_PW_EWR := (a * b4) + c4
'
# Fit the model
fit_2 <- sem(model_2, data = data_sem, cluster = 'FID')
## Warning in lav_options_set(opt): observed.information for ALL test statistics
## is set to h1.
## Warning in lav_data_full(data = data, group = group, cluster = cluster, : lavaan WARNING: cluster variable 'FID' contains missing values
## Warning in lav_data_full(data = data, group = group, cluster = cluster, :
## lavaan WARNING: some observed variances are (at least) a factor 1000 times
## larger than others; use varTable(fit) to investigate
## Warning in lav_partable_vnames(FLAT, "ov.x", warn = TRUE): lavaan WARNING:
## model syntax contains variance/covariance/intercept formulas
## involving (an) exogenous variable(s): [PW]; These variables will
## now be treated as random introducing additional free parameters.
## If you wish to treat those variables as fixed, remove these
## formulas from the model syntax. Otherwise, consider adding the
## fixed.x = FALSE option.
# For bootstrap CIs with clustered data, we need to use a different approach
# First, let's get the regular results
summary(fit_2, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6.15 ended normally after 202 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 37
##
## Used Total
## Number of observations 135 202
## Number of clusters [FID] 74
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 0.945 0.651
## Degrees of freedom 2 2
## P-value (Chi-square) 0.623 0.722
## Scaling correction factor 1.452
## Yuan-Bentler correction (Mplus variant)
## Observed information based on H1
##
## Model Test Baseline Model:
##
## Test statistic 600.169 458.233
## Degrees of freedom 27 27
## P-value 0.000 0.000
## Scaling correction factor 1.310
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000 1.000
## Tucker-Lewis Index (TLI) 1.025 1.042
##
## Robust Comparative Fit Index (CFI) 1.000
## Robust Tucker-Lewis Index (TLI) 1.047
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -1734.630 -1734.630
## Scaling correction factor 1.326
## for the MLR correction
## Loglikelihood unrestricted model (H1) -1734.157 -1734.157
## Scaling correction factor 1.332
## for the MLR correction
##
## Akaike (AIC) 3543.260 3543.260
## Bayesian (BIC) 3650.755 3650.755
## Sample-size adjusted Bayesian (SABIC) 3533.712 3533.712
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000 0.000
## 90 Percent confidence interval - lower 0.000 0.000
## 90 Percent confidence interval - upper 0.137 0.093
## P-value H_0: RMSEA <= 0.050 0.711 0.866
## P-value H_0: RMSEA >= 0.080 0.191 0.071
##
## Robust RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.147
## P-value H_0: Robust RMSEA <= 0.050 0.772
## P-value H_0: Robust RMSEA >= 0.080 0.168
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.023 0.023
##
## Parameter Estimates:
##
## Standard errors Robust.cluster
## Information Observed
## Observed information based on Hessian
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CDRAN ~
## PW (a) 7.100 2.481 2.862 0.004 7.100 0.214
## SES 0.131 0.051 2.568 0.010 0.131 0.217
## Age 0.460 0.083 5.543 0.000 0.460 0.402
## CWR ~
## CDRAN (b1) 15.480 2.905 5.329 0.000 15.480 0.441
## PW (c1) 129.800 71.187 1.823 0.068 129.800 0.111
## SES -0.921 1.916 -0.481 0.631 -0.921 -0.043
## Age 14.364 3.416 4.204 0.000 14.364 0.358
## CDICT ~
## CDRAN (b2) 2.941 0.924 3.183 0.001 2.941 0.352
## PW (c2) 41.652 19.218 2.167 0.030 41.652 0.150
## SES 0.236 0.506 0.467 0.641 0.236 0.047
## Age 1.568 0.953 1.645 0.100 1.568 0.164
## EWR ~
## CDRAN (b3) 5.047 1.295 3.899 0.000 5.047 0.293
## PW (c3) 55.338 32.448 1.705 0.088 55.338 0.097
## SES 4.739 0.810 5.853 0.000 4.739 0.457
## Age 5.250 1.565 3.354 0.001 5.250 0.267
## EDICT ~
## CDRAN (b4) 2.497 0.655 3.811 0.000 2.497 0.310
## PW (c4) 34.730 15.568 2.231 0.026 34.730 0.130
## SES 2.082 0.370 5.627 0.000 2.082 0.428
## Age 2.312 0.708 3.266 0.001 2.312 0.251
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CWR ~~
## .CDICT 98.305 17.975 5.469 0.000 98.305 0.640
## .EDICT 33.049 13.502 2.448 0.014 33.049 0.281
## .EWR 77.710 25.961 2.993 0.003 77.710 0.315
## .CDICT ~~
## .EDICT 6.412 3.078 2.083 0.037 6.412 0.193
## .EWR 11.254 7.278 1.546 0.122 11.254 0.161
## .EWR ~~
## .EDICT 42.993 6.086 7.064 0.000 42.993 0.805
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## PW 0.090 0.003 32.649 0.000 0.090 3.237
## .CDRAN -1.178 0.729 -1.616 0.106 -1.178 -1.283
## .CWR -97.076 25.753 -3.770 0.000 -97.076 -3.011
## .CDICT 1.112 7.280 0.153 0.879 1.112 0.145
## .EDICT -21.817 5.352 -4.077 0.000 -21.817 -2.948
## .EWR -43.917 13.122 -3.347 0.001 -43.917 -2.781
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CDRAN 0.612 0.079 7.745 0.000 0.612 0.726
## .CWR 542.135 75.674 7.164 0.000 542.135 0.522
## .CDICT 43.549 5.649 7.710 0.000 43.549 0.742
## .EWR 112.104 13.882 8.076 0.000 112.104 0.449
## .EDICT 25.468 3.391 7.511 0.000 25.468 0.465
## PW 0.001 0.000 7.228 0.000 0.001 1.000
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Indirct_PW_CWR 109.913 42.051 2.614 0.009 109.913 0.094
## Indrc_PW_CDICT 20.882 8.690 2.403 0.016 20.882 0.075
## Indrc_PW_EDICT 35.838 15.433 2.322 0.020 35.838 0.063
## Indirct_PW_EWR 17.728 8.029 2.208 0.027 17.728 0.066
## Total_PW_CWR 239.714 77.410 3.097 0.002 239.714 0.206
## Total_PW_CDICT 62.534 19.906 3.141 0.002 62.534 0.226
## Total_PW_EDICT 91.176 35.047 2.602 0.009 91.176 0.160
## Total_PW_EWR 52.458 16.515 3.176 0.001 52.458 0.196
# ============================================================================
# Bootstrap confidence intervals for indirect and total effects
# ============================================================================
# For bootstrap confidence intervals with clustered data, we'll use bootstrapLavaan
set.seed(123) # For reproducibility
bootstrap_results <- bootstrapLavaan(fit_2, R = 5000, type = "ordinary")
# The bootstrap object does not include defined parameters (Indirect_/Total_).
# We reconstruct indirect and total effects from labeled path coefficients.
needed_coeffs <- c("a","b1","b2","b3","b4","c1","c2","c3","c4")
has_all <- all(needed_coeffs %in% colnames(bootstrap_results))
bootstrap_ci_effects <- data.frame()
if (has_all) {
# Compute effects for each bootstrap sample
indirect_total_matrix <- data.frame(
Indirect_PW_CWR = bootstrap_results[,"a"] * bootstrap_results[,"b1"],
Indirect_PW_CDICT= bootstrap_results[,"a"] * bootstrap_results[,"b2"],
Indirect_PW_EDICT= bootstrap_results[,"a"] * bootstrap_results[,"b3"],
Indirect_PW_EWR = bootstrap_results[,"a"] * bootstrap_results[,"b4"],
Total_PW_CWR = bootstrap_results[,"a"] * bootstrap_results[,"b1"] + bootstrap_results[,"c1"],
Total_PW_CDICT = bootstrap_results[,"a"] * bootstrap_results[,"b2"] + bootstrap_results[,"c2"],
Total_PW_EDICT = bootstrap_results[,"a"] * bootstrap_results[,"b3"] + bootstrap_results[,"c3"],
Total_PW_EWR = bootstrap_results[,"a"] * bootstrap_results[,"b4"] + bootstrap_results[,"c4"]
)
# Derive percentile CIs
for (effect in colnames(indirect_total_matrix)) {
boot_values <- indirect_total_matrix[[effect]]
ci_lower <- quantile(boot_values, 0.025, na.rm = TRUE)
ci_upper <- quantile(boot_values, 0.975, na.rm = TRUE)
bootstrap_ci_effects <- rbind(
bootstrap_ci_effects,
data.frame(label = effect,
ci.lower.boot = ci_lower,
ci.upper.boot = ci_upper,
stringsAsFactors = FALSE)
)
}
} else {
message("Bootstrap coefficients for indirect/total effects could not be reconstructed: missing labels ",
paste(setdiff(needed_coeffs, colnames(bootstrap_results)), collapse = ", "))
}
# ============================================================================
# Extract results for Word document
# ============================================================================
# Extract parameter estimates
param_estimates <- parameterEstimates(fit_2, standardized = TRUE) %>%
mutate(
# Add significance stars
significance = case_when(
pvalue < 0.001 ~ "***",
pvalue < 0.01 ~ "**",
pvalue < 0.05 ~ "*",
TRUE ~ ""
),
# Create significance indicator
significant = pvalue < 0.05,
# Format path description
path = paste(lhs, op, rhs),
# Round values
across(c(est, se, z, pvalue, ci.lower, ci.upper, std.all), ~round(., 3))
)
# Merge bootstrap CIs for indirect and total effects
param_estimates <- param_estimates %>%
left_join(bootstrap_ci_effects, by = "label") %>%
mutate(
# Use bootstrap CIs for indirect/total effects, otherwise use regular CIs
ci.lower.final = ifelse(!is.na(ci.lower.boot), round(ci.lower.boot, 3), ci.lower),
ci.upper.final = ifelse(!is.na(ci.upper.boot), round(ci.upper.boot, 3), ci.upper),
# Create CI string
ci_95 = paste0("[", ci.lower.final, ", ", ci.upper.final, "]"),
# Mark bootstrap CIs
ci_type = ifelse(!is.na(ci.lower.boot), "Bootstrap", "Regular")
)
# Separate regression paths and covariances
regression_paths <- param_estimates %>%
filter(op == "~")
# Rename and select columns using base R
regression_paths <- regression_paths[, c("lhs", "rhs", "est", "std.all", "se", "z", "pvalue",
"ci.lower.final", "ci.upper.final", "ci_95", "ci_type",
"significance", "significant")]
names(regression_paths) <- c("outcome", "predictor", "est", "std.all", "se", "z", "pvalue",
"ci.lower", "ci.upper", "ci_95", "ci_type", "significance", "significant")
covariances <- param_estimates %>%
filter(op == "~~", lhs != rhs)
# Rename and select columns using base R
covariances <- covariances[, c("lhs", "rhs", "est", "std.all", "se", "z", "pvalue",
"ci.lower.final", "ci.upper.final", "ci_95", "ci_type",
"significance", "significant")]
names(covariances) <- c("var1", "var2", "est", "std.all", "se", "z", "pvalue",
"ci.lower", "ci.upper", "ci_95", "ci_type", "significance", "significant")
# Extract defined parameters (indirect and total effects)
defined_effects <- param_estimates %>%
filter(op == ":=") %>%
select(label, est, std.all, se, z, pvalue, ci.lower.final, ci.upper.final,
ci_95, ci_type, significance, significant)
# Extract fit measures
fit_measures <- fitMeasures(fit_2)
fit_df <- data.frame(
Measure = c("Chi-Square", "df", "p-value", "CFI", "TLI", "RMSEA", "RMSEA 90% CI Lower",
"RMSEA 90% CI Upper", "SRMR", "AIC", "BIC"),
Value = c(
round(fit_measures["chisq"], 3),
fit_measures["df"],
round(fit_measures["pvalue"], 3),
round(fit_measures["cfi"], 3),
round(fit_measures["tli"], 3),
round(fit_measures["rmsea"], 3),
round(fit_measures["rmsea.ci.lower"], 3),
round(fit_measures["rmsea.ci.upper"], 3),
round(fit_measures["srmr"], 3),
round(fit_measures["aic"], 2),
round(fit_measures["bic"], 2)
)
)
# Display key results with bootstrap CIs
cat("\n=== INDIRECT EFFECTS (with Bootstrap 95% CIs) ===\n")
##
## === INDIRECT EFFECTS (with Bootstrap 95% CIs) ===
indirect_effects <- defined_effects %>% filter(grepl("Indirect_", label))
for(i in 1:nrow(indirect_effects)) {
cat(sprintf("%s: β = %.3f, %s CI %s, p = %.3f%s\n",
indirect_effects$label[i],
indirect_effects$est[i],
indirect_effects$ci_type[i],
indirect_effects$ci_95[i],
indirect_effects$pvalue[i],
indirect_effects$significance[i]))
}
## Indirect_PW_CWR: β = 109.913, Bootstrap CI [30.882, 192.513], p = 0.009**
## Indirect_PW_CDICT: β = 20.882, Bootstrap CI [5.755, 37.545], p = 0.016*
## Indirect_PW_EDICT: β = 35.838, Bootstrap CI [9.135, 68.273], p = 0.020*
## Indirect_PW_EWR: β = 17.728, Bootstrap CI [4.473, 34.76], p = 0.027*
cat("\n=== TOTAL EFFECTS (with Bootstrap 95% CIs) ===\n")
##
## === TOTAL EFFECTS (with Bootstrap 95% CIs) ===
total_effects <- defined_effects %>% filter(grepl("Total_", label))
for(i in 1:nrow(total_effects)) {
cat(sprintf("%s: β = %.3f, %s CI %s, p = %.3f%s\n",
total_effects$label[i],
total_effects$est[i],
total_effects$ci_type[i],
total_effects$ci_95[i],
total_effects$pvalue[i],
total_effects$significance[i]))
}
## Total_PW_CWR: β = 239.714, Bootstrap CI [75.671, 392.338], p = 0.002**
## Total_PW_CDICT: β = 62.534, Bootstrap CI [20.758, 106.939], p = 0.002**
## Total_PW_EDICT: β = 91.176, Bootstrap CI [23.941, 149.343], p = 0.009**
## Total_PW_EWR: β = 52.458, Bootstrap CI [21.478, 79.777], p = 0.001**
# Calculate R-squared values for each outcome variable
# Extract parameter estimates
params <- parameterEstimates(fit_2, standardized = TRUE)
# Function to calculate R-squared from lavaan model
calculate_r_squared <- function(fit, outcome_vars) {
# Get the inspect function results
r_squared_values <- inspect(fit, "r2")
# Create a data frame with R-squared values
if(is.null(r_squared_values)) {
# If inspect doesn't work, calculate manually from residual variances
# Get standardized solution
std_solution <- standardizedSolution(fit)
# Extract residual variances (these are 1 - R²)
residual_vars <- std_solution %>%
filter(op == "~~", lhs == rhs, lhs %in% outcome_vars) %>%
select(lhs, est.std) %>%
rename(variable = lhs, residual_var = est.std) %>%
mutate(r_squared = 1 - residual_var)
return(residual_vars)
} else {
# If inspect works, use those values
r2_df <- data.frame(
variable = names(r_squared_values),
r_squared = as.numeric(r_squared_values)
)
return(r2_df)
}
}
# Define outcome variables
outcome_vars <- c("CDRAN", "CWR", "CDICT", "EWR", "EDICT")
# Calculate R-squared values
r_squared_results <- calculate_r_squared(fit_2, outcome_vars)
# Display results
cat("\n=== R-SQUARED VALUES (Proportion of Variance Explained) ===\n")
##
## === R-SQUARED VALUES (Proportion of Variance Explained) ===
print(r_squared_results)
## variable r_squared
## 1 CDRAN 0.2736476
## 2 CWR 0.4783052
## 3 CDICT 0.2581940
## 4 EWR 0.5505111
## 5 EDICT 0.5349744