library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lavaan)
## Warning: package 'lavaan' was built under R version 4.3.3
## This is lavaan 0.6-18
## lavaan is FREE software! Please report any bugs.
library(semPlot)
## Warning: package 'semPlot' was built under R version 4.3.3
data_sem <- read.csv('./data/EEG_RS_lanExp_income_imputated_all_v5.csv') %>%
rename(Age = Age_EEG_yr)
model <- '
# Level 1: Direct paths
CDRAN ~ PW
CWR ~ CDRAN
CDICT ~ CDRAN
ARITH ~ CDRAN
EWR ~ CDRAN
# Residual covariances to control for shared variance
CWR ~~ CDICT
CWR ~~ ARITH
CWR ~~ EWR
CDICT ~~ ARITH
CDICT ~~ EWR
ARITH ~~ EWR
# Level 2: Between-group model
PW ~ 1 # Random intercept for PW
# Control variables
CDRAN ~ SES + Age
'
# Fit the model
fit <- sem(model, data = data_sem, cluster = 'FID')
## Warning: lavaan->lav_data_full():
## cluster variable 'FID' contains missing values
## Warning: lavaan->lav_data_full():
## some observed variances are (at least) a factor 1000 times larger than
## others; use varTable(fit) to investigate
# Summarize the results
summary(fit, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-18 ended normally after 134 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 25
##
## Used Total
## Number of observations 136 202
## Number of clusters [FID] 74
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 117.755 82.677
## Degrees of freedom 14 14
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 1.424
## Yuan-Bentler correction (Mplus variant)
##
## Model Test Baseline Model:
##
## Test statistic 473.588 349.789
## Degrees of freedom 27 27
## P-value 0.000 0.000
## Scaling correction factor 1.354
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.768 0.787
## Tucker-Lewis Index (TLI) 0.552 0.590
##
## Robust Comparative Fit Index (CFI) 0.776
## Robust Tucker-Lewis Index (TLI) 0.568
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -1826.002 -1826.002
## Scaling correction factor 1.338
## for the MLR correction
## Loglikelihood unrestricted model (H1) -1767.125 -1767.125
## Scaling correction factor 1.369
## for the MLR correction
##
## Akaike (AIC) 3702.004 3702.004
## Bayesian (BIC) 3774.821 3774.821
## Sample-size adjusted Bayesian (SABIC) 3695.734 3695.734
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.233 0.190
## 90 Percent confidence interval - lower 0.196 0.158
## 90 Percent confidence interval - upper 0.273 0.224
## P-value H_0: RMSEA <= 0.050 0.000 0.000
## P-value H_0: RMSEA >= 0.080 1.000 1.000
##
## Robust RMSEA 0.227
## 90 Percent confidence interval - lower 0.181
## 90 Percent confidence interval - upper 0.275
## P-value H_0: Robust RMSEA <= 0.050 0.000
## P-value H_0: Robust RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.108 0.108
##
## Parameter Estimates:
##
## Standard errors Robust.cluster
## Information Observed
## Observed information based on Hessian
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CDRAN ~
## PW 7.109 2.465 2.884 0.004 7.109 0.215
## CWR ~
## CDRAN 20.942 2.625 7.979 0.000 20.942 0.600
## CDICT ~
## CDRAN 3.816 0.811 4.708 0.000 3.816 0.460
## ARITH ~
## CDRAN 2.861 0.431 6.638 0.000 2.861 0.494
## EWR ~
## CDRAN 9.275 1.232 7.531 0.000 9.275 0.543
## CDRAN ~
## SES 0.131 0.051 2.576 0.010 0.131 0.218
## Age 0.459 0.083 5.557 0.000 0.459 0.403
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CWR ~~
## .CDICT 111.631 20.874 5.348 0.000 111.631 0.645
## .ARITH 50.018 12.758 3.920 0.000 50.018 0.423
## .EWR 105.341 39.441 2.671 0.008 105.341 0.313
## .CDICT ~~
## .ARITH 9.833 3.082 3.191 0.001 9.833 0.315
## .EWR 17.648 8.636 2.043 0.041 17.648 0.199
## .ARITH ~~
## .EWR 29.184 6.695 4.359 0.000 29.184 0.481
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## PW 0.090 0.003 32.920 0.000 0.090 3.242
## .CDRAN -1.176 0.728 -1.615 0.106 -1.176 -1.283
## .CWR 15.638 9.639 1.622 0.105 15.638 0.489
## .CDICT 14.968 2.913 5.138 0.000 14.968 1.967
## .ARITH -0.149 1.307 -0.114 0.909 -0.149 -0.028
## .EWR -9.666 4.202 -2.300 0.021 -9.666 -0.617
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CDRAN 0.607 0.079 7.728 0.000 0.607 0.723
## .CWR 656.170 84.955 7.724 0.000 656.170 0.640
## .CDICT 45.655 6.028 7.573 0.000 45.655 0.789
## .ARITH 21.294 3.101 6.867 0.000 21.294 0.756
## .EWR 172.995 19.701 8.781 0.000 172.995 0.705
## PW 0.001 0.000 7.286 0.000 0.001 1.000
# Plot the path model with manual layout
semPaths(fit,
what = "est", # Display parameter estimates
whatLabels = "std", # Label paths with standardized estimates
style = "lisrel", # Use LISREL style for the plot
layout = "tree2", # Use manual layout
edge.label.cex = 1, # Size of edge labels
sizeMan = 10, # Size of manifest variables
sizeLat = 10, # Size of latent variables
nCharNodes = 0, # Do not abbreviate node names
rotation = 2, # Rotate the plot (2 = vertical)
intercepts = FALSE, # Do not show intercepts
residuals = TRUE, # Show residuals
curve = 1.5, # Add curvature to paths
mar = c(5, 5, 5, 5), # Margins for the plot
edge.label.color = "black",
fade = F,
)

model_2 <- '
# Level 1: Direct paths
CDRAN ~ PW + SES + Age
CWR ~ PW + SES + Age
CDICT ~ PW + SES + Age
EWR ~ PW + SES + Age
ARITH ~ PW + SES + Age
CWR ~ CDRAN
CDICT ~ CDRAN
ARITH ~ CDRAN
EWR ~ CDRAN
# Residual covariances to control for shared variance
CWR ~~ CDICT
CWR ~~ ARITH
CWR ~~ EWR
CDICT ~~ ARITH
CDICT ~~ EWR
ARITH ~~ EWR
# Level 2: Between-group model
PW ~ 1 # Random intercept for PW
CDRAN ~ 1
CWR ~ 1
CDICT ~ 1
ARITH ~ 1
EWR ~ 1
'
# Fit the model
fit_2 <- sem(model_2, data = data_sem, cluster = 'FID')
## Warning: lavaan->lav_data_full():
## cluster variable 'FID' contains missing values
## Warning: lavaan->lav_data_full():
## some observed variances are (at least) a factor 1000 times larger than
## others; use varTable(fit) to investigate
# Summarize the results
summary(fit_2, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-18 ended normally after 182 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 37
##
## Used Total
## Number of observations 136 202
## Number of clusters [FID] 74
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 1.252 0.863
## Degrees of freedom 2 2
## P-value (Chi-square) 0.535 0.649
## Scaling correction factor 1.450
## Yuan-Bentler correction (Mplus variant)
##
## Model Test Baseline Model:
##
## Test statistic 473.588 349.789
## Degrees of freedom 27 27
## P-value 0.000 0.000
## Scaling correction factor 1.354
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000 1.000
## Tucker-Lewis Index (TLI) 1.023 1.048
##
## Robust Comparative Fit Index (CFI) 1.000
## Robust Tucker-Lewis Index (TLI) 1.051
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -1767.751 -1767.751
## Scaling correction factor 1.364
## for the MLR correction
## Loglikelihood unrestricted model (H1) -1767.125 -1767.125
## Scaling correction factor 1.369
## for the MLR correction
##
## Akaike (AIC) 3609.501 3609.501
## Bayesian (BIC) 3717.270 3717.270
## Sample-size adjusted Bayesian (SABIC) 3600.222 3600.222
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000 0.000
## 90 Percent confidence interval - lower 0.000 0.000
## 90 Percent confidence interval - upper 0.148 0.106
## P-value H_0: RMSEA <= 0.050 0.636 0.811
## P-value H_0: RMSEA >= 0.080 0.247 0.104
##
## Robust RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.160
## P-value H_0: Robust RMSEA <= 0.050 0.709
## P-value H_0: Robust RMSEA >= 0.080 0.216
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.026 0.026
##
## Parameter Estimates:
##
## Standard errors Robust.cluster
## Information Observed
## Observed information based on Hessian
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CDRAN ~
## PW 7.109 2.465 2.884 0.004 7.109 0.215
## SES 0.131 0.051 2.576 0.010 0.131 0.218
## Age 0.459 0.083 5.557 0.000 0.459 0.403
## CWR ~
## PW 138.376 70.292 1.969 0.049 138.376 0.119
## SES -1.076 1.925 -0.559 0.576 -1.076 -0.051
## Age 14.041 3.431 4.093 0.000 14.041 0.351
## CDICT ~
## PW 43.116 19.156 2.251 0.024 43.116 0.156
## SES 0.210 0.504 0.416 0.677 0.210 0.042
## Age 1.513 0.948 1.596 0.110 1.513 0.159
## EWR ~
## PW 56.533 32.154 1.758 0.079 56.533 0.099
## SES 4.717 0.811 5.815 0.000 4.717 0.455
## Age 5.205 1.562 3.333 0.001 5.205 0.265
## ARITH ~
## PW 21.177 13.053 1.622 0.105 21.177 0.110
## SES 0.719 0.315 2.280 0.023 0.719 0.205
## Age 3.315 0.506 6.548 0.000 3.315 0.499
## CWR ~
## CDRAN 15.493 2.898 5.347 0.000 15.493 0.441
## CDICT ~
## CDRAN 2.943 0.924 3.184 0.001 2.943 0.353
## ARITH ~
## CDRAN 1.220 0.394 3.095 0.002 1.220 0.209
## EWR ~
## CDRAN 5.049 1.294 3.901 0.000 5.049 0.293
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CWR ~~
## .CDICT 98.909 18.071 5.473 0.000 98.909 0.642
## .ARITH 27.375 8.897 3.077 0.002 27.375 0.308
## .EWR 78.220 25.760 3.037 0.002 78.220 0.317
## .CDICT ~~
## .ARITH 6.856 2.465 2.782 0.005 6.856 0.273
## .EWR 11.356 7.218 1.573 0.116 11.356 0.163
## .EWR ~~
## .ARITH 13.019 4.120 3.160 0.002 13.019 0.324
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## PW 0.090 0.003 32.920 0.000 0.090 3.242
## .CDRAN -1.176 0.728 -1.615 0.106 -1.176 -1.283
## .CWR -94.992 25.872 -3.672 0.000 -94.992 -2.950
## .CDICT 1.468 7.246 0.203 0.839 1.468 0.192
## .ARITH -24.016 3.873 -6.201 0.000 -24.016 -4.493
## .EWR -43.626 13.097 -3.331 0.001 -43.626 -2.765
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CDRAN 0.607 0.079 7.728 0.000 0.607 0.723
## .CWR 545.915 76.502 7.136 0.000 545.915 0.527
## .CDICT 43.455 5.664 7.673 0.000 43.455 0.743
## .EWR 111.430 13.803 8.073 0.000 111.430 0.448
## .ARITH 14.489 2.008 7.217 0.000 14.489 0.507
## PW 0.001 0.000 7.286 0.000 0.001 1.000
# lavNames(fit_2, type = "ov") # [1] "CDRAN" "CWR" "CDICT" "EWR" "ARITH" "PW" "SES" "Age"
# Define node positions (x, y coordinates)
positions <- matrix(c(
# x y Node
0, 0, # CDRAN
1, 5, # CWR
1, 2.5, # CDICT
1, -2.5, # EWR
1, -5, # ARITH
-1, 0, # PW
-1, 2.5, # SES
-1, 5 # Age
), ncol = 2, byrow = TRUE)
rownames(positions) <- c("CDRAN", "CWR", "CDICT", "EWR", "ARITH", "PW", "SES", "Age")
# Assign colors (1 color per node, in the order of rownames(positions))
node_colors <- c(
rep("#F0B27A", 6), # First 3 nodes (PW, SES, Age)
rep("#7FB3D5", 2) # Next 5 nodes (CDRAN, CWR, CDICT, ARITH, EWR)
)
# Plot the path model with manual layout
semPaths(fit_2,
what = "est", # Display parameter estimates
whatLabels = "std", # Label paths with standardized estimates
style = "lisrel", # Use LISREL style for the plot
layout = positions,
edge.label.cex = 0.9, # Size of edge labels
sizeMan = 10, # Size of manifest variables
sizeLat = 10, # Size of latent variables
nCharNodes = 0, # Do not abbreviate node names
rotation = 1, # Rotate the plot (2 = vertical)
intercepts = FALSE, # Do not show intercepts
residuals = TRUE, # Show residuals
curve = 2, # Add curvature to paths
curvePivot = TRUE, # Pivot curves at midpoint
curveAdjacent = TRUE, # Curve adjacent edges
mar = c(3, 5, 3, 5), # Margins for the plot
edge.color = "#404040",# Darker edges
color = node_colors,
edge.label.color = "black",
fade = F,
asize = 3
)

# model_3 <- '
# # Level 1: Direct paths
# CDRAN ~ PW
# CWR ~ PW
# CDICT ~ PW
# EWR ~ PW
# ARITH ~ PW
# CWR ~ CDRAN
# CDICT ~ CDRAN
# ARITH ~ CDRAN
# EWR ~ CDRAN
#
# # Residual covariances to control for shared variance
# CWR ~~ CDICT
# CWR ~~ ARITH
# CWR ~~ EWR
# CDICT ~~ ARITH
# CDICT ~~ EWR
# ARITH ~~ EWR
#
# # Level 2: Between-group model
# PW ~ 1 # Random intercept for PW
#
# # Control variables
# CDRAN ~ SES + Age
# PW ~~ SES + Age
# '
#
# # Fit the model
# fit_3 <- sem(model_3, data = data_sem, cluster = 'FID')
#
# # Summarize the results
# summary(fit_3, standardized = TRUE, fit.measures = TRUE)