library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lavaan)
## Warning: package 'lavaan' was built under R version 4.3.3
## This is lavaan 0.6-18
## lavaan is FREE software! Please report any bugs.
library(semPlot)
## Warning: package 'semPlot' was built under R version 4.3.3
data_sem <- read.csv('./data/EEG_RS_lanExp_income_imputated_all_v5.csv') %>%
  rename(Age = Age_EEG_yr)
model <- '
  # Level 1: Direct paths
  CDRAN ~ PW
  CWR ~ CDRAN
  CDICT ~ CDRAN
  ARITH ~ CDRAN
  EWR ~ CDRAN
  
  # Residual covariances to control for shared variance
  CWR ~~ CDICT
  CWR ~~ ARITH
  CWR ~~ EWR
  CDICT ~~ ARITH
  CDICT ~~ EWR
  ARITH ~~ EWR
  
  # Level 2: Between-group model
  PW ~ 1  # Random intercept for PW
  
  # Control variables
  CDRAN ~ SES + Age
'

# Fit the model
fit <- sem(model, data = data_sem, cluster = 'FID')
## Warning: lavaan->lav_data_full():  
##    cluster variable 'FID' contains missing values
## Warning: lavaan->lav_data_full():  
##    some observed variances are (at least) a factor 1000 times larger than 
##    others; use varTable(fit) to investigate
# Summarize the results
summary(fit, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-18 ended normally after 134 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        25
## 
##                                                   Used       Total
##   Number of observations                           136         202
##   Number of clusters [FID]                          74            
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                               117.755      82.677
##   Degrees of freedom                                14          14
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  1.424
##     Yuan-Bentler correction (Mplus variant)                       
## 
## Model Test Baseline Model:
## 
##   Test statistic                               473.588     349.789
##   Degrees of freedom                                27          27
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.354
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.768       0.787
##   Tucker-Lewis Index (TLI)                       0.552       0.590
##                                                                   
##   Robust Comparative Fit Index (CFI)                         0.776
##   Robust Tucker-Lewis Index (TLI)                            0.568
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1826.002   -1826.002
##   Scaling correction factor                                  1.338
##       for the MLR correction                                      
##   Loglikelihood unrestricted model (H1)      -1767.125   -1767.125
##   Scaling correction factor                                  1.369
##       for the MLR correction                                      
##                                                                   
##   Akaike (AIC)                                3702.004    3702.004
##   Bayesian (BIC)                              3774.821    3774.821
##   Sample-size adjusted Bayesian (SABIC)       3695.734    3695.734
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.233       0.190
##   90 Percent confidence interval - lower         0.196       0.158
##   90 Percent confidence interval - upper         0.273       0.224
##   P-value H_0: RMSEA <= 0.050                    0.000       0.000
##   P-value H_0: RMSEA >= 0.080                    1.000       1.000
##                                                                   
##   Robust RMSEA                                               0.227
##   90 Percent confidence interval - lower                     0.181
##   90 Percent confidence interval - upper                     0.275
##   P-value H_0: Robust RMSEA <= 0.050                         0.000
##   P-value H_0: Robust RMSEA >= 0.080                         1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.108       0.108
## 
## Parameter Estimates:
## 
##   Standard errors                        Robust.cluster
##   Information                                  Observed
##   Observed information based on                 Hessian
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   CDRAN ~                                                               
##     PW                7.109    2.465    2.884    0.004    7.109    0.215
##   CWR ~                                                                 
##     CDRAN            20.942    2.625    7.979    0.000   20.942    0.600
##   CDICT ~                                                               
##     CDRAN             3.816    0.811    4.708    0.000    3.816    0.460
##   ARITH ~                                                               
##     CDRAN             2.861    0.431    6.638    0.000    2.861    0.494
##   EWR ~                                                                 
##     CDRAN             9.275    1.232    7.531    0.000    9.275    0.543
##   CDRAN ~                                                               
##     SES               0.131    0.051    2.576    0.010    0.131    0.218
##     Age               0.459    0.083    5.557    0.000    0.459    0.403
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##  .CWR ~~                                                                
##    .CDICT           111.631   20.874    5.348    0.000  111.631    0.645
##    .ARITH            50.018   12.758    3.920    0.000   50.018    0.423
##    .EWR             105.341   39.441    2.671    0.008  105.341    0.313
##  .CDICT ~~                                                              
##    .ARITH             9.833    3.082    3.191    0.001    9.833    0.315
##    .EWR              17.648    8.636    2.043    0.041   17.648    0.199
##  .ARITH ~~                                                              
##    .EWR              29.184    6.695    4.359    0.000   29.184    0.481
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     PW                0.090    0.003   32.920    0.000    0.090    3.242
##    .CDRAN            -1.176    0.728   -1.615    0.106   -1.176   -1.283
##    .CWR              15.638    9.639    1.622    0.105   15.638    0.489
##    .CDICT            14.968    2.913    5.138    0.000   14.968    1.967
##    .ARITH            -0.149    1.307   -0.114    0.909   -0.149   -0.028
##    .EWR              -9.666    4.202   -2.300    0.021   -9.666   -0.617
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .CDRAN             0.607    0.079    7.728    0.000    0.607    0.723
##    .CWR             656.170   84.955    7.724    0.000  656.170    0.640
##    .CDICT            45.655    6.028    7.573    0.000   45.655    0.789
##    .ARITH            21.294    3.101    6.867    0.000   21.294    0.756
##    .EWR             172.995   19.701    8.781    0.000  172.995    0.705
##     PW                0.001    0.000    7.286    0.000    0.001    1.000
# Plot the path model with manual layout
semPaths(fit, 
         what = "est",          # Display parameter estimates
         whatLabels = "std",    # Label paths with standardized estimates
         style = "lisrel",      # Use LISREL style for the plot
         layout = "tree2",     # Use manual layout
         edge.label.cex = 1,    # Size of edge labels
         sizeMan = 10,          # Size of manifest variables
         sizeLat = 10,          # Size of latent variables
         nCharNodes = 0,        # Do not abbreviate node names
         rotation = 2,          # Rotate the plot (2 = vertical)
         intercepts = FALSE,    # Do not show intercepts
         residuals = TRUE,      # Show residuals
         curve = 1.5,           # Add curvature to paths
         mar = c(5, 5, 5, 5),   # Margins for the plot
         edge.label.color = "black",
         fade = F,
         )

model_2 <- '
  # Level 1: Direct paths
  CDRAN ~ PW + SES + Age
  CWR ~ PW + SES + Age
  CDICT ~ PW + SES + Age
  EWR ~ PW + SES + Age
  ARITH ~ PW + SES + Age
  CWR ~ CDRAN
  CDICT ~ CDRAN
  ARITH ~ CDRAN
  EWR ~ CDRAN

  # Residual covariances to control for shared variance
  CWR ~~ CDICT
  CWR ~~ ARITH
  CWR ~~ EWR
  CDICT ~~ ARITH
  CDICT ~~ EWR
  ARITH ~~ EWR

  # Level 2: Between-group model
  PW ~ 1  # Random intercept for PW
  CDRAN ~ 1
  CWR ~ 1
  CDICT ~ 1
  ARITH ~ 1
  EWR ~ 1
'

# Fit the model
fit_2 <- sem(model_2, data = data_sem, cluster = 'FID')
## Warning: lavaan->lav_data_full():  
##    cluster variable 'FID' contains missing values
## Warning: lavaan->lav_data_full():  
##    some observed variances are (at least) a factor 1000 times larger than 
##    others; use varTable(fit) to investigate
# Summarize the results
summary(fit_2, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-18 ended normally after 182 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        37
## 
##                                                   Used       Total
##   Number of observations                           136         202
##   Number of clusters [FID]                          74            
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                                 1.252       0.863
##   Degrees of freedom                                 2           2
##   P-value (Chi-square)                           0.535       0.649
##   Scaling correction factor                                  1.450
##     Yuan-Bentler correction (Mplus variant)                       
## 
## Model Test Baseline Model:
## 
##   Test statistic                               473.588     349.789
##   Degrees of freedom                                27          27
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.354
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000       1.000
##   Tucker-Lewis Index (TLI)                       1.023       1.048
##                                                                   
##   Robust Comparative Fit Index (CFI)                         1.000
##   Robust Tucker-Lewis Index (TLI)                            1.051
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1767.751   -1767.751
##   Scaling correction factor                                  1.364
##       for the MLR correction                                      
##   Loglikelihood unrestricted model (H1)      -1767.125   -1767.125
##   Scaling correction factor                                  1.369
##       for the MLR correction                                      
##                                                                   
##   Akaike (AIC)                                3609.501    3609.501
##   Bayesian (BIC)                              3717.270    3717.270
##   Sample-size adjusted Bayesian (SABIC)       3600.222    3600.222
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000       0.000
##   90 Percent confidence interval - lower         0.000       0.000
##   90 Percent confidence interval - upper         0.148       0.106
##   P-value H_0: RMSEA <= 0.050                    0.636       0.811
##   P-value H_0: RMSEA >= 0.080                    0.247       0.104
##                                                                   
##   Robust RMSEA                                               0.000
##   90 Percent confidence interval - lower                     0.000
##   90 Percent confidence interval - upper                     0.160
##   P-value H_0: Robust RMSEA <= 0.050                         0.709
##   P-value H_0: Robust RMSEA >= 0.080                         0.216
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.026       0.026
## 
## Parameter Estimates:
## 
##   Standard errors                        Robust.cluster
##   Information                                  Observed
##   Observed information based on                 Hessian
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   CDRAN ~                                                               
##     PW                7.109    2.465    2.884    0.004    7.109    0.215
##     SES               0.131    0.051    2.576    0.010    0.131    0.218
##     Age               0.459    0.083    5.557    0.000    0.459    0.403
##   CWR ~                                                                 
##     PW              138.376   70.292    1.969    0.049  138.376    0.119
##     SES              -1.076    1.925   -0.559    0.576   -1.076   -0.051
##     Age              14.041    3.431    4.093    0.000   14.041    0.351
##   CDICT ~                                                               
##     PW               43.116   19.156    2.251    0.024   43.116    0.156
##     SES               0.210    0.504    0.416    0.677    0.210    0.042
##     Age               1.513    0.948    1.596    0.110    1.513    0.159
##   EWR ~                                                                 
##     PW               56.533   32.154    1.758    0.079   56.533    0.099
##     SES               4.717    0.811    5.815    0.000    4.717    0.455
##     Age               5.205    1.562    3.333    0.001    5.205    0.265
##   ARITH ~                                                               
##     PW               21.177   13.053    1.622    0.105   21.177    0.110
##     SES               0.719    0.315    2.280    0.023    0.719    0.205
##     Age               3.315    0.506    6.548    0.000    3.315    0.499
##   CWR ~                                                                 
##     CDRAN            15.493    2.898    5.347    0.000   15.493    0.441
##   CDICT ~                                                               
##     CDRAN             2.943    0.924    3.184    0.001    2.943    0.353
##   ARITH ~                                                               
##     CDRAN             1.220    0.394    3.095    0.002    1.220    0.209
##   EWR ~                                                                 
##     CDRAN             5.049    1.294    3.901    0.000    5.049    0.293
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##  .CWR ~~                                                                
##    .CDICT            98.909   18.071    5.473    0.000   98.909    0.642
##    .ARITH            27.375    8.897    3.077    0.002   27.375    0.308
##    .EWR              78.220   25.760    3.037    0.002   78.220    0.317
##  .CDICT ~~                                                              
##    .ARITH             6.856    2.465    2.782    0.005    6.856    0.273
##    .EWR              11.356    7.218    1.573    0.116   11.356    0.163
##  .EWR ~~                                                                
##    .ARITH            13.019    4.120    3.160    0.002   13.019    0.324
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     PW                0.090    0.003   32.920    0.000    0.090    3.242
##    .CDRAN            -1.176    0.728   -1.615    0.106   -1.176   -1.283
##    .CWR             -94.992   25.872   -3.672    0.000  -94.992   -2.950
##    .CDICT             1.468    7.246    0.203    0.839    1.468    0.192
##    .ARITH           -24.016    3.873   -6.201    0.000  -24.016   -4.493
##    .EWR             -43.626   13.097   -3.331    0.001  -43.626   -2.765
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .CDRAN             0.607    0.079    7.728    0.000    0.607    0.723
##    .CWR             545.915   76.502    7.136    0.000  545.915    0.527
##    .CDICT            43.455    5.664    7.673    0.000   43.455    0.743
##    .EWR             111.430   13.803    8.073    0.000  111.430    0.448
##    .ARITH            14.489    2.008    7.217    0.000   14.489    0.507
##     PW                0.001    0.000    7.286    0.000    0.001    1.000
# lavNames(fit_2, type = "ov") # [1] "CDRAN" "CWR"   "CDICT" "EWR"   "ARITH" "PW"    "SES"   "Age"  

# Define node positions (x, y coordinates)
positions <- matrix(c(
  # x    y      Node
    0,   0,     # CDRAN
    1,   5,     # CWR 
    1,   2.5,     # CDICT
    1,  -2.5,      # EWR
    1,  -5,     # ARITH
   -1,   0,     # PW 
   -1,  2.5,     # SES
   -1,  5      # Age
), ncol = 2, byrow = TRUE)

rownames(positions) <- c("CDRAN", "CWR", "CDICT", "EWR", "ARITH", "PW", "SES", "Age")

# Assign colors (1 color per node, in the order of rownames(positions))
node_colors <- c(
  rep("#F0B27A", 6),  # First 3 nodes (PW, SES, Age)
  rep("#7FB3D5", 2)   # Next 5 nodes (CDRAN, CWR, CDICT, ARITH, EWR)
)

# Plot the path model with manual layout
semPaths(fit_2, 
         what = "est",          # Display parameter estimates
         whatLabels = "std",    # Label paths with standardized estimates
         style = "lisrel",      # Use LISREL style for the plot
         layout = positions,
         edge.label.cex = 0.9,    # Size of edge labels
         sizeMan = 10,          # Size of manifest variables
         sizeLat = 10,          # Size of latent variables
         nCharNodes = 0,        # Do not abbreviate node names
         rotation = 1,          # Rotate the plot (2 = vertical)
         intercepts = FALSE,    # Do not show intercepts
         residuals = TRUE,      # Show residuals
         curve = 2,           # Add curvature to paths
         curvePivot = TRUE,     # Pivot curves at midpoint
         curveAdjacent = TRUE,  # Curve adjacent edges
         mar = c(3, 5, 3, 5),   # Margins for the plot
         edge.color = "#404040",# Darker edges
         color = node_colors,
         edge.label.color = "black",
         fade = F,
         asize = 3
)

# model_3 <- '
#   # Level 1: Direct paths
#   CDRAN ~ PW
#   CWR ~ PW
#   CDICT ~ PW
#   EWR ~ PW
#   ARITH ~ PW
#   CWR ~ CDRAN
#   CDICT ~ CDRAN
#   ARITH ~ CDRAN
#   EWR ~ CDRAN
# 
#   # Residual covariances to control for shared variance
#   CWR ~~ CDICT
#   CWR ~~ ARITH
#   CWR ~~ EWR
#   CDICT ~~ ARITH
#   CDICT ~~ EWR
#   ARITH ~~ EWR
# 
#   # Level 2: Between-group model
#   PW ~ 1  # Random intercept for PW
#   
#   # Control variables
#   CDRAN ~ SES + Age
#   PW ~~ SES + Age
# '
# 
# # Fit the model
# fit_3 <- sem(model_3, data = data_sem, cluster = 'FID')
# 
# # Summarize the results
# summary(fit_3, standardized = TRUE, fit.measures = TRUE)