library(bruceR)
## Warning: package 'bruceR' was built under R version 4.3.3
##
## bruceR (v2024.6)
## Broadly Useful Convenient and Efficient R functions
##
## Packages also loaded:
## ✔ data.table ✔ emmeans
## ✔ dplyr ✔ lmerTest
## ✔ tidyr ✔ effectsize
## ✔ stringr ✔ performance
## ✔ ggplot2 ✔ interactions
##
## Main functions of `bruceR`:
## cc() Describe() TTEST()
## add() Freq() MANOVA()
## .mean() Corr() EMMEANS()
## set.wd() Alpha() PROCESS()
## import() EFA() model_summary()
## print_table() CFA() lavaan_summary()
##
## For full functionality, please install all dependencies:
## install.packages("bruceR", dep=TRUE)
##
## Online documentation:
## https://psychbruce.github.io/bruceR
##
## To use this package in publications, please cite:
## Bao, H.-W.-S. (2024). bruceR: Broadly useful convenient and efficient R functions (Version 2024.6) [Computer software]. https://CRAN.R-project.org/package=bruceR
set.wd()
## ✔ Set working directory to "C:/Users/psyuser/Desktop/ARWA article"
set.seed(6)
# rm(list = ls())
library(tidyverse)
## Warning: package 'readr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ Matrix::expand() masks tidyr::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ data.table::first() masks dplyr::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ data.table::last() masks dplyr::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ Matrix::pack() masks tidyr::pack()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ Matrix::unpack() masks tidyr::unpack()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(tidyverse)
library(lmerTest)
library(conflicted)
data_income <- read.csv("./data/EEG_RS_lanExp_income_imputated_all_v5.csv")
# Center the variables
data_income <- data_income %>%
dplyr::mutate(
PW_c = as.numeric(scale(PW, center = TRUE, scale = FALSE)),
Age_EEG_yr_c = as.numeric(scale(Age_EEG_yr, center = TRUE, scale = FALSE)),
SES_c = as.numeric(scale(SES, center = TRUE, scale = FALSE)),
CDRAN_c = as.numeric(scale(CDRAN, center = TRUE, scale = FALSE))
# ARITH_c = scale(ARITH, center = TRUE, scale = FALSE)
)
# keep only the columns data for the analysis
data_income <- data_income %>%
select(CWR, CDICT, ARITH, EWR, EDICT, CDRAN, PW_c, Age_EEG_yr_c, SES_c, CDRAN_c, FID, EEGID, Age_EEG_yr, PW, CF, exponent, offset)
# %>%
# # exclude NaN value
# drop_na()
write.csv(data_income, file = "./data/model_data_180_v5.csv", row.names = F)
demo_info <- read.csv("./data/assessment-resting-state-v8-200-drop-duplicated.csv")
# left join data_income with demo_info by selected column age gender
demo_data_summary <- data_income %>%
left_join(demo_info %>% select(EEGID, Gender), by = "EEGID", suffix = c("", "")) %>%
summarise(count = n(),
mean_age = mean(Age_EEG_yr, na.rm = TRUE),
gender_count = list(table(Gender))
)
knitr::kable(demo_data_summary)
# Fit separate models for each response variable
model_CWR <- lmerTest::lmer(CWR ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID), data = data_income)
model_CDICT <- lmerTest::lmer(CDICT ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID), data = data_income)
model_ARITH <- lmerTest::lmer(ARITH ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID), data = data_income)
model_EWR <- lmerTest::lmer(EWR ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID), data = data_income)
model_EDICT <- lmerTest::lmer(EDICT ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID), data = data_income)
# Print the model summaries
summary(model_CWR)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: CWR ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 1625.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.67123 -0.45267 0.00596 0.39871 2.42828
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 577.4 24.03
## Residual 192.8 13.89
## Number of obs: 181, groups: FID, 101
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 75.3981 2.6220 97.4668 28.756 < 2e-16 ***
## PW_c 201.1653 65.0612 149.0165 3.092 0.00237 **
## Age_EEG_yr_c 21.3751 2.6198 103.9251 8.159 8.3e-13 ***
## SES_c 0.6042 1.7509 97.6286 0.345 0.73077
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c A_EEG_
## PW_c 0.011
## Ag_EEG_yr_c -0.005 0.035
## SES_c 0.042 0.056 0.051
summary(model_CDICT)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: CDICT ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 1196.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3214 -0.4872 0.1039 0.5035 2.0731
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 36.91 6.075
## Residual 19.37 4.401
## Number of obs: 183, groups: FID, 102
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 25.3377 0.6895 95.8007 36.747 < 2e-16 ***
## PW_c 50.1023 19.1489 167.1021 2.616 0.0097 **
## Age_EEG_yr_c 3.8014 0.6954 100.6957 5.466 3.35e-07 ***
## SES_c 0.5019 0.4618 94.8883 1.087 0.2799
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c A_EEG_
## PW_c 0.020
## Ag_EEG_yr_c -0.005 0.033
## SES_c 0.035 0.068 0.052
summary(model_ARITH)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: ARITH ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 944.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7908 -0.4376 -0.0549 0.3863 3.8665
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 9.003 3.000
## Residual 4.727 2.174
## Number of obs: 183, groups: FID, 102
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.8875 0.3406 99.7933 23.160 < 2e-16 ***
## PW_c 24.0772 9.4588 168.0732 2.545 0.01181 *
## Age_EEG_yr_c 3.9716 0.3435 104.6348 11.563 < 2e-16 ***
## SES_c 0.7089 0.2281 98.8872 3.108 0.00246 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c A_EEG_
## PW_c 0.020
## Ag_EEG_yr_c -0.005 0.033
## SES_c 0.035 0.068 0.052
summary(model_EWR)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: EWR ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 1333.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3093 -0.4255 -0.0133 0.3923 3.0278
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 91.11 9.545
## Residual 37.49 6.123
## Number of obs: 183, groups: FID, 102
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 19.2808 1.0556 97.9781 18.264 < 2e-16 ***
## PW_c 60.6910 27.6733 159.8471 2.193 0.0297 *
## Age_EEG_yr_c 7.3629 1.0620 103.6222 6.933 3.63e-10 ***
## SES_c 5.0626 0.7073 97.6992 7.158 1.53e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c A_EEG_
## PW_c 0.021
## Ag_EEG_yr_c -0.002 0.032
## SES_c 0.036 0.065 0.053
summary(model_EDICT)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: EDICT ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 1065.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3924 -0.4309 -0.1053 0.5054 2.7162
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 17.834 4.223
## Residual 9.802 3.131
## Number of obs: 182, groups: FID, 102
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.5983 0.4824 98.9083 15.751 < 2e-16 ***
## PW_c 31.2985 13.6090 168.5731 2.300 0.0227 *
## Age_EEG_yr_c 3.5538 0.4866 103.5935 7.304 5.96e-11 ***
## SES_c 2.0869 0.3231 97.9724 6.459 4.07e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c A_EEG_
## PW_c 0.025
## Ag_EEG_yr_c -0.007 0.028
## SES_c 0.033 0.063 0.054
# Load lavaan package
library(lavaan)
## Warning: package 'lavaan' was built under R version 4.3.3
## This is lavaan 0.6-18
## lavaan is FREE software! Please report any bugs.
# Define the mediation model
mediation_CWR <- '
# Mediator model
CDRAN_c ~ a * PW_c + Age_EEG_yr_c + SES_c
# Outcome model
CWR ~ b * CDRAN_c + c * PW_c + Age_EEG_yr_c + SES_c
# Indirect effect (a * b)
indirect := a * b
# Total effect (c + a * b)
total := c + (a * b)
'
# Fit the model
fit <- sem(mediation_CWR, data = data_income)
## Warning: lavaan->lav_data_full():
## some observed variances are (at least) a factor 1000 times larger than
## others; use varTable(fit) to investigate
# Summarize the results
summary(fit, standardized = TRUE)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 9
##
## Used Total
## Number of observations 136 202
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CDRAN_c ~
## PW_c (a) 7.109 2.424 2.932 0.003 7.109 0.217
## Ag_EEG_yr_ 0.459 0.084 5.476 0.000 0.459 0.407
## SES_c 0.131 0.044 2.957 0.003 0.131 0.220
## CWR ~
## CDRAN_c (b) 15.493 2.571 6.027 0.000 15.493 0.441
## PW_c (c) 138.376 74.941 1.846 0.065 138.376 0.120
## Ag_EEG_yr_ 14.041 2.778 5.054 0.000 14.041 0.354
## SES_c -1.076 1.369 -0.786 0.432 -1.076 -0.051
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CDRAN_c 0.607 0.074 8.246 0.000 0.607 0.738
## .CWR 545.915 66.202 8.246 0.000 545.915 0.537
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## indirect 110.136 41.769 2.637 0.008 110.136 0.096
## total 248.512 81.810 3.038 0.002 248.512 0.216
# Define the mediation model
mediation_CDICT <- '
# Mediator model
CDRAN_c ~ a * PW_c + Age_EEG_yr_c + SES_c
# Outcome model
CDICT ~ b * CDRAN_c + c * PW_c + Age_EEG_yr_c + SES_c
# Indirect effect (a * b)
indirect := a * b
# Total effect (c + a * b)
total := c + (a * b)
'
# Fit the model
fit <- sem(mediation_CDICT, data = data_income)
# Summarize the results
summary(fit, standardized = TRUE)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 9
##
## Used Total
## Number of observations 138 202
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CDRAN_c ~
## PW_c (a) 6.984 2.369 2.949 0.003 6.984 0.216
## Ag_EEG_yr_ 0.457 0.083 5.513 0.000 0.457 0.406
## SES_c 0.132 0.044 3.012 0.003 0.132 0.222
## CDICT ~
## CDRAN_c (b) 2.896 0.735 3.942 0.000 2.896 0.338
## PW_c (c) 51.709 21.074 2.454 0.014 51.709 0.187
## Ag_EEG_yr_ 1.710 0.790 2.166 0.030 1.710 0.177
## SES_c 0.145 0.390 0.373 0.709 0.145 0.029
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CDRAN_c 0.599 0.072 8.307 0.000 0.599 0.737
## .CDICT 44.607 5.370 8.307 0.000 44.607 0.746
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## indirect 20.226 8.566 2.361 0.018 20.226 0.073
## total 71.935 21.560 3.336 0.001 71.935 0.260
# Define the mediation model
mediation_EWR <- '
# Mediator model
CDRAN_c ~ a * PW_c + Age_EEG_yr_c + SES_c
# Outcome model
EWR ~ b * CDRAN_c + c * PW_c + Age_EEG_yr_c + SES_c
# Indirect effect (a * b)
indirect := a * b
# Total effect (c + a * b)
total := c + (a * b)
'
# Fit the model
fit <- sem(mediation_EWR, data = data_income)
# Summarize the results
summary(fit, standardized = TRUE)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 9
##
## Used Total
## Number of observations 138 202
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CDRAN_c ~
## PW_c (a) 6.984 2.369 2.949 0.003 6.984 0.216
## Ag_EEG_yr_ 0.457 0.083 5.513 0.000 0.457 0.406
## SES_c 0.132 0.044 3.012 0.003 0.132 0.222
## EWR ~
## CDRAN_c (b) 5.139 1.191 4.316 0.000 5.139 0.295
## PW_c (c) 38.193 34.159 1.118 0.264 38.193 0.068
## Ag_EEG_yr_ 4.796 1.280 3.748 0.000 4.796 0.245
## SES_c 4.853 0.632 7.672 0.000 4.853 0.469
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CDRAN_c 0.599 0.072 8.307 0.000 0.599 0.737
## .EWR 117.194 14.108 8.307 0.000 117.194 0.475
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## indirect 35.890 14.741 2.435 0.015 35.890 0.064
## total 74.083 35.296 2.099 0.036 74.083 0.132
# Define the mediation model
mediation_EDICT <- '
# Mediator model
CDRAN_c ~ a * PW_c + Age_EEG_yr_c + SES_c
# Outcome model
EDICT ~ b * CDRAN_c + c * PW_c + Age_EEG_yr_c + SES_c
# Indirect effect (a * b)
indirect := a * b
# Total effect (c + a * b)
total := c + (a * b)
'
# Fit the model
fit <- sem(mediation_EDICT, data = data_income)
# Summarize the results
summary(fit, standardized = TRUE)
## lavaan 0.6-18 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 9
##
## Used Total
## Number of observations 137 202
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## CDRAN_c ~
## PW_c (a) 6.976 2.386 2.923 0.003 6.976 0.215
## Ag_EEG_yr_ 0.457 0.083 5.474 0.000 0.457 0.404
## SES_c 0.132 0.044 2.995 0.003 0.132 0.221
## EDICT ~
## CDRAN_c (b) 2.503 0.552 4.539 0.000 2.503 0.311
## PW_c (c) 34.122 15.877 2.149 0.032 34.122 0.131
## Ag_EEG_yr_ 2.295 0.595 3.858 0.000 2.295 0.252
## SES_c 2.087 0.294 7.101 0.000 2.087 0.435
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .CDRAN_c 0.603 0.073 8.276 0.000 0.603 0.738
## .EDICT 25.146 3.038 8.276 0.000 25.146 0.475
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## indirect 17.461 7.105 2.458 0.014 17.461 0.067
## total 51.583 16.521 3.122 0.002 51.583 0.198
# # Mediator model: CDRAN_c as a function of PW_c
# mediator_model <- lm(CDRAN_c ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID), data = data_income)
# # Outcome model: XXX a function of PW_c and SES_c
# outcome_CWR <- lmerTest::lmer(CWR ~ PW_c + CDRAN_c + SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
# outcome_CDICT <- lmerTest::lmer(CDICT ~ PW_c + CDRAN_c + SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
# outcome_ARITH <- lmerTest::lmer(ARITH ~ PW_c + CDRAN_c + SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
# outcome_EWR <- lmerTest::lmer(EWR ~ PW_c + CDRAN_c + SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
# outcome_EDICT <- lmerTest::lmer(EDICT ~ PW_c + CDRAN_c + SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
#
# # Perform mediation analysis
# mediate_CWR <- mediate(
# model.m = mediator_model, # Mediator model
# model.y = outcome_CWR, # Outcome model
# treat = "PW_c", # Predictor variable
# mediator = "CDRAN_c", # Mediator variable
# boot = TRUE, # Use bootstrapping for confidence intervals
# sims = 1000 # Number of bootstrap simulations
# )
#
# # Summarize the results
# summary(mediate_CWR)
#
# # Perform mediation analysis
# mediate_CDICT <- mediate(
# model.m = mediator_model, # Mediator model
# model.y = outcome_CDICT, # Outcome model
# treat = "PW_c", # Predictor variable
# mediator = "CDRAN_c", # Mediator variable
# boot = F, # Use bootstrapping for confidence intervals
# sims = 1000 # Number of bootstrap simulations
# )
#
# # Summarize the results
# summary(mediate_CDICT)
#
# # Perform mediation analysis
# mediate_ARITH <- mediate(
# model.m = mediator_model, # Mediator model
# model.y = outcome_ARITH, # Outcome model
# treat = "PW_c", # Predictor variable
# mediator = "CDRAN_c", # Mediator variable
# boot = F, # Use bootstrapping for confidence intervals
# sims = 1000 # Number of bootstrap simulations
# )
#
# # Summarize the results
# summary(mediate_ARITH)
#
# # Perform mediation analysis
# mediate_EWR <- mediate(
# model.m = mediator_model, # Mediator model
# model.y = outcome_EWR, # Outcome model
# treat = "PW_c", # Predictor variable
# mediator = "CDRAN_c", # Mediator variable
# boot = F, # Use bootstrapping for confidence intervals
# sims = 1000 # Number of bootstrap simulations
# )
#
# # Summarize the results
# summary(mediate_EWR)
#
# # Perform mediation analysis
# mediate_EDICT <- mediate(
# model.m = mediator_model, # Mediator model
# model.y = outcome_EDICT, # Outcome model
# treat = "PW_c", # Predictor variable
# mediator = "CDRAN_c", # Mediator variable
# boot = F, # Use bootstrapping for confidence intervals
# sims = 1000 # Number of bootstrap simulations
# )
#
# # Summarize the results
# summary(mediate_EDICT)
# Fit the model with PW as predictor, SES as moderator, and Age_EEG_yr as covariate
moderate_CWR_SES <- lmerTest::lmer(CWR ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
moderate_CDICT_SES <- lmerTest::lmer(CDICT ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
moderate_ARITH_SES <- lmerTest::lmer(ARITH ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
moderate_EWR_SES <- lmerTest::lmer(EWR ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
moderate_EDICT_SES <- lmerTest::lmer(EDICT ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID), data = data_income)
# Print the model summary
summary(moderate_CWR_SES)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: CWR ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 1615.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.63654 -0.45293 0.02704 0.38155 2.41868
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 587.4 24.24
## Residual 191.3 13.83
## Number of obs: 181, groups: FID, 101
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 75.3091 2.6427 95.9331 28.497 < 2e-16 ***
## PW_c 192.4478 66.4999 154.2573 2.894 0.00436 **
## SES_c 0.7626 1.7826 99.2972 0.428 0.66971
## Age_EEG_yr_c 21.4366 2.6377 102.4821 8.127 1.05e-12 ***
## PW_c:SES_c -27.4350 45.5669 136.6754 -0.602 0.54812
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c SES_c A_EEG_
## PW_c 0.021
## SES_c 0.034 0.023
## Ag_EEG_yr_c -0.006 0.028 0.055
## PW_c:SES_c 0.051 0.209 -0.150 -0.033
summary(moderate_CDICT_SES)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: CDICT ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 1189
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.27929 -0.47065 0.05754 0.50528 2.00731
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 37.26 6.104
## Residual 19.31 4.395
## Number of obs: 183, groups: FID, 102
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 25.3009 0.6933 95.4681 36.491 < 2e-16 ***
## PW_c 47.4916 19.4171 171.0016 2.446 0.0155 *
## SES_c 0.5622 0.4698 98.1405 1.197 0.2343
## Age_EEG_yr_c 3.8247 0.6983 100.2972 5.477 3.21e-07 ***
## PW_c:SES_c -10.5681 13.5500 155.3846 -0.780 0.4366
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c SES_c A_EEG_
## PW_c 0.031
## SES_c 0.024 0.039
## Ag_EEG_yr_c -0.008 0.025 0.059
## PW_c:SES_c 0.067 0.163 -0.165 -0.044
summary(moderate_ARITH_SES)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: ARITH ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 938.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7646 -0.4276 -0.0359 0.3866 3.8318
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 8.999 3.000
## Residual 4.770 2.184
## Number of obs: 183, groups: FID, 102
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.8946 0.3416 99.9052 23.108 < 2e-16 ***
## PW_c 24.5736 9.6128 172.0946 2.556 0.01144 *
## SES_c 0.6976 0.2315 102.5250 3.013 0.00326 **
## Age_EEG_yr_c 3.9667 0.3442 104.6136 11.526 < 2e-16 ***
## PW_c:SES_c 2.0137 6.7132 158.0059 0.300 0.76460
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c SES_c A_EEG_
## PW_c 0.031
## SES_c 0.024 0.040
## Ag_EEG_yr_c -0.008 0.025 0.059
## PW_c:SES_c 0.067 0.161 -0.166 -0.044
summary(moderate_EWR_SES)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: EWR ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 1325.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2809 -0.4226 -0.0159 0.3915 3.0008
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 92.84 9.635
## Residual 37.11 6.092
## Number of obs: 183, groups: FID, 102
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 19.2320 1.0648 96.7680 18.062 < 2e-16 ***
## PW_c 56.6085 28.1326 164.3695 2.012 0.0458 *
## SES_c 5.1420 0.7205 99.7030 7.137 1.57e-10 ***
## Age_EEG_yr_c 7.3888 1.0696 102.4256 6.908 4.26e-10 ***
## PW_c:SES_c -14.2028 19.4599 146.8086 -0.730 0.4666
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c SES_c A_EEG_
## PW_c 0.032
## SES_c 0.026 0.035
## Ag_EEG_yr_c -0.005 0.024 0.059
## PW_c:SES_c 0.063 0.184 -0.154 -0.040
summary(moderate_EDICT_SES)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: EDICT ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## Data: data_income
##
## REML criterion at convergence: 1059.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3922 -0.4334 -0.1127 0.4956 2.7039
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID (Intercept) 17.801 4.219
## Residual 9.907 3.148
## Number of obs: 182, groups: FID, 102
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.6036 0.4836 98.0362 15.722 < 2e-16 ***
## PW_c 31.7582 13.7976 171.6855 2.302 0.0226 *
## SES_c 2.0779 0.3277 100.5440 6.340 6.58e-09 ***
## Age_EEG_yr_c 3.5507 0.4873 102.6078 7.286 6.76e-11 ***
## PW_c:SES_c 1.6687 9.6785 158.2115 0.172 0.8633
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) PW_c SES_c A_EEG_
## PW_c 0.034
## SES_c 0.022 0.038
## Ag_EEG_yr_c -0.010 0.022 0.060
## PW_c:SES_c 0.063 0.144 -0.164 -0.041
# Perform the likelihood ratio test
comparison_CWR_SES <- anova(model_CWR, moderate_CWR_SES)
## refitting model(s) with ML (instead of REML)
# Print the comparison results
print(comparison_CWR_SES)
## Data: data_income
## Models:
## model_CWR: CWR ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## moderate_CWR_SES: CWR ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_CWR 6 1658.1 1677.3 -823.06 1646.1
## moderate_CWR_SES 7 1659.8 1682.2 -822.89 1645.8 0.3336 1 0.5636
# Perform the likelihood ratio test
comparison_CDICT_SES <- anova(model_CDICT, moderate_CDICT_SES)
## refitting model(s) with ML (instead of REML)
# Print the comparison results
print(comparison_CDICT_SES)
## Data: data_income
## Models:
## model_CDICT: CDICT ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## moderate_CDICT_SES: CDICT ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_CDICT 6 1218.8 1238.1 -603.43 1206.8
## moderate_CDICT_SES 7 1220.2 1242.7 -603.12 1206.2 0.6058 1 0.4364
# Perform the likelihood ratio test
comparison_ARITH_SES <- anova(model_ARITH, moderate_ARITH_SES)
## refitting model(s) with ML (instead of REML)
# Print the comparison results
print(comparison_ARITH_SES)
## Data: data_income
## Models:
## model_ARITH: ARITH ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## moderate_ARITH_SES: ARITH ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_ARITH 6 960.71 979.96 -474.35 948.71
## moderate_ARITH_SES 7 962.61 985.08 -474.31 948.61 0.0957 1 0.7571
# Perform the likelihood ratio test
comparison_EWR_SES <- anova(model_EWR, moderate_EWR_SES)
## refitting model(s) with ML (instead of REML)
# Print the comparison results
print(comparison_EWR_SES)
## Data: data_income
## Models:
## model_EWR: EWR ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## moderate_EWR_SES: EWR ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_EWR 6 1359.1 1378.4 -673.57 1347.1
## moderate_EWR_SES 7 1360.6 1383.1 -673.32 1346.6 0.5022 1 0.4785
# Perform the likelihood ratio test
comparison_EDICT_SES <- anova(model_EDICT, moderate_EDICT_SES)
## refitting model(s) with ML (instead of REML)
# Print the comparison results
print(comparison_EDICT_SES)
## Data: data_income
## Models:
## model_EDICT: EDICT ~ PW_c + Age_EEG_yr_c + SES_c + (1 | FID)
## moderate_EDICT_SES: EDICT ~ PW_c * SES_c + Age_EEG_yr_c + (1 | FID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_EDICT 6 1085 1104.2 -536.50 1073
## moderate_EDICT_SES 7 1087 1109.4 -536.48 1073 0.036 1 0.8494
{r get CF, exponent, offset data} # data <- read.csv("./data/EEG_RS_lanExp.csv") # # # Select the relevant columns and drop rows with NA values # data_selected <- data %>% # mutate(Age_EEG_yr = Age_EEG / 12) %>% # convert Age_EEG from months to years # # keep unique EEGID values # distinct(EEGID, .keep_all = TRUE) %>% # dplyr::select(CWR, CDICT, ARITH, EWR, EDICT, CF, PW, offset, exponent, CMOI, EMOI, FatherEducation, MotherEducation, Income, Age_EEG_yr, FID, EEGID) # # # Replace the original columns with the imputed data # data_selected$CMOI <- completed_data$CMOI # data_selected$EMOI <- completed_data$EMOI # data_selected$FatherEducation <- completed_data$FatherEducation # data_selected$MotherEducation <- completed_data$MotherEducation # data_selected$Income <- completed_data$Income # # # Select the relevant columns and drop rows with NA values # education_income_data <- data_selected %>% # select(EEGID, FatherEducation, MotherEducation, Income) %>% # drop_na() # # # Merge the PCA scores back into the original data # data_income <- data_selected %>% # left_join(pca_scores, by = "EEGID") # # # output the data_income to csv # write.csv(data_income, file = "./data/EEG_RS_lanExp_income_imputated_all_v3.csv", row.names = FALSE) #
# read CRF dataset
data_parent_raw <- read_excel("./data/CRF_ADATA_20230620.xlsx")
## Warning: Expecting logical in BQM1490 / R1490C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1491 / R1491C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1492 / R1492C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1493 / R1493C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1498 / R1498C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1499 / R1499C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1500 / R1500C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1501 / R1501C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1733 / R1733C1807: got
## '右眼有外斜視問題但幸運地未影響他的立體感建立閱讀距離視力正常'
## Warning: Expecting logical in BQM1734 / R1734C1807: got
## '右眼有外斜視問題但幸運地未影響他的立體感建立閱讀距離視力正常'
## Warning: Expecting logical in BQM1735 / R1735C1807: got
## '右眼有外斜視問題但幸運地未影響他的立體感建立閱讀距離視力正常'
data_parent_s <- data_parent_raw %>%
dplyr::select(SubjectID, FID, EEGID,
MSWR, MCDICT, MARITH,
FSWR, FCDICT, FARITH, LearningProblem, LP1, LP2, LP3, LP4, LP5) %>%
dplyr::filter(!(rowSums(is.na(select(., MSWR, MCDICT, MARITH, FSWR, FCDICT, FARITH))) == 6)) %>%
dplyr::mutate(
PSWR = case_when(
is.na(MSWR) & is.na(FSWR) ~ NA_real_, # If both are NA, PSWR is NA
is.na(MSWR) ~ FSWR, # If MSWR is NA, PSWR is FSWR
is.na(FSWR) ~ MSWR, # If FSWR is NA, PSWR is MSWR
TRUE ~ (MSWR + FSWR) / 2 # If both have values, PSWR is the average
),
PCDICT = case_when(
is.na(MCDICT) & is.na(FCDICT) ~ NA_real_,
is.na(MCDICT) ~ FCDICT,
is.na(FCDICT) ~ MCDICT,
TRUE ~ (MCDICT + FCDICT) / 2
),
PARITH = case_when(
is.na(MARITH) & is.na(FARITH) ~ NA_real_,
is.na(MARITH) ~ FARITH,
is.na(FARITH) ~ MARITH,
TRUE ~ (MARITH + FARITH) / 2
)
) %>%
dplyr::mutate(
PSWR_z = as.numeric(scale(PSWR)), # Calculate z-score for PSWR
PCDICT_z = as.numeric(scale(PCDICT)), # Calculate z-score for PCDICT
PARITH_z = as.numeric(scale(PARITH)) # Calculate z-score for PARITH
)
# Summarize the mean and SD for PSWR, PCDICT, and PARITH
summary_stats <- data_parent_s %>%
dplyr::summarize(
PSWR_mean = mean(PSWR, na.rm = TRUE),
PSWR_sd = sd(PSWR, na.rm = TRUE),
PCDICT_mean = mean(PCDICT, na.rm = TRUE),
PCDICT_sd = sd(PCDICT, na.rm = TRUE),
PARITH_mean = mean(PARITH, na.rm = TRUE),
PARITH_sd = sd(PARITH, na.rm = TRUE)
)
# Create a subset where at least one of the z-scores is lower than -1
risk_data <- data_parent_s %>%
dplyr::filter(
PSWR_z < -1 | PCDICT_z < -1 | PARITH_z < -1
) %>%
dplyr::select(FID, EEGID, PSWR_z, PCDICT_z, PARITH_z)
{r select risk child and do N-P test} # # read data from the tidy data # data_child <- read.csv("./data/model_data_180_v3.csv") # # data_child_c <- data_child %>% # dplyr::mutate( # CF_z = as.numeric(scale(CF)), # PW_z = as.numeric(scale(PW)), # exponent_z = as.numeric(scale(exponent)), # offset_z = as.numeric(scale(offset)) # ) # # # Function to perform regression and extract residuals # regress_and_add_residuals <- function(data, dependent_var, independent_var) { # # Perform the regression # model <- lm(reformulate(independent_var, response = dependent_var), data = data) # # # Extract the residuals and add them as a new column # residuals_name <- paste0(dependent_var, "_r") # # data <- data %>% # dplyr::mutate(!!residuals_name := residuals(model)) # # return(data) # } # # # List of dependent variables # dependent_vars <- c("CF_z", "PW_z") # # # Apply the function to each dependent variable # for (var in dependent_vars) { # data_child_c <- regress_and_add_residuals(data_child_c, var, "Age_EEG_yr") # } # # # Select risk child from child_data where EEGID and FID match with risk_data # risk_child <- data_child_c %>% # dplyr::inner_join(risk_data, by = c("EEGID", "FID")) # # # List of variables to plot # variables <- c("CF_z_r", "PW_z_r") # , "exponent_z_r", "offset_z_r" # # # exclude risk_child data from original dataset # whitney_data <- data_child_c %>% # dplyr::anti_join(risk_child, by = c("EEGID", "FID")) # # # Prepare combined data # combined_data <- bind_rows( # whitney_data %>% # select(EEGID, FID, CF_z_r, PW_z_r) %>% # pivot_longer(cols = c(CF_z_r, PW_z_r), # names_to = "Variable", # values_to = "Value") %>% # mutate(Group = "Normal"), # # risk_child %>% # select(EEGID, FID, CF_z_r, PW_z_r) %>% # pivot_longer(cols = c(CF_z_r, PW_z_r), # names_to = "Variable", # values_to = "Value") %>% # mutate(Group = "Risk") # ) # # # Create the combined plot # p1 <- ggplot(combined_data, aes(x = Value)) + # # Density plots # geom_density(aes(fill = Group), alpha = 0.2, adjust = 1.5) + # # # Faceting # facet_grid(rows = vars(Group), # cols = vars(Variable), # scales = "free_y", # switch = "y") + # # # Color scale # scale_fill_manual(values = c("Data" = "#ee9d5c", "Risk" = "#ff3500")) + # # # Theme adjustments # theme_bw() + # theme( # strip.placement = "outside", # strip.background = element_blank(), # panel.spacing = unit(0.5, "lines"), # axis.text.x = element_text(size = 12), # axis.title = element_text(size = 14), # plot.title = element_text(size = 16, face = "bold"), # legend.position = "none" # ) + # # # Labels # labs(title = "Combined Distribution of Variables", # x = "Z-score Values", # y = "Density") # # p1 # # # Function to perform the Mann-Whitney U test and return the results # perform_mann_whitney_test <- function(var, data1, data2) { # test_result <- wilcox.test(data1[[var]], data2[[var]], alternative = "two.sided", exact = FALSE) # return(test_result) # } # # # Apply the function to each variable in the list # test_results <- lapply(variables, function(var) { # perform_mann_whitney_test(var, whitney_data, risk_child) # }) # # # Print the test results for each variable # for (i in seq_along(variables)) { # cat("Mann-Whitney U test results for", variables[i], ":\n") # print(test_results[[i]]) # cat("\n") # } #
# read data from the tidy data
data_child <- read.csv("./data/model_data_180_v5.csv") %>%
drop_na()
data_child_c <- data_child %>%
dplyr::mutate(
CF_z = as.numeric(scale(CF)),
PW_z = as.numeric(scale(PW)),
exponent_z = as.numeric(scale(exponent)),
CDRAN_z = as.numeric(scale(CDRAN))
)
# Function to perform regression and extract residuals
regress_and_add_residuals <- function(data, dependent_var, independent_var) {
# Perform the regression
model <- lm(reformulate(independent_var, response = dependent_var), data = data)
# Extract the residuals and add them as a new column
residuals_name <- paste0(dependent_var, "_r")
data <- data %>%
dplyr::mutate(!!residuals_name := residuals(model))
return(data)
}
# List of dependent variables
dependent_vars <- c("CDRAN_z", "CF_z", "PW_z")
# Apply the function to each dependent variable
for (var in dependent_vars) {
data_child_c <- regress_and_add_residuals(data_child_c, var, "Age_EEG_yr")
}
# Select risk child from child_data where EEGID and FID match with risk_data
risk_child <- data_child_c %>%
dplyr::inner_join(risk_data, by = c("EEGID", "FID"))
# List of variables to plot
variables <- c("CDRAN_z_r", "CF_z_r", "PW_z_r") # , "exponent_z_r", "offset_z_r"
# exclude risk_child data from original dataset
whitney_data <- data_child_c %>%
dplyr::anti_join(risk_child, by = c("EEGID", "FID"))
# Prepare combined data
combined_data <- bind_rows(
whitney_data %>%
select(EEGID, FID, CF_z_r, PW_z_r, CDRAN_z_r) %>%
pivot_longer(cols = c(CF_z_r, PW_z_r, CDRAN_z_r),
names_to = "Variable",
values_to = "Value") %>%
mutate(Group = "Normal"),
risk_child %>%
select(EEGID, FID, CF_z_r, PW_z_r, CDRAN_z_r) %>%
pivot_longer(cols = c(CF_z_r, PW_z_r, CDRAN_z_r),
names_to = "Variable",
values_to = "Value") %>%
mutate(Group = "Risk")
)
# Create the combined plot
p1 <- ggplot(combined_data, aes(x = Value)) +
# Density plots
geom_density(aes(fill = Group), alpha = 0.2, adjust = 1.5) +
# Faceting
facet_grid(rows = vars(Group),
cols = vars(Variable),
scales = "free_y",
switch = "y") +
# Color scale
scale_fill_manual(values = c("Data" = "#ee9d5c", "Risk" = "#ff3500")) +
# Theme adjustments
theme_bw() +
theme(
strip.placement = "outside",
strip.background = element_blank(),
panel.spacing = unit(0.5, "lines"),
axis.text.x = element_text(size = 12),
axis.title = element_text(size = 14),
plot.title = element_text(size = 16, face = "bold"),
legend.position = "none"
) +
# Labels
labs(title = "Combined Distribution of Variables",
x = "Z-score Values",
y = "Density")
p1

# Function to perform the Mann-Whitney U test and return the results
perform_mann_whitney_test <- function(var, data1, data2) {
test_result <- wilcox.test(data1[[var]], data2[[var]], alternative = "two.sided", exact = FALSE)
return(test_result)
}
# Apply the function to each variable in the list
test_results <- lapply(variables, function(var) {
perform_mann_whitney_test(var, whitney_data, risk_child)
})
# Print the test results for each variable
for (i in seq_along(variables)) {
cat("Mann-Whitney U test results for", variables[i], ":\n")
print(test_results[[i]])
cat("\n")
}
## Mann-Whitney U test results for CDRAN_z_r :
##
## Wilcoxon rank sum test with continuity correction
##
## data: data1[[var]] and data2[[var]]
## W = 647, p-value = 0.8225
## alternative hypothesis: true location shift is not equal to 0
##
##
## Mann-Whitney U test results for CF_z_r :
##
## Wilcoxon rank sum test with continuity correction
##
## data: data1[[var]] and data2[[var]]
## W = 416, p-value = 0.08489
## alternative hypothesis: true location shift is not equal to 0
##
##
## Mann-Whitney U test results for PW_z_r :
##
## Wilcoxon rank sum test with continuity correction
##
## data: data1[[var]] and data2[[var]]
## W = 841, p-value = 0.06192
## alternative hypothesis: true location shift is not equal to 0