library(bruceR)
## Warning: package 'bruceR' was built under R version 4.3.3
## 
## bruceR (v2024.6)
## Broadly Useful Convenient and Efficient R functions
## 
## Packages also loaded:
## ✔ data.table ✔ emmeans
## ✔ dplyr      ✔ lmerTest
## ✔ tidyr      ✔ effectsize
## ✔ stringr    ✔ performance
## ✔ ggplot2    ✔ interactions
## 
## Main functions of `bruceR`:
## cc()             Describe()  TTEST()
## add()            Freq()      MANOVA()
## .mean()          Corr()      EMMEANS()
## set.wd()         Alpha()     PROCESS()
## import()         EFA()       model_summary()
## print_table()    CFA()       lavaan_summary()
## 
## For full functionality, please install all dependencies:
## install.packages("bruceR", dep=TRUE)
## 
## Online documentation:
## https://psychbruce.github.io/bruceR
## 
## To use this package in publications, please cite:
## Bao, H.-W.-S. (2024). bruceR: Broadly useful convenient and efficient R functions (Version 2024.6) [Computer software]. https://CRAN.R-project.org/package=bruceR
set.wd()
## ✔ Set working directory to "C:/Users/psyuser/Desktop/ARWA article"
set.seed(6)
rm(list = ls())
library(tidyverse)
## Warning: package 'readr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ Matrix::expand()      masks tidyr::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ data.table::first()   masks dplyr::first()
## ✖ lubridate::hour()     masks data.table::hour()
## ✖ lubridate::isoweek()  masks data.table::isoweek()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ data.table::last()    masks dplyr::last()
## ✖ lubridate::mday()     masks data.table::mday()
## ✖ lubridate::minute()   masks data.table::minute()
## ✖ lubridate::month()    masks data.table::month()
## ✖ Matrix::pack()        masks tidyr::pack()
## ✖ lubridate::quarter()  masks data.table::quarter()
## ✖ lubridate::second()   masks data.table::second()
## ✖ purrr::transpose()    masks data.table::transpose()
## ✖ Matrix::unpack()      masks tidyr::unpack()
## ✖ lubridate::wday()     masks data.table::wday()
## ✖ lubridate::week()     masks data.table::week()
## ✖ lubridate::yday()     masks data.table::yday()
## ✖ lubridate::year()     masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(mice)
## Warning: package 'mice' was built under R version 4.3.3
## Registered S3 methods overwritten by 'broom':
##   method            from  
##   tidy.glht         jtools
##   tidy.summary.glht jtools
## 
## Attaching package: 'mice'
## 
## The following object is masked from 'package:bruceR':
## 
##     cc
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(naniar)
## Warning: package 'naniar' was built under R version 4.3.3
library(broom)
## Warning: package 'broom' was built under R version 4.3.3
library(readxl)

data <- read.csv("./data/EEG_RS_lanExp.csv")

# Select the relevant columns and drop rows with NA values
data_selected <- data %>% 
  # select(EEGID, FatherEducation, MotherEducation, Income) %>% 
  mutate(Age_EEG_yr = Age_EEG / 12) %>% # convert Age_EEG from months to years
  # keep unique EEGID values
  distinct(EEGID, .keep_all = TRUE) %>% 
  # dplyr::select(CWR, CDICT, ARITH, EWR, EDICT, PW, CMOI, EMOI, FatherEducation, MotherEducation, Income, Age_EEG_yr, FID, EEGID)
  dplyr::select(CWR, CDICT, ARITH, EWR, EDICT, CF, PW, offset, exponent, CMOI, EMOI, FatherEducation, MotherEducation, Income, Age_EEG_yr, FID, EEGID)

# Visualize the missing data pattern
vis_miss(data_selected)

# Analyze the pattern of missing data
missing_pattern <- naniar::miss_var_summary(data_selected)
print(missing_pattern)
## # A tibble: 17 × 3
##    variable        n_miss pct_miss
##    <chr>            <int>    <num>
##  1 CMOI                39    19.3 
##  2 EMOI                39    19.3 
##  3 CF                  17     8.42
##  4 PW                  16     7.92
##  5 offset              16     7.92
##  6 exponent            16     7.92
##  7 FatherEducation     13     6.44
##  8 Income               9     4.46
##  9 MotherEducation      7     3.47
## 10 CWR                  5     2.48
## 11 EDICT                4     1.98
## 12 CDICT                3     1.49
## 13 ARITH                3     1.49
## 14 EWR                  3     1.49
## 15 Age_EEG_yr           3     1.49
## 16 FID                  3     1.49
## 17 EEGID                0     0
# Test if the missing data is random using Little's MCAR test
mcar_test <- naniar::mcar_test(data_selected[, c("CMOI", "EMOI", 
                                               "FatherEducation", 
                                               "MotherEducation", "Income")])
print(mcar_test)
## # A tibble: 1 × 4
##   statistic    df  p.value missing.patterns
##       <dbl> <dbl>    <dbl>            <int>
## 1      45.3    17 0.000221                9
# If the missing data is random, perform imputation using the mice package
if (mcar_test$p.value > 0.05) {
  print("The missing data is at random.")
  imputed_data <- mice(data_selected[, c("CMOI", "EMOI", 
                                         "FatherEducation", "MotherEducation", "Income")], 
                       method = "norm.predict", m = 5, maxit = 50, seed = 500)
  completed_data <- complete(imputed_data, 1)
  
  # Replace the original columns with the imputed data
  data_selected$CMOI <- completed_data$CMOI
  data_selected$EMOI <- completed_data$EMOI
  data_selected$FatherEducation <- completed_data$FatherEducation
  data_selected$MotherEducation <- completed_data$MotherEducation
  data_selected$Income <- completed_data$Income
} else {
  print("The missing data is not completely at random.")
}
## [1] "The missing data is not completely at random."
# Check if CMOI and EMOI are normally distributed
shapiro_test_cmo <- shapiro.test(data_selected$CMOI)
shapiro_test_emo <- shapiro.test(data_selected$EMOI)
print(shapiro_test_cmo)
## 
##  Shapiro-Wilk normality test
## 
## data:  data_selected$CMOI
## W = 0.73803, p-value = 9.87e-16
print(shapiro_test_emo)
## 
##  Shapiro-Wilk normality test
## 
## data:  data_selected$EMOI
## W = 0.70746, p-value < 2.2e-16
# Analyze the pattern of missing data
missing_pattern <- naniar::miss_var_summary(data_selected)
print(missing_pattern)
## # A tibble: 17 × 3
##    variable        n_miss pct_miss
##    <chr>            <int>    <num>
##  1 CMOI                39    19.3 
##  2 EMOI                39    19.3 
##  3 CF                  17     8.42
##  4 PW                  16     7.92
##  5 offset              16     7.92
##  6 exponent            16     7.92
##  7 FatherEducation     13     6.44
##  8 Income               9     4.46
##  9 MotherEducation      7     3.47
## 10 CWR                  5     2.48
## 11 EDICT                4     1.98
## 12 CDICT                3     1.49
## 13 ARITH                3     1.49
## 14 EWR                  3     1.49
## 15 Age_EEG_yr           3     1.49
## 16 FID                  3     1.49
## 17 EEGID                0     0
# Print the results of the normality tests
if (shapiro_test_cmo$p.value < 0.05) {
  print("CMOI is not normally distributed.")
} else {
  print("CMOI is normally distributed.")
}
## [1] "CMOI is not normally distributed."
if (shapiro_test_emo$p.value < 0.05) {
  print("EMOI is not normally distributed.")
} else {
  print("EMOI is normally distributed.")
}
## [1] "EMOI is not normally distributed."
# Choose imputation method based on normality test results
imputation_method <- ifelse(shapiro_test_cmo$p.value < 0.05 | shapiro_test_emo$p.value < 0.05, "pmm", "norm.predict")

# Perform imputation using the mice package
imputed_data <- mice(data_selected[, c("CMOI", "EMOI", 
                                     "FatherEducation", "MotherEducation", "Income")], method = imputation_method, m = 5, maxit = 50, seed = 500)
## 
##  iter imp variable
##   1   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   1   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   1   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   1   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   1   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   2   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   2   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   2   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   2   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   2   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   3   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   3   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   3   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   3   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   3   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   4   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   4   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   4   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   4   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   4   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   5   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   5   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   5   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   5   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   5   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   6   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   6   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   6   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   6   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   6   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   7   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   7   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   7   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   7   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   7   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   8   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   8   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   8   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   8   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   8   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   9   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   9   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   9   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   9   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   9   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   10   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   10   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   10   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   10   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   10   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   11   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   11   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   11   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   11   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   11   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   12   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   12   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   12   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   12   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   12   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   13   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   13   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   13   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   13   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   13   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   14   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   14   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   14   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   14   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   14   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   15   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   15   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   15   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   15   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   15   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   16   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   16   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   16   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   16   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   16   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   17   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   17   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   17   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   17   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   17   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   18   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   18   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   18   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   18   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   18   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   19   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   19   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   19   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   19   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   19   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   20   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   20   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   20   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   20   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   20   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   21   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   21   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   21   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   21   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   21   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   22   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   22   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   22   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   22   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   22   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   23   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   23   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   23   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   23   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   23   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   24   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   24   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   24   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   24   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   24   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   25   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   25   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   25   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   25   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   25   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   26   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   26   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   26   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   26   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   26   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   27   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   27   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   27   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   27   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   27   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   28   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   28   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   28   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   28   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   28   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   29   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   29   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   29   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   29   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   29   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   30   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   30   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   30   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   30   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   30   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   31   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   31   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   31   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   31   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   31   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   32   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   32   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   32   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   32   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   32   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   33   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   33   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   33   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   33   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   33   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   34   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   34   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   34   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   34   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   34   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   35   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   35   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   35   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   35   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   35   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   36   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   36   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   36   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   36   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   36   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   37   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   37   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   37   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   37   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   37   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   38   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   38   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   38   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   38   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   38   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   39   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   39   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   39   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   39   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   39   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   40   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   40   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   40   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   40   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   40   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   41   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   41   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   41   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   41   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   41   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   42   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   42   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   42   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   42   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   42   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   43   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   43   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   43   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   43   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   43   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   44   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   44   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   44   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   44   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   44   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   45   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   45   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   45   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   45   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   45   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   46   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   46   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   46   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   46   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   46   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   47   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   47   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   47   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   47   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   47   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   48   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   48   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   48   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   48   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   48   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   49   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   49   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   49   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   49   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   49   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   50   1  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   50   2  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   50   3  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   50   4  CMOI  EMOI  FatherEducation  MotherEducation  Income
##   50   5  CMOI  EMOI  FatherEducation  MotherEducation  Income
completed_data <- complete(imputed_data, 1)

# Replace the original columns with the imputed data
data_selected$CMOI <- completed_data$CMOI
data_selected$EMOI <- completed_data$EMOI
data_selected$FatherEducation <- completed_data$FatherEducation
data_selected$MotherEducation <- completed_data$MotherEducation
data_selected$Income <- completed_data$Income

# Select the relevant columns and drop rows with NA values
education_income_data <- data_selected %>% 
  select(EEGID, FatherEducation, MotherEducation, Income) %>% 
  drop_na()


# Perform PCA
pca_result <- prcomp(education_income_data %>% select(-EEGID), scale. = TRUE)

# Extract the first principal component and add SubjectID
pca_scores <- education_income_data %>%
  select(EEGID) %>%
  mutate(SES = -pca_result$x[, 1]) # add minus so that higher scores indicate higher SES

# Merge the PCA scores back into the original data
data_income <- data_selected %>%
  left_join(pca_scores, by = "EEGID")

# output the data_income to csv
# write.csv(data_income, file = "./data/EEG_RS_lanExp_income_imputated_all_v2.csv", row.names = FALSE)
write.csv(data_income, file = "./data/EEG_RS_lanExp_income_imputated_all_v3.csv", row.names = FALSE)
# read CRF dataset
data_parent_raw <- read_excel("./data/CRF_ADATA_20230620.xlsx")
## Warning: Expecting logical in BQM1490 / R1490C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1491 / R1491C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1492 / R1492C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1493 / R1493C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1498 / R1498C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1499 / R1499C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1500 / R1500C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1501 / R1501C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1733 / R1733C1807: got
## '右眼有外斜視問題但幸運地未影響他的立體感建立閱讀距離視力正常'
## Warning: Expecting logical in BQM1734 / R1734C1807: got
## '右眼有外斜視問題但幸運地未影響他的立體感建立閱讀距離視力正常'
## Warning: Expecting logical in BQM1735 / R1735C1807: got
## '右眼有外斜視問題但幸運地未影響他的立體感建立閱讀距離視力正常'
data_parent_s <- data_parent_raw %>% 
  dplyr::select(SubjectID, FID, EEGID,
                MSWR, MCDICT, MARITH, 
                FSWR, FCDICT, FARITH, LearningProblem, LP1, LP2, LP3, LP4, LP5) %>%
  dplyr::filter(!(rowSums(is.na(select(., MSWR, MCDICT, MARITH, FSWR, FCDICT, FARITH))) == 6)) %>%
  dplyr::mutate(
    PSWR = case_when(
      is.na(MSWR) & is.na(FSWR) ~ NA_real_,  # If both are NA, PSWR is NA
      is.na(MSWR) ~ FSWR,                   # If MSWR is NA, PSWR is FSWR
      is.na(FSWR) ~ MSWR,                   # If FSWR is NA, PSWR is MSWR
      TRUE ~ (MSWR + FSWR) / 2              # If both have values, PSWR is the average
    ),
    PCDICT = case_when(
      is.na(MCDICT) & is.na(FCDICT) ~ NA_real_, 
      is.na(MCDICT) ~ FCDICT,                 
      is.na(FCDICT) ~ MCDICT,                  
      TRUE ~ (MCDICT + FCDICT) / 2              
    ), 
    PARITH = case_when(
      is.na(MARITH) & is.na(FARITH) ~ NA_real_, 
      is.na(MARITH) ~ FARITH,                 
      is.na(FARITH) ~ MARITH,                  
      TRUE ~ (MARITH + FARITH) / 2
    )
  ) %>%
  dplyr::mutate(
    PSWR_z = as.numeric(scale(PSWR)),  # Calculate z-score for PSWR
    PCDICT_z = as.numeric(scale(PCDICT)),  # Calculate z-score for PCDICT
    PARITH_z = as.numeric(scale(PARITH))  # Calculate z-score for PARITH
  )

# Summarize the mean and SD for PSWR, PCDICT, and PARITH
summary_stats <- data_parent_s %>%
  dplyr::summarize(
    PSWR_mean = mean(PSWR, na.rm = TRUE),
    PSWR_sd = sd(PSWR, na.rm = TRUE),
    PCDICT_mean = mean(PCDICT, na.rm = TRUE),
    PCDICT_sd = sd(PCDICT, na.rm = TRUE),
    PARITH_mean = mean(PARITH, na.rm = TRUE),
    PARITH_sd = sd(PARITH, na.rm = TRUE)
  )


# Create a subset where at least one of the z-scores is lower than -1
risk_data <- data_parent_s %>%
  dplyr::filter(
    PSWR_z < -1 | PCDICT_z < -1 | PARITH_z < -1
  ) %>%
  dplyr::select(FID, EEGID, PSWR_z, PCDICT_z, PARITH_z)
# read data from the tidy data
data_child <- read.csv("./data/model_data_180_v3.csv")

data_child_c <- data_child %>%
  dplyr::mutate(
    CF_z = as.numeric(scale(CF)),
    PW_z = as.numeric(scale(PW)),
    exponent_z = as.numeric(scale(exponent)),
    offset_z = as.numeric(scale(offset))
  )

# Function to perform regression and extract residuals
regress_and_add_residuals <- function(data, dependent_var, independent_var) {
  # Perform the regression
  model <- lm(reformulate(independent_var, response = dependent_var), data = data)
  
  # Extract the residuals and add them as a new column
  residuals_name <- paste0(dependent_var, "_r")
  
  data <- data %>%
    dplyr::mutate(!!residuals_name := residuals(model))
  
  return(data)
}

# List of dependent variables
dependent_vars <- c("CF_z", "PW_z", "exponent_z", "offset_z")

# Apply the function to each dependent variable
for (var in dependent_vars) {
  data_child_c <- regress_and_add_residuals(data_child_c, var, "Age_EEG_yr")
}

# Select risk child from child_data where EEGID and FID match with risk_data
risk_child <- data_child_c %>%
  dplyr::inner_join(risk_data, by = c("EEGID", "FID"))

# Function to create the plot for a given variable
create_plot <- function(data, risk_data, variable) {
  ggplot(data, aes(x = !!sym(variable))) +
    geom_histogram(binwidth = 0.2, fill = 'white', color = "black", alpha = 0.7) +
    geom_density(alpha = 0.2, fill = 'white') +
    geom_point(data = risk_data, aes(y = 3), color = 'red', size = 2, 
               position = position_jitter(width = 0, height = 1)) +
    labs(title = paste("Distribution of", variable), x = variable, y = "Frequency") +
    theme_bw() + 
    theme(
      axis.text.x = element_text(size = 20),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      legend.position = "none", 
      strip.text = element_text(size = 20), 
      plot.title = element_text(size = 24, hjust = 0.5), 
      axis.title = element_text(size = 20) 
    )
}

# List of variables to plot
variables <- c("CF_z_r", "PW_z_r", "exponent_z_r", "offset_z_r")

# exclude risk_child data from original dataset
whitney_data <- data_child_c %>%
  dplyr::anti_join(risk_child, by = c("EEGID", "FID"))

# Create and print plots for each variable
eeg_plots <- lapply(variables, function(var) {
  create_plot(whitney_data, risk_child, var)
})

# Print the plots
eeg_plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

# Function to perform the Mann-Whitney U test and return the results
perform_mann_whitney_test <- function(var, data1, data2) {
  test_result <- wilcox.test(data1[[var]], data2[[var]], alternative = "two.sided", exact = FALSE)
  return(test_result)
}



# Apply the function to each variable in the list
test_results <- lapply(variables, function(var) {
  perform_mann_whitney_test(var, whitney_data, risk_child)
})

# Print the test results for each variable
for (i in seq_along(variables)) {
  cat("Mann-Whitney U test results for", variables[i], ":\n")
  print(test_results[[i]])
  cat("\n")
}
## Mann-Whitney U test results for CF_z_r :
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data1[[var]] and data2[[var]]
## W = 529, p-value = 0.04753
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## Mann-Whitney U test results for PW_z_r :
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data1[[var]] and data2[[var]]
## W = 1171, p-value = 0.04092
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## Mann-Whitney U test results for exponent_z_r :
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data1[[var]] and data2[[var]]
## W = 770, p-value = 0.6398
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## Mann-Whitney U test results for offset_z_r :
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data1[[var]] and data2[[var]]
## W = 820, p-value = 0.8777
## alternative hypothesis: true location shift is not equal to 0
# read CRF dataset
data_parent_raw <- read_excel("./data/CRF_ADATA_20230620.xlsx")
## Warning: Expecting logical in BQM1490 / R1490C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1491 / R1491C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1492 / R1492C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1493 / R1493C1807: got '曾有懶眼需遮蓋眼睛'
## Warning: Expecting logical in BQM1498 / R1498C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1499 / R1499C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1500 / R1500C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1501 / R1501C1807: got
## '兩眼有白內障已動手術換晶片'
## Warning: Expecting logical in BQM1733 / R1733C1807: got
## '右眼有外斜視問題但幸運地未影響他的立體感建立閱讀距離視力正常'
## Warning: Expecting logical in BQM1734 / R1734C1807: got
## '右眼有外斜視問題但幸運地未影響他的立體感建立閱讀距離視力正常'
## Warning: Expecting logical in BQM1735 / R1735C1807: got
## '右眼有外斜視問題但幸運地未影響他的立體感建立閱讀距離視力正常'
data_parent_s_m <- data_parent_raw %>% 
  dplyr::select(SubjectID, FID, EEGID,
                PQ_C41, PQ_C42, PQ_C43,
                PQ_C44, PQ_C45,
                LearningProblem, LP1, LP2, LP3, LP4, LP5) %>% 
  dplyr::mutate(PMV = rowSums(select(., PQ_C41, PQ_C42, PQ_C43, PQ_C44, PQ_C45), na.rm = TRUE)) %>%
  dplyr::filter(!is.na(EEGID) & EEGID != "") %>% 
  dplyr::mutate(
    PMV_z = as.numeric(scale(PMV))) # Calculate z-score for PMV
   # Filter out rows where EEGID is empty or NA

# Create a subset where at least one of the z-scores is lower than -1
risk_data_m <- data_parent_s_m %>%
  dplyr::filter(
    PMV_z < -1
  ) %>%
  dplyr::select(FID, EEGID, PMV_z)
# read data from the tidy data
data_child <- read.csv("./data/model_data_180_v3.csv")

data_child_c <- data_child %>%
  dplyr::mutate(
    CF_z = as.numeric(scale(CF)),
    PW_z = as.numeric(scale(PW)),
    exponent_z = as.numeric(scale(exponent)),
    offset_z = as.numeric(scale(offset))
  )

# Function to perform regression and extract residuals
regress_and_add_residuals <- function(data, dependent_var, independent_var) {
  # Perform the regression
  model <- lm(reformulate(independent_var, response = dependent_var), data = data)
  
  # Extract the residuals and add them as a new column
  residuals_name <- paste0(dependent_var, "_r")
  
  data <- data %>%
    dplyr::mutate(!!residuals_name := residuals(model))
  
  return(data)
}

# List of dependent variables
dependent_vars <- c("CF_z", "PW_z", "exponent_z", "offset_z")

# Apply the function to each dependent variable
for (var in dependent_vars) {
  data_child_c <- regress_and_add_residuals(data_child_c, var, "Age_EEG_yr")
}

# Select risk child from child_data where EEGID and FID match with risk_data
risk_child_m <- data_child_c %>%
  dplyr::inner_join(risk_data_m, by = c("EEGID", "FID"))

# Function to create the plot for a given variable
create_plot <- function(data, risk_data, variable) {
  ggplot(data, aes(x = !!sym(variable))) +
    geom_histogram(binwidth = 0.2, fill = 'white', color = "black", alpha = 0.7) +
    geom_density(alpha = 0.2, fill = 'white') +
    geom_point(data = risk_data, aes(y = 3), color = 'red', size = 2, 
               position = position_jitter(width = 0, height = 1)) +
    labs(title = paste("Distribution of", variable), x = variable, y = "Frequency") +
    theme_bw() + 
    theme(
      axis.text.x = element_text(size = 20),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      legend.position = "none", 
      strip.text = element_text(size = 20), 
      plot.title = element_text(size = 24, hjust = 0.5), 
      axis.title = element_text(size = 20) 
    )
}

# List of variables to plot
variables <- c("CF_z_r", "PW_z_r", "exponent_z_r", "offset_z_r")

# exclude risk_child data from original dataset
whitney_data_m <- data_child_c %>%
  dplyr::anti_join(risk_child_m, by = c("EEGID", "FID"))

# Create and print plots for each variable
eeg_plots <- lapply(variables, function(var) {
  create_plot(whitney_data_m, risk_child_m, var)
})

# Print the plots
eeg_plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

# Function to perform the Mann-Whitney U test and return the results
perform_mann_whitney_test <- function(var, data1, data2) {
  test_result <- wilcox.test(data1[[var]], data2[[var]], alternative = "two.sided", exact = FALSE)
  return(test_result)
}



# Apply the function to each variable in the list
test_results <- lapply(variables, function(var) {
  perform_mann_whitney_test(var, whitney_data_m, risk_child_m)
})

# Print the test results for each variable
for (i in seq_along(variables)) {
  cat("Mann-Whitney U test results for", variables[i], ":\n")
  print(test_results[[i]])
  cat("\n")
}
## Mann-Whitney U test results for CF_z_r :
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data1[[var]] and data2[[var]]
## W = 1001, p-value = 0.6666
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## Mann-Whitney U test results for PW_z_r :
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data1[[var]] and data2[[var]]
## W = 1108, p-value = 0.8741
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## Mann-Whitney U test results for exponent_z_r :
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data1[[var]] and data2[[var]]
## W = 953, p-value = 0.4855
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## Mann-Whitney U test results for offset_z_r :
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  data1[[var]] and data2[[var]]
## W = 942, p-value = 0.448
## alternative hypothesis: true location shift is not equal to 0