import libraries & data

library(tidyverse)
library(dplyr)
library(sjlabelled)
library(ggplot2)
library(ggpubr)
library(Hmisc)
library(sjmisc)
library(caTools)
library(glmnet)
library(MASS)
library(stargazer)
library(effects)
library(corrr)
library(graphlayouts)
library(lavaan)
library(knitr)
library(reshape2)

# load data & get functions

# load("~/data/abcd_clean.Rda") # usually run. 

# source("prediction-functions.R") # usually run

Notes on imported data & functions

abcd_clean.Rda is the dataset resulting from cleaning script, publically available at https://rpubs.com/dooleyr/clean_abcd

prediction-functions.R is where the prediction scripts are stored (i.e. the cross-validation process, the elastic net algorithm, etc). See rpubs file by same name. The main functions are:

linearEN(data.frame, outer_folds, cv_folds): linear elastic net which predicts continuous/linear outcome. Column 2 in data.frame must be outcomes, ID numbers in col1 and potential predictors in remaining columns. Cross-validation is applied, missing data is mean-imputed, and an elastic net procedure finds alpha, lamda, and coefficients that minimize error between actual and predicted outcome.

balanced.logisticEN(data.frame, outer_folds, cv_folds): same as above but for a binary outcome (therefore using logistic link function). The script includes a balancing procedure such that the number of controls (without significant attention problems) is reduced to match the number of cases (children with significant attention problems)

collate.linear(model, outcome): pulls out key outcomes from elastic net results, useful for comparing group-stratified results. Outcomes returned include: sample size, intercept, average R-squared (and it’s SD, 95% CIs), number of predictors with non-zero betas, and number of total predictors entered into the model.

Clean data

# data needs to be purely numeric and all factors need to be dummy coded for the elastic net function

# remove all subjects without outcome data (cbcl attention problems)
abcd <- abcd[!is.na(abcd$syn_attention_r),]


# make categorical vars into numerical
#abcd$plannedpreg_n <- as.numeric(abcd$plannedpreg)-1
abcd$multibirth <- as.numeric(abcd$multibirth)   # 0=non-twin, 1=twin/other
abcd$csection <- as.numeric(abcd$csection)-1
abcd$smoked_n <- as.numeric(abcd$smokedpreg)-1 # 0=no smoking, 1=smoked
abcd$meds_n <- as.numeric(abcd$medspreg)-1
abcd$drugs_n <- as.numeric(abcd$drugspreg)-1
abcd$drank_n <- as.numeric(abcd$drankpreg)-1
abcd$vitamins_n <- as.numeric(abcd$devhx_10_prenatvitamins)
abcd$vitamins_n[abcd$vitamins_n==1] <- NA #missing
abcd$vitamins_n[abcd$vitamins_n==2] <- 1
abcd$vitamins_n[abcd$vitamins_n==3] <- 0 # taking vits should be the ref (majority case)
abcd$partner_n <- as.numeric(abcd$resps_partner)-1
# 'Yes I have a partner' is the reference (majority); "no im a single parent' is 1

#make sure 1 corresponds to the minority/risk class
abcd$preg_marijuana <- as.numeric(abcd$preg_marijuana)-1

#abcd$sex <- as.numeric(abcd$sex)-1 # females are 0, males are 1
abcd$sex <- as.numeric(abcd$sex)-1.5 # females are -0.5, males are 0.5

# dummy code ethnicity
abcd$eth_black <- ifelse(abcd$race_ethnicity== "Black", 1, 0)
abcd$eth_hispanic <- ifelse(abcd$race_ethnicity== "Hispanic", 1, 0)
abcd$eth_asian <- ifelse(abcd$race_ethnicity== "Asian", 1, 0)
abcd$eth_other <- ifelse(abcd$race_ethnicity== "Other", 1, 0)
#abcd$eth_white <- ifelse(abcd$race_ethnicity== "White", 1, 0)


# create new ids for each subject
abcd$myid <- c(1:length(abcd$src_subject_id))
id_tbl <- data.frame(abcd$myid, as.factor(abcd$src_subject_id))
names(id_tbl) <- c("my.ids", "orig.ids")

# reverse code parent education, income & bw, such that lower is higher
abcd$lower.income <- (scale(abcd$income.n, center=T, scale=F))*-1
abcd$lower.edu <- (scale(abcd$hsd_ed5_n, center=T, scale=F))*-1
abcd$lower.bw <- (scale(abcd$bw_kgs, center=T, scale=F))*-1 # lower BWs have higher values

Create smaller data frame

# select outcome and all predictors/stratifiers you're interested in
abcd_m <- subset(abcd, select=c(myid, syn_attention_r, syn_attention_t,
                                sex,  
                                Rship2child,
                                partner_n, lower.income, lower.edu ,
                                parent_mh_n, asr_scr_attention_r,

                                eth_black, eth_other, 
                                eth_hispanic, eth_asian,
                            
                                dadold, dadyoung, mumold, mumyoung,
                                multibirth,
                                gest_anyOG_n, gest_nausea, gest_bleed, 
                                #gest_preeclampsia, #merged with other
                                #gest_rubella,      #removed, too few
                                #gest_proteinuria   #merged with other
                                gest_gallbladder, gest_anemia, 
                                gest_uti, gest_diabetes, gest_hibloodpres,
                                gest_placenta, 
                                gest_accident,  gest_other, preeclamp_protein,
                                
                                #gest_ndocvisits, 11% missing data
                               
                                lower.bw, prem_wks, 
                                birthcomp_any_n, 
                               birthcomp_blue, birthcomp_slowheart, 
                               birthcomp_nobreathe, birthcomp_rhincomp,
                                #birthcomp_convuls, #removed, too few cases
                                #birthcomp_blood,   #removed, too few cases
                                birthcomp_jaundice, birthcomp_oxygen,
                                csection, incubator_days,
                                smoked_n, drank_n, meds_n, vitamins_n,
                                drugs_n,  preg_marijuana, drug_notweed)) 


# rename vars 
abcd_m<- abcd_m%>% 
  dplyr::rename(
  cbclatt =  syn_attention_r, # note I'm using the raw outcome here, not centred
  parent.att = asr_scr_attention_r,
  exp.smoking = smoked_n,
  exp.meds = meds_n,
  exp.drugs =drugs_n,
  exp.marij = preg_marijuana, 
  exp.otherdrug = drug_notweed, 
  exp.alcohol=drank_n,
  exp.novits=vitamins_n,
  Black=eth_black,
  Hispanic=eth_hispanic,
  Asian=eth_asian,
  OtherEthnic = eth_other,
  SE.singleparent = partner_n,
  gest.totalcomps = gest_anyOG_n,
  gest.rh=birthcomp_rhincomp,
  gest.nausea = gest_nausea, 
  gest.bleed = gest_bleed, 
  gest.gall = gest_gallbladder, 
  gest.preclamp.protein = preeclamp_protein,
  gest.anemia=gest_anemia, 
  gest.uti=gest_uti, 
  gest.diab =gest_diabetes, 
  gest.hiBP = gest_hibloodpres, 
  gest.placenta = gest_placenta, 
  gest.accident = gest_accident, 
  gest.other = gest_other,
  deliv.blue=birthcomp_blue, 
  deliv.heart=birthcomp_slowheart, 
  deliv.breathe=birthcomp_nobreathe,
  deliv.jaund = birthcomp_jaundice, 
  deliv.oxyg = birthcomp_oxygen,
  deliv.totalcomps = birthcomp_any_n,
  nicu_days = incubator_days,
  male.sex = sex,
  parent.any.n = parent_mh_n
)

# creata a dataframe with only data plausibly available at birth and cbcl attention problems as continuous outcome (remove social stratifiers like family income etc.)
abcd_birth <- subset(abcd_m, select=-c(SE.singleparent, lower.income, lower.edu, 
                                  parent.any.n, parent.att,
                                  Rship2child, syn_attention_t))

# explore missingness on predictors
A<-data.frame(colSums(is.na(abcd_birth))) 
A$var <- colnames(abcd_birth)
colnames(A) <- "NAs"
A$perc <- round((A$NAs/length(abcd_birth$myid))*100, 1)
rownames(A) <- c()
print(A[order(-A$NAs),])

##    NAs                    NA perc
## 39 852            exp.novits  8.5
## 38 800              exp.meds  8.0
## 35 616             nicu_days  6.2
## 37 582           exp.alcohol  5.8
## 8  554                dadold  5.6
## 9  554              dadyoung  5.6
## 25 446              lower.bw  4.5
## 18 407              gest.uti  4.1
## 20 351             gest.hiBP  3.5
## 31 342               gest.rh  3.4
## 14 340           gest.nausea  3.4
## 17 335           gest.anemia  3.4
## 41 329             exp.marij  3.3
## 19 324             gest.diab  3.2
## 21 313         gest.placenta  3.1
## 23 303            gest.other  3.0
## 24 298 gest.preclamp.protein  3.0
## 16 296             gest.gall  3.0
## 29 285           deliv.heart  2.9
## 15 284            gest.bleed  2.8
## 22 280         gest.accident  2.8
## 28 268            deliv.blue  2.7
## 36 253           exp.smoking  2.5
## 30 252         deliv.breathe  2.5
## 33 248            deliv.oxyg  2.5
## 32 243           deliv.jaund  2.4
## 42 233         exp.otherdrug  2.3
## 13 229       gest.totalcomps  2.3
## 10 223                mumold  2.2
## 11 223              mumyoung  2.2
## 40 153             exp.drugs  1.5
## 26 143              prem_wks  1.4
## 27 140      deliv.totalcomps  1.4
## 34 128              csection  1.3
## 4   14                 Black  0.1
## 5   14           OtherEthnic  0.1
## 6   14              Hispanic  0.1
## 7   14                 Asian  0.1
## 1    0                  myid  0.0
## 2    0               cbclatt  0.0
## 3    0              male.sex  0.0
## 12   0            multibirth  0.0

Set elastic net parameters

set.seed(100)
cvs = 5 # cross-validation folds
runs= 20 # outer fold (i.e. number of times to repeat the whole process)

Run elastic-net prediction

The first model predicts CBCL attention problems in the full sample (n=9975)

#birth.mod <- linearEN(abcd_birth, runs, cvs)
# not run here as takes long time, results extracted from saved versions
print(t(birth.mod[["summary"]]))

##                          [,1]
## sample_n              9975.00
## num.runs               100.00
## num.inputs              40.00
## num.meetingthreshfreq   17.00
## intercept                2.50
## mean.rsq                 8.13
## sd.rsq                   1.49
## rsq.CIlo                 5.61
## rsq.CIhi                11.47
## CVmean.ofrsqs            8.00
## CVrsq.CIlo               7.42
## CVrsq.CIhi               8.13
## error.mean               2.65
## error.CIlo               0.08
## error.CIhi               8.86

Bs <- birth.mod[["Bs"]] # what to plot

# plot betas of predictors with selection frequency of 95% or greater
thresh = 95 
ggplot(data=Bs[Bs$chosenpc>=thresh,])+
   geom_col(aes(x=reorder(predictor, mean.B), y= mean.B), width=.90, fill="steelblue", alpha=0.9)+
   geom_linerange(aes(x=reorder(predictor, mean.B) , ymin=cilo, ymax=cihi), alpha=0.5, colour="steelblue4")+
   geom_text(aes(x=reorder(predictor, mean.B), y=0, label=predictor, hjust=sign), 
             vjust= 0.5, size=4)+
   labs(x="", y="mean B", title = "Full sample", subtitle = paste0("Mean R-squared = ", round(as.numeric(birth.mod["Rsq"]), digits=2), "%" ) )+
   theme_minimal()+
   coord_flip()+
   theme_classic()+
   theme(legend.position="none",
         plot.title = element_text(hjust=0.5, face="bold"), 
         plot.subtitle = element_text(hjust=0.5),
         axis.text.y = element_blank(),
         axis.ticks.y = element_blank(),
         axis.line.y = element_blank())

birth.mod$log

##     fold testn   test.rsq     lambda alpha
## 1      1   998 0.07451778 0.05366970  1.00
## 2      2   998 0.07045908 0.06310880  0.70
## 3      3   998 0.06529149 0.59802818  0.05
## 4      4   998 0.07798338 0.04942438  0.85
## 5      5   998 0.07157084 0.30108346  0.05
## 6      1   998 0.09599504 0.08251380  0.65
## 7      2   998 0.08033155 0.15089333  0.30
## 8      3   998 0.09211290 0.21855819  0.15
## 9      4   998 0.07719784 0.07597276  0.40
## 10     5   998 0.09674485 0.04592400  0.85
## 11     1   998 0.07810995 0.42353107  0.10
## 12     2   998 0.06114653 0.08596928  0.55
## 13     3   998 0.07253902 0.07520072  0.90
## 14     4   998 0.06845408 0.18600381  0.25
## 15     5   998 0.06703420 0.12121355  0.50
## 16     1   998 0.07349912 0.09526872  0.50
## 17     2   998 0.09200230 0.09766030  0.50
## 18     3   998 0.08079097 0.06156387  0.90
## 19     4   998 0.08999672 0.07828760  1.00
## 20     5   998 0.09388049 0.13817023  0.30
## 21     1   998 0.08586147 0.63740841  0.00
## 22     2   998 0.08829863 0.09538691  0.35
## 23     3   998 0.09397584 0.14966273  0.35
## 24     4   998 0.07074978 0.16105898  0.25
## 25     5   998 0.06815829 0.15462271  0.30
## 26     1   998 0.08486783 0.09763146  0.50
## 27     2   998 0.07081832 0.13515153  0.50
## 28     3   998 0.06108455 0.15961634  0.30
## 29     4   998 0.10476708 0.08630526  0.60
## 30     5   998 0.07708194 0.07874574  0.65
## 31     1   998 0.08300347 0.10902407  0.40
## 32     2   998 0.11057171 0.15300948  0.25
## 33     3   998 0.08212875 0.71471305  0.05
## 34     4   998 0.08602233 0.51128185  0.05
## 35     5   998 0.08299525 0.05212823  0.90
## 36     1   998 0.09361915 0.06789750  1.00
## 37     2   998 0.08170665 0.14422571  0.35
## 38     3   998 0.08737463 0.47020222  0.10
## 39     4   998 0.06675979 0.26548941  0.10
## 40     5   998 0.09188067 0.06650868  0.70
## 41     1   998 0.07674255 0.09353754  0.65
## 42     2   998 0.09791383 0.29004364  0.15
## 43     3   998 0.05090476 0.07581506  0.90
## 44     4   998 0.06809078 0.05111142  1.00
## 45     5   998 0.07361099 0.45263749  0.10
## 46     1   998 0.06647678 0.05209534  0.60
## 47     2   998 0.09021461 0.07880137  0.85
## 48     3   998 0.11669223 0.28525835  0.10
## 49     4   998 0.08680493 0.03925091  0.80
## 50     5   998 0.08536036 0.07740672  0.65
## 51     1   998 0.09092127 0.20070208  0.20
## 52     2   998 0.08306688 0.08414151  0.70
## 53     3   998 0.06936714 0.10743869  0.45
## 54     4   998 0.08931791 0.06457365  0.80
## 55     5   998 0.06846488 0.22220100  0.20
## 56     1   998 0.07329901 0.18033638  0.25
## 57     2   998 0.07306502 0.07518083  0.75
## 58     3   998 0.09597284 0.09362783  0.45
## 59     4   998 0.03432075 0.29669812  0.15
## 60     5   998 0.07846881 0.06367871  0.95
## 61     1   998 0.11700141 0.09599021  0.40
## 62     2   998 0.05588034 0.25901369  0.20
## 63     3   998 0.08809989 0.61071090  0.05
## 64     4   998 0.10811138 0.18290755  0.25
## 65     5   998 0.05838455 0.15381270  0.35
## 66     1   998 0.06862963 0.14779353  0.30
## 67     2   998 0.09397013 0.08820225  0.40
## 68     3   998 0.08503381 0.04786076  0.85
## 69     4   998 0.09067965 0.07583798  0.75
## 70     5   998 0.08493265 0.34381602  0.10
## 71     1   998 0.06577384 0.36867284  0.10
## 72     2   998 0.08434462 0.31687504  0.10
## 73     3   998 0.09295776 0.18688833  0.35
## 74     4   998 0.09017474 0.10120115  0.50
## 75     5   998 0.05642633 0.10255173  0.55
## 76     1   998 0.07587606 0.05723823  0.85
## 77     2   998 0.12147207 0.03867801  0.55
## 78     3   998 0.07755944 0.14085686  0.40
## 79     4   998 0.09180454 0.10147340  0.55
## 80     5   998 0.06975185 0.11135238  0.50
## 81     1   998 0.08390861 0.84330160  0.00
## 82     2   998 0.08721012 0.09262751  0.70
## 83     3   998 0.06361714 0.25441908  0.15
## 84     4   998 0.08514719 0.08516273  0.70
## 85     5   998 0.07691442 0.10326369  0.45
## 86     1   998 0.05866072 0.05159019  0.75
## 87     2   998 0.08143027 0.68665697  0.05
## 88     3   998 0.06063849 0.08645227  0.50
## 89     4   998 0.10148278 0.30619729  0.15
## 90     5   998 0.05899148 0.08351576  0.75
## 91     1   998 0.11241430 0.07112994  0.85
## 92     2   998 0.09435041 0.09443641  0.50
## 93     3   998 0.09039005 0.11710101  0.40
## 94     4   998 0.07820359 0.47534216  0.05
## 95     5   998 0.07509208 0.05777879  0.90
## 96     1   998 0.07052303 0.38036459  0.05
## 97     2   998 0.09093007 0.07727950  0.85
## 98     3   998 0.08306394 0.04887156  1.00
## 99     4   998 0.08425388 0.08599949  0.60
## 100    5   998 0.09756001 0.41320983  0.05

Stratified predictive modelling

The following models predict attention problems in children, split by sex of the child, family income, race/ethnicity of the child, parent attention problems & parent psychiatric history

A. Split by sex

# run the prediction algorithm in boys & girls separately
boys_birth <- abcd_birth[abcd_birth$male.sex==0.5,]
boys_birth <- subset(boys_birth, select= -c(male.sex))
girls_birth <- abcd_birth[abcd_birth$male.sex== -0.5,]
girls_birth <- subset(girls_birth, select= -c(male.sex))

# *not run here as takes long time, results extracted from saved versions
# malebirth.mod <- linearEN(boys_birth, runs, cvs)
# femalebirth.mod <- linearEN(girls_birth, runs, cvs)

# comparison table
rbind(collate_linear(malebirth.mod,"males"),
      collate_linear(femalebirth.mod, "females"))

##     group    N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1   males 5255      3.08     5.28   1.51       2.0     7.92             12
## 2 females 4720      2.11     4.82   1.72       1.4     8.14              8
##   num_total
## 1        39
## 2        39

# B's, CIs & selection frequencies to plot
maleBs <- malebirth.mod[["Bs"]] 
femaleBs <- femalebirth.mod[["Bs"]] 

# example of saved table for each model
round(head(maleBs[c("chosenpc", "mean.B", "cilo", "cihi")]), digits=2)

##             chosenpc mean.B  cilo  cihi
## Black             81   0.11  0.01  0.22
## OtherEthnic      100   0.31  0.15  0.51
## Hispanic          19   0.05  0.02  0.08
## Asian             99  -0.37 -0.70 -0.08
## dadold            17   0.00  0.00  0.01
## dadyoung          92   0.12  0.01  0.26

B. Split by race/ethnicity

# select subsamples
# note there were 14 cases where race was unknown so these rows need to be deleted
# too few "asian" so not run as a separate model
white_birth <- abcd_birth[(abcd_birth$Black==0 & abcd_birth$Asian==0 & abcd_birth$OtherEthnic==0 & abcd_birth$Hispanic==0),]
white_birth <- subset(white_birth, select=-c(Black, Asian, Hispanic, OtherEthnic))
white_birth <- white_birth[!is.na(white_birth$cbclatt),]

hispanic_birth <- abcd_birth[abcd_birth$Hispanic==1,]
hispanic_birth <- subset(hispanic_birth, select=-c(Black, Asian, Hispanic, OtherEthnic))
hispanic_birth <- hispanic_birth[!is.na(hispanic_birth$cbclatt),]

black_birth <- abcd_birth[abcd_birth$Black==1,]
black_birth <- subset(black_birth, select=-c(Black, Asian, Hispanic, OtherEthnic))
black_birth <- black_birth[!is.na(black_birth$cbclatt),]

# run elastic net (don't actually run here, takes too long, results loaded up)
# whitebirth.mod <- linearEN(white_birth, runs, cvs)
# hispanicbirth.mod <- linearEN(hispanic_birth, runs, cvs)
# blackbirth.mod <- linearEN(black_birth, runs, cvs)

# comparison table
rbind(collate_linear(whitebirth.mod,"white"),
      collate_linear(hispanicbirth.mod, "hisp"),
      collate_linear(blackbirth.mod, "black"))

##   group    N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1 white 5061      2.50     6.43   1.72      3.06     9.59             10
## 2  hisp 2121      2.83     3.61   2.23     -1.85     6.94              4
## 3 black 1508      2.84     7.10   4.47     -2.90    14.22              9
##   num_total
## 1        36
## 2        36
## 3        36

# collect Bs
b1 <- whitebirth.mod$Bs[,c(3,4,9)]
names(b1) <- paste("white", names(b1), sep=".")

b2<- blackbirth.mod$Bs[,c(3,4,9)]
names(b2) <- paste("black", names(b2), sep=".")

b3 <- hispanicbirth.mod$Bs[,c(3,4,9)]
names(b3) <- paste("hisp", names(b3), sep=".")

# which variables were robust predictors of ADHD Sx for which groups? (robust ~ selection frequency >=95%)
race.Bs <- cbind(b1, b2, b3)
race.Bs <- race.Bs[race.Bs$white.chosenpc>=95 | race.Bs$hisp.chosenpc>=95 | race.Bs$black.chosenpc>=95,]

C. Split by income bands

Income Groups
low.income 1-6
mid.income 7 & 8
hi.income 9 & 10

original income levels
1. <$5,000; LOW
2. $5,000-11,999; LOW
3. $12,000-15,999; LOW
4. $16,000-24,999; LOW
5. $25,000-34,999; LOW
6. $35,000-49,999; LOW
7. $50,000-74,999; MID
8. $75,000-99,999; MID
9. $100,000-199,999; HIGH
10. $200,000+ HIGH

…but note that the numbering below in the script refers to cut-points on a standardised and reverse-coded scale of income levels (1.15 and 1.50 are cut-points corresponding to groupings above)

# select subsamples 
inc1_birth <- abcd_m[abcd_m$lower.income > 1.15,]
inc1_birth <- subset(inc1_birth, select = -c(SE.singleparent,lower.income, lower.edu,    parent.any.n, parent.att, syn_attention_t  ))
inc1_birth <- inc1_birth[!is.na(inc1_birth$cbclatt),]

inc2_birth <- abcd_m[(abcd_m$lower.income < 1.15) & (abcd_m$lower.income > -1.5),]
inc2_birth <- subset(inc2_birth, select = -c(SE.singleparent,lower.income, lower.edu,    parent.any.n, parent.att, syn_attention_t  ))
inc2_birth <- inc2_birth[!is.na(inc2_birth$cbclatt),]


inc3_birth <- abcd_m[abcd_m$lower.income < -1.5,]
inc3_birth <- subset(inc3_birth, select = -c(SE.singleparent,lower.income, lower.edu,    parent.any.n, parent.att, syn_attention_t  ))
inc3_birth <- inc3_birth[!is.na(inc3_birth$cbclatt),]


# run elastic net
# inc1birth.mod <- linearEN(inc1_birth, runs, cvs)
# inc2birth.mod <- linearEN(inc2_birth, runs, cvs)
# inc3birth.mod <- linearEN(inc3_birth, runs, cvs)

# comparison table
rbind(collate_linear(inc1birth.mod,"LOW"),
      collate_linear(inc2birth.mod, "MID"),
      collate_linear(inc3birth.mod, "HIGH"))

##   group    N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1   LOW 2780      2.88     9.61   2.91      3.85    14.83             12
## 2   MID 2567      2.80     4.09   1.89      0.64     8.07              6
## 3  HIGH 3757      2.52     2.90   1.21      0.23     5.05              5
##   num_total
## 1        40
## 2        40
## 3        40

# collect Bs
b1 <- inc1birth.mod$Bs[,c(3,4,9)]
names(b1) <- paste("low", names(b1), sep=".")

b2 <- inc2birth.mod$Bs[,c(3,4,9)]
names(b2) <- paste("mid", names(b2), sep=".")

b3 <- inc3birth.mod$Bs[,c(3,4,9)]
names(b3) <- paste("hi", names(b3), sep=".")

inc.Bs <- cbind(b1, b2, b3)
inc.Bs <- inc.Bs[inc.Bs$low.chosenpc>94 | inc.Bs$mid.chosenpc>94 | inc.Bs$hi.chosenpc>94,]

D. Split by parent attention problems

# select subsample
hiADHD_birth <- abcd_m[abcd_m$parent.att >= 8,] # 80th percentile
hiADHD_birth <- subset(hiADHD_birth, select = -c(SE.singleparent,lower.income, lower.edu,    parent.any.n, parent.att, syn_attention_t  ))
hiADHD_birth <- hiADHD_birth[!is.na(hiADHD_birth$cbclatt),]

midADHD_birth <- abcd_m[(abcd_m$parent.att >= 4 & abcd_m$parent.att < 8),] # 50th-80th percentile
midADHD_birth <- subset(midADHD_birth, select = -c(SE.singleparent,lower.income, lower.edu,    parent.any.n, parent.att, syn_attention_t  ))
midADHD_birth <- midADHD_birth[!is.na(midADHD_birth$cbclatt),]

loADHD_birth <- abcd_m[abcd_m$parent.att < 4,] # less than ave
loADHD_birth <- subset(loADHD_birth, select = -c(SE.singleparent,lower.income, lower.edu,    parent.any.n, parent.att, syn_attention_t  ))
loADHD_birth <- loADHD_birth[!is.na(loADHD_birth$cbclatt),]

# run elastic net
# hiADHDbirth.mod <- linearEN(hiADHD_birth, runs, cvs)
# midADHDbirth.mod <- linearEN(midADHD_birth, runs, cvs)
# loADHDbirth.mod <- linearEN(loADHD_birth, runs, cvs)

rbind(collate_linear(hiADHDbirth.mod,"High"),
      collate_linear(midADHDbirth.mod, "Mid"),
      collate_linear(loADHDbirth.mod, "Low"))

##   group    N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1  High 2160      4.75     5.32   2.48      0.25     9.69              6
## 2   Mid 2891      3.00     4.30   1.92      0.35     7.62              7
## 3   Low 4921      1.80     3.75   1.53      0.32     6.43              7
##   num_total
## 1        40
## 2        40
## 3        40

# merge B-summary tables 
b1 <- loADHDbirth.mod$Bs[,c(3,4,9)]
names(b1) <- paste("low", names(b1), sep=".")

b2 <- midADHDbirth.mod$Bs[,c(3,4,9)]
names(b2) <- paste("mid", names(b2), sep=".")

b3 <- hiADHDbirth.mod$Bs[,c(3,4,9)]
names(b3) <- paste("hi", names(b3), sep=".")

adhd.Bs <- cbind(b1, b2, b3)
adhd.Bs <- adhd.Bs[adhd.Bs$low.chosenpc>=95 | adhd.Bs$mid.chosenpc>=95 | adhd.Bs$hi.chosenpc>=95,]

E. Split by parent psych history

# select subsamples 
noMHx_birth <- abcd_m[abcd_m$parent.any.n==0,]
noMHx_birth <- subset(noMHx_birth, select = -c(SE.singleparent, lower.income, lower.edu,   parent.any.n, parent.att, syn_attention_t ))
noMHx_birth <- noMHx_birth[!is.na(noMHx_birth$cbclatt),]

medMHx_birth <- abcd_m[(abcd_m$parent.any.n == 1) | (abcd_m$parent.any.n == 2),]
medMHx_birth <- subset(medMHx_birth, select = -c(SE.singleparent,lower.income, lower.edu,   parent.any.n, parent.att, syn_attention_t ))
medMHx_birth <- medMHx_birth[!is.na(medMHx_birth$cbclatt),]

hiMHx_birth <- abcd_m[(abcd_m$parent.any.n >= 3),]
hiMHx_birth <- subset(hiMHx_birth, select = -c(SE.singleparent,lower.income, lower.edu,   parent.any.n, parent.att, syn_attention_t  ))
hiMHx_birth <- hiMHx_birth[!is.na(hiMHx_birth$cbclatt),]


# run elastic net
# hiMHxbirth.mod <- linearEN(hiMHx_birth, runs, cvs)
# medMHxbirth.mod <- linearEN(medMHx_birth, runs, cvs)
# noMHxbirth.mod <- linearEN(noMHx_birth, runs, cvs)

rbind(collate_linear(hiMHxbirth.mod,"High"),
      collate_linear(medMHxbirth.mod, "Medium"),
      collate_linear(noMHxbirth.mod, "None"))

##    group    N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1   High 2551      3.77     5.98   2.41      1.79     9.98              8
## 2 Medium 2717      2.86     4.96   2.03      1.74     9.11              8
## 3   None 4597      2.15     2.88   1.34     -0.01     5.00              6
##   num_total
## 1        40
## 2        40
## 3        40

# merge B-summary tables 
b1 <- noMHxbirth.mod$Bs[,c(3,4,9)]
names(b1) <- paste("noMH", names(b1), sep=".")

b2 <- medMHxbirth.mod$Bs[,c(3,4,9)]
names(b2) <- paste("medMH", names(b2), sep=".")

b3 <- hiMHxbirth.mod$Bs[,c(3,4,9)]
names(b3) <- paste("hiMH", names(b3), sep=".")

parMH.Bs <- cbind(b1, b2, b3)
parMH.Bs <- parMH.Bs[parMH.Bs$noMH.chosenpc>=95 | parMH.Bs$medMH.chosenpc>=95 | parMH.Bs$hiMH.chosenpc>=95,]

Compare output from stratified results

# save: N, intercept, R-sq, R lower, R higher, # vars
grandsummary <-
   rbind(
      collate_linear(birth.mod, "all participants"),
      #   collate_linear(mumsonly.mod, "SA: mother only"),
      #   collate_linear(noNA.mod, "SA: full data only"),
      collate_linear(femalebirth.mod, "females"),
      collate_linear(malebirth.mod, "males"),
      collate_linear(hispanicbirth.mod, "hispanic"),
       collate_linear(whitebirth.mod, "white"),
      collate_linear(blackbirth.mod, "black"),
      collate_linear(inc3birth.mod, "high income"),
      collate_linear(inc2birth.mod, "middle income"),
      collate_linear(inc1birth.mod, "low income"),      
      collate_linear(noMHxbirth.mod, "no parent Hx"),
      collate_linear(medMHxbirth.mod, "average parent Hx"),
      collate_linear(hiMHxbirth.mod, " strong parent Hx"),
      collate_linear(loADHDbirth.mod, "low parent ADHD"),
      collate_linear(midADHDbirth.mod, "mod parent ADHD"),
      collate_linear(hiADHDbirth.mod, "high parent ADHD")  )

grandsummary$strat <- c("No Stratification","Sex", "Sex", "Race/Ethnicity", "Race/Ethnicity", "Race/Ethnicity", "Income", "Income", "Income", "Parent Psychiatric History", "Parent Psychiatric History", "Parent Psychiatric History", "Parent ADHD Symptoms","Parent ADHD Symptoms","Parent ADHD Symptoms")
grandsummary$strat.n <- c(1, 2,3, 4,5,6, 7,8,9 ,10,11,12, 13,14,15)

grandsummary$strat <- factor(grandsummary$strat, levels = c("No Stratification", "Sex",  "Race/Ethnicity", "Income",  "Parent Psychiatric History","Parent ADHD Symptoms"))

groupcols <- c("steelblue", "firebrick2", "lightpink2","lightblue", "olivedrab",  "darkolivegreen3")

ggplot(grandsummary, aes(x=reorder(group,strat.n), y=Rsq_mean, fill=strat))+
   geom_bar(stat="identity", alpha=.8)+
   geom_errorbar(aes(x=reorder(group,strat.n), ymin=Rsq_mean-Rsq_sd, ymax=Rsq_mean+Rsq_sd,
                     colour=strat), width=0.1)+
   #geom_text(stat="identity", aes(x=reorder(group,strat.n), label=Rsq_mean, 
   #                               colour = strat),vjust=-0.3, size=3])+
   scale_fill_manual(values=groupcols)+
   scale_colour_manual(values=groupcols)+
   labs(x="")+
   scale_y_continuous(name="Mean R-squared", breaks=c(0,2,4,6,8,10,12))+
   theme_classic()+
   theme(axis.text.x=element_text(angle=60, hjust=1, vjust=1),
         legend.title = element_blank())

# compare R-sq from male and female models
t.test(malebirth.mod[["log"]][["test.rsq"]], femalebirth.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  malebirth.mod[["log"]][["test.rsq"]] and femalebirth.mod[["log"]][["test.rsq"]]
## t = 2.0185, df = 194.74, p-value = 0.04491
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0001057203 0.0091145856
## sample estimates:
##  mean of x  mean of y 
## 0.05280289 0.04819274

# race
d <- rbind((data.frame(Rsq=whitebirth.mod[["log"]][["test.rsq"]], group="white")),
           (data.frame(Rsq=blackbirth.mod[["log"]][["test.rsq"]], group="black")),
           (data.frame(Rsq=hispanicbirth.mod[["log"]][["test.rsq"]], group="hispanic")))
summary(aov(Rsq ~ group, data=d))

##              Df  Sum Sq Mean Sq F value   Pr(>F)    
## group         2 0.06871 0.03435   36.93 4.74e-15 ***
## Residuals   297 0.27627 0.00093                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

      # group post-hocs
      t.test(whitebirth.mod[["log"]][["test.rsq"]], blackbirth.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  whitebirth.mod[["log"]][["test.rsq"]] and blackbirth.mod[["log"]][["test.rsq"]]
## t = -1.3883, df = 127.68, p-value = 0.1675
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.016122976  0.002827008
## sample estimates:
##  mean of x  mean of y 
## 0.06432814 0.07097612

      t.test(whitebirth.mod[["log"]][["test.rsq"]], hispanicbirth.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  whitebirth.mod[["log"]][["test.rsq"]] and hispanicbirth.mod[["log"]][["test.rsq"]]
## t = 10.033, df = 185.94, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.02270241 0.03381501
## sample estimates:
##  mean of x  mean of y 
## 0.06432814 0.03606943

      t.test(blackbirth.mod[["log"]][["test.rsq"]], hispanicbirth.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  blackbirth.mod[["log"]][["test.rsq"]] and hispanicbirth.mod[["log"]][["test.rsq"]]
## t = 6.9884, df = 145.45, p-value = 9.28e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.02503460 0.04477878
## sample estimates:
##  mean of x  mean of y 
## 0.07097612 0.03606943

# income
d <- rbind((data.frame(Rsq=inc1birth.mod[["log"]][["test.rsq"]], group="low")),
           (data.frame(Rsq=inc2birth.mod[["log"]][["test.rsq"]], group="middle")),
           (data.frame(Rsq=inc3birth.mod[["log"]][["test.rsq"]], group="high")))
summary(aov(Rsq ~ group, data=d))

##              Df Sum Sq Mean Sq F value Pr(>F)    
## group         2 0.2562 0.12808   285.1 <2e-16 ***
## Residuals   297 0.1334 0.00045                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

       # post-hocs
       t.test(inc2birth.mod[["log"]][["test.rsq"]], inc3birth.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  inc2birth.mod[["log"]][["test.rsq"]] and inc3birth.mod[["log"]][["test.rsq"]]
## t = 5.2899, df = 168.4, p-value = 3.765e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.007438335 0.016295757
## sample estimates:
##  mean of x  mean of y 
## 0.04087335 0.02900630

       t.test(inc1birth.mod[["log"]][["test.rsq"]], inc2birth.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  inc1birth.mod[["log"]][["test.rsq"]] and inc2birth.mod[["log"]][["test.rsq"]]
## t = 15.922, df = 170.02, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.04835266 0.06203899
## sample estimates:
##  mean of x  mean of y 
## 0.09606917 0.04087335

# parent psychopathology
d <- rbind((data.frame(Rsq=hiMHxbirth.mod[["log"]][["test.rsq"]], group="strong Hx")),
           (data.frame(Rsq=medMHxbirth.mod[["log"]][["test.rsq"]], group="average Hx")),
           (data.frame(Rsq=noMHxbirth.mod[["log"]][["test.rsq"]], group="no Hx")))
summary(aov(Rsq ~ group, data=d))

##              Df  Sum Sq Mean Sq F value Pr(>F)    
## group         2 0.04985 0.02492   63.86 <2e-16 ***
## Residuals   297 0.11592 0.00039                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

t.test(noMHxbirth.mod[["log"]][["test.rsq"]], medMHxbirth.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  noMHxbirth.mod[["log"]][["test.rsq"]] and medMHxbirth.mod[["log"]][["test.rsq"]]
## t = -8.5587, df = 171.77, p-value = 6.193e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02561462 -0.01601396
## sample estimates:
##  mean of x  mean of y 
## 0.02880501 0.04961929

t.test(medMHxbirth.mod[["log"]][["test.rsq"]], hiMHxbirth.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  medMHxbirth.mod[["log"]][["test.rsq"]] and hiMHxbirth.mod[["log"]][["test.rsq"]]
## t = -3.2264, df = 192.45, p-value = 0.001473
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.016363669 -0.003947178
## sample estimates:
##  mean of x  mean of y 
## 0.04961929 0.05977471

# parent ADHD sx
d <- rbind((data.frame(Rsq=hiADHDbirth.mod[["log"]][["test.rsq"]], group="HIGH")),
           (data.frame(Rsq=midADHDbirth.mod[["log"]][["test.rsq"]], group="MID")),
           (data.frame(Rsq=loADHDbirth.mod[["log"]][["test.rsq"]], group="LOW")))
summary(aov(Rsq ~ group, data=d))

##              Df  Sum Sq  Mean Sq F value   Pr(>F)    
## group         2 0.01266 0.006328    15.6 3.63e-07 ***
## Residuals   297 0.12052 0.000406                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# post-hocs for each level of parent attention problems
t.test(loADHDbirth.mod[["log"]][["test.rsq"]],
       midADHDbirth.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  loADHDbirth.mod[["log"]][["test.rsq"]] and midADHDbirth.mod[["log"]][["test.rsq"]]
## t = -2.2232, df = 188.62, p-value = 0.02739
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0102825403 -0.0006141471
## sample estimates:
##  mean of x  mean of y 
## 0.03753519 0.04298354

t.test(midADHDbirth.mod[["log"]][["test.rsq"]],
       hiADHDbirth.mod[["log"]][["test.rsq"]])   # t=-3.26, p=.001

## 
##  Welch Two Sample t-test
## 
## data:  midADHDbirth.mod[["log"]][["test.rsq"]] and hiADHDbirth.mod[["log"]][["test.rsq"]]
## t = -3.2583, df = 186.03, p-value = 0.001332
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.016409884 -0.004032688
## sample estimates:
##  mean of x  mean of y 
## 0.04298354 0.05320482

t.test(loADHDbirth.mod[["log"]][["test.rsq"]],
       hiADHDbirth.mod[["log"]][["test.rsq"]])  # t = -5.37, p <.001

## 
##  Welch Two Sample t-test
## 
## data:  loADHDbirth.mod[["log"]][["test.rsq"]] and hiADHDbirth.mod[["log"]][["test.rsq"]]
## t = -5.3741, df = 164.52, p-value = 2.596e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02142673 -0.00991253
## sample estimates:
##  mean of x  mean of y 
## 0.03753519 0.05320482

Sensitivity Tests

SENS #1. What if mean-imputation of missing values was strongly influencing results (though note, all imputed variables had less than 10% missing data).

Run full-sample analysis with no missing data (complete cases only)

noNA_birth <- abcd_birth[complete.cases(abcd_birth),]

#noNA.mod <- linearEN(noNA_birth, runs, cvs)

noNABs <- noNA.mod[["Bs"]] # what to plot?

# plot prenatal predictors
thresh = 95 # needs to be 99 for lambdamin but lower for 1se
ggplot(data=noNABs[noNABs$chosenpc>=thresh,])+
   geom_col(aes(x=reorder(predictor, mean.B), y= mean.B), width=.90, fill="steelblue", alpha=0.9)+
   geom_linerange(aes(x=reorder(predictor, mean.B) , ymin=cilo, ymax=cihi), alpha=0.5, colour="steelblue4")+
   geom_text(aes(x=reorder(predictor, mean.B), y=0, label=predictor, hjust=sign), 
             vjust= 0.5, size=4)+
   labs(x="", y="mean B", title = "Full sample", subtitle = paste0("Mean R-squared = ", round(as.numeric(noNA.mod["Rsq"]), digits=2), "%" ) )+
   theme_minimal()+
   coord_flip()+
   theme_classic()+
   theme(legend.position="none",
         plot.title = element_text(hjust=0.5, face="bold"), 
         plot.subtitle = element_text(hjust=0.5),
         axis.text.y = element_blank(),
         axis.ticks.y = element_blank(),
         axis.line.y = element_blank())

Observe differences across the complete-case sample and the sample used in main analysis (missing vals imputed). Look at outcome (CBCL attention problems) and each predictor, using chi-squared/t-tests as appropriate.

# chi-square tests
chisq.test(rbind(table(abcd_birth$male.sex), table(noNA_birth$male.sex)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$male.sex), table(noNA_birth$male.sex))
## X-squared = 0.031509, df = 1, p-value = 0.8591

chisq.test(rbind(table(abcd_birth$Black), table(noNA_birth$Black)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$Black), table(noNA_birth$Black))
## X-squared = 11.537, df = 1, p-value = 0.0006823

chisq.test(rbind(table(abcd_birth$OtherEthnic), table(noNA_birth$OtherEthnic)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$OtherEthnic), table(noNA_birth$OtherEthnic))
## X-squared = 0.83432, df = 1, p-value = 0.361

chisq.test(rbind(table(abcd_birth$Hispanic), table(noNA_birth$Hispanic)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$Hispanic), table(noNA_birth$Hispanic))
## X-squared = 0.3471, df = 1, p-value = 0.5558

chisq.test(rbind(table(abcd_birth$Asian), table(noNA_birth$Asian)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$Asian), table(noNA_birth$Asian))
## X-squared = 2.1944, df = 1, p-value = 0.1385

chisq.test(rbind(table(abcd_birth$multibirth), table(noNA_birth$multibirth)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$multibirth), table(noNA_birth$multibirth))
## X-squared = 0.45282, df = 1, p-value = 0.501

chisq.test(rbind(table(abcd_birth$csection), table(noNA_birth$csection)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$csection), table(noNA_birth$csection))
## X-squared = 0.095388, df = 1, p-value = 0.7574

chisq.test(rbind(table(abcd_birth$exp.smoking), table(noNA_birth$exp.smoking)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$exp.smoking), table(noNA_birth$exp.smoking))
## X-squared = 11.329, df = 1, p-value = 0.0007629

chisq.test(rbind(table(abcd_birth$exp.alcohol), table(noNA_birth$exp.alcohol)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$exp.alcohol), table(noNA_birth$exp.alcohol))
## X-squared = 0.17039, df = 1, p-value = 0.6798

chisq.test(rbind(table(abcd_birth$exp.drugs), table(noNA_birth$exp.drugs)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$exp.drugs), table(noNA_birth$exp.drugs))
## X-squared = 8.3809, df = 1, p-value = 0.003792

chisq.test(rbind(table(abcd_birth$exp.meds), table(noNA_birth$exp.meds)))

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  rbind(table(abcd_birth$exp.meds), table(noNA_birth$exp.meds))
## X-squared = 0, df = 1, p-value = 1

# two-sample t-tests
t.test(abcd_birth$cbclatt, noNA_birth$cbclatt)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$cbclatt and noNA_birth$cbclatt
## t = 3.7055, df = 15649, p-value = 0.0002117
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.09402692 0.30521350
## sample estimates:
## mean of x mean of y 
##  3.073584  2.873964

t.test(abcd_birth$deliv.totalcomps, noNA_birth$deliv.totalcomps)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$deliv.totalcomps and noNA_birth$deliv.totalcomps
## t = 0.61584, df = 15499, p-value = 0.538
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01543542  0.02957794
## sample estimates:
## mean of x mean of y 
## 0.3700051 0.3629338

t.test(abcd_birth$gest.totalcomps, noNA_birth$gest.totalcomps)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$gest.totalcomps and noNA_birth$gest.totalcomps
## t = 1.2747, df = 15574, p-value = 0.2024
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01084540  0.05118121
## sample estimates:
## mean of x mean of y 
## 0.6673507 0.6471828

t.test(abcd_birth$prem_wks, noNA_birth$prem_wks)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$prem_wks and noNA_birth$prem_wks
## t = 1.6468, df = 15774, p-value = 0.09962
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.009229511  0.106248069
## sample estimates:
## mean of x mean of y 
## 0.7218267 0.6733174

t.test(abcd_birth$lower.bw, noNA_birth$lower.bw)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$lower.bw and noNA_birth$lower.bw
## t = 2.5635, df = 15557, p-value = 0.01037
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.005862904 0.043957085
## sample estimates:
##     mean of x     mean of y 
## -1.520191e-16 -2.490999e-02

t.test(abcd_birth$nicu_days, noNA_birth$nicu_days)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$nicu_days and noNA_birth$nicu_days
## t = 0.89758, df = 15704, p-value = 0.3694
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.06824719  0.18355232
## sample estimates:
## mean of x mean of y 
## 0.8830003 0.8253478

t.test(abcd_birth$mumold, noNA_birth$mumold)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$mumold and noNA_birth$mumold
## t = 0.67046, df = 15580, p-value = 0.5026
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03395888  0.06926786
## sample estimates:
## mean of x mean of y 
## 0.6175913 0.5999368

t.test(abcd_birth$mumyoung, noNA_birth$mumyoung)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$mumyoung and noNA_birth$mumyoung
## t = 2.4604, df = 16143, p-value = 0.01389
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.005013307 0.044293613
## sample estimates:
## mean of x mean of y 
## 0.1588392 0.1341858

t.test(abcd_birth$dadold, noNA_birth$dadold)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$dadold and noNA_birth$dadold
## t = 0.68255, df = 15528, p-value = 0.4949
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.06605541  0.13663588
## sample estimates:
## mean of x mean of y 
##  1.505573  1.470282

t.test(abcd_birth$dadyoung, noNA_birth$dadyoung)

## 
##  Welch Two Sample t-test
## 
## data:  abcd_birth$dadyoung and noNA_birth$dadyoung
## t = 0.88171, df = 15741, p-value = 0.3779
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007413722  0.019536690
## sample estimates:
##  mean of x  mean of y 
## 0.07589428 0.06983279

SENS #2. ~85% of respondants were the biological mother. Are results being swayed by non-biological mothers reporting on: obstetric complications, substance-use during pregnancy, etc.?

abcd_mum <- abcd_m[abcd_m$Rship2child=="BioMom",]
mumsonly <- subset(abcd_mum, select=-c(SE.singleparent, lower.income, lower.edu, 
                                  parent.any.n, parent.att,
                                  Rship2child, syn_attention_t)) 

#mumsonly.mod <- linearEN(mumsonly, runs, cvs)

mumsonly.Bs <- mumsonly.mod[["Bs"]] # what to plot?

thresh = 95 
ggplot(data=mumsonly.Bs[mumsonly.Bs$chosenpc>=thresh,])+
   geom_col(aes(x=reorder(predictor, mean.B), y= mean.B), width=.90, fill="steelblue", alpha=0.9)+
   geom_linerange(aes(x=reorder(predictor, mean.B) , ymin=cilo, ymax=cihi), alpha=0.5, colour="steelblue4")+
   geom_text(aes(x=reorder(predictor, mean.B), y=0, label=predictor, hjust=sign), 
             vjust= 0.5, size=4)+
   labs(x="", y="mean B", title = "Bio mum's Only", subtitle = paste0("Mean R-squared = ", round(as.numeric(mumsonly.mod["Rsq"]), digits=2), "%" ) )+
   theme_minimal()+
   coord_flip()+
   theme_classic()+
   theme(legend.position="none",
         plot.title = element_text(hjust=0.5, face="bold"), 
         plot.subtitle = element_text(hjust=0.5),
         axis.text.y = element_blank(),
         axis.ticks.y = element_blank(),
         axis.line.y = element_blank())

SENS #3. It may be clinically useful to see how well we can predict clinically-significant attention problems, rather than increasing scores on the attention problem scale.

Choosing a cut-off (based on literature):
* For a mixed group of clinic-referred and general community children, Lampert et al. (2004) reported an optimum cutoff (raw score of 9)
* clinic-referred sample (best cutoff for ADHD = 9; AUC = 0.78; sensitivity = 75%; specificity = 68%)
* T >= 65 A CUT-OFF FOR “DEVIANT” (ACHENBACH ET AL., 2003)
* T > 62 WAS OPTIMAL CUT-OFF SCORE TO DISTINGUIGH ADHD FROM NO ADHD (GOMEZ ET AL., 2019)
* Eiraldi et al. (2000), also for a clinic-referred sample, reported sensitivity and specificity values of 78% and 89%, respectively, at a cutoff score of T = 65.
* Tehrani-Doost et al. (2011) found a lower optimal cut-off for predicting ADHD: raw scores of 6.5-7.5

# compare t-score and raw score cut off
ggplot(data=abcd)+
   geom_smooth(aes(x=syn_attention_r, y = syn_attention_t), formula=y~poly(x,2), method="lm")+
   scale_x_continuous(name="CBCL attention problems", 
                      breaks=c(0:20))+
   geom_hline(yintercept= 65,linetype="dashed")+
   geom_hline(yintercept= 63.8,linetype="dotted", colour="blue")+
   geom_vline(xintercept= 9.6,linetype="dashed")+
   geom_vline(xintercept= 9,linetype="dotted", colour="blue")

# view cut-off of 9 imposed on the continous distibution of scores
ggplot(data=abcd_m)+
   geom_histogram(aes(x=cbclatt), binwidth = 1, fill="purple", colour="black")+
   geom_vline(xintercept=9)+
   theme_minimal()

abcd_bin <- abcd_birth
# dichotomize raw score (use cut-off of 9 on the raw scale)
abcd_bin$cbcl.bin <- ifelse(abcd_bin$cbclatt>=9, 1, 0)

table(abcd_bin$cbcl.bin) %>%
   prop.table() # 907 kids (~9%)

## 
##          0          1 
## 0.90907268 0.09092732

# place this binary outcome var in column 2 and remove continuous var
abcd_bin <- move_columns(abcd_bin, cbcl.bin, .after = "myid")
abcd_bin <- subset(abcd_bin, select= -c(cbclatt))

# run elastic net (note the reduction of CV folds from 5 to 4; but increase in number of outer folds)
#logistic.mod <- balanced.logisticEN(abcd_bin, 25, 4) 

Bs <- logistic.mod[["Bs"]] # Bs, selection frequency, etc
t(logistic.mod[["summary"]])

##                          [,1]
## sample_n              9975.00
## num.runs               100.00
## num.inputs              40.00
## num.meetingthreshfreq    5.00
## intercept               -0.25
## mean.auc                67.39
## sd.auc                   1.16
## aucCIlo                 65.37
## aucCIhi                 69.62
## mean.sensit              0.60
## mean.specif              0.63

# there were 227 children in each test-set, ~50% were "cases" (n between 100 and 120)
# saved: true positve rate & true negative rate
# calculate: fnr, fpr, ppv, npv

# also observe the alpha value for each run, it may influence discrimination
a<-as.data.frame(logistic.mod[["Bs"]])
b<-names(a)
b<-b[str_detect(b,"run")]
c<-NA
for (x in 1:100){
c[x]<-str_sub(b[x], 2, (str_locate(b[x], "r")[1]-1) )
}
   
log.hx <- data.frame(true.pos= logistic.mod[["log"]][["test_tpr"]], 
                 true.neg= logistic.mod[["log"]][["test_tnr"]],
                 n.cases= logistic.mod[["log"]][["testn_cases"]],
                 n.controls= logistic.mod[["log"]][["testn"]]-
                                logistic.mod[["log"]][["testn_cases"]],
                 alpha= c)

log.hx$true.pos.n <- log.hx$true.pos*log.hx$n.cases
log.hx$true.neg.n <- log.hx$true.neg*log.hx$n.controls
log.hx$false.pos.n <- log.hx$n.controls-log.hx$true.neg.n
log.hx$false.neg.n <- log.hx$n.cases-log.hx$true.pos.n

log.hx$ppv <- round((log.hx$true.pos.n / (log.hx$true.pos.n + log.hx$false.pos.n)),2)
log.hx$npv <- round((log.hx$true.neg.n / (log.hx$true.neg.n + log.hx$false.neg.n)),2)
# ppv = % of individuals testing positive that really are positive
# npv = % of individuals testing negative are truly negative
mean(log.hx$ppv, na.rm=T)

## [1] 0.6307071

mean(log.hx$npv, na.rm=T)

## [1] 0.6123469

print(log.hx)

##       true.pos  true.neg n.cases n.controls alpha true.pos.n true.neg.n
## 1   0.13392857 0.9478261     112        115     0         15        109
## 2   0.73076923 0.6341463     104        123  0.75         76         78
## 3   0.63414634 0.7115385     123        104   0.1         78         74
## 4   0.50434783 0.7053571     115        112  0.45         58         79
## 5   0.60000000 0.6078431     125        102   0.8         75         62
## 6   0.59504132 0.6226415     121        106   0.6         72         66
## 7   0.64102564 0.6363636     117        110  0.35         75         70
## 8   0.65137615 0.6694915     109        118  0.75         71         79
## 9   0.52380952 0.7821782     126        101  0.75         66         79
## 10  0.74358974 0.4545455     117        110   0.8         87         50
## 11  0.52678571 0.6434783     112        115   0.4         59         74
## 12  0.77669903 0.4838710     103        124  0.25         80         60
## 13  0.66666667 0.4913793     111        116  0.45         74         57
## 14  0.57600000 0.7156863     125        102  0.35         72         73
## 15  0.59821429 0.5913043     112        115  0.45         67         68
## 16  0.69444444 0.5378151     108        119  0.25         75         64
## 17  0.61467890 0.5508475     109        118  0.45         67         65
## 18  0.77868852 0.5142857     122        105  0.65         95         54
## 19  0.72580645 0.4174757     124        103  0.75         90         43
## 20  0.52542373 0.7339450     118        109  0.55         62         80
## 21  0.66666667 0.5918367     129         98   0.2         86         58
## 22  0.60169492 0.7155963     118        109  0.85         71         78
## 23  0.70588235 0.4629630     119        108  0.85         84         50
## 24  0.70434783 0.5267857     115        112   0.6         81         59
## 25  0.72357724 0.4230769     123        104  0.35         89         44
## 26  0.64150943 0.4545455     106        121  0.65         68         55
## 27  0.59090909 0.6324786     110        117  0.15         65         74
## 28  0.51239669 0.6415094     121        106  0.35         62         68
## 29  0.54838710 0.6893204     124        103  0.35         68         71
## 30  0.60714286 0.6260870     112        115  0.05         68         72
## 31  0.50877193 0.6017699     114        113   0.2         58         68
## 32  0.58407080 0.6842105     113        114   0.6         66         78
## 33  0.47826087 0.7589286     115        112  0.05         55         85
## 34  0.52475248 0.7777778     101        126  0.15         53         98
## 35  0.78632479 0.4363636     117        110   0.5         92         48
## 36  0.61016949 0.6146789     118        109   0.7         72         67
## 37  0.49074074 0.6890756     108        119  0.35         53         82
## 38  0.62790698 0.7346939     129         98   0.4         81         72
## 39  0.50877193 0.7433628     114        113  0.95         58         84
## 40  1.00000000 0.0000000     104        123     0        104          0
## 41  0.79824561 0.4247788     114        113  0.15         91         48
## 42  0.51351351 0.6637931     111        116  0.05         57         77
## 43  0.50000000 0.7433628     114        113  0.15         57         84
## 44  0.63716814 0.6842105     113        114  0.35         72         78
## 45  0.58536585 0.6923077     123        104  0.15         72         72
## 46  0.64406780 0.5871560     118        109  0.15         76         64
## 47  0.68571429 0.6229508     105        122  0.15         72         76
## 48  0.52631579 0.6725664     114        113  0.45         60         76
## 49  0.48305085 0.7706422     118        109   0.9         57         84
## 50  0.57281553 0.7016129     103        124   0.7         59         87
## 51  0.53571429 0.6434783     112        115   0.8         60         74
## 52  0.78181818 0.3675214     110        117  0.25         86         43
## 53  0.53719008 0.7547170     121        106  0.25         65         80
## 54  0.67289720 0.6333333     107        120   0.1         72         76
## 55  0.54545455 0.7547170     121        106  0.05         66         80
## 56  0.46428571 0.8173913     112        115  0.05         52         94
## 57  0.65686275 0.6240000     102        125  0.45         67         78
## 58  0.70175439 0.4690265     114        113  0.25         80         53
## 59  0.56756757 0.5689655     111        116     1         63         66
## 60  0.62931034 0.5855856     116        111  0.35         73         65
## 61  0.55752212 0.6666667     113        114  0.15         63         76
## 62  0.49557522 0.8157895     113        114  0.15         56         93
## 63  0.61467890 0.6779661     109        118  0.25         67         80
## 64  0.66363636 0.6153846     110        117   0.1         73         72
## 65  0.60194175 0.6935484     103        124  0.55         62         86
## 66  0.62962963 0.6134454     108        119  0.65         68         73
## 67  0.68269231 0.6260163     104        123   0.4         71         77
## 68  0.33050847 0.7889908     118        109  0.05         39         86
## 69  0.00000000 1.0000000     119        108     0          0        108
## 70  0.33333333 0.8224299     120        107     0         40         88
## 71  0.50961538 0.7154472     104        123   0.5         53         88
## 72  1.00000000 0.0000000     112        115     0        112          0
## 73  0.72566372 0.4561404     113        114  0.25         82         52
## 74  0.40875912 0.8000000     137         90   0.2         56         72
## 75  0.68103448 0.6036036     116        111  0.15         79         67
## 76  0.70535714 0.5130435     112        115   0.3         79         59
## 77  0.57391304 0.5535714     115        112   0.2         66         62
## 78  0.59322034 0.6788991     118        109   0.2         70         74
## 79  0.73000000 0.4803150     100        127  0.75         73         61
## 80  0.53174603 0.8019802     126        101  0.15         67         81
## 81  0.55963303 0.7203390     109        118   0.9         61         85
## 82  0.57500000 0.6168224     120        107  0.75         69         66
## 83  0.62903226 0.7572816     124        103   0.1         78         78
## 84  0.60869565 0.6607143     115        112   0.6         70         74
## 85  0.67272727 0.5641026     110        117   0.2         74         66
## 86  0.08403361 0.9722222     119        108  0.05         10        105
## 87  0.67567568 0.6896552     111        116  0.75         75         80
## 88  0.60784314 0.5760000     102        125  0.55         62         72
## 89  0.78991597 0.3703704     119        108   0.6         94         40
## 90  0.65137615 0.6694915     109        118  0.45         71         79
## 91  0.56896552 0.7027027     116        111     1         66         78
## 92  0.75652174 0.3660714     115        112  0.05         87         41
## 93  0.68421053 0.5681818      95        132  0.65         65         75
## 94  0.50833333 0.7383178     120        107  0.95         61         79
## 95  0.51327434 0.7192982     113        114   0.5         58         82
## 96  0.76991150 0.4298246     113        114   0.9         87         49
## 97  0.47058824 0.7592593     119        108   0.2         56         82
## 98  0.58771930 0.7168142     114        113   0.5         67         81
## 99  0.56880734 0.6610169     109        118   0.4         62         78
## 100 0.55963303 0.7288136     109        118   0.1         61         86
##     false.pos.n false.neg.n  ppv  npv
## 1             6          97 0.71 0.53
## 2            45          28 0.63 0.74
## 3            30          45 0.72 0.62
## 4            33          57 0.64 0.58
## 5            40          50 0.65 0.55
## 6            40          49 0.64 0.57
## 7            40          42 0.65 0.62
## 8            39          38 0.65 0.68
## 9            22          60 0.75 0.57
## 10           60          30 0.59 0.62
## 11           41          53 0.59 0.58
## 12           64          23 0.56 0.72
## 13           59          37 0.56 0.61
## 14           29          53 0.71 0.58
## 15           47          45 0.59 0.60
## 16           55          33 0.58 0.66
## 17           53          42 0.56 0.61
## 18           51          27 0.65 0.67
## 19           60          34 0.60 0.56
## 20           29          56 0.68 0.59
## 21           40          43 0.68 0.57
## 22           31          47 0.70 0.62
## 23           58          35 0.59 0.59
## 24           53          34 0.60 0.63
## 25           60          34 0.60 0.56
## 26           66          38 0.51 0.59
## 27           43          45 0.60 0.62
## 28           38          59 0.62 0.54
## 29           32          56 0.68 0.56
## 30           43          44 0.61 0.62
## 31           45          56 0.56 0.55
## 32           36          47 0.65 0.62
## 33           27          60 0.67 0.59
## 34           28          48 0.65 0.67
## 35           62          25 0.60 0.66
## 36           42          46 0.63 0.59
## 37           37          55 0.59 0.60
## 38           26          48 0.76 0.60
## 39           29          56 0.67 0.60
## 40          123           0 0.46  NaN
## 41           65          23 0.58 0.68
## 42           39          54 0.59 0.59
## 43           29          57 0.66 0.60
## 44           36          41 0.67 0.66
## 45           32          51 0.69 0.59
## 46           45          42 0.63 0.60
## 47           46          33 0.61 0.70
## 48           37          54 0.62 0.58
## 49           25          61 0.70 0.58
## 50           37          44 0.61 0.66
## 51           41          52 0.59 0.59
## 52           74          24 0.54 0.64
## 53           26          56 0.71 0.59
## 54           44          35 0.62 0.68
## 55           26          55 0.72 0.59
## 56           21          60 0.71 0.61
## 57           47          35 0.59 0.69
## 58           60          34 0.57 0.61
## 59           50          48 0.56 0.58
## 60           46          43 0.61 0.60
## 61           38          50 0.62 0.60
## 62           21          57 0.73 0.62
## 63           38          42 0.64 0.66
## 64           45          37 0.62 0.66
## 65           38          41 0.62 0.68
## 66           46          40 0.60 0.65
## 67           46          33 0.61 0.70
## 68           23          79 0.63 0.52
## 69            0         119  NaN 0.48
## 70           19          80 0.68 0.52
## 71           35          51 0.60 0.63
## 72          115           0 0.49  NaN
## 73           62          31 0.57 0.63
## 74           18          81 0.76 0.47
## 75           44          37 0.64 0.64
## 76           56          33 0.59 0.64
## 77           50          49 0.57 0.56
## 78           35          48 0.67 0.61
## 79           66          27 0.53 0.69
## 80           20          59 0.77 0.58
## 81           33          48 0.65 0.64
## 82           41          51 0.63 0.56
## 83           25          46 0.76 0.63
## 84           38          45 0.65 0.62
## 85           51          36 0.59 0.65
## 86            3         109 0.77 0.49
## 87           36          36 0.68 0.69
## 88           53          40 0.54 0.64
## 89           68          25 0.58 0.62
## 90           39          38 0.65 0.68
## 91           33          50 0.67 0.61
## 92           71          28 0.55 0.59
## 93           57          30 0.53 0.71
## 94           28          59 0.69 0.57
## 95           32          55 0.64 0.60
## 96           65          26 0.57 0.65
## 97           26          63 0.68 0.57
## 98           32          47 0.68 0.63
## 99           40          47 0.61 0.62
## 100          32          48 0.66 0.64

# ppv = true.pos.n / all those testing pos

Compare sensitivity test results with primary result

Compare R2 from original model and those from sensitivity tests

# RESTRICTED (SENS #1) VS ORIGINAL SAMPLE
t.test(birth.mod[["log"]][["test.rsq"]],noNA.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  birth.mod[["log"]][["test.rsq"]] and noNA.mod[["log"]][["test.rsq"]]
## t = 6.9427, df = 196.58, p-value = 5.471e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01008317 0.01808433
## sample estimates:
##  mean of x  mean of y 
## 0.08132108 0.06723732

# BIOLOGICAL MOTHERS (SENS #2) ONLY VS ORIGINAL SAMPLE
t.test(birth.mod[["log"]][["test.rsq"]],mumsonly.mod[["log"]][["test.rsq"]])

## 
##  Welch Two Sample t-test
## 
## data:  birth.mod[["log"]][["test.rsq"]] and mumsonly.mod[["log"]][["test.rsq"]]
## t = 0.19391, df = 196.67, p-value = 0.8464
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003611422  0.004399086
## sample estimates:
##  mean of x  mean of y 
## 0.08132108 0.08092725

Other descriptives

Heatmap of all inter-correlations

abcd4cor <- subset(abcd_birth, select=-c(myid, cbclatt))

cormat <- round(cor(abcd4cor, use="pairwise.complete.obs"), digits=3)

lower <- cormat
lower[lower.tri(cormat)] <- NA

meltedcormat <- melt(lower, na.rm=T)

ggplot(data=meltedcormat, aes(Var2, Var1, fill=value))+
  geom_tile(color="white") +
  scale_fill_gradient2(low="blue", high="red", mid="white",
                       midpoint=0, limit = c(-.7,.7), space="Lab",
                       name="Pearson's r") +
  theme_minimal()+
  theme(axis.text.x = element_text(angle=60, 
                                   vjust=1 ,  hjust=1),
        axis.title.x=element_blank(),
        axis.title.y=element_blank())+
  coord_fixed()+
  coord_flip()+
  geom_text(aes(Var2, Var1, label=round(value,2)), color="black", size=1)

CBCL attention probs reliability

Calculate reliability of CBCL attention problem scale
1 score <- 10 items

# import AP items
# ap = read.csv("DownloadedData/ABCDbeh1/CBCLattitems.csv") # usually run

ap <- ap[, c("src_subject_id","eventname", "Att1","Att2","Att3","Att4","Att5","Att6","Att7","Att8","Att9","Att10")]

ap <- ap[ap$eventname=="baseline_year_1_arm_1",]
ap <- ap[c(2:length(ap$Att1)),] # remove var descr
ap[ap==""] <- NA
ap <- droplevels(ap)
ap$Att1 <- as.numeric(ap$Att1)-1
ap$Att2 <- as.numeric(ap$Att2)-1
ap$Att3 <- as.numeric(ap$Att3)-1
ap$Att4 <- as.numeric(ap$Att4)-1
ap$Att5 <- as.numeric(ap$Att5)-1
ap$Att6 <- as.numeric(ap$Att6)-1
ap$Att7 <- as.numeric(ap$Att7)-1
ap$Att8 <- as.numeric(ap$Att8)-1
ap$Att9 <- as.numeric(ap$Att9)-1
ap$Att10 <- as.numeric(ap$Att10)-1

# limit sample to those included in your analysis (only 1 sibling was allowed from each hsd in my analysis sample)
toremove <- setdiff(as.character(ap$src_subject_id), as.character(abcd$src_subject_id))

ap2 <- c()
for (x in 1:length(ap$src_subject_id)){
   if (!str_contains(toremove, as.character(ap$src_subject_id[x]))) {
      ap2 <- rbind(ap2, ap[x,])
   }
}

# calculate alpha & omega reliability estimates
library(psych)
omega(ap2[,3:length(ap2)], nfactors=1) # alpha=.86, omega total=.86

## Omega 
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip, 
##     digits = digits, title = title, sl = sl, labels = labels, 
##     plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option, 
##     covar = covar)
## Alpha:                 0.86 
## G.6:                   0.86 
## Omega Hierarchical:    0.86 
## Omega H asymptotic:    1 
## Omega Total            0.86 
## 
## Schmid Leiman Factor loadings greater than  0.2 
##          g  F1*   h2   u2 p2
## Att1  0.48      0.23 0.77  1
## Att2  0.66      0.44 0.56  1
## Att3  0.83      0.70 0.30  1
## Att4  0.69      0.48 0.52  1
## Att5  0.45      0.20 0.80  1
## Att6  0.53      0.29 0.71  1
## Att7  0.64      0.41 0.59  1
## Att8  0.54      0.30 0.70  1
## Att9  0.83      0.68 0.32  1
## Att10 0.50      0.25 0.75  1
## 
## With eigenvalues of:
##   g F1* 
##   4   0 
## 
## general/max  3.572921e+16   max/min =   1
## mean percent general =  1    with sd =  0 and cv of  0 
## Explained Common Variance of the general factor =  1 
## 
## The degrees of freedom are 35  and the fit is  0.33 
## The number of observations was  9974  with Chi Square =  3300.31  with prob <  0
## The root mean square of the residuals is  0.06 
## The df corrected root mean square of the residuals is  0.06
## RMSEA index =  0.097  and the 10 % confidence intervals are  0.094 0.1
## BIC =  2978.04
## 
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 35  and the fit is  0.33 
## The number of observations was  9974  with Chi Square =  3300.31  with prob <  0
## The root mean square of the residuals is  0.06 
## The df corrected root mean square of the residuals is  0.06 
## 
## RMSEA index =  0.097  and the 10 % confidence intervals are  0.094 0.1
## BIC =  2978.04 
## 
## Measures of factor score adequacy             
##                                                  g F1*
## Correlation of scores with factors            0.94   0
## Multiple R square of scores with factors      0.89   0
## Minimum correlation of factor score estimates 0.79  -1
## 
##  Total, General and Subset omega for each subset
##                                                  g  F1*
## Omega total for total scores and subscales    0.86 0.86
## Omega general for total scores and subscales  0.86 0.86
## Omega group for total scores and subscales    0.00 0.00

alpha(ap2[,3:length(ap2)]) # alpha=.86

## 
## Reliability analysis   
## Call: alpha(x = ap2[, 3:length(ap2)])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.86      0.86    0.86      0.38 6.1 0.0019 0.31 0.35     0.35
## 
##  lower alpha upper     95% confidence boundaries
## 0.86 0.86 0.87 
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## Att1       0.86      0.86    0.86      0.40 5.9   0.0019 0.0152  0.37
## Att2       0.84      0.84    0.85      0.37 5.3   0.0021 0.0151  0.34
## Att3       0.83      0.83    0.83      0.35 4.9   0.0024 0.0095  0.33
## Att4       0.84      0.84    0.84      0.37 5.3   0.0021 0.0127  0.35
## Att5       0.86      0.86    0.86      0.40 6.0   0.0019 0.0147  0.37
## Att6       0.85      0.85    0.85      0.39 5.7   0.0019 0.0160  0.35
## Att7       0.85      0.84    0.85      0.38 5.4   0.0021 0.0151  0.34
## Att8       0.85      0.85    0.85      0.39 5.7   0.0020 0.0160  0.35
## Att9       0.83      0.83    0.83      0.35 4.9   0.0024 0.0099  0.33
## Att10      0.86      0.85    0.85      0.39 5.8   0.0019 0.0158  0.35
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop  mean   sd
## Att1  9974  0.55  0.56  0.48   0.45 0.248 0.49
## Att2  9974  0.72  0.70  0.66   0.62 0.475 0.61
## Att3  9974  0.84  0.81  0.82   0.77 0.484 0.65
## Att4  9974  0.74  0.71  0.68   0.64 0.396 0.62
## Att5  9974  0.48  0.55  0.48   0.42 0.072 0.29
## Att6  9974  0.60  0.61  0.55   0.49 0.296 0.53
## Att7  9974  0.69  0.68  0.63   0.60 0.299 0.55
## Att8  9974  0.59  0.61  0.54   0.51 0.163 0.42
## Att9  9974  0.84  0.81  0.81   0.76 0.539 0.68
## Att10 9974  0.53  0.60  0.53   0.46 0.102 0.33
## 
## Non missing response frequency for each item
##          0    1    2 miss
## Att1  0.78 0.20 0.03    0
## Att2  0.58 0.36 0.06    0
## Att3  0.60 0.31 0.09    0
## Att4  0.68 0.25 0.07    0
## Att5  0.94 0.06 0.01    0
## Att6  0.74 0.22 0.04    0
## Att7  0.75 0.21 0.04    0
## Att8  0.86 0.12 0.02    0
## Att9  0.57 0.33 0.11    0
## Att10 0.91 0.08 0.01    0

# repeat using a different package
library(ltm)
cronbach.alpha(ap2[,3:length(ap2)], CI=TRUE)[["alpha"]] #.86

## [1] 0.8614551

Predicting age 9 ADHD-like symptoms from birth in the ABCD cohort

Niamh Dooley

12/22/2022