library(tidyverse)
library(dplyr)
library(sjlabelled)
library(ggplot2)
library(ggpubr)
library(Hmisc)
library(sjmisc)
library(caTools)
library(glmnet)
library(MASS)
library(stargazer)
library(effects)
library(corrr)
library(graphlayouts)
library(lavaan)
library(knitr)
library(reshape2)
# load data & get functions
# load("~/data/abcd_clean.Rda") # usually run.
# source("prediction-functions.R") # usually run
Notes on imported data & functions
abcd_clean.Rda is the dataset resulting from cleaning script, publically available at https://rpubs.com/dooleyr/clean_abcd
prediction-functions.R is where the prediction scripts are stored (i.e. the cross-validation process, the elastic net algorithm, etc). See rpubs file by same name. The main functions are:
linearEN(data.frame, outer_folds, cv_folds): linear elastic net which predicts continuous/linear outcome. Column 2 in data.frame must be outcomes, ID numbers in col1 and potential predictors in remaining columns. Cross-validation is applied, missing data is mean-imputed, and an elastic net procedure finds alpha, lamda, and coefficients that minimize error between actual and predicted outcome.
balanced.logisticEN(data.frame, outer_folds, cv_folds): same as above but for a binary outcome (therefore using logistic link function). The script includes a balancing procedure such that the number of controls (without significant attention problems) is reduced to match the number of cases (children with significant attention problems)
collate.linear(model, outcome): pulls out key outcomes from elastic net results, useful for comparing group-stratified results. Outcomes returned include: sample size, intercept, average R-squared (and it’s SD, 95% CIs), number of predictors with non-zero betas, and number of total predictors entered into the model.
# data needs to be purely numeric and all factors need to be dummy coded for the elastic net function
# remove all subjects without outcome data (cbcl attention problems)
abcd <- abcd[!is.na(abcd$syn_attention_r),]
# make categorical vars into numerical
#abcd$plannedpreg_n <- as.numeric(abcd$plannedpreg)-1
abcd$multibirth <- as.numeric(abcd$multibirth) # 0=non-twin, 1=twin/other
abcd$csection <- as.numeric(abcd$csection)-1
abcd$smoked_n <- as.numeric(abcd$smokedpreg)-1 # 0=no smoking, 1=smoked
abcd$meds_n <- as.numeric(abcd$medspreg)-1
abcd$drugs_n <- as.numeric(abcd$drugspreg)-1
abcd$drank_n <- as.numeric(abcd$drankpreg)-1
abcd$vitamins_n <- as.numeric(abcd$devhx_10_prenatvitamins)
abcd$vitamins_n[abcd$vitamins_n==1] <- NA #missing
abcd$vitamins_n[abcd$vitamins_n==2] <- 1
abcd$vitamins_n[abcd$vitamins_n==3] <- 0 # taking vits should be the ref (majority case)
abcd$partner_n <- as.numeric(abcd$resps_partner)-1
# 'Yes I have a partner' is the reference (majority); "no im a single parent' is 1
#make sure 1 corresponds to the minority/risk class
abcd$preg_marijuana <- as.numeric(abcd$preg_marijuana)-1
#abcd$sex <- as.numeric(abcd$sex)-1 # females are 0, males are 1
abcd$sex <- as.numeric(abcd$sex)-1.5 # females are -0.5, males are 0.5
# dummy code ethnicity
abcd$eth_black <- ifelse(abcd$race_ethnicity== "Black", 1, 0)
abcd$eth_hispanic <- ifelse(abcd$race_ethnicity== "Hispanic", 1, 0)
abcd$eth_asian <- ifelse(abcd$race_ethnicity== "Asian", 1, 0)
abcd$eth_other <- ifelse(abcd$race_ethnicity== "Other", 1, 0)
#abcd$eth_white <- ifelse(abcd$race_ethnicity== "White", 1, 0)
# create new ids for each subject
abcd$myid <- c(1:length(abcd$src_subject_id))
id_tbl <- data.frame(abcd$myid, as.factor(abcd$src_subject_id))
names(id_tbl) <- c("my.ids", "orig.ids")
# reverse code parent education, income & bw, such that lower is higher
abcd$lower.income <- (scale(abcd$income.n, center=T, scale=F))*-1
abcd$lower.edu <- (scale(abcd$hsd_ed5_n, center=T, scale=F))*-1
abcd$lower.bw <- (scale(abcd$bw_kgs, center=T, scale=F))*-1 # lower BWs have higher values
# select outcome and all predictors/stratifiers you're interested in
abcd_m <- subset(abcd, select=c(myid, syn_attention_r, syn_attention_t,
sex,
Rship2child,
partner_n, lower.income, lower.edu ,
parent_mh_n, asr_scr_attention_r,
eth_black, eth_other,
eth_hispanic, eth_asian,
dadold, dadyoung, mumold, mumyoung,
multibirth,
gest_anyOG_n, gest_nausea, gest_bleed,
#gest_preeclampsia, #merged with other
#gest_rubella, #removed, too few
#gest_proteinuria #merged with other
gest_gallbladder, gest_anemia,
gest_uti, gest_diabetes, gest_hibloodpres,
gest_placenta,
gest_accident, gest_other, preeclamp_protein,
#gest_ndocvisits, 11% missing data
lower.bw, prem_wks,
birthcomp_any_n,
birthcomp_blue, birthcomp_slowheart,
birthcomp_nobreathe, birthcomp_rhincomp,
#birthcomp_convuls, #removed, too few cases
#birthcomp_blood, #removed, too few cases
birthcomp_jaundice, birthcomp_oxygen,
csection, incubator_days,
smoked_n, drank_n, meds_n, vitamins_n,
drugs_n, preg_marijuana, drug_notweed))
# rename vars
abcd_m<- abcd_m%>%
dplyr::rename(
cbclatt = syn_attention_r, # note I'm using the raw outcome here, not centred
parent.att = asr_scr_attention_r,
exp.smoking = smoked_n,
exp.meds = meds_n,
exp.drugs =drugs_n,
exp.marij = preg_marijuana,
exp.otherdrug = drug_notweed,
exp.alcohol=drank_n,
exp.novits=vitamins_n,
Black=eth_black,
Hispanic=eth_hispanic,
Asian=eth_asian,
OtherEthnic = eth_other,
SE.singleparent = partner_n,
gest.totalcomps = gest_anyOG_n,
gest.rh=birthcomp_rhincomp,
gest.nausea = gest_nausea,
gest.bleed = gest_bleed,
gest.gall = gest_gallbladder,
gest.preclamp.protein = preeclamp_protein,
gest.anemia=gest_anemia,
gest.uti=gest_uti,
gest.diab =gest_diabetes,
gest.hiBP = gest_hibloodpres,
gest.placenta = gest_placenta,
gest.accident = gest_accident,
gest.other = gest_other,
deliv.blue=birthcomp_blue,
deliv.heart=birthcomp_slowheart,
deliv.breathe=birthcomp_nobreathe,
deliv.jaund = birthcomp_jaundice,
deliv.oxyg = birthcomp_oxygen,
deliv.totalcomps = birthcomp_any_n,
nicu_days = incubator_days,
male.sex = sex,
parent.any.n = parent_mh_n
)
# creata a dataframe with only data plausibly available at birth and cbcl attention problems as continuous outcome (remove social stratifiers like family income etc.)
abcd_birth <- subset(abcd_m, select=-c(SE.singleparent, lower.income, lower.edu,
parent.any.n, parent.att,
Rship2child, syn_attention_t))
# explore missingness on predictors
A<-data.frame(colSums(is.na(abcd_birth)))
A$var <- colnames(abcd_birth)
colnames(A) <- "NAs"
A$perc <- round((A$NAs/length(abcd_birth$myid))*100, 1)
rownames(A) <- c()
print(A[order(-A$NAs),])
## NAs NA perc
## 39 852 exp.novits 8.5
## 38 800 exp.meds 8.0
## 35 616 nicu_days 6.2
## 37 582 exp.alcohol 5.8
## 8 554 dadold 5.6
## 9 554 dadyoung 5.6
## 25 446 lower.bw 4.5
## 18 407 gest.uti 4.1
## 20 351 gest.hiBP 3.5
## 31 342 gest.rh 3.4
## 14 340 gest.nausea 3.4
## 17 335 gest.anemia 3.4
## 41 329 exp.marij 3.3
## 19 324 gest.diab 3.2
## 21 313 gest.placenta 3.1
## 23 303 gest.other 3.0
## 24 298 gest.preclamp.protein 3.0
## 16 296 gest.gall 3.0
## 29 285 deliv.heart 2.9
## 15 284 gest.bleed 2.8
## 22 280 gest.accident 2.8
## 28 268 deliv.blue 2.7
## 36 253 exp.smoking 2.5
## 30 252 deliv.breathe 2.5
## 33 248 deliv.oxyg 2.5
## 32 243 deliv.jaund 2.4
## 42 233 exp.otherdrug 2.3
## 13 229 gest.totalcomps 2.3
## 10 223 mumold 2.2
## 11 223 mumyoung 2.2
## 40 153 exp.drugs 1.5
## 26 143 prem_wks 1.4
## 27 140 deliv.totalcomps 1.4
## 34 128 csection 1.3
## 4 14 Black 0.1
## 5 14 OtherEthnic 0.1
## 6 14 Hispanic 0.1
## 7 14 Asian 0.1
## 1 0 myid 0.0
## 2 0 cbclatt 0.0
## 3 0 male.sex 0.0
## 12 0 multibirth 0.0
set.seed(100)
cvs = 5 # cross-validation folds
runs= 20 # outer fold (i.e. number of times to repeat the whole process)
The first model predicts CBCL attention problems in the full sample (n=9975)
#birth.mod <- linearEN(abcd_birth, runs, cvs)
# not run here as takes long time, results extracted from saved versions
print(t(birth.mod[["summary"]]))
## [,1]
## sample_n 9975.00
## num.runs 100.00
## num.inputs 40.00
## num.meetingthreshfreq 17.00
## intercept 2.50
## mean.rsq 8.13
## sd.rsq 1.49
## rsq.CIlo 5.61
## rsq.CIhi 11.47
## CVmean.ofrsqs 8.00
## CVrsq.CIlo 7.42
## CVrsq.CIhi 8.13
## error.mean 2.65
## error.CIlo 0.08
## error.CIhi 8.86
Bs <- birth.mod[["Bs"]] # what to plot
# plot betas of predictors with selection frequency of 95% or greater
thresh = 95
ggplot(data=Bs[Bs$chosenpc>=thresh,])+
geom_col(aes(x=reorder(predictor, mean.B), y= mean.B), width=.90, fill="steelblue", alpha=0.9)+
geom_linerange(aes(x=reorder(predictor, mean.B) , ymin=cilo, ymax=cihi), alpha=0.5, colour="steelblue4")+
geom_text(aes(x=reorder(predictor, mean.B), y=0, label=predictor, hjust=sign),
vjust= 0.5, size=4)+
labs(x="", y="mean B", title = "Full sample", subtitle = paste0("Mean R-squared = ", round(as.numeric(birth.mod["Rsq"]), digits=2), "%" ) )+
theme_minimal()+
coord_flip()+
theme_classic()+
theme(legend.position="none",
plot.title = element_text(hjust=0.5, face="bold"),
plot.subtitle = element_text(hjust=0.5),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank())
birth.mod$log
## fold testn test.rsq lambda alpha
## 1 1 998 0.07451778 0.05366970 1.00
## 2 2 998 0.07045908 0.06310880 0.70
## 3 3 998 0.06529149 0.59802818 0.05
## 4 4 998 0.07798338 0.04942438 0.85
## 5 5 998 0.07157084 0.30108346 0.05
## 6 1 998 0.09599504 0.08251380 0.65
## 7 2 998 0.08033155 0.15089333 0.30
## 8 3 998 0.09211290 0.21855819 0.15
## 9 4 998 0.07719784 0.07597276 0.40
## 10 5 998 0.09674485 0.04592400 0.85
## 11 1 998 0.07810995 0.42353107 0.10
## 12 2 998 0.06114653 0.08596928 0.55
## 13 3 998 0.07253902 0.07520072 0.90
## 14 4 998 0.06845408 0.18600381 0.25
## 15 5 998 0.06703420 0.12121355 0.50
## 16 1 998 0.07349912 0.09526872 0.50
## 17 2 998 0.09200230 0.09766030 0.50
## 18 3 998 0.08079097 0.06156387 0.90
## 19 4 998 0.08999672 0.07828760 1.00
## 20 5 998 0.09388049 0.13817023 0.30
## 21 1 998 0.08586147 0.63740841 0.00
## 22 2 998 0.08829863 0.09538691 0.35
## 23 3 998 0.09397584 0.14966273 0.35
## 24 4 998 0.07074978 0.16105898 0.25
## 25 5 998 0.06815829 0.15462271 0.30
## 26 1 998 0.08486783 0.09763146 0.50
## 27 2 998 0.07081832 0.13515153 0.50
## 28 3 998 0.06108455 0.15961634 0.30
## 29 4 998 0.10476708 0.08630526 0.60
## 30 5 998 0.07708194 0.07874574 0.65
## 31 1 998 0.08300347 0.10902407 0.40
## 32 2 998 0.11057171 0.15300948 0.25
## 33 3 998 0.08212875 0.71471305 0.05
## 34 4 998 0.08602233 0.51128185 0.05
## 35 5 998 0.08299525 0.05212823 0.90
## 36 1 998 0.09361915 0.06789750 1.00
## 37 2 998 0.08170665 0.14422571 0.35
## 38 3 998 0.08737463 0.47020222 0.10
## 39 4 998 0.06675979 0.26548941 0.10
## 40 5 998 0.09188067 0.06650868 0.70
## 41 1 998 0.07674255 0.09353754 0.65
## 42 2 998 0.09791383 0.29004364 0.15
## 43 3 998 0.05090476 0.07581506 0.90
## 44 4 998 0.06809078 0.05111142 1.00
## 45 5 998 0.07361099 0.45263749 0.10
## 46 1 998 0.06647678 0.05209534 0.60
## 47 2 998 0.09021461 0.07880137 0.85
## 48 3 998 0.11669223 0.28525835 0.10
## 49 4 998 0.08680493 0.03925091 0.80
## 50 5 998 0.08536036 0.07740672 0.65
## 51 1 998 0.09092127 0.20070208 0.20
## 52 2 998 0.08306688 0.08414151 0.70
## 53 3 998 0.06936714 0.10743869 0.45
## 54 4 998 0.08931791 0.06457365 0.80
## 55 5 998 0.06846488 0.22220100 0.20
## 56 1 998 0.07329901 0.18033638 0.25
## 57 2 998 0.07306502 0.07518083 0.75
## 58 3 998 0.09597284 0.09362783 0.45
## 59 4 998 0.03432075 0.29669812 0.15
## 60 5 998 0.07846881 0.06367871 0.95
## 61 1 998 0.11700141 0.09599021 0.40
## 62 2 998 0.05588034 0.25901369 0.20
## 63 3 998 0.08809989 0.61071090 0.05
## 64 4 998 0.10811138 0.18290755 0.25
## 65 5 998 0.05838455 0.15381270 0.35
## 66 1 998 0.06862963 0.14779353 0.30
## 67 2 998 0.09397013 0.08820225 0.40
## 68 3 998 0.08503381 0.04786076 0.85
## 69 4 998 0.09067965 0.07583798 0.75
## 70 5 998 0.08493265 0.34381602 0.10
## 71 1 998 0.06577384 0.36867284 0.10
## 72 2 998 0.08434462 0.31687504 0.10
## 73 3 998 0.09295776 0.18688833 0.35
## 74 4 998 0.09017474 0.10120115 0.50
## 75 5 998 0.05642633 0.10255173 0.55
## 76 1 998 0.07587606 0.05723823 0.85
## 77 2 998 0.12147207 0.03867801 0.55
## 78 3 998 0.07755944 0.14085686 0.40
## 79 4 998 0.09180454 0.10147340 0.55
## 80 5 998 0.06975185 0.11135238 0.50
## 81 1 998 0.08390861 0.84330160 0.00
## 82 2 998 0.08721012 0.09262751 0.70
## 83 3 998 0.06361714 0.25441908 0.15
## 84 4 998 0.08514719 0.08516273 0.70
## 85 5 998 0.07691442 0.10326369 0.45
## 86 1 998 0.05866072 0.05159019 0.75
## 87 2 998 0.08143027 0.68665697 0.05
## 88 3 998 0.06063849 0.08645227 0.50
## 89 4 998 0.10148278 0.30619729 0.15
## 90 5 998 0.05899148 0.08351576 0.75
## 91 1 998 0.11241430 0.07112994 0.85
## 92 2 998 0.09435041 0.09443641 0.50
## 93 3 998 0.09039005 0.11710101 0.40
## 94 4 998 0.07820359 0.47534216 0.05
## 95 5 998 0.07509208 0.05777879 0.90
## 96 1 998 0.07052303 0.38036459 0.05
## 97 2 998 0.09093007 0.07727950 0.85
## 98 3 998 0.08306394 0.04887156 1.00
## 99 4 998 0.08425388 0.08599949 0.60
## 100 5 998 0.09756001 0.41320983 0.05
The following models predict attention problems in children, split by sex of the child, family income, race/ethnicity of the child, parent attention problems & parent psychiatric history
# run the prediction algorithm in boys & girls separately
boys_birth <- abcd_birth[abcd_birth$male.sex==0.5,]
boys_birth <- subset(boys_birth, select= -c(male.sex))
girls_birth <- abcd_birth[abcd_birth$male.sex== -0.5,]
girls_birth <- subset(girls_birth, select= -c(male.sex))
# *not run here as takes long time, results extracted from saved versions
# malebirth.mod <- linearEN(boys_birth, runs, cvs)
# femalebirth.mod <- linearEN(girls_birth, runs, cvs)
# comparison table
rbind(collate_linear(malebirth.mod,"males"),
collate_linear(femalebirth.mod, "females"))
## group N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1 males 5255 3.08 5.28 1.51 2.0 7.92 12
## 2 females 4720 2.11 4.82 1.72 1.4 8.14 8
## num_total
## 1 39
## 2 39
# B's, CIs & selection frequencies to plot
maleBs <- malebirth.mod[["Bs"]]
femaleBs <- femalebirth.mod[["Bs"]]
# example of saved table for each model
round(head(maleBs[c("chosenpc", "mean.B", "cilo", "cihi")]), digits=2)
## chosenpc mean.B cilo cihi
## Black 81 0.11 0.01 0.22
## OtherEthnic 100 0.31 0.15 0.51
## Hispanic 19 0.05 0.02 0.08
## Asian 99 -0.37 -0.70 -0.08
## dadold 17 0.00 0.00 0.01
## dadyoung 92 0.12 0.01 0.26
# select subsamples
# note there were 14 cases where race was unknown so these rows need to be deleted
# too few "asian" so not run as a separate model
white_birth <- abcd_birth[(abcd_birth$Black==0 & abcd_birth$Asian==0 & abcd_birth$OtherEthnic==0 & abcd_birth$Hispanic==0),]
white_birth <- subset(white_birth, select=-c(Black, Asian, Hispanic, OtherEthnic))
white_birth <- white_birth[!is.na(white_birth$cbclatt),]
hispanic_birth <- abcd_birth[abcd_birth$Hispanic==1,]
hispanic_birth <- subset(hispanic_birth, select=-c(Black, Asian, Hispanic, OtherEthnic))
hispanic_birth <- hispanic_birth[!is.na(hispanic_birth$cbclatt),]
black_birth <- abcd_birth[abcd_birth$Black==1,]
black_birth <- subset(black_birth, select=-c(Black, Asian, Hispanic, OtherEthnic))
black_birth <- black_birth[!is.na(black_birth$cbclatt),]
# run elastic net (don't actually run here, takes too long, results loaded up)
# whitebirth.mod <- linearEN(white_birth, runs, cvs)
# hispanicbirth.mod <- linearEN(hispanic_birth, runs, cvs)
# blackbirth.mod <- linearEN(black_birth, runs, cvs)
# comparison table
rbind(collate_linear(whitebirth.mod,"white"),
collate_linear(hispanicbirth.mod, "hisp"),
collate_linear(blackbirth.mod, "black"))
## group N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1 white 5061 2.50 6.43 1.72 3.06 9.59 10
## 2 hisp 2121 2.83 3.61 2.23 -1.85 6.94 4
## 3 black 1508 2.84 7.10 4.47 -2.90 14.22 9
## num_total
## 1 36
## 2 36
## 3 36
# collect Bs
b1 <- whitebirth.mod$Bs[,c(3,4,9)]
names(b1) <- paste("white", names(b1), sep=".")
b2<- blackbirth.mod$Bs[,c(3,4,9)]
names(b2) <- paste("black", names(b2), sep=".")
b3 <- hispanicbirth.mod$Bs[,c(3,4,9)]
names(b3) <- paste("hisp", names(b3), sep=".")
# which variables were robust predictors of ADHD Sx for which groups? (robust ~ selection frequency >=95%)
race.Bs <- cbind(b1, b2, b3)
race.Bs <- race.Bs[race.Bs$white.chosenpc>=95 | race.Bs$hisp.chosenpc>=95 | race.Bs$black.chosenpc>=95,]
Income Groups
low.income 1-6
mid.income 7 & 8
hi.income 9 & 10
original income levels
1. <$5,000; LOW
2. $5,000-11,999; LOW
3. $12,000-15,999; LOW
4. $16,000-24,999; LOW
5. $25,000-34,999; LOW
6. $35,000-49,999; LOW
7. $50,000-74,999; MID
8. $75,000-99,999; MID
9. $100,000-199,999; HIGH
10. $200,000+ HIGH
…but note that the numbering below in the script refers to cut-points on a standardised and reverse-coded scale of income levels (1.15 and 1.50 are cut-points corresponding to groupings above)
# select subsamples
inc1_birth <- abcd_m[abcd_m$lower.income > 1.15,]
inc1_birth <- subset(inc1_birth, select = -c(SE.singleparent,lower.income, lower.edu, parent.any.n, parent.att, syn_attention_t ))
inc1_birth <- inc1_birth[!is.na(inc1_birth$cbclatt),]
inc2_birth <- abcd_m[(abcd_m$lower.income < 1.15) & (abcd_m$lower.income > -1.5),]
inc2_birth <- subset(inc2_birth, select = -c(SE.singleparent,lower.income, lower.edu, parent.any.n, parent.att, syn_attention_t ))
inc2_birth <- inc2_birth[!is.na(inc2_birth$cbclatt),]
inc3_birth <- abcd_m[abcd_m$lower.income < -1.5,]
inc3_birth <- subset(inc3_birth, select = -c(SE.singleparent,lower.income, lower.edu, parent.any.n, parent.att, syn_attention_t ))
inc3_birth <- inc3_birth[!is.na(inc3_birth$cbclatt),]
# run elastic net
# inc1birth.mod <- linearEN(inc1_birth, runs, cvs)
# inc2birth.mod <- linearEN(inc2_birth, runs, cvs)
# inc3birth.mod <- linearEN(inc3_birth, runs, cvs)
# comparison table
rbind(collate_linear(inc1birth.mod,"LOW"),
collate_linear(inc2birth.mod, "MID"),
collate_linear(inc3birth.mod, "HIGH"))
## group N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1 LOW 2780 2.88 9.61 2.91 3.85 14.83 12
## 2 MID 2567 2.80 4.09 1.89 0.64 8.07 6
## 3 HIGH 3757 2.52 2.90 1.21 0.23 5.05 5
## num_total
## 1 40
## 2 40
## 3 40
# collect Bs
b1 <- inc1birth.mod$Bs[,c(3,4,9)]
names(b1) <- paste("low", names(b1), sep=".")
b2 <- inc2birth.mod$Bs[,c(3,4,9)]
names(b2) <- paste("mid", names(b2), sep=".")
b3 <- inc3birth.mod$Bs[,c(3,4,9)]
names(b3) <- paste("hi", names(b3), sep=".")
inc.Bs <- cbind(b1, b2, b3)
inc.Bs <- inc.Bs[inc.Bs$low.chosenpc>94 | inc.Bs$mid.chosenpc>94 | inc.Bs$hi.chosenpc>94,]
# select subsample
hiADHD_birth <- abcd_m[abcd_m$parent.att >= 8,] # 80th percentile
hiADHD_birth <- subset(hiADHD_birth, select = -c(SE.singleparent,lower.income, lower.edu, parent.any.n, parent.att, syn_attention_t ))
hiADHD_birth <- hiADHD_birth[!is.na(hiADHD_birth$cbclatt),]
midADHD_birth <- abcd_m[(abcd_m$parent.att >= 4 & abcd_m$parent.att < 8),] # 50th-80th percentile
midADHD_birth <- subset(midADHD_birth, select = -c(SE.singleparent,lower.income, lower.edu, parent.any.n, parent.att, syn_attention_t ))
midADHD_birth <- midADHD_birth[!is.na(midADHD_birth$cbclatt),]
loADHD_birth <- abcd_m[abcd_m$parent.att < 4,] # less than ave
loADHD_birth <- subset(loADHD_birth, select = -c(SE.singleparent,lower.income, lower.edu, parent.any.n, parent.att, syn_attention_t ))
loADHD_birth <- loADHD_birth[!is.na(loADHD_birth$cbclatt),]
# run elastic net
# hiADHDbirth.mod <- linearEN(hiADHD_birth, runs, cvs)
# midADHDbirth.mod <- linearEN(midADHD_birth, runs, cvs)
# loADHDbirth.mod <- linearEN(loADHD_birth, runs, cvs)
rbind(collate_linear(hiADHDbirth.mod,"High"),
collate_linear(midADHDbirth.mod, "Mid"),
collate_linear(loADHDbirth.mod, "Low"))
## group N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1 High 2160 4.75 5.32 2.48 0.25 9.69 6
## 2 Mid 2891 3.00 4.30 1.92 0.35 7.62 7
## 3 Low 4921 1.80 3.75 1.53 0.32 6.43 7
## num_total
## 1 40
## 2 40
## 3 40
# merge B-summary tables
b1 <- loADHDbirth.mod$Bs[,c(3,4,9)]
names(b1) <- paste("low", names(b1), sep=".")
b2 <- midADHDbirth.mod$Bs[,c(3,4,9)]
names(b2) <- paste("mid", names(b2), sep=".")
b3 <- hiADHDbirth.mod$Bs[,c(3,4,9)]
names(b3) <- paste("hi", names(b3), sep=".")
adhd.Bs <- cbind(b1, b2, b3)
adhd.Bs <- adhd.Bs[adhd.Bs$low.chosenpc>=95 | adhd.Bs$mid.chosenpc>=95 | adhd.Bs$hi.chosenpc>=95,]
# select subsamples
noMHx_birth <- abcd_m[abcd_m$parent.any.n==0,]
noMHx_birth <- subset(noMHx_birth, select = -c(SE.singleparent, lower.income, lower.edu, parent.any.n, parent.att, syn_attention_t ))
noMHx_birth <- noMHx_birth[!is.na(noMHx_birth$cbclatt),]
medMHx_birth <- abcd_m[(abcd_m$parent.any.n == 1) | (abcd_m$parent.any.n == 2),]
medMHx_birth <- subset(medMHx_birth, select = -c(SE.singleparent,lower.income, lower.edu, parent.any.n, parent.att, syn_attention_t ))
medMHx_birth <- medMHx_birth[!is.na(medMHx_birth$cbclatt),]
hiMHx_birth <- abcd_m[(abcd_m$parent.any.n >= 3),]
hiMHx_birth <- subset(hiMHx_birth, select = -c(SE.singleparent,lower.income, lower.edu, parent.any.n, parent.att, syn_attention_t ))
hiMHx_birth <- hiMHx_birth[!is.na(hiMHx_birth$cbclatt),]
# run elastic net
# hiMHxbirth.mod <- linearEN(hiMHx_birth, runs, cvs)
# medMHxbirth.mod <- linearEN(medMHx_birth, runs, cvs)
# noMHxbirth.mod <- linearEN(noMHx_birth, runs, cvs)
rbind(collate_linear(hiMHxbirth.mod,"High"),
collate_linear(medMHxbirth.mod, "Medium"),
collate_linear(noMHxbirth.mod, "None"))
## group N intercept Rsq_mean Rsq_sd Rsq_lowCI Rsq_hiCI num_predictors
## 1 High 2551 3.77 5.98 2.41 1.79 9.98 8
## 2 Medium 2717 2.86 4.96 2.03 1.74 9.11 8
## 3 None 4597 2.15 2.88 1.34 -0.01 5.00 6
## num_total
## 1 40
## 2 40
## 3 40
# merge B-summary tables
b1 <- noMHxbirth.mod$Bs[,c(3,4,9)]
names(b1) <- paste("noMH", names(b1), sep=".")
b2 <- medMHxbirth.mod$Bs[,c(3,4,9)]
names(b2) <- paste("medMH", names(b2), sep=".")
b3 <- hiMHxbirth.mod$Bs[,c(3,4,9)]
names(b3) <- paste("hiMH", names(b3), sep=".")
parMH.Bs <- cbind(b1, b2, b3)
parMH.Bs <- parMH.Bs[parMH.Bs$noMH.chosenpc>=95 | parMH.Bs$medMH.chosenpc>=95 | parMH.Bs$hiMH.chosenpc>=95,]
# save: N, intercept, R-sq, R lower, R higher, # vars
grandsummary <-
rbind(
collate_linear(birth.mod, "all participants"),
# collate_linear(mumsonly.mod, "SA: mother only"),
# collate_linear(noNA.mod, "SA: full data only"),
collate_linear(femalebirth.mod, "females"),
collate_linear(malebirth.mod, "males"),
collate_linear(hispanicbirth.mod, "hispanic"),
collate_linear(whitebirth.mod, "white"),
collate_linear(blackbirth.mod, "black"),
collate_linear(inc3birth.mod, "high income"),
collate_linear(inc2birth.mod, "middle income"),
collate_linear(inc1birth.mod, "low income"),
collate_linear(noMHxbirth.mod, "no parent Hx"),
collate_linear(medMHxbirth.mod, "average parent Hx"),
collate_linear(hiMHxbirth.mod, " strong parent Hx"),
collate_linear(loADHDbirth.mod, "low parent ADHD"),
collate_linear(midADHDbirth.mod, "mod parent ADHD"),
collate_linear(hiADHDbirth.mod, "high parent ADHD") )
grandsummary$strat <- c("No Stratification","Sex", "Sex", "Race/Ethnicity", "Race/Ethnicity", "Race/Ethnicity", "Income", "Income", "Income", "Parent Psychiatric History", "Parent Psychiatric History", "Parent Psychiatric History", "Parent ADHD Symptoms","Parent ADHD Symptoms","Parent ADHD Symptoms")
grandsummary$strat.n <- c(1, 2,3, 4,5,6, 7,8,9 ,10,11,12, 13,14,15)
grandsummary$strat <- factor(grandsummary$strat, levels = c("No Stratification", "Sex", "Race/Ethnicity", "Income", "Parent Psychiatric History","Parent ADHD Symptoms"))
groupcols <- c("steelblue", "firebrick2", "lightpink2","lightblue", "olivedrab", "darkolivegreen3")
ggplot(grandsummary, aes(x=reorder(group,strat.n), y=Rsq_mean, fill=strat))+
geom_bar(stat="identity", alpha=.8)+
geom_errorbar(aes(x=reorder(group,strat.n), ymin=Rsq_mean-Rsq_sd, ymax=Rsq_mean+Rsq_sd,
colour=strat), width=0.1)+
#geom_text(stat="identity", aes(x=reorder(group,strat.n), label=Rsq_mean,
# colour = strat),vjust=-0.3, size=3])+
scale_fill_manual(values=groupcols)+
scale_colour_manual(values=groupcols)+
labs(x="")+
scale_y_continuous(name="Mean R-squared", breaks=c(0,2,4,6,8,10,12))+
theme_classic()+
theme(axis.text.x=element_text(angle=60, hjust=1, vjust=1),
legend.title = element_blank())
# compare R-sq from male and female models
t.test(malebirth.mod[["log"]][["test.rsq"]], femalebirth.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: malebirth.mod[["log"]][["test.rsq"]] and femalebirth.mod[["log"]][["test.rsq"]]
## t = 2.0185, df = 194.74, p-value = 0.04491
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.0001057203 0.0091145856
## sample estimates:
## mean of x mean of y
## 0.05280289 0.04819274
# race
d <- rbind((data.frame(Rsq=whitebirth.mod[["log"]][["test.rsq"]], group="white")),
(data.frame(Rsq=blackbirth.mod[["log"]][["test.rsq"]], group="black")),
(data.frame(Rsq=hispanicbirth.mod[["log"]][["test.rsq"]], group="hispanic")))
summary(aov(Rsq ~ group, data=d))
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 0.06871 0.03435 36.93 4.74e-15 ***
## Residuals 297 0.27627 0.00093
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# group post-hocs
t.test(whitebirth.mod[["log"]][["test.rsq"]], blackbirth.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: whitebirth.mod[["log"]][["test.rsq"]] and blackbirth.mod[["log"]][["test.rsq"]]
## t = -1.3883, df = 127.68, p-value = 0.1675
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.016122976 0.002827008
## sample estimates:
## mean of x mean of y
## 0.06432814 0.07097612
t.test(whitebirth.mod[["log"]][["test.rsq"]], hispanicbirth.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: whitebirth.mod[["log"]][["test.rsq"]] and hispanicbirth.mod[["log"]][["test.rsq"]]
## t = 10.033, df = 185.94, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.02270241 0.03381501
## sample estimates:
## mean of x mean of y
## 0.06432814 0.03606943
t.test(blackbirth.mod[["log"]][["test.rsq"]], hispanicbirth.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: blackbirth.mod[["log"]][["test.rsq"]] and hispanicbirth.mod[["log"]][["test.rsq"]]
## t = 6.9884, df = 145.45, p-value = 9.28e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.02503460 0.04477878
## sample estimates:
## mean of x mean of y
## 0.07097612 0.03606943
# income
d <- rbind((data.frame(Rsq=inc1birth.mod[["log"]][["test.rsq"]], group="low")),
(data.frame(Rsq=inc2birth.mod[["log"]][["test.rsq"]], group="middle")),
(data.frame(Rsq=inc3birth.mod[["log"]][["test.rsq"]], group="high")))
summary(aov(Rsq ~ group, data=d))
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 0.2562 0.12808 285.1 <2e-16 ***
## Residuals 297 0.1334 0.00045
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# post-hocs
t.test(inc2birth.mod[["log"]][["test.rsq"]], inc3birth.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: inc2birth.mod[["log"]][["test.rsq"]] and inc3birth.mod[["log"]][["test.rsq"]]
## t = 5.2899, df = 168.4, p-value = 3.765e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.007438335 0.016295757
## sample estimates:
## mean of x mean of y
## 0.04087335 0.02900630
t.test(inc1birth.mod[["log"]][["test.rsq"]], inc2birth.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: inc1birth.mod[["log"]][["test.rsq"]] and inc2birth.mod[["log"]][["test.rsq"]]
## t = 15.922, df = 170.02, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.04835266 0.06203899
## sample estimates:
## mean of x mean of y
## 0.09606917 0.04087335
# parent psychopathology
d <- rbind((data.frame(Rsq=hiMHxbirth.mod[["log"]][["test.rsq"]], group="strong Hx")),
(data.frame(Rsq=medMHxbirth.mod[["log"]][["test.rsq"]], group="average Hx")),
(data.frame(Rsq=noMHxbirth.mod[["log"]][["test.rsq"]], group="no Hx")))
summary(aov(Rsq ~ group, data=d))
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 0.04985 0.02492 63.86 <2e-16 ***
## Residuals 297 0.11592 0.00039
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
t.test(noMHxbirth.mod[["log"]][["test.rsq"]], medMHxbirth.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: noMHxbirth.mod[["log"]][["test.rsq"]] and medMHxbirth.mod[["log"]][["test.rsq"]]
## t = -8.5587, df = 171.77, p-value = 6.193e-15
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02561462 -0.01601396
## sample estimates:
## mean of x mean of y
## 0.02880501 0.04961929
t.test(medMHxbirth.mod[["log"]][["test.rsq"]], hiMHxbirth.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: medMHxbirth.mod[["log"]][["test.rsq"]] and hiMHxbirth.mod[["log"]][["test.rsq"]]
## t = -3.2264, df = 192.45, p-value = 0.001473
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.016363669 -0.003947178
## sample estimates:
## mean of x mean of y
## 0.04961929 0.05977471
# parent ADHD sx
d <- rbind((data.frame(Rsq=hiADHDbirth.mod[["log"]][["test.rsq"]], group="HIGH")),
(data.frame(Rsq=midADHDbirth.mod[["log"]][["test.rsq"]], group="MID")),
(data.frame(Rsq=loADHDbirth.mod[["log"]][["test.rsq"]], group="LOW")))
summary(aov(Rsq ~ group, data=d))
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 0.01266 0.006328 15.6 3.63e-07 ***
## Residuals 297 0.12052 0.000406
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# post-hocs for each level of parent attention problems
t.test(loADHDbirth.mod[["log"]][["test.rsq"]],
midADHDbirth.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: loADHDbirth.mod[["log"]][["test.rsq"]] and midADHDbirth.mod[["log"]][["test.rsq"]]
## t = -2.2232, df = 188.62, p-value = 0.02739
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0102825403 -0.0006141471
## sample estimates:
## mean of x mean of y
## 0.03753519 0.04298354
t.test(midADHDbirth.mod[["log"]][["test.rsq"]],
hiADHDbirth.mod[["log"]][["test.rsq"]]) # t=-3.26, p=.001
##
## Welch Two Sample t-test
##
## data: midADHDbirth.mod[["log"]][["test.rsq"]] and hiADHDbirth.mod[["log"]][["test.rsq"]]
## t = -3.2583, df = 186.03, p-value = 0.001332
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.016409884 -0.004032688
## sample estimates:
## mean of x mean of y
## 0.04298354 0.05320482
t.test(loADHDbirth.mod[["log"]][["test.rsq"]],
hiADHDbirth.mod[["log"]][["test.rsq"]]) # t = -5.37, p <.001
##
## Welch Two Sample t-test
##
## data: loADHDbirth.mod[["log"]][["test.rsq"]] and hiADHDbirth.mod[["log"]][["test.rsq"]]
## t = -5.3741, df = 164.52, p-value = 2.596e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02142673 -0.00991253
## sample estimates:
## mean of x mean of y
## 0.03753519 0.05320482
SENS #1. What if mean-imputation of missing values was strongly influencing results (though note, all imputed variables had less than 10% missing data).
Run full-sample analysis with no missing data (complete cases only)
noNA_birth <- abcd_birth[complete.cases(abcd_birth),]
#noNA.mod <- linearEN(noNA_birth, runs, cvs)
noNABs <- noNA.mod[["Bs"]] # what to plot?
# plot prenatal predictors
thresh = 95 # needs to be 99 for lambdamin but lower for 1se
ggplot(data=noNABs[noNABs$chosenpc>=thresh,])+
geom_col(aes(x=reorder(predictor, mean.B), y= mean.B), width=.90, fill="steelblue", alpha=0.9)+
geom_linerange(aes(x=reorder(predictor, mean.B) , ymin=cilo, ymax=cihi), alpha=0.5, colour="steelblue4")+
geom_text(aes(x=reorder(predictor, mean.B), y=0, label=predictor, hjust=sign),
vjust= 0.5, size=4)+
labs(x="", y="mean B", title = "Full sample", subtitle = paste0("Mean R-squared = ", round(as.numeric(noNA.mod["Rsq"]), digits=2), "%" ) )+
theme_minimal()+
coord_flip()+
theme_classic()+
theme(legend.position="none",
plot.title = element_text(hjust=0.5, face="bold"),
plot.subtitle = element_text(hjust=0.5),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank())
Observe differences across the complete-case sample and the sample used in main analysis (missing vals imputed). Look at outcome (CBCL attention problems) and each predictor, using chi-squared/t-tests as appropriate.
# chi-square tests
chisq.test(rbind(table(abcd_birth$male.sex), table(noNA_birth$male.sex)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$male.sex), table(noNA_birth$male.sex))
## X-squared = 0.031509, df = 1, p-value = 0.8591
chisq.test(rbind(table(abcd_birth$Black), table(noNA_birth$Black)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$Black), table(noNA_birth$Black))
## X-squared = 11.537, df = 1, p-value = 0.0006823
chisq.test(rbind(table(abcd_birth$OtherEthnic), table(noNA_birth$OtherEthnic)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$OtherEthnic), table(noNA_birth$OtherEthnic))
## X-squared = 0.83432, df = 1, p-value = 0.361
chisq.test(rbind(table(abcd_birth$Hispanic), table(noNA_birth$Hispanic)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$Hispanic), table(noNA_birth$Hispanic))
## X-squared = 0.3471, df = 1, p-value = 0.5558
chisq.test(rbind(table(abcd_birth$Asian), table(noNA_birth$Asian)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$Asian), table(noNA_birth$Asian))
## X-squared = 2.1944, df = 1, p-value = 0.1385
chisq.test(rbind(table(abcd_birth$multibirth), table(noNA_birth$multibirth)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$multibirth), table(noNA_birth$multibirth))
## X-squared = 0.45282, df = 1, p-value = 0.501
chisq.test(rbind(table(abcd_birth$csection), table(noNA_birth$csection)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$csection), table(noNA_birth$csection))
## X-squared = 0.095388, df = 1, p-value = 0.7574
chisq.test(rbind(table(abcd_birth$exp.smoking), table(noNA_birth$exp.smoking)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$exp.smoking), table(noNA_birth$exp.smoking))
## X-squared = 11.329, df = 1, p-value = 0.0007629
chisq.test(rbind(table(abcd_birth$exp.alcohol), table(noNA_birth$exp.alcohol)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$exp.alcohol), table(noNA_birth$exp.alcohol))
## X-squared = 0.17039, df = 1, p-value = 0.6798
chisq.test(rbind(table(abcd_birth$exp.drugs), table(noNA_birth$exp.drugs)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$exp.drugs), table(noNA_birth$exp.drugs))
## X-squared = 8.3809, df = 1, p-value = 0.003792
chisq.test(rbind(table(abcd_birth$exp.meds), table(noNA_birth$exp.meds)))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: rbind(table(abcd_birth$exp.meds), table(noNA_birth$exp.meds))
## X-squared = 0, df = 1, p-value = 1
# two-sample t-tests
t.test(abcd_birth$cbclatt, noNA_birth$cbclatt)
##
## Welch Two Sample t-test
##
## data: abcd_birth$cbclatt and noNA_birth$cbclatt
## t = 3.7055, df = 15649, p-value = 0.0002117
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.09402692 0.30521350
## sample estimates:
## mean of x mean of y
## 3.073584 2.873964
t.test(abcd_birth$deliv.totalcomps, noNA_birth$deliv.totalcomps)
##
## Welch Two Sample t-test
##
## data: abcd_birth$deliv.totalcomps and noNA_birth$deliv.totalcomps
## t = 0.61584, df = 15499, p-value = 0.538
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01543542 0.02957794
## sample estimates:
## mean of x mean of y
## 0.3700051 0.3629338
t.test(abcd_birth$gest.totalcomps, noNA_birth$gest.totalcomps)
##
## Welch Two Sample t-test
##
## data: abcd_birth$gest.totalcomps and noNA_birth$gest.totalcomps
## t = 1.2747, df = 15574, p-value = 0.2024
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01084540 0.05118121
## sample estimates:
## mean of x mean of y
## 0.6673507 0.6471828
t.test(abcd_birth$prem_wks, noNA_birth$prem_wks)
##
## Welch Two Sample t-test
##
## data: abcd_birth$prem_wks and noNA_birth$prem_wks
## t = 1.6468, df = 15774, p-value = 0.09962
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.009229511 0.106248069
## sample estimates:
## mean of x mean of y
## 0.7218267 0.6733174
t.test(abcd_birth$lower.bw, noNA_birth$lower.bw)
##
## Welch Two Sample t-test
##
## data: abcd_birth$lower.bw and noNA_birth$lower.bw
## t = 2.5635, df = 15557, p-value = 0.01037
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.005862904 0.043957085
## sample estimates:
## mean of x mean of y
## -1.520191e-16 -2.490999e-02
t.test(abcd_birth$nicu_days, noNA_birth$nicu_days)
##
## Welch Two Sample t-test
##
## data: abcd_birth$nicu_days and noNA_birth$nicu_days
## t = 0.89758, df = 15704, p-value = 0.3694
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06824719 0.18355232
## sample estimates:
## mean of x mean of y
## 0.8830003 0.8253478
t.test(abcd_birth$mumold, noNA_birth$mumold)
##
## Welch Two Sample t-test
##
## data: abcd_birth$mumold and noNA_birth$mumold
## t = 0.67046, df = 15580, p-value = 0.5026
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03395888 0.06926786
## sample estimates:
## mean of x mean of y
## 0.6175913 0.5999368
t.test(abcd_birth$mumyoung, noNA_birth$mumyoung)
##
## Welch Two Sample t-test
##
## data: abcd_birth$mumyoung and noNA_birth$mumyoung
## t = 2.4604, df = 16143, p-value = 0.01389
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.005013307 0.044293613
## sample estimates:
## mean of x mean of y
## 0.1588392 0.1341858
t.test(abcd_birth$dadold, noNA_birth$dadold)
##
## Welch Two Sample t-test
##
## data: abcd_birth$dadold and noNA_birth$dadold
## t = 0.68255, df = 15528, p-value = 0.4949
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06605541 0.13663588
## sample estimates:
## mean of x mean of y
## 1.505573 1.470282
t.test(abcd_birth$dadyoung, noNA_birth$dadyoung)
##
## Welch Two Sample t-test
##
## data: abcd_birth$dadyoung and noNA_birth$dadyoung
## t = 0.88171, df = 15741, p-value = 0.3779
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.007413722 0.019536690
## sample estimates:
## mean of x mean of y
## 0.07589428 0.06983279
SENS #2. ~85% of respondants were the biological mother. Are results being swayed by non-biological mothers reporting on: obstetric complications, substance-use during pregnancy, etc.?
abcd_mum <- abcd_m[abcd_m$Rship2child=="BioMom",]
mumsonly <- subset(abcd_mum, select=-c(SE.singleparent, lower.income, lower.edu,
parent.any.n, parent.att,
Rship2child, syn_attention_t))
#mumsonly.mod <- linearEN(mumsonly, runs, cvs)
mumsonly.Bs <- mumsonly.mod[["Bs"]] # what to plot?
thresh = 95
ggplot(data=mumsonly.Bs[mumsonly.Bs$chosenpc>=thresh,])+
geom_col(aes(x=reorder(predictor, mean.B), y= mean.B), width=.90, fill="steelblue", alpha=0.9)+
geom_linerange(aes(x=reorder(predictor, mean.B) , ymin=cilo, ymax=cihi), alpha=0.5, colour="steelblue4")+
geom_text(aes(x=reorder(predictor, mean.B), y=0, label=predictor, hjust=sign),
vjust= 0.5, size=4)+
labs(x="", y="mean B", title = "Bio mum's Only", subtitle = paste0("Mean R-squared = ", round(as.numeric(mumsonly.mod["Rsq"]), digits=2), "%" ) )+
theme_minimal()+
coord_flip()+
theme_classic()+
theme(legend.position="none",
plot.title = element_text(hjust=0.5, face="bold"),
plot.subtitle = element_text(hjust=0.5),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank())
SENS #3. It may be clinically useful to see how well we can predict clinically-significant attention problems, rather than increasing scores on the attention problem scale.
Choosing a cut-off (based on literature):
* For a mixed group of clinic-referred and general community children, Lampert et al. (2004) reported an optimum cutoff (raw score of 9)
* clinic-referred sample (best cutoff for ADHD = 9; AUC = 0.78; sensitivity = 75%; specificity = 68%)
* T >= 65 A CUT-OFF FOR “DEVIANT” (ACHENBACH ET AL., 2003)
* T > 62 WAS OPTIMAL CUT-OFF SCORE TO DISTINGUIGH ADHD FROM NO ADHD (GOMEZ ET AL., 2019)
* Eiraldi et al. (2000), also for a clinic-referred sample, reported sensitivity and specificity values of 78% and 89%, respectively, at a cutoff score of T = 65.
* Tehrani-Doost et al. (2011) found a lower optimal cut-off for predicting ADHD: raw scores of 6.5-7.5
# compare t-score and raw score cut off
ggplot(data=abcd)+
geom_smooth(aes(x=syn_attention_r, y = syn_attention_t), formula=y~poly(x,2), method="lm")+
scale_x_continuous(name="CBCL attention problems",
breaks=c(0:20))+
geom_hline(yintercept= 65,linetype="dashed")+
geom_hline(yintercept= 63.8,linetype="dotted", colour="blue")+
geom_vline(xintercept= 9.6,linetype="dashed")+
geom_vline(xintercept= 9,linetype="dotted", colour="blue")
# view cut-off of 9 imposed on the continous distibution of scores
ggplot(data=abcd_m)+
geom_histogram(aes(x=cbclatt), binwidth = 1, fill="purple", colour="black")+
geom_vline(xintercept=9)+
theme_minimal()
abcd_bin <- abcd_birth
# dichotomize raw score (use cut-off of 9 on the raw scale)
abcd_bin$cbcl.bin <- ifelse(abcd_bin$cbclatt>=9, 1, 0)
table(abcd_bin$cbcl.bin) %>%
prop.table() # 907 kids (~9%)
##
## 0 1
## 0.90907268 0.09092732
# place this binary outcome var in column 2 and remove continuous var
abcd_bin <- move_columns(abcd_bin, cbcl.bin, .after = "myid")
abcd_bin <- subset(abcd_bin, select= -c(cbclatt))
# run elastic net (note the reduction of CV folds from 5 to 4; but increase in number of outer folds)
#logistic.mod <- balanced.logisticEN(abcd_bin, 25, 4)
Bs <- logistic.mod[["Bs"]] # Bs, selection frequency, etc
t(logistic.mod[["summary"]])
## [,1]
## sample_n 9975.00
## num.runs 100.00
## num.inputs 40.00
## num.meetingthreshfreq 5.00
## intercept -0.25
## mean.auc 67.39
## sd.auc 1.16
## aucCIlo 65.37
## aucCIhi 69.62
## mean.sensit 0.60
## mean.specif 0.63
# there were 227 children in each test-set, ~50% were "cases" (n between 100 and 120)
# saved: true positve rate & true negative rate
# calculate: fnr, fpr, ppv, npv
# also observe the alpha value for each run, it may influence discrimination
a<-as.data.frame(logistic.mod[["Bs"]])
b<-names(a)
b<-b[str_detect(b,"run")]
c<-NA
for (x in 1:100){
c[x]<-str_sub(b[x], 2, (str_locate(b[x], "r")[1]-1) )
}
log.hx <- data.frame(true.pos= logistic.mod[["log"]][["test_tpr"]],
true.neg= logistic.mod[["log"]][["test_tnr"]],
n.cases= logistic.mod[["log"]][["testn_cases"]],
n.controls= logistic.mod[["log"]][["testn"]]-
logistic.mod[["log"]][["testn_cases"]],
alpha= c)
log.hx$true.pos.n <- log.hx$true.pos*log.hx$n.cases
log.hx$true.neg.n <- log.hx$true.neg*log.hx$n.controls
log.hx$false.pos.n <- log.hx$n.controls-log.hx$true.neg.n
log.hx$false.neg.n <- log.hx$n.cases-log.hx$true.pos.n
log.hx$ppv <- round((log.hx$true.pos.n / (log.hx$true.pos.n + log.hx$false.pos.n)),2)
log.hx$npv <- round((log.hx$true.neg.n / (log.hx$true.neg.n + log.hx$false.neg.n)),2)
# ppv = % of individuals testing positive that really are positive
# npv = % of individuals testing negative are truly negative
mean(log.hx$ppv, na.rm=T)
## [1] 0.6307071
mean(log.hx$npv, na.rm=T)
## [1] 0.6123469
print(log.hx)
## true.pos true.neg n.cases n.controls alpha true.pos.n true.neg.n
## 1 0.13392857 0.9478261 112 115 0 15 109
## 2 0.73076923 0.6341463 104 123 0.75 76 78
## 3 0.63414634 0.7115385 123 104 0.1 78 74
## 4 0.50434783 0.7053571 115 112 0.45 58 79
## 5 0.60000000 0.6078431 125 102 0.8 75 62
## 6 0.59504132 0.6226415 121 106 0.6 72 66
## 7 0.64102564 0.6363636 117 110 0.35 75 70
## 8 0.65137615 0.6694915 109 118 0.75 71 79
## 9 0.52380952 0.7821782 126 101 0.75 66 79
## 10 0.74358974 0.4545455 117 110 0.8 87 50
## 11 0.52678571 0.6434783 112 115 0.4 59 74
## 12 0.77669903 0.4838710 103 124 0.25 80 60
## 13 0.66666667 0.4913793 111 116 0.45 74 57
## 14 0.57600000 0.7156863 125 102 0.35 72 73
## 15 0.59821429 0.5913043 112 115 0.45 67 68
## 16 0.69444444 0.5378151 108 119 0.25 75 64
## 17 0.61467890 0.5508475 109 118 0.45 67 65
## 18 0.77868852 0.5142857 122 105 0.65 95 54
## 19 0.72580645 0.4174757 124 103 0.75 90 43
## 20 0.52542373 0.7339450 118 109 0.55 62 80
## 21 0.66666667 0.5918367 129 98 0.2 86 58
## 22 0.60169492 0.7155963 118 109 0.85 71 78
## 23 0.70588235 0.4629630 119 108 0.85 84 50
## 24 0.70434783 0.5267857 115 112 0.6 81 59
## 25 0.72357724 0.4230769 123 104 0.35 89 44
## 26 0.64150943 0.4545455 106 121 0.65 68 55
## 27 0.59090909 0.6324786 110 117 0.15 65 74
## 28 0.51239669 0.6415094 121 106 0.35 62 68
## 29 0.54838710 0.6893204 124 103 0.35 68 71
## 30 0.60714286 0.6260870 112 115 0.05 68 72
## 31 0.50877193 0.6017699 114 113 0.2 58 68
## 32 0.58407080 0.6842105 113 114 0.6 66 78
## 33 0.47826087 0.7589286 115 112 0.05 55 85
## 34 0.52475248 0.7777778 101 126 0.15 53 98
## 35 0.78632479 0.4363636 117 110 0.5 92 48
## 36 0.61016949 0.6146789 118 109 0.7 72 67
## 37 0.49074074 0.6890756 108 119 0.35 53 82
## 38 0.62790698 0.7346939 129 98 0.4 81 72
## 39 0.50877193 0.7433628 114 113 0.95 58 84
## 40 1.00000000 0.0000000 104 123 0 104 0
## 41 0.79824561 0.4247788 114 113 0.15 91 48
## 42 0.51351351 0.6637931 111 116 0.05 57 77
## 43 0.50000000 0.7433628 114 113 0.15 57 84
## 44 0.63716814 0.6842105 113 114 0.35 72 78
## 45 0.58536585 0.6923077 123 104 0.15 72 72
## 46 0.64406780 0.5871560 118 109 0.15 76 64
## 47 0.68571429 0.6229508 105 122 0.15 72 76
## 48 0.52631579 0.6725664 114 113 0.45 60 76
## 49 0.48305085 0.7706422 118 109 0.9 57 84
## 50 0.57281553 0.7016129 103 124 0.7 59 87
## 51 0.53571429 0.6434783 112 115 0.8 60 74
## 52 0.78181818 0.3675214 110 117 0.25 86 43
## 53 0.53719008 0.7547170 121 106 0.25 65 80
## 54 0.67289720 0.6333333 107 120 0.1 72 76
## 55 0.54545455 0.7547170 121 106 0.05 66 80
## 56 0.46428571 0.8173913 112 115 0.05 52 94
## 57 0.65686275 0.6240000 102 125 0.45 67 78
## 58 0.70175439 0.4690265 114 113 0.25 80 53
## 59 0.56756757 0.5689655 111 116 1 63 66
## 60 0.62931034 0.5855856 116 111 0.35 73 65
## 61 0.55752212 0.6666667 113 114 0.15 63 76
## 62 0.49557522 0.8157895 113 114 0.15 56 93
## 63 0.61467890 0.6779661 109 118 0.25 67 80
## 64 0.66363636 0.6153846 110 117 0.1 73 72
## 65 0.60194175 0.6935484 103 124 0.55 62 86
## 66 0.62962963 0.6134454 108 119 0.65 68 73
## 67 0.68269231 0.6260163 104 123 0.4 71 77
## 68 0.33050847 0.7889908 118 109 0.05 39 86
## 69 0.00000000 1.0000000 119 108 0 0 108
## 70 0.33333333 0.8224299 120 107 0 40 88
## 71 0.50961538 0.7154472 104 123 0.5 53 88
## 72 1.00000000 0.0000000 112 115 0 112 0
## 73 0.72566372 0.4561404 113 114 0.25 82 52
## 74 0.40875912 0.8000000 137 90 0.2 56 72
## 75 0.68103448 0.6036036 116 111 0.15 79 67
## 76 0.70535714 0.5130435 112 115 0.3 79 59
## 77 0.57391304 0.5535714 115 112 0.2 66 62
## 78 0.59322034 0.6788991 118 109 0.2 70 74
## 79 0.73000000 0.4803150 100 127 0.75 73 61
## 80 0.53174603 0.8019802 126 101 0.15 67 81
## 81 0.55963303 0.7203390 109 118 0.9 61 85
## 82 0.57500000 0.6168224 120 107 0.75 69 66
## 83 0.62903226 0.7572816 124 103 0.1 78 78
## 84 0.60869565 0.6607143 115 112 0.6 70 74
## 85 0.67272727 0.5641026 110 117 0.2 74 66
## 86 0.08403361 0.9722222 119 108 0.05 10 105
## 87 0.67567568 0.6896552 111 116 0.75 75 80
## 88 0.60784314 0.5760000 102 125 0.55 62 72
## 89 0.78991597 0.3703704 119 108 0.6 94 40
## 90 0.65137615 0.6694915 109 118 0.45 71 79
## 91 0.56896552 0.7027027 116 111 1 66 78
## 92 0.75652174 0.3660714 115 112 0.05 87 41
## 93 0.68421053 0.5681818 95 132 0.65 65 75
## 94 0.50833333 0.7383178 120 107 0.95 61 79
## 95 0.51327434 0.7192982 113 114 0.5 58 82
## 96 0.76991150 0.4298246 113 114 0.9 87 49
## 97 0.47058824 0.7592593 119 108 0.2 56 82
## 98 0.58771930 0.7168142 114 113 0.5 67 81
## 99 0.56880734 0.6610169 109 118 0.4 62 78
## 100 0.55963303 0.7288136 109 118 0.1 61 86
## false.pos.n false.neg.n ppv npv
## 1 6 97 0.71 0.53
## 2 45 28 0.63 0.74
## 3 30 45 0.72 0.62
## 4 33 57 0.64 0.58
## 5 40 50 0.65 0.55
## 6 40 49 0.64 0.57
## 7 40 42 0.65 0.62
## 8 39 38 0.65 0.68
## 9 22 60 0.75 0.57
## 10 60 30 0.59 0.62
## 11 41 53 0.59 0.58
## 12 64 23 0.56 0.72
## 13 59 37 0.56 0.61
## 14 29 53 0.71 0.58
## 15 47 45 0.59 0.60
## 16 55 33 0.58 0.66
## 17 53 42 0.56 0.61
## 18 51 27 0.65 0.67
## 19 60 34 0.60 0.56
## 20 29 56 0.68 0.59
## 21 40 43 0.68 0.57
## 22 31 47 0.70 0.62
## 23 58 35 0.59 0.59
## 24 53 34 0.60 0.63
## 25 60 34 0.60 0.56
## 26 66 38 0.51 0.59
## 27 43 45 0.60 0.62
## 28 38 59 0.62 0.54
## 29 32 56 0.68 0.56
## 30 43 44 0.61 0.62
## 31 45 56 0.56 0.55
## 32 36 47 0.65 0.62
## 33 27 60 0.67 0.59
## 34 28 48 0.65 0.67
## 35 62 25 0.60 0.66
## 36 42 46 0.63 0.59
## 37 37 55 0.59 0.60
## 38 26 48 0.76 0.60
## 39 29 56 0.67 0.60
## 40 123 0 0.46 NaN
## 41 65 23 0.58 0.68
## 42 39 54 0.59 0.59
## 43 29 57 0.66 0.60
## 44 36 41 0.67 0.66
## 45 32 51 0.69 0.59
## 46 45 42 0.63 0.60
## 47 46 33 0.61 0.70
## 48 37 54 0.62 0.58
## 49 25 61 0.70 0.58
## 50 37 44 0.61 0.66
## 51 41 52 0.59 0.59
## 52 74 24 0.54 0.64
## 53 26 56 0.71 0.59
## 54 44 35 0.62 0.68
## 55 26 55 0.72 0.59
## 56 21 60 0.71 0.61
## 57 47 35 0.59 0.69
## 58 60 34 0.57 0.61
## 59 50 48 0.56 0.58
## 60 46 43 0.61 0.60
## 61 38 50 0.62 0.60
## 62 21 57 0.73 0.62
## 63 38 42 0.64 0.66
## 64 45 37 0.62 0.66
## 65 38 41 0.62 0.68
## 66 46 40 0.60 0.65
## 67 46 33 0.61 0.70
## 68 23 79 0.63 0.52
## 69 0 119 NaN 0.48
## 70 19 80 0.68 0.52
## 71 35 51 0.60 0.63
## 72 115 0 0.49 NaN
## 73 62 31 0.57 0.63
## 74 18 81 0.76 0.47
## 75 44 37 0.64 0.64
## 76 56 33 0.59 0.64
## 77 50 49 0.57 0.56
## 78 35 48 0.67 0.61
## 79 66 27 0.53 0.69
## 80 20 59 0.77 0.58
## 81 33 48 0.65 0.64
## 82 41 51 0.63 0.56
## 83 25 46 0.76 0.63
## 84 38 45 0.65 0.62
## 85 51 36 0.59 0.65
## 86 3 109 0.77 0.49
## 87 36 36 0.68 0.69
## 88 53 40 0.54 0.64
## 89 68 25 0.58 0.62
## 90 39 38 0.65 0.68
## 91 33 50 0.67 0.61
## 92 71 28 0.55 0.59
## 93 57 30 0.53 0.71
## 94 28 59 0.69 0.57
## 95 32 55 0.64 0.60
## 96 65 26 0.57 0.65
## 97 26 63 0.68 0.57
## 98 32 47 0.68 0.63
## 99 40 47 0.61 0.62
## 100 32 48 0.66 0.64
# ppv = true.pos.n / all those testing pos
Compare R2 from original model and those from sensitivity tests
# RESTRICTED (SENS #1) VS ORIGINAL SAMPLE
t.test(birth.mod[["log"]][["test.rsq"]],noNA.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: birth.mod[["log"]][["test.rsq"]] and noNA.mod[["log"]][["test.rsq"]]
## t = 6.9427, df = 196.58, p-value = 5.471e-11
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01008317 0.01808433
## sample estimates:
## mean of x mean of y
## 0.08132108 0.06723732
# BIOLOGICAL MOTHERS (SENS #2) ONLY VS ORIGINAL SAMPLE
t.test(birth.mod[["log"]][["test.rsq"]],mumsonly.mod[["log"]][["test.rsq"]])
##
## Welch Two Sample t-test
##
## data: birth.mod[["log"]][["test.rsq"]] and mumsonly.mod[["log"]][["test.rsq"]]
## t = 0.19391, df = 196.67, p-value = 0.8464
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.003611422 0.004399086
## sample estimates:
## mean of x mean of y
## 0.08132108 0.08092725
abcd4cor <- subset(abcd_birth, select=-c(myid, cbclatt))
cormat <- round(cor(abcd4cor, use="pairwise.complete.obs"), digits=3)
lower <- cormat
lower[lower.tri(cormat)] <- NA
meltedcormat <- melt(lower, na.rm=T)
ggplot(data=meltedcormat, aes(Var2, Var1, fill=value))+
geom_tile(color="white") +
scale_fill_gradient2(low="blue", high="red", mid="white",
midpoint=0, limit = c(-.7,.7), space="Lab",
name="Pearson's r") +
theme_minimal()+
theme(axis.text.x = element_text(angle=60,
vjust=1 , hjust=1),
axis.title.x=element_blank(),
axis.title.y=element_blank())+
coord_fixed()+
coord_flip()+
geom_text(aes(Var2, Var1, label=round(value,2)), color="black", size=1)
Calculate reliability of CBCL attention problem scale
1 score <- 10 items
# import AP items
# ap = read.csv("DownloadedData/ABCDbeh1/CBCLattitems.csv") # usually run
ap <- ap[, c("src_subject_id","eventname", "Att1","Att2","Att3","Att4","Att5","Att6","Att7","Att8","Att9","Att10")]
ap <- ap[ap$eventname=="baseline_year_1_arm_1",]
ap <- ap[c(2:length(ap$Att1)),] # remove var descr
ap[ap==""] <- NA
ap <- droplevels(ap)
ap$Att1 <- as.numeric(ap$Att1)-1
ap$Att2 <- as.numeric(ap$Att2)-1
ap$Att3 <- as.numeric(ap$Att3)-1
ap$Att4 <- as.numeric(ap$Att4)-1
ap$Att5 <- as.numeric(ap$Att5)-1
ap$Att6 <- as.numeric(ap$Att6)-1
ap$Att7 <- as.numeric(ap$Att7)-1
ap$Att8 <- as.numeric(ap$Att8)-1
ap$Att9 <- as.numeric(ap$Att9)-1
ap$Att10 <- as.numeric(ap$Att10)-1
# limit sample to those included in your analysis (only 1 sibling was allowed from each hsd in my analysis sample)
toremove <- setdiff(as.character(ap$src_subject_id), as.character(abcd$src_subject_id))
ap2 <- c()
for (x in 1:length(ap$src_subject_id)){
if (!str_contains(toremove, as.character(ap$src_subject_id[x]))) {
ap2 <- rbind(ap2, ap[x,])
}
}
# calculate alpha & omega reliability estimates
library(psych)
omega(ap2[,3:length(ap2)], nfactors=1) # alpha=.86, omega total=.86
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.86
## G.6: 0.86
## Omega Hierarchical: 0.86
## Omega H asymptotic: 1
## Omega Total 0.86
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* h2 u2 p2
## Att1 0.48 0.23 0.77 1
## Att2 0.66 0.44 0.56 1
## Att3 0.83 0.70 0.30 1
## Att4 0.69 0.48 0.52 1
## Att5 0.45 0.20 0.80 1
## Att6 0.53 0.29 0.71 1
## Att7 0.64 0.41 0.59 1
## Att8 0.54 0.30 0.70 1
## Att9 0.83 0.68 0.32 1
## Att10 0.50 0.25 0.75 1
##
## With eigenvalues of:
## g F1*
## 4 0
##
## general/max 3.572921e+16 max/min = 1
## mean percent general = 1 with sd = 0 and cv of 0
## Explained Common Variance of the general factor = 1
##
## The degrees of freedom are 35 and the fit is 0.33
## The number of observations was 9974 with Chi Square = 3300.31 with prob < 0
## The root mean square of the residuals is 0.06
## The df corrected root mean square of the residuals is 0.06
## RMSEA index = 0.097 and the 10 % confidence intervals are 0.094 0.1
## BIC = 2978.04
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 35 and the fit is 0.33
## The number of observations was 9974 with Chi Square = 3300.31 with prob < 0
## The root mean square of the residuals is 0.06
## The df corrected root mean square of the residuals is 0.06
##
## RMSEA index = 0.097 and the 10 % confidence intervals are 0.094 0.1
## BIC = 2978.04
##
## Measures of factor score adequacy
## g F1*
## Correlation of scores with factors 0.94 0
## Multiple R square of scores with factors 0.89 0
## Minimum correlation of factor score estimates 0.79 -1
##
## Total, General and Subset omega for each subset
## g F1*
## Omega total for total scores and subscales 0.86 0.86
## Omega general for total scores and subscales 0.86 0.86
## Omega group for total scores and subscales 0.00 0.00
alpha(ap2[,3:length(ap2)]) # alpha=.86
##
## Reliability analysis
## Call: alpha(x = ap2[, 3:length(ap2)])
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.86 0.86 0.38 6.1 0.0019 0.31 0.35 0.35
##
## lower alpha upper 95% confidence boundaries
## 0.86 0.86 0.87
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## Att1 0.86 0.86 0.86 0.40 5.9 0.0019 0.0152 0.37
## Att2 0.84 0.84 0.85 0.37 5.3 0.0021 0.0151 0.34
## Att3 0.83 0.83 0.83 0.35 4.9 0.0024 0.0095 0.33
## Att4 0.84 0.84 0.84 0.37 5.3 0.0021 0.0127 0.35
## Att5 0.86 0.86 0.86 0.40 6.0 0.0019 0.0147 0.37
## Att6 0.85 0.85 0.85 0.39 5.7 0.0019 0.0160 0.35
## Att7 0.85 0.84 0.85 0.38 5.4 0.0021 0.0151 0.34
## Att8 0.85 0.85 0.85 0.39 5.7 0.0020 0.0160 0.35
## Att9 0.83 0.83 0.83 0.35 4.9 0.0024 0.0099 0.33
## Att10 0.86 0.85 0.85 0.39 5.8 0.0019 0.0158 0.35
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## Att1 9974 0.55 0.56 0.48 0.45 0.248 0.49
## Att2 9974 0.72 0.70 0.66 0.62 0.475 0.61
## Att3 9974 0.84 0.81 0.82 0.77 0.484 0.65
## Att4 9974 0.74 0.71 0.68 0.64 0.396 0.62
## Att5 9974 0.48 0.55 0.48 0.42 0.072 0.29
## Att6 9974 0.60 0.61 0.55 0.49 0.296 0.53
## Att7 9974 0.69 0.68 0.63 0.60 0.299 0.55
## Att8 9974 0.59 0.61 0.54 0.51 0.163 0.42
## Att9 9974 0.84 0.81 0.81 0.76 0.539 0.68
## Att10 9974 0.53 0.60 0.53 0.46 0.102 0.33
##
## Non missing response frequency for each item
## 0 1 2 miss
## Att1 0.78 0.20 0.03 0
## Att2 0.58 0.36 0.06 0
## Att3 0.60 0.31 0.09 0
## Att4 0.68 0.25 0.07 0
## Att5 0.94 0.06 0.01 0
## Att6 0.74 0.22 0.04 0
## Att7 0.75 0.21 0.04 0
## Att8 0.86 0.12 0.02 0
## Att9 0.57 0.33 0.11 0
## Att10 0.91 0.08 0.01 0
# repeat using a different package
library(ltm)
cronbach.alpha(ap2[,3:length(ap2)], CI=TRUE)[["alpha"]] #.86
## [1] 0.8614551