#Load and install packages
library(plyr)
library(dplyr)
install.packages("robustHD")
install.packages("devtools")
library(devtools)
devtools::install_github("explodecomputer/alspac")
library(alspac)

ALSPAC data

#Searched for and loaded the anxiety variables via the shiny app
 <https://alspac-example.shinyapps.io/alspac-dt/>

#Got the specific anxiety variables: 
#some needed recoding in an ALSPAC-specific manner as outlined on page 104 of:
# R:\Data\documentation\built_pdf\Quest\Mother\D1108_H.pdf
#These are the set that don't need ALSPAC-specific recoding
#They come from the data labeled 'Crown Crisp - Anxiety'
#Here are the variable names (provided by Lotte):

#'b328','b330','b333','b336','b339','b342','b344','b347',
#'c550','c552','c555','c558','c561','c564','c566','c569',
results_1 <- extractWebOutput("anxiety_vars_data-2018-02-15.csv")
head(results_1)
#I looked at how the variables were categorically coded and created in the H file first, then I tabled each variable 
#to see what categories were in the b and c variables, so that I could set the categories that didn't have scale data to 'NA'
#(the non-scale data categories were different in the b, c, and h variables)

Coded the ‘b’ variables

‘b328’,‘b330’,‘b333’,‘b336’,‘b339’,‘b342’,‘b344’,‘b347’,

#recode b238
table(results_1$b328)
results_1$b328_recoded  <- revalue(results_1$b328, c("Never"="1", "Not V often"="2", "Often"="3", "V often"="4", "HaB short / YHL"=NA, "Missing"=NA,"Other"=NA, "DK"=NA))
table(results_1$b328_recoded)
results_1$b328_recoded <- as.numeric(results_1$b328_recoded)


#recode b330
table(results_1$b330)
results_1$b330_recoded  <- revalue(results_1$b330, c("Never"="1", "Not V often"="2", "Often"="3", "V often"="4", "HaB short / YHL"=NA, "Missing"=NA,"Other"=NA, "DK"=NA))
table(results_1$b330_recoded)
results_1$b330_recoded <- as.numeric(results_1$b330_recoded)

#recode b333
table(results_1$b333)
results_1$b333_recoded  <- revalue(results_1$b333, c("Never"="1", "Not V often"="2", "Often"="3", "V often"="4", "HaB short / YHL"=NA, "Missing"=NA,"Other"=NA, "DK"=NA))
table(results_1$b333_recoded)
results_1$b333_recoded <- as.numeric(results_1$b333_recoded)

#recode b336
table(results_1$b336)
results_1$b336_recoded  <- revalue(results_1$b336, c("Never"="1", "Not V often"="2", "Often"="3", "V often"="4", "HaB short / YHL"=NA, "Missing"=NA,"Other"=NA, "DK"=NA))
table(results_1$b336_recoded)
results_1$b336_recoded <- as.numeric(results_1$b336_recoded)

#recode b339
table(results_1$b339)
results_1$b339_recoded  <- revalue(results_1$b339, c("Never"="1", "Not V often"="2", "Often"="3", "V often"="4", "HaB short / YHL"=NA, "Missing"=NA,"Other"=NA, "DK"=NA))
table(results_1$b339_recoded)
results_1$b339_recoded <- as.numeric(results_1$b339_recoded)

#recode b342
table(results_1$b342)
results_1$b342_recoded  <- revalue(results_1$b342, c("Never"="1", "Not V often"="2", "Often"="3", "V often"="4", "HaB short / YHL"=NA, "Missing"=NA,"Other"=NA, "DK"=NA))
table(results_1$b342_recoded)
results_1$b342_recoded <- as.numeric(results_1$b342_recoded)

#recode b344
table(results_1$b344)
results_1$b344_recoded  <- revalue(results_1$b344, c("Never"="1", "Not V often"="2", "Often"="3", "V often"="4", "HaB short / YHL"=NA, "Missing"=NA,"Other"=NA, "DK"=NA))
table(results_1$b344_recoded)
results_1$b344_recoded <- as.numeric(results_1$b344_recoded)

#recode b347
table(results_1$b347)
results_1$b347_recoded  <- revalue(results_1$b347, c("Never"="1", "Not V often"="2", "Often"="3", "V often"="4", "HaB short / YHL"=NA, "Missing"=NA,"Other"=NA, "DK"=NA))
table(results_1$b347_recoded)
results_1$b347_recoded <- as.numeric(results_1$b347_recoded)

Coded the ‘c’ variables

‘c550’,‘c552’,‘c555’,‘c558’,‘c561’,‘c564’,‘c566’,‘c569’

#recode c550
table(results_1$c550)
results_1$c550_recoded  <- revalue(results_1$c550, c("Never"="1", "Not often"="2", "Often"="3", "V often"="4", "YP short / FTG"=NA, "Missing"=NA,"Other"=NA, "DK"=NA, "FTG Missing"=NA))
table(results_1$c550_recoded)
results_1$c550_recoded <- as.numeric(results_1$c550_recoded)

#recode c552
table(results_1$c552)
results_1$c552_recoded  <- revalue(results_1$c552, c("Never"="1", "Not often"="2", "Often"="3", "V often"="4", "YP short / FTG"=NA, "Missing"=NA,"Other"=NA, "DK"=NA, "FTG Missing"=NA))
table(results_1$c552_recoded)
results_1$c552_recoded <- as.numeric(results_1$c552_recoded)

#recode c555
table(results_1$c555)
results_1$c555_recoded  <- revalue(results_1$c555, c("Never"="1", "Not often"="2", "Often"="3", "V often"="4", "YP short / FTG"=NA, "Missing"=NA,"Other"=NA, "DK"=NA, "FTG Missing"=NA))
table(results_1$c555_recoded)
results_1$c555_recoded <- as.numeric(results_1$c555_recoded)

#recode c558
table(results_1$c558)
results_1$c558_recoded  <- revalue(results_1$c558, c("Never"="1", "Not often"="2", "Often"="3", "V often"="4", "YP short / FTG"=NA, "Missing"=NA,"Other"=NA, "DK"=NA, "FTG Missing"=NA))
table(results_1$c558_recoded)
results_1$c558_recoded <- as.numeric(results_1$c558_recoded)

#recode c561
table(results_1$c561)
results_1$c561_recoded  <- revalue(results_1$c561, c("Never"="1", "Not often"="2", "Often"="3", "V often"="4", "YP short / FTG"=NA, "Missing"=NA,"Other"=NA, "DK"=NA, "FTG Missing"=NA))
table(results_1$c561_recoded)
results_1$c561_recoded <- as.numeric(results_1$c561_recoded)

#recode c564
table(results_1$c564)
results_1$c564_recoded  <- revalue(results_1$c564, c("Never"="1", "Not often"="2", "Often"="3", "V often"="4", "YP short / FTG"=NA, "Missing"=NA,"Other"=NA, "DK"=NA, "FTG Missing"=NA))
table(results_1$c564_recoded)
results_1$c564_recoded <- as.numeric(results_1$c564_recoded)

#recode c566
table(results_1$c566)
results_1$c566_recoded  <- revalue(results_1$c566, c("Never"="1", "Not often"="2", "Often"="3", "V often"="4", "YP short / FTG"=NA, "Missing"=NA,"Other"=NA, "DK"=NA, "FTG Missing"=NA))
table(results_1$c566_recoded)
results_1$c566_recoded <- as.numeric(results_1$c566_recoded)

#recode c569
table(results_1$c569)
results_1$c569_recoded  <- revalue(results_1$c569, c("Never"="1", "Not often"="2", "Often"="3", "V often"="4", "YP short / FTG"=NA, "Missing"=NA,"Other"=NA, "DK"=NA, "FTG Missing"=NA))
table(results_1$c569_recoded)
results_1$c569_recoded <- as.numeric(results_1$c569_recoded)

Coded the ‘h’ variables (ALSPAC-specific scoring)

from entire crown crisp (d1-d23), only selected anxiety questions d1 + d3 + d6 + d9 + d12 + d15 + d17 + d20

results_h <- extractWebOutput("anxiety_vars_H_data-2018-02-15.csv")

#recode d1: h155
head(results_h)
table(results_h$h155)
results_h$h155_recoded  <- revalue(results_h$h155, c("Never"=0, "Not very often"="0", "Often"="2", "Very often"="2", "Section D omitted"=NA, "Not stated"=NA))
head(results_h$h155_recoded, n=40 )
table(results_h$h155_recoded)
results_h$h155_recoded <- as.numeric(results_h$h155_recoded)

#recode d3: h157
head(results_h)
table(results_h$h157)
results_h$h157_recoded  <- revalue(results_h$h157, c("Never"="0", "Not very often"="1", "Often"="2", "Very often"="2", "Section D omitted"=NA, "Not stated"=NA, "See text"=NA))
head(results_h$h157_recoded, n=40 )
table(results_h$h157_recoded)
results_h$h157_recoded <- as.numeric(results_h$h157_recoded)

#recode d6: h160
head(results_h)
table(results_h$h160)
results_h$h160_recoded  <- revalue(results_h$h160, c("Never"="0", "Not very often"="1", "Often"="2", "Very often"="2", "Section D omitted"=NA, "Not stated"=NA))
head(results_h$h160_recoded, n=40 )
table(results_h$h160_recoded)
results_h$h160_recoded <- as.numeric(results_h$h160_recoded)

#recode d9: h163
head(results_h)
table(results_h$h163)
results_h$h163_recoded  <- revalue(results_h$h163, c("Never"="0", "Not very often"="2", "Often"="2", "Very often"="2", "Section D omitted"=NA, "Not stated"=NA))
head(results_h$h163_recoded, n=40 )
table(results_h$h163_recoded)
results_h$h163_recoded <- as.numeric(results_h$h163_recoded)

#recode d12: h166
head(results_h)
table(results_h$h166)
results_h$h166_recoded  <- revalue(results_h$h166, c("Never"="0", "Not very often"="1", "Often"="2", "Very often"="2","Section D omitted"=NA, "Not stated"=NA, "See text"=NA))
head(results_h$h166_recoded, n=40 )
table(results_h$h166_recoded)
results_h$h166_recoded <- as.numeric(results_h$h166_recoded)


#recode d15: h169
head(results_h)
table(results_h$h169)
results_h$h169_recoded  <- revalue(results_h$h169, c("Never"="0", "Not very often"="0", "Often"="2", "Very often"="2", "Section D omitted"=NA, "Not stated"=NA))
head(results_h$h169_recoded, n=40 )
table(results_h$h169_recoded)
results_h$h169_recoded <- as.numeric(results_h$h169_recoded)


#recode d17: h171
head(results_h)
table(results_h$h171)
results_h$h171_recoded  <- revalue(results_h$h171, c("Never"="0", "Not very often"="0", "Often"="2", "Very often"="2","Section D omitted"=NA, "Not stated"=NA, "See text"=NA))
head(results_h$h171_recoded, n=40 )
table(results_h$h171_recoded)
results_h$h171_recoded <- as.numeric(results_h$h171_recoded)


#recode d20: h174
head(results_h)
table(results_h$h174)
results_h$h174_recoded  <- revalue(results_h$h174, c("Never"="0", "Not very often"="1", "Often"="2", "Very often"="2", "Section D omitted"=NA, "Not stated"=NA))
head(results_h$h174_recoded, n=40 )
table(results_h$h174_recoded)
results_h$h174_recoded <- as.numeric(results_h$h174_recoded)

Merged the recoded anxiety variable files

pheno <- merge(results_1,results_h,by="alnqlet")
head(pheno)

myvars <- c( "alnqlet", "aln.x",
             "b328_recoded", "b330_recoded", "b333_recoded", "b336_recoded", 
             "b339_recoded", "b342_recoded", "b344_recoded", "b347_recoded", 
             "c550_recoded", "c552_recoded", "c555_recoded", "c558_recoded", 
             "c561_recoded", "c564_recoded", "c566_recoded", "c569_recoded",  
             "h155_recoded", "h157_recoded", "h160_recoded", "h166_recoded", 
             "h169_recoded", "h171_recoded", "h174_recoded", "h163_recoded")

newdata <- pheno[myvars]
head(newdata)

newdata=rename(newdata, c("aln.x"="aln"))

Made the weighted sum score

#Step 1
#Determine how many items were answered for each individual
#Name number of items answered "items_answered"
##########################
newdata$not_missing <- apply(newdata, 1, function(x) length(which(!is.na(x))))
head(newdata$not_missing, n=50)
head(newdata, n=50)

newdata$items_answered <- (newdata$not_missing-2)
head(newdata$items_answered)

#Step 2
#Sum across answered items
##########################
newdata$sum <- rowSums(newdata[,3:26], na.rm=TRUE)
head(newdata$sum, n=20)

#Step 3
#Divide "Sum" by "items_answered" and then multiple by number of items (24)
##########################
newdata$continuous_anx_score <- (newdata$sum/newdata$items_answered)*24
head(newdata$continuous_anx_score)
summary(newdata$continuous_anx_score)

Found the 85th percentile for maternal general anxiety

quantile(newdata$continuous_anx_score, c(.85), na.rm=TRUE) 

Made the dichotomous maternal general anxiety variable

where highest-scoring 15% are the high-anxiety category

newdata$dichotomous_anx_score <- ifelse(newdata$continuous_anx_score>=77, 1, 0)
table(newdata$dichotomous_anx_score)
dataGeneralAnx <- newdata

Dichotomous Anxiety

(before removing non-consents and before winsorizing)

Low High
8080 1484