Maternal Anxiety Variable Selection and Score for PACE

setwd("C:/Users/ca16591/Dropbox/Bristol") #Oakfield

#Load and install packages
install.packages("plyr")
install.packages("dplyr")
library(plyr)
library(dplyr)
install.packages("robustHD")
install.packages("devtools")
library(devtools)
devtools::install_github("explodecomputer/alspac")
library(alspac)

setDataDir("R:/Data/Current/Quest/Mother") # to access the 'b' and 'c' questionnaire data
#setDataDir("R:/Data") # to access to 'h' questionnaire data, which I extracted with the Shiny App and saved as a .csv (see below)

##########################
#Read in the 'b' variables
#Code the 'b' variables ALSPAC-specific scoring
##########################
require(foreign)
b<-read.spss("R://data/Current/Quest/Mother/b_4d.sav",to.data.frame = TRUE)
attr(b, "variable.labels")
b_selection<-b[,c('aln','b328','b330','b333','b336','b339','b342','b344','b347','b351')]
#Code according to the 'B Doc': 
#d1 (b328), d15 (b342), d17 (b344)==(1,2=2)(3,4=0); 
#d3 (b330), d6 (b333), d12 (b339), d20 (b347)==(1,2=2)(3=1)(4=0)
#d9 (b336)==(1,2,3=2)(4=0)

b_selection[,2] <- ifelse(b_selection[,2] == "Never", 0, ifelse(b_selection[,2] == "Not V often", 0, ifelse(b_selection[,2] == "Often", 2, ifelse(b_selection[,2] == "V often", 2, NA))))
b_selection[,3] <- ifelse(b_selection[,3] == "Never", 0, ifelse(b_selection[,3] == "Not V often", 1, ifelse(b_selection[,3] == "Often", 2, ifelse(b_selection[,3] == "V often", 2, NA))))
b_selection[,4] <- ifelse(b_selection[,4] == "Never", 0, ifelse(b_selection[,4] == "Not V often", 1, ifelse(b_selection[,4] == "Often", 2, ifelse(b_selection[,4] == "V often", 2, NA))))
b_selection[,5] <- ifelse(b_selection[,5] == "Never", 0, ifelse(b_selection[,5] == "Not V often", 2, ifelse(b_selection[,5] == "Often", 2, ifelse(b_selection[,5] == "V often", 2, NA))))
b_selection[,6] <- ifelse(b_selection[,6] == "Never", 0, ifelse(b_selection[,6] == "Not V often", 1, ifelse(b_selection[,6] == "Often", 2, ifelse(b_selection[,6] == "V often", 2, NA))))
b_selection[,7] <- ifelse(b_selection[,7] == "Never", 0, ifelse(b_selection[,7] == "Not V often", 0, ifelse(b_selection[,7] == "Often", 2, ifelse(b_selection[,7] == "V often", 2, NA))))
b_selection[,8] <- ifelse(b_selection[,8] == "Never", 0, ifelse(b_selection[,8] == "Not V often", 0, ifelse(b_selection[,8] == "Often", 2, ifelse(b_selection[,8] == "V often", 2, NA))))
b_selection[,9] <- ifelse(b_selection[,9] == "Never", 0, ifelse(b_selection[,9] == "Not V often", 1, ifelse(b_selection[,9] == "Often", 2, ifelse(b_selection[,9] == "V often", 2, NA))))

#Remove 'NAs' to test whether the complete 'b' cases (answered all 'b' questions) adds up bo b351: they do! (11799=TRUE; no false in the table)
b_selection_complete=na.omit(b_selection)
b_selection_complete$b351_new=as.numeric(as.character(b_selection_complete$b351))
table(b_selection_complete$b351_new)

b_selection_complete$test_b <- rowSums(b_selection_complete[,2:9])
table(b_selection_complete$test_b==b_selection_complete$b351_new)

##########################
#Read in the 'c' variables
#Code the 'c' variables ALSPAC-specific scoring
##########################
c <- read.spss("R://data/Current/Quest/Mother/c_7d.sav",to.data.frame = TRUE)
attr(c, "variable.labels")
c_selection <- c[,c('aln','c550','c552','c555','c558','c561','c564','c566','c569', 'c573')]
table(c_selection$c573)
c_selection$c573 <- revalue(c_selection$c573, c("not anxious"="0", "very anxious"="16"))
head(c_selection[,2], n=30)
colnames(c_selection)

#Set the 'c' variables up according to the C Doc: F1 (c550), F15 (c564), F17 (c566)==(1,2=2)(3=1)(4=0); 
#F3 (c552), F6 (c555), F12 (c561), F20 (c569)==(1,2=2)(3=1)(4=0); 
#F9 (c558)==(1,2,3=2)(4=0)

c_selection[,2] <- ifelse(c_selection[,2] == "Never", 0, ifelse(c_selection[,2] == "Not often", 0, ifelse(c_selection[,2] == "Often", 2, ifelse(c_selection[,2] == "V often", 2, NA))))
c_selection[,3] <- ifelse(c_selection[,3] == "Never", 0, ifelse(c_selection[,3] == "Not often", 1, ifelse(c_selection[,3] == "Often", 2, ifelse(c_selection[,3] == "V often", 2, NA))))
c_selection[,4] <- ifelse(c_selection[,4] == "Never", 0, ifelse(c_selection[,4] == "Not often", 1, ifelse(c_selection[,4] == "Often", 2, ifelse(c_selection[,4] == "V often", 2, NA))))
c_selection[,5] <- ifelse(c_selection[,5] == "Never", 0, ifelse(c_selection[,5] == "Not often", 2, ifelse(c_selection[,5] == "Often", 2, ifelse(c_selection[,5] == "V often", 2, NA))))
c_selection[,6] <- ifelse(c_selection[,6] == "Never", 0, ifelse(c_selection[,6] == "Not often", 1, ifelse(c_selection[,6] == "Often", 2, ifelse(c_selection[,6] == "V often", 2, NA))))
c_selection[,7] <- ifelse(c_selection[,7] == "Never", 0, ifelse(c_selection[,7] == "Not often", 0, ifelse(c_selection[,7] == "Often", 2, ifelse(c_selection[,7] == "V often", 2, NA))))
c_selection[,8] <- ifelse(c_selection[,8] == "Never", 0, ifelse(c_selection[,8] == "Not often", 0, ifelse(c_selection[,8] == "Often", 2, ifelse(c_selection[,8] == "V often", 2, NA))))
c_selection[,9] <- ifelse(c_selection[,9] == "Never", 0, ifelse(c_selection[,9] == "Not often", 1, ifelse(c_selection[,9] == "Often", 2, ifelse(c_selection[,9] == "V often", 2, NA))))

##########################
#Test whether the complete cases (answered all 'c' questions) adds up to c573: they do! (10954=TRUE; no false in the table)
##########################
c_selection_complete=na.omit(c_selection)
c_selection_complete$c573_new=as.numeric(as.character(c_selection_complete$c573))
table(c_selection_complete$c573_new)

c_selection_complete$test_c <- rowSums(c_selection_complete[,2:9])
table(c_selection_complete$test_c==c_selection_complete$c573_new)

##########################
#Read in the 'h' variables from the .csv file, extracted from the Shiny App (pulling in the h_6c.sav file directly with R didn't work...)
#Code the 'h' variables ALSPAC-specific scoring
#from entire Crown Crisp (d1-d23), select anxiety questions: d1 + d3 + d6 + d9 + d12 + d15 + d17 + d20 

#This code is greened out because it didn't work
#require(foreign)
#h <- read.spss("R://data/Current/Quest/Mother/h_6c.sav",to.data.frame = TRUE)
#attr(h, "variable.labels")
#h_selection <- h[,h('aln','h155','h157','h160','h163','h166','h169','h171','h174')]

##########################
setDataDir("R:/Data") # On my work PC
h_selection <- extractWebOutput("anxiety_vars_H_data-2018-02-15.csv")
colnames(h_selection)

#recode d1: h155
h_selection[,2] <- ifelse(h_selection[,2] == "Never", 0, ifelse(h_selection[,2] == "Not very often", 0, ifelse(h_selection[,2] == "Often", 2, ifelse(h_selection[,2] == "Very often", 2, NA))))
head(h_selection$h155, n=20)

#recode d3: h157
h_selection[,3] <- ifelse(h_selection[,3] == "Never", 0, ifelse(h_selection[,3] == "Not very often", 1, ifelse(h_selection[,3] == "Often", 2, ifelse(h_selection[,3] == "Very often", 2, NA))))
head(h_selection$h157, n=20)

#recode d6: h160
h_selection[,4] <- ifelse(h_selection[,4] == "Never", 0, ifelse(h_selection[,4] == "Not very often", 1, ifelse(h_selection[,4] == "Often", 2, ifelse(h_selection[,4] == "Very often", 2, NA))))
head(h_selection$h160, n=20)

#recode d9: h163
h_selection[,5] <- ifelse(h_selection[,5] == "Never", 0, ifelse(h_selection[,5] == "Not very often", 2, ifelse(h_selection[,5] == "Often", 2, ifelse(h_selection[,5] == "Very often", 2, NA))))
head(h_selection$h163, n=20)

#recode d12: h166
h_selection[,6] <- ifelse(h_selection[,6] == "Never", 0, ifelse(h_selection[,6] == "Not very often", 1, ifelse(h_selection[,6] == "Often", 2, ifelse(h_selection[,6] == "Very often", 2, NA))))
head(h_selection$h166, n=20)

#recode d15: h169
h_selection[,7] <- ifelse(h_selection[,7] == "Never", 0, ifelse(h_selection[,7] == "Not very often", 0, ifelse(h_selection[,7] == "Often", 2, ifelse(h_selection[,7] == "Very often", 2, NA))))
head(h_selection$h169, n=20)

#recode d17: h171
h_selection[,8] <- ifelse(h_selection[,8] == "Never", 0, ifelse(h_selection[,8] == "Not very often", 0, ifelse(h_selection[,8] == "Often", 2, ifelse(h_selection[,8] == "Very often", 2, NA))))
head(h_selection$h171, n=20)

#recode d20: h174
h_selection[,9] <- ifelse(h_selection[,9] == "Never", 0, ifelse(h_selection[,9] == "Not very often", 1, ifelse(h_selection[,9] == "Often", 2, ifelse(h_selection[,9] == "Very often", 2, NA))))
head(h_selection$h174, n=20)

head(h_selection)

##########################
#Merge the b_selection, c_selection, and h_selection anxiety variables
##########################

anxiety_pheno_b_c <- merge(b_selection,c_selection, by="aln")
head(anxiety_pheno_b_c)

anxiety_pheno_complete <- merge(anxiety_pheno_b_c,h_selection, by="aln")

myvars <- c( "aln",
             "b328", "b330", "b333", "b336", 
             "b339", "b342", "b344", "b347", 
             "c550", "c552", "c555", "c558", 
             "c561", "c564", "c566", "c569",
             "h155", "h157", "h160", "h166", 
             "h169", "h171", "h174", "h163")

anxiety_pheno_complete <- anxiety_pheno_complete[myvars]
head(anxiety_pheno_complete)

colnames(anxiety_pheno_complete)

##########################
#Create the continuous (general maternal) anxiety score (3 steps)
##########################
#Step 1
#Determine how many items were answered for each individual
#Name number of items answered "items_answered"
##########################
anxiety_pheno_complete$not_missing <- apply(anxiety_pheno_complete, 1, function(x) length(which(!is.na(x))))
head(anxiety_pheno_complete$not_missing, n=10)
head(anxiety_pheno_complete, n=50)

anxiety_pheno_complete$items_answered <- (anxiety_pheno_complete$not_missing-1)
head(anxiety_pheno_complete$items_answered)

##########################
#Step 2
#Sum across answered items
##########################
anxiety_pheno_complete$sum <- rowSums(anxiety_pheno_complete[,2:25], na.rm=TRUE)
head(anxiety_pheno_complete$sum, n=20)

##########################
#Step 3
#Divide "Sum" by "items_answered" and then multiple by number of items (24)
##########################
anxiety_pheno_complete$continuous_anx_score <- (anxiety_pheno_complete$sum/anxiety_pheno_complete$items_answered)*24
head(anxiety_pheno_complete$continuous_anx_score)
summary(anxiety_pheno_complete$continuous_anx_score)

##########################
#Find out if there are any 'NAs'--mums included but who don't have an anxiety score
##########################
dim(anxiety_pheno_complete)
str(anxiety_pheno_complete)
missings <- anxiety_pheno_complete[which(anxiety_pheno_complete$continuous_anx_score=='NA'),] # 2 missing

no_score_ALNs<-anxiety_pheno_complete[which(is.na(anxiety_pheno_complete$continuous_anx_score)),] # aln==36867, 45286

##########################
#Find the 85th percentile for (maternal general) anxiety
##########################
quantile(anxiety_pheno_complete$continuous_anx_score, c(.85), na.rm=TRUE)

##########################
#Make the dichotomous (maternal general anxiety) variable
#(where highest-scoring 15% are the high-anxiety category)

anxiety_pheno_complete$dichotomous_anx_score <- ifelse(anxiety_pheno_complete$continuous_anx_score>=24, 1, 0)
table(anxiety_pheno_complete$dichotomous_anx_score)
dataGeneralAnx <- anxiety_pheno_complete

##########################
#Dichotomous Anxiety Table for Markdown
#(before removing non-consents and before winsorizing)
##########################

Dichotomous Score | Low | High| | 7789| 1485|

##########################
#Pull in the anxiety and anti-depressant medication variables
#medication use 18w(=B) or 32w(=C) gestation, for completeness added 8w(=E) and 8m(=F). PACE does not ask for the E/F questions
#anxiolytics
#"b106","b107","c093","f060","f062","f060a","f062a"
#antidepressants
#"b122","b123","c101",'e326','e327',"f063","f063a"
###########################
setDataDir("R:/Data") # On my work PC
meds_selection <- extractWebOutput("anxioletics_antidepression_meds_ALSPAC_pace.csv")
colnames(meds_selection)

head(meds_selection)
table(meds_selection$b106)
meds_selection$b106_any <- ifelse(meds_selection[,3] == "Missing", 0, ifelse(meds_selection[,3] == "not at all", 0, 1))
head(meds_selection$b106_any, n=20)

table(meds_selection$b107)
meds_selection$b107_any <- ifelse(meds_selection[,4] == "Missing", 0, ifelse(meds_selection[,4] == "No", 0, 1))
head(meds_selection$b107_any, n=20)

table(meds_selection$b122)
meds_selection$b122_any <- ifelse(meds_selection[,5] == "Missing", 0, ifelse(meds_selection[,5] == "not at all", 0, 1))
head(meds_selection$b122_any, n=20)

table(meds_selection$b123)
meds_selection$b123_any <- ifelse(meds_selection[,6] == "Missing", 0, ifelse(meds_selection[,6] == "No", 0, 1))
head(meds_selection$b123_any, n=20)

table(meds_selection$c093)
meds_selection$c093_any <- ifelse(meds_selection[,7] == "Y", 1, 0)
head(meds_selection$c093_any, n=20)
table(meds_selection$c093_any)

colnames(meds_selection)

#FTG?
table(meds_selection$c101)
meds_selection$c101_any <- ifelse(meds_selection[,8] == "Y", 1, 0)
head(meds_selection$c101_any, n=20)
table(meds_selection$c101_any)

table(meds_selection$e326)
meds_selection$e326_any <- ifelse(meds_selection[,9] == "Almost daily" | meds_selection[,9] == "SMTS", 1, 0)
head(meds_selection$e326_any, n=20)
table(meds_selection$e326_any)

table(meds_selection$e327)
meds_selection$e327_any <- ifelse(meds_selection[,10] == "Yes", 1, 0)
head(meds_selection$e327_any, n=20)
table(meds_selection$e327_any)

table(meds_selection$f060)
meds_selection$f060_any <- ifelse(meds_selection[,11] == "Daily" | meds_selection[,11] =="Often"| meds_selection[,11] == "SMTS", 1, 0)
head(meds_selection$f060_any, n=20)
table(meds_selection$f060_any)

table(meds_selection$f060a)
meds_selection$f060a_any <- ifelse(meds_selection[,12] == "Yes", 1, 0)
head(meds_selection$f060a_any, n=20)
table(meds_selection$f060a_any)

table(meds_selection$f062)
meds_selection$f062_any <- ifelse(meds_selection[,13] == "Daily" | meds_selection[,13] =="Often"| meds_selection[,13] == "SMTS", 1, 0)
head(meds_selection$f062_any, n=20)
table(meds_selection$f062_any)

table(meds_selection$f062a)
meds_selection$f062a_any <- ifelse(meds_selection[,14] == "Yes", 1, 0)
head(meds_selection$f062a_any, n=20)
table(meds_selection$f062a_any)

table(meds_selection$f063)
meds_selection$f063_any <- ifelse(meds_selection[,15] == "Daily" | meds_selection[,15] =="Often"| meds_selection[,15] == "SMTS", 1, 0)
head(meds_selection$f063_any, n=20)
table(meds_selection$f063_any)

table(meds_selection$f063a)
meds_selection$f063a_any <- ifelse(meds_selection[,16] == "Yes",1, 0)
head(meds_selection$f063a_any, n=20)
table(meds_selection$f063a_any)

head(meds_selection, n=70)

str(meds_selection)

##########################
#Create binary indication of any psychiatric medication during pregnancy: 947 mums (before removing non-consents) were on meds
##########################
colnames(meds_selection)
meds_selection$any_medication <- rowSums(meds_selection[,17:31], na.rm=TRUE)
table(meds_selection$any_medication)
meds_selection$any_medication_binary <- ifelse(meds_selection$any_medication !=0,1,0)
table(meds_selection$any_medication_binary)

Maternal Anxiety Variable Selection and Score for PACE

Charleen D. Adams

24 February 2018