Set up root directory
knitr::opts_knit$set(root.dir = '/data/joy/BBL/')
Read in files
allSubjects <- read.csv("./projects/beardt2star/cohort_list_n1601.csv")
means_striatum <- read.csv("/data/joy/BBL/projects/beardt2star/sampleFiles/means/n1514_striatalMeans.csv")
b0validation <- read.csv("./studies/pnc/n1601_dataFreeze/neuroimaging/n1601_pnc_protocol_validation_params_status_20161220.csv")
health <- read.csv("./studies/pnc/n1601_dataFreeze/health/n1601_health_20161214.csv")
t1 <- read.csv("./studies/pnc/n1601_dataFreeze/neuroimaging/t1struct/n1601_t1QaData_v2.csv")
t2missing <- read.csv("./projects/beardt2star/scripts/logs/t2starMissingLog.csv")
cnb <- read.csv("./studies/pnc/n1601_dataFreeze/cnb/n1601_cnb_factor_scores_tymoore_20151006.csv") #kosha will provide update for this at some poitn in near future
clinical1 <- read.csv("./studies/pnc/n1601_dataFreeze/clinical/n1601_goassess_itemwise_bifactor_scores_20161219.csv")
clinical2 <- read.csv("./studies/pnc/n1601_dataFreeze/clinical/n1601_goassess_itemwise_corrtraits_scores_20161219.csv")
demos <- read.csv("./studies/pnc/n1601_dataFreeze/demographics/n1601_demographics_go1_20161212.csv")
Count striatal regions
print(paste("means_striatum has", dim(means_striatum)[1],"samples, with",dim(means_striatum)[2]-1,"regions in the striatum."))
## [1] "means_striatum has 1514 samples, with 8 regions in the striatum."
Subject level exclusions
Health exclude
health <- subset(health,select = c(bblid,ltnExcludev2))
means_health <- merge(means_striatum,health, by="bblid")
dataComb1 <- subset(means_health, ltnExcludev2 == 0)
dataComb1 <- subset(dataComb1, select=-c(ltnExcludev2))
print(paste("Health excluded means_striatum has", dim(dataComb1)[1],"samples, with",(dim(means_striatum)[1]-dim(dataComb1)[1]),"being excluded"))
## [1] "Health excluded means_striatum has 1203 samples, with 311 being excluded"
Structural exclude
t1 <- subset(t1,select = c(bblid,t1Exclude))
dataComb2 <- merge(dataComb1,t1, by="bblid")
dataComb2 <- subset(dataComb2, t1Exclude == 0)
dataComb2 <- subset(dataComb2, select=-c(t1Exclude))
print(paste("Structure excluded means_striatum has", dim(dataComb2)[1],"samples, with",(dim(dataComb1)[1]-dim(dataComb2)[1]),"being excluded"))
## [1] "Structure excluded means_striatum has 1156 samples, with 47 being excluded"
B0 exclude
b0validation <- subset(b0validation,select = c(bblid,b0ProtocolValidationStatus))
dataComb3 <- merge(dataComb2, b0validation, by="bblid")
dataComb3 <- subset(dataComb3, b0ProtocolValidationStatus == 1)
dataComb3 <- subset(dataComb3, select=-c(b0ProtocolValidationStatus))
print(paste("B0 excluded means_striatum has", dim(dataComb3)[1],"samples, with",(dim(dataComb2)[1]-dim(dataComb3)[1]),"being excluded"))
## [1] "B0 excluded means_striatum has 1147 samples, with 9 being excluded"
T2Star exclude
dataComb4 <- subset(dataComb3, is.element(bblid,t2missing$bblid) == FALSE)
print(paste("t2 exclused means_striatum has", dim(dataComb4)[1],"samples, with",(dim(dataComb3)[1]-dim(dataComb4)[1]),"being excluded"))
## [1] "t2 exclused means_striatum has 1147 samples, with 0 being excluded"
Regional exclusions
Negative values exclude
striatum_final <- subset(dataComb4,select = c(bblid,Right.Pallidum:Left.Putamen))
striatum_final[striatum_final < 0] <- NA
boxplot(striatum_final[,-1])

SD exclude
## Replace all values above 3 SD with NA
striatum_final_uplim <- apply(striatum_final[,2:9],2,function(x) mean(x,na.rm = T)+3*sd(x,na.rm=T))
for (x in 1:8){
striatum_final[,x+1] [striatum_final[,x+1] > striatum_final_uplim[x]] <- NA
}
boxplot(striatum_final[,-1])

## Replace all values below 3 SD with NA
striatum_final_lowlim <- apply(striatum_final[,2:9],2,function(x) mean(x,na.rm = T)-3*sd(x,na.rm=T))
for (x in 1:8){
striatum_final[,x+1] [striatum_final[,x+1] < striatum_final_lowlim[x]] <- NA
}
boxplot(striatum_final[,-1])
## get only bblids from old dataComb4
dataComb4 <- subset(dataComb4, select= bblid)
dataComb5 <- merge(striatum_final,dataComb4,by="bblid")
boxplot(dataComb5[,2:9])

Convert t2* values to r2*
r2vals <- (1/dataComb6[,2:9])
dataComb7 <- as.data.frame(cbind(r2vals,dataComb6[,c(1,10:56)]))
## move bblid, scanid to first two columns
dataComb8 <- dataComb7[,c(9,10,1:8,11:56)]
Final sample construction
# Write out final csv
write.csv(dataComb8, "/data/joy/BBL/projects/beardt2star/sampleFiles/r2star_finalSample_03152017.csv", row.names=FALSE)