05_Birthweight_PAA_explore.R

frankdavenport — Feb 4, 2014, 3:45 PM


#-------------------Base Setup--------------------------------------------------
rm(list=ls())
svnDir<-'/Users/frankdavenport/Education/R_Work/SVN/'
rcodeDir<-paste0(svnDir,'fd_FEWS/R_code/PAA_bweights/')
latexDir<-paste0(svnDir,'LaTex/')
rdatDir<-'~/Dropbox/FEWS_clim_low_bweight/'
dhsDir<-'~/Google Drive/dhs/'
chirpsDir<-'~/Google Drive/chirps/'
datDir<-paste0(dhsDir,'all/')

setwd(datDir)


library(plyr)
library(reshape2)
library(ggplot2)
library(scales)
library(car)
library(stringr)
#library(dplyr)
#===============================================================================
load('all_children_selectvars.Rdata')

cvars<-c('birth_weight','birth_wgth_recall','.id','case','Cluster_Number','urban','Index_to_Birth_History','twin')

d<-d[,cvars]
names(d)[1]<-'bw'
names(d)[2]<-'recall'

d<-d[!is.na(d$bw),]
d<-droplevels(d[d$bw<=6000,]) #above this is NAs or coding errors

d<-droplevels(d[d$twin=='Single birth',])

levels(d$recall)<-c('card','recall','recall','card')

d<-na.omit(d) #get rid of the missing recalls for now

d$country<-str_sub(d$.id,end=-5)
d$year<-str_sub(d$.id,start=-4)

#=====Make some plots

#----Histograms by country
pd<-d[,c('bw','country','year','urban','recall')]

lbw<-geom_vline(x=2500,linetype=2) #the low birthweight cut off line

p0<-ggplot(pd)+facet_wrap(~country)+theme_bw()+lbw+labs(xlab='Dotted line is 2500g')

#By Country
p1<-p0+geom_density(aes(x=bw))+labs(title='By Country')
p1

plot of chunk unnamed-chunk-1



#By Urban/rural
p2<-p0+ geom_density(aes(x=bw,color=urban))+labs(title='By Urban/Rural')
p2

plot of chunk unnamed-chunk-1


#By Year
p3<-p0+ geom_density(aes(x=bw,color=year))+labs(title='By Year')
p3

plot of chunk unnamed-chunk-1


#By Birthweight Record Type3
p4<-p0+ geom_density(aes(x=bw,color=recall))+labs(title='Birthweight from Card or Recollection')
p4

plot of chunk unnamed-chunk-1


rm(pd)

#----Load the Precip Data and Explore Correlations---------
load('child_to_precip_all.Rdata') #precip data

#Caclulate standard deviation over terms


dall<-merge(d,dcp,by.x=c('.id','case','Index_to_Birth_History','country'),by.y=c('.id','CASEID','MIDX','country'))

#rm(d,dcp)

pvars<-paste0('b',1:12) #precip vars
vars<-c('.id','country','year','bw','Cluster_Number','recall','bdate','urban',pvars)
dp<-dall[,vars]
dp<-na.omit(dp) #CHECK NAS ON PRECIP

rm(dall)

#-------------------Correlations by exposure Month---------
library(gdata)
gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.

gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.

Attaching package: 'gdata'

The following object is masked from 'package:stats':

    nobs

The following object is masked from 'package:utils':

    object.size
pd<-dp[,c('country','bw',pvars)]
pd<-melt(pd,id.vars=c('country','bw'))

fun.cor<-function(df){
  #rho<-cor(df$bw,df$value)
  rho<-cor.test(df$bw,df$value)
  rho<-data.frame('correlation'=rho$estimate,'low'=rho$conf.int[1],'high'=rho$conf.int[2])
 }

pd<-ddply(pd,c('country','variable'),fun.cor)
pd$variable<-reorder.factor(pd$variable,new.order=rev(unique(pd$variable)))
names(pd)[2]<-'mons.before.birth'

#Does the ci cross 0?
for(i in 1:nrow(pd)){pd$sig[i]<-findInterval(0,pd$low[i]:pd$high[i])}
pd$significant<-ifelse(pd$sig==1,'No','Yes')


p5<-ggplot(pd)+geom_pointrange(aes(x=mons.before.birth,y=correlation,ymin=low,ymax=high,color=significant))
p5<-p5+facet_wrap(~country)+theme_bw()+geom_hline(y=0)
p5<-p5+labs(title='Correlations between Birthweigth and Total Monthly Precip\n(in each month prior to and including the birth month)',x='b1=birth month; b12=12 months before')
p5<-p5+theme(legend.position = "top")
p5

plot of chunk unnamed-chunk-1



#====================================================================================================================================
load('child_to_tmax_all.Rdata')


dallt<-merge(d,dct,by.x=c('.id','case','Index_to_Birth_History','country'),by.y=c('.id','CASEID','MIDX','country'))


pvars<-paste0('b',1:12) #precip vars
vars<-c('.id','country','year','bw','Cluster_Number','recall','bdate','urban',pvars)
dt<-dallt[,vars]
dt<-na.omit(dt) #CHECK NAS ON PRECIP

#--Remove clusters with out accurate gps info (already done in precip) #check this, no records deleted
#dp$clus_id<-paste0(dp$.id,dp$Cluster_Number)
#dt$clus_id<-paste0(dt$.id,dt$Cluster_Number)
#dt2<-droplevels(dt[dt$clus_id %in% dp$clus_id,])


rm(dallt)
#------------Look at a similar plot for temp
pdt<-dt[,c('country','bw',pvars)]
pdt$kid<-1:nrow(pdt)
pdt<-melt(pdt,id.vars=c('kid','country','bw'))
pdt$term<-recode(pdt$variable,"c('b1','b2','b3','b4')='t3';c('b5','b6','b7')='t2';c('b8','b9','b10')='t1';else='t0'")
pdt<-dcast(pdt,kid+country+bw~term,sum,na.rm=T)
pdt<-melt(pdt,id.vars=c('kid','country','bw'))
#pdt<-ddply(pdt,c('country','term'),sum(pdt$value))

fun.cor<-function(df){
  #rho<-cor(df$bw,df$value)
  rho<-cor.test(df$bw,df$value)
  rho<-data.frame('correlation'=rho$estimate,'low'=rho$conf.int[1],'high'=rho$conf.int[2])
}


pdt<-ddply(pdt,c('country','variable'),fun.cor)
#pdt$variable<-reorder.factor(pdt$variable,new.order=rev(unique(pdt$variable)))
names(pdt)[2]<-'terms.before.birth'

#Does the ci cross 0?
for(i in 1:nrow(pdt)){pdt$sig[i]<-findInterval(0,pdt$low[i]:pdt$high[i])}
pdt$significant<-ifelse(pdt$sig==1,'No','Yes')


p6<-ggplot(pdt)+geom_pointrange(aes(x=terms.before.birth,y=correlation,ymin=low,ymax=high,color=significant))
p6<-p6+facet_wrap(~country)+theme_bw()+geom_hline(y=0)
p6<-p6+labs(title='Correlations between Birthweigth and Count of days Above Mean(Tmax)\n(in each month prior to and including the birth month)',x='b1=birth month; b12=12 months before')
p6<-p6+theme(legend.position = "top")
p6

plot of chunk unnamed-chunk-1





# 
# pd<-d[,c('country','bw',pvars)]
# pd<-melt(pd,id.vars=c('country','bw'))
# pd$term<-recode(pd$variable,"c('b1','b2','b3','b4')='t3';c('b5','b6','b7')='t2';c('b8','b9','b10')='t1';else='t0'")
# 
# fun.cor.sd<-function(df){
#   #rho<-cor(df$bw,df$value)
#   std<-sd(df$value) #have to calculate this above for every child, either from dall or dcp
#   rho<-cor.test(df$bw,std)
#   rho<-data.frame('correlation'=rho$estimate,'low'=rho$conf.int[1],'high'=rho$conf.int[2])
# }
# 
# pd<-ddply(pd,c('country','term'),fun.cor.sd)
# 
# 
# #Does the ci cross 0?
# for(i in 1:nrow(pd)){pd$sig[i]<-findInterval(0,pd$low[i]:pd$high[i])}
# pd$significant<-ifelse(pd$sig==1,'No','Yes')
# 
# 
# p6<-ggplot(pd)+geom_pointrange(aes(x=term,y=correlation,ymin=low,ymax=high,color=significant))
# p6<-p6+facet_wrap(~country)+theme_bw()+geom_hline(y=0)
# p6<-p6+labs(title='Correlations between Birthweigth and Std. Dev of Precipination \n in each Trimester',x='t1=first trimester; t3=final trimester')
# p6<-p6+theme(legend.position = "top")
# p6
#