frankdavenport — Feb 4, 2014, 3:45 PM
#-------------------Base Setup--------------------------------------------------
rm(list=ls())
svnDir<-'/Users/frankdavenport/Education/R_Work/SVN/'
rcodeDir<-paste0(svnDir,'fd_FEWS/R_code/PAA_bweights/')
latexDir<-paste0(svnDir,'LaTex/')
rdatDir<-'~/Dropbox/FEWS_clim_low_bweight/'
dhsDir<-'~/Google Drive/dhs/'
chirpsDir<-'~/Google Drive/chirps/'
datDir<-paste0(dhsDir,'all/')
setwd(datDir)
library(plyr)
library(reshape2)
library(ggplot2)
library(scales)
library(car)
library(stringr)
#library(dplyr)
#===============================================================================
load('all_children_selectvars.Rdata')
cvars<-c('birth_weight','birth_wgth_recall','.id','case','Cluster_Number','urban','Index_to_Birth_History','twin')
d<-d[,cvars]
names(d)[1]<-'bw'
names(d)[2]<-'recall'
d<-d[!is.na(d$bw),]
d<-droplevels(d[d$bw<=6000,]) #above this is NAs or coding errors
d<-droplevels(d[d$twin=='Single birth',])
levels(d$recall)<-c('card','recall','recall','card')
d<-na.omit(d) #get rid of the missing recalls for now
d$country<-str_sub(d$.id,end=-5)
d$year<-str_sub(d$.id,start=-4)
#=====Make some plots
#----Histograms by country
pd<-d[,c('bw','country','year','urban','recall')]
lbw<-geom_vline(x=2500,linetype=2) #the low birthweight cut off line
p0<-ggplot(pd)+facet_wrap(~country)+theme_bw()+lbw+labs(xlab='Dotted line is 2500g')
#By Country
p1<-p0+geom_density(aes(x=bw))+labs(title='By Country')
p1
#By Urban/rural
p2<-p0+ geom_density(aes(x=bw,color=urban))+labs(title='By Urban/Rural')
p2
#By Year
p3<-p0+ geom_density(aes(x=bw,color=year))+labs(title='By Year')
p3
#By Birthweight Record Type3
p4<-p0+ geom_density(aes(x=bw,color=recall))+labs(title='Birthweight from Card or Recollection')
p4
rm(pd)
#----Load the Precip Data and Explore Correlations---------
load('child_to_precip_all.Rdata') #precip data
#Caclulate standard deviation over terms
dall<-merge(d,dcp,by.x=c('.id','case','Index_to_Birth_History','country'),by.y=c('.id','CASEID','MIDX','country'))
#rm(d,dcp)
pvars<-paste0('b',1:12) #precip vars
vars<-c('.id','country','year','bw','Cluster_Number','recall','bdate','urban',pvars)
dp<-dall[,vars]
dp<-na.omit(dp) #CHECK NAS ON PRECIP
rm(dall)
#-------------------Correlations by exposure Month---------
library(gdata)
gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
Attaching package: 'gdata'
The following object is masked from 'package:stats':
nobs
The following object is masked from 'package:utils':
object.size
pd<-dp[,c('country','bw',pvars)]
pd<-melt(pd,id.vars=c('country','bw'))
fun.cor<-function(df){
#rho<-cor(df$bw,df$value)
rho<-cor.test(df$bw,df$value)
rho<-data.frame('correlation'=rho$estimate,'low'=rho$conf.int[1],'high'=rho$conf.int[2])
}
pd<-ddply(pd,c('country','variable'),fun.cor)
pd$variable<-reorder.factor(pd$variable,new.order=rev(unique(pd$variable)))
names(pd)[2]<-'mons.before.birth'
#Does the ci cross 0?
for(i in 1:nrow(pd)){pd$sig[i]<-findInterval(0,pd$low[i]:pd$high[i])}
pd$significant<-ifelse(pd$sig==1,'No','Yes')
p5<-ggplot(pd)+geom_pointrange(aes(x=mons.before.birth,y=correlation,ymin=low,ymax=high,color=significant))
p5<-p5+facet_wrap(~country)+theme_bw()+geom_hline(y=0)
p5<-p5+labs(title='Correlations between Birthweigth and Total Monthly Precip\n(in each month prior to and including the birth month)',x='b1=birth month; b12=12 months before')
p5<-p5+theme(legend.position = "top")
p5
#====================================================================================================================================
load('child_to_tmax_all.Rdata')
dallt<-merge(d,dct,by.x=c('.id','case','Index_to_Birth_History','country'),by.y=c('.id','CASEID','MIDX','country'))
pvars<-paste0('b',1:12) #precip vars
vars<-c('.id','country','year','bw','Cluster_Number','recall','bdate','urban',pvars)
dt<-dallt[,vars]
dt<-na.omit(dt) #CHECK NAS ON PRECIP
#--Remove clusters with out accurate gps info (already done in precip) #check this, no records deleted
#dp$clus_id<-paste0(dp$.id,dp$Cluster_Number)
#dt$clus_id<-paste0(dt$.id,dt$Cluster_Number)
#dt2<-droplevels(dt[dt$clus_id %in% dp$clus_id,])
rm(dallt)
#------------Look at a similar plot for temp
pdt<-dt[,c('country','bw',pvars)]
pdt$kid<-1:nrow(pdt)
pdt<-melt(pdt,id.vars=c('kid','country','bw'))
pdt$term<-recode(pdt$variable,"c('b1','b2','b3','b4')='t3';c('b5','b6','b7')='t2';c('b8','b9','b10')='t1';else='t0'")
pdt<-dcast(pdt,kid+country+bw~term,sum,na.rm=T)
pdt<-melt(pdt,id.vars=c('kid','country','bw'))
#pdt<-ddply(pdt,c('country','term'),sum(pdt$value))
fun.cor<-function(df){
#rho<-cor(df$bw,df$value)
rho<-cor.test(df$bw,df$value)
rho<-data.frame('correlation'=rho$estimate,'low'=rho$conf.int[1],'high'=rho$conf.int[2])
}
pdt<-ddply(pdt,c('country','variable'),fun.cor)
#pdt$variable<-reorder.factor(pdt$variable,new.order=rev(unique(pdt$variable)))
names(pdt)[2]<-'terms.before.birth'
#Does the ci cross 0?
for(i in 1:nrow(pdt)){pdt$sig[i]<-findInterval(0,pdt$low[i]:pdt$high[i])}
pdt$significant<-ifelse(pdt$sig==1,'No','Yes')
p6<-ggplot(pdt)+geom_pointrange(aes(x=terms.before.birth,y=correlation,ymin=low,ymax=high,color=significant))
p6<-p6+facet_wrap(~country)+theme_bw()+geom_hline(y=0)
p6<-p6+labs(title='Correlations between Birthweigth and Count of days Above Mean(Tmax)\n(in each month prior to and including the birth month)',x='b1=birth month; b12=12 months before')
p6<-p6+theme(legend.position = "top")
p6
#
# pd<-d[,c('country','bw',pvars)]
# pd<-melt(pd,id.vars=c('country','bw'))
# pd$term<-recode(pd$variable,"c('b1','b2','b3','b4')='t3';c('b5','b6','b7')='t2';c('b8','b9','b10')='t1';else='t0'")
#
# fun.cor.sd<-function(df){
# #rho<-cor(df$bw,df$value)
# std<-sd(df$value) #have to calculate this above for every child, either from dall or dcp
# rho<-cor.test(df$bw,std)
# rho<-data.frame('correlation'=rho$estimate,'low'=rho$conf.int[1],'high'=rho$conf.int[2])
# }
#
# pd<-ddply(pd,c('country','term'),fun.cor.sd)
#
#
# #Does the ci cross 0?
# for(i in 1:nrow(pd)){pd$sig[i]<-findInterval(0,pd$low[i]:pd$high[i])}
# pd$significant<-ifelse(pd$sig==1,'No','Yes')
#
#
# p6<-ggplot(pd)+geom_pointrange(aes(x=term,y=correlation,ymin=low,ymax=high,color=significant))
# p6<-p6+facet_wrap(~country)+theme_bw()+geom_hline(y=0)
# p6<-p6+labs(title='Correlations between Birthweigth and Std. Dev of Precipination \n in each Trimester',x='t1=first trimester; t3=final trimester')
# p6<-p6+theme(legend.position = "top")
# p6
#