frankdavenport — Feb 7, 2014, 1:02 PM
#-------------------Base Setup--------------------------------------------------
rm(list=ls())
svnDir<-'/Users/frankdavenport/Education/R_Work/SVN/'
rcodeDir<-paste0(svnDir,'fd_FEWS/R_code/PAA_bweights/')
latexDir<-paste0(svnDir,'LaTex/')
rdatDir<-'~/Dropbox/FEWS_clim_low_bweight/'
dhsDir<-'~/Google Drive/dhs/'
chirpsDir<-'~/Google Drive/chirps/'
datDir<-paste0(dhsDir,'all/')
setwd(datDir)
library(Biograph)
library(plyr)
library(reshape2)
library(stringr)
library(lubridate)
Attaching package: 'lubridate'
The following object is masked from 'package:plyr':
here
library(car)
#library(dplyr)
#===============================================================================
load('all_children_selectvars.Rdata')
#STILL NEED TO ADD 92 MONTHS TO ETHIOPIA FILE, SEE 02_..._LinkClusterEnvData
#----Birthweight
names(d)[grep('birth_weight',names(d))]<-'bw'
names(d)[grep("birth_wgth_recall",names(d))]<-'recall'
d<-subset(d,bw<=6000)
#--Drop Twins
d<-subset(d,twin=='Single birth')
d<-droplevels(d)
#--Recode the Recall Variable
levels(d$recall)<-c('card','recall','recall','card')
#-----Years in Residence---------------------
d$years_in_residence<-as.numeric(as.character(d$years_in_residence)) #95=Always, 96=Visitor, 97=Inconsistent
d$years_in_residence<-ifelse(d$years_in_residence>95,-1,d$years_in_residence)
#Find difference between date of interview and childs birth, Does that exceed years in residence?
inter<-d$CMC_interview
inter<-date_convert(inter,format.in='CMC',format.out='%m/%d/%Y')
inter<-as.Date(inter,format='%m/%d/%Y')
birth<-d$CMC_birth
birth<-date_convert(birth,format.in='CMC',format.out='%m/%d/%Y')
birth<-as.Date(birth,format='%m/%d/%Y')
span<-inter-birth #can also use this to get better reading of childs age in months
d$c_age<-round(as.numeric(span)/30.5,1) #childs age in months
span<-(as.numeric(span)/365)+0.75 #this is the difference (in years) between interview date and childs years + 9 months
#Have they lived in res long enough for treatment?
d$in_res_long<-ifelse(span>=d$years_in_residence,'no','yes')
rm(inter,birth,span)
#========================================================================
#----------Electricity-----NA omit?
levels(d$electricity)<-c('no','not a resident','not a resident','yes')
d<-subset(d,electricity != 'not a resident')
d$electricity<-recode(d$electricity,"NA='not recorded'")
#-Television and Radio------
d$television<-recode(d$television,"'NA'='not recorded'")
d$radio<-recode(d$radio,"'NA'='not recorded'")
#--Mothers Height
d<-subset(d,d$m_height<2500) #no one over 8ft tall
#---------Mothers Education
d$m_educ<-recode(d$m_educ,"'Complete primary'='primary';'Complete secondary'='secondary and beyond';'Higher'='secondary and beyond';'Incomplete primary'='none';'Incomplete secondary'='primary';'No education'='none';NA='not recorded'",as.factor=T,levels=c('none','primary','secondary and beyond','not recorded'))
#------Is the Mother the Household Head?
d$m_housholdhead<-recode(d$m_housholdhead,"'Head'='yes';else='no'",as.factor=T)
#----------Mother's current union status (see old recode from Kenya project for more complicated recode)
d$marital_status<-recode(d$m_married_base,"'Currently in union/living with a man'='yes';'Currently married'='yes';'Currently married / living together';='yes';'Formerly in union/living with a man'='yes';'Formerly married'='no';'Never in union'='no';'Never married'='no'",as.factor=T)
# % of Mothers Children who have died
d$child_dead<-round(((d$sons_died+d$daugt_died)/d$births)*100,0)
#----Floor Material-----
dflr<-read.csv('floor_recodes.csv',stringsAsFactors=F)
d$floor_material<-as.character(d$floor_material)
d[is.na(d$floor_material),'floor_material']<-'not recorded'
for(i in 1:nrow(dflr)){
d[d$floor_material==dflr$Var1[i],'floor_material']<-dflr$recode[i]
}
d$floor_material<-recode(d$floor_material,"'Earth (terra não batida)'='natural'")
#------Water Source--------
dflr<-read.csv('water_recodes.csv',stringsAsFactors=F)
d$water_source<-as.character(d$water_source)
d[is.na(d$water_source),'water_source']<-'not recorded'
for(i in 1:nrow(dflr)){
d[d$water_source==dflr$Var1[i],'water_source']<-dflr$recode[i]
}
#---Religion-------
dflr<-read.csv('religion_recodes.csv',stringsAsFactors=F)
d$religion<-as.character(d$religion)
d[is.na(d$religion),'religion']<-'not recorded'
for(i in 1:nrow(dflr)){
d[d$religion==dflr$Var1[i],'religion']<-dflr$recode[i]
}
#-----Month of Birth
d$c_month_of_birth<-as.factor(d$Month_of_Birth)
#================================================================
#-------------Clean up and Write Out------------------------
d<-droplevels(d)
#----MAY NEED TO RECODE REGION
d$country<-str_sub(d$.id,end=-5)
d$year<-str_sub(d$.id,start=-4)
idvars<-c('.id','country','year','Cluster_Number','case','Index_to_Birth_History','in_res_long','c_age','region','recall')
depvar<-'bw'
hvars<-c('urban','electricity','water_source','floor_material','radio','television','children_under5','household_members')
cvars<-c('birth_order','m_births5years','c_month_of_birth','sex')
mvars<-c('m_age','m_age_at_first_birth','m_height','m_educ','religion','child_dead','marital_status')
dc<-d[,c(idvars,depvar,hvars,cvars,mvars)]
met<-date()
save(d,dc,met,file='04_allCvars_cleanandrecoded.Rdata')
#test<-subset(dc,in_res_long=='yes')
#mod<-lmer(bw~recall+m_age+m_age_at_first_birth+sex+m_height+marital_status+child_dead+birth_order+m_births5years+electricity+floor_material+urban+religion+(1|.id),data=d)
#-Birth interval has 37k NA values, but births in past 5 years has none, use that with birth order
#======================OLD EXPLORATORY CODE BELOW HERE==========
#-----Do print outs for 'floor material', 'water_source','religion', and 'ethnicity'
# library(WriteXLS)
# #--Religion
# #Christion, Muslim, Tranditiona, Other, Not Recorded
# religion<-as.data.frame(table(d$religion))
# #--Floor Material--Finished, Unfinished, Other
# floor<-as.data.frame(table(d$floor_material))
# #--Water Source--Piped onto Propetry (property or dwelling),
# water<-as.data.frame(table(d$water_source))
# #--Ethnicity-- no idea, just put them in there, accomdate based on cluster??
# ethnicity<-as.data.frame(table(d$ethnicity))
#
# WriteXLS(c('religion','floor','water','ethnicity'),'complicated_variables.xlsx')
# funn<-catcolwise(as.factor)
# dn<-funn(d)
#
#
# dn2<-melt(dn,id.vars=c('.id','case'))
#
# funun<-function(df){
# length(unique(df$value))
# }
# dlevs<-ddply(dn2,'variable',funun)
# dlevs<-arrange(dlevs,V1)