# Load EDHS 2011
library(haven)
library(survival)
library(survey)
library(eha)
library(survminer)
library(car)
library(rgdal)
library(foreign)
library(muhaz)
library(leaflet)
library(sf)
library(sp)
library(raster)
library(rgeos)
library(mapview)

Used data from Ethiopan Demographic and Health Survey (EDHS - 2016)

# CHILDREN DHS DATA
dhs_2016<-read.dta("~/Google Drive/DHS Resources/ET_2016_DHS_08152017_2025_103714/etkr70dt/ETKR70FL.dta", convert.factors = F)
edhs16<-subset(dhs_2016,select=c("caseid","b0", "b1", "b2", "b2", "b3", "b4",  "b5", "b7","m1",  "m15", "m10", "m14", "m70","v005", "v008", "v021", "v022", "v024", "v025","v012", "v013", "h0",  "v190", "v201", "v208", "v106",  "v3a02", "v113", "m15", "b11", "bord", "v113", "v115", "h10", "h10", "h2", "h33", "v116","v130", "v137", "v212", "v364", "v025", "v445", "v501","v624", "v701", "v445" ))
edhs16$yr<-2016 
#WOMEN DHS DATA
#wdhs_2016<-read.dta("~/Google Drive/DHS Resources/ET_2016_DHS_08152017_2025_103714/etir70dt/ETIR70FL.dta", convert.factors = F)
ethsp4<-readOGR(dsn="~/Google Drive/fikre/ethiopia_dhs/sdr_subnational_data_2017-03-28/shps",layer = "sdr_subnational_data_dhs_2016")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/fikrewoldbitew/Google Drive/fikre/ethiopia_dhs/sdr_subnational_data_2017-03-28/shps", layer: "sdr_subnational_data_dhs_2016"
## with 11 features
## It has 55 fields
#write.csv(ethsp4, "/Users/fikrewoldbitew/Google Drive/Data/dt.csv")
edhs16$REGION<- Recode(edhs16$v024, recodes='11="Tigray"; 2="Afar"; 3= "Amhara"; 8= "Oromia"; 10="Somali"; 4="Benishangul"; 9="SNNPR"; 6="Gambela"; 7="Harari";1="Addis Adaba"; 5="Dire Dawa";else=NA', as.factor = F)
edhs16<-merge(edhs16, ethsp4@data, by.x="REGION", by.y="REGNAME" ,all.x=T)
## identify the working variables and the all the edhs datasets
edhs16$b5<-ifelse(edhs16$b2>=2004,edhs16$b5,NA)
edhs16$under_5<-ifelse(edhs16$b5==0,0,1)
#write.csv(edhs16, "/Users/fikrewoldbitew/Google Drive/PhD_Courses/Spring 2019/before16.csv")
edhs16$under_5[is.na(edhs16$under_5)]<-1
edhs16$chmort<-ifelse(edhs16$under_5==0,1,0)
#write.csv(edhs16, "~/Google Drive/PhD_Courses/after17.csv"

Survival Analysis Model Moels

## 
## Alive at 5  Dead by 5 
##      10006        635
## 
## Alive at 5  Dead by 5 
##      10006        635

#Used variables in the Model

edhs16$region<- Recode(edhs16$v024, recodes='11="Tigray"; 2="Afar"; 3= "Amhara"; 8= "1Oromia"; 10="Somali"; 4="Ben-Gumuz"; 9="SNNP"; 6="Gambella"; 7="Harari";1="Addis Ababa"; 5="Dire Dawa";else=NA', as.factor = F)
edhs16$child_sex<-ifelse(edhs16$b4,"Male", "Female")
edhs16$child_sex<-car::Recode(edhs16$b4, recodes='1="Male"; 2="Female"; else=NA')
#edhs16$sibling_alive<-recode(edhs16$mm2_01, recodes='0="Dead"; 1="Alive"; else=NA', as.factor.result = T)
#Education 
edhs16$educ<-Recode(edhs16$v106, recodes="0='0Noeducation'; 1='1Primary'; 2:3='2Secodary and Higher'; else=NA", as.factor=F)
edhs16$bmi<-ifelse(edhs16$v445==9999 | edhs16$v445==9998 , NA, edhs16$v445/100)
edhs16$age<-edhs16$v012
edhs16$mother_age<-Recode(edhs16$v013,recodes="1='15-19'; 2:3='20-34'; 4:7='35-49';else=NA", as.factor=T)
edhs16$births_5ys<-edhs16$v208
edhs16$ch_5<-Recode(edhs16$v137, recodes="0:1='0-1'; 2:18='2+';else=NA", as.factor=F)

edhs16$b_order<-Recode(edhs16$bord, recodes="1='1st-2nd'; 2='1st-2nd'; 3:14='3rd or later';else=NA", as.factor=T)
edhs16$b_interval<-Recode(edhs16$b11, recodes="7:23='< 2 years'; 24:48='2-4 yrs'; 48:215='> 4 yrs';else=NA", as.factor=T)

#wealth Index
edhs16$wealthindex<-Recode(edhs16$v190, recodes="1:2='1Poor'; 3='2Middle'; 4:5='3Rich'; else=NA", as.factor=F)
edhs16$religion<-Recode(edhs16$v130, recodes="1='1Orthodoxr'; 2='Others'; 3='3Protestant'; 4='2Muslim';5='Others';else=NA", as.factor=F)
edhs16$ceb<-edhs16$v201
edhs16$contra_use<-Recode(edhs16$v364, recodes=
"1='1Using modern method';  
 2='4Does not intend to use';
 3='3Non-user - intends to use later'; 
 4='4Does not intend to use';else=NA", 
 as.factor=F)
edhs16$contraceptive_use<-Recode(edhs16$v364, recodes="1='0Yes'; 2:5='No'; else=NA", as.factor=T)
edhs16$water_source<-Recode(edhs16$v113, recodes="10:14='Improved'; 
20='Improved'; 
21= 'Improved';                        
30='Unimproved';
31='Improved';                                    
32='Unimproved';
40='Unimproved';               
41='Improved';  
42='Unimproved';  
43='Unimproved';  
51='Improved';                                           
61='Unimproved';  
62='Unimproved';   
71='Improved';
96='Unimproved' ; else=NA", as.factor = F)
edhs16$time_water<-edhs16$v115
edhs16$toilet_facility<-Recode(edhs16$v116,
recodes='10 =   "Improved";
11  =   "Improved";
12= "Improved";
13  =   "Improved";
14= "Improved";
15= "Improved";
20= "Improved";
21= "Improved";
22= "Improved";
23= "Unimproved";
30= "Unimproved";
31= "Unimproved";
41= "Unimproved";
42= "Unimproved";
43= "Unimproved";
96= "Unimproved"; 
else=NA', as.factor = F)

edhs16$residence<-Recode(edhs16$v025, recodes='1="Urban"; 2="Rural"; else=NA',  as.factor = F)                     
edhs16$watersource<-Recode(edhs16$v113, recodes="10:13='1Piped/bottled'; 20:32= '2Rain/well/surface/tanker'; 61:62= '2Rain/well/surface/tanker'; 40:43='3River/stream/lake/spring'; 51='2Rain/well/surface/tanker';  71='1Piped/bottled'; else=NA", as.factor = F)
#Age at first Birth
edhs16$age_fb<-Recode(edhs16$v212, recodes="10:19='<20 years'; 20:49='20+years'; else=NA", as.factor=T)
edhs16$bmi<-ifelse(edhs16$v445==9999 | edhs16$v445==9998 , NA, edhs16$v445/100)
edhs16$bmi_rec<-Recode(edhs16$bmi, recodes="0:18.49='Underweight'; 18.5:25='1Normal'; 25:54='Overweight';
                       else=NA", as.factor=T)

edhs16$fac_CS_delivery<-Recode(edhs16$m15, recodes="11:12='Home'; 22:25='Fac with CS delivery'; 
                                 32:35='Fac with CS delivery'; 21='Fac without CS delivery';
                                 31='Fac without CS delivery';26='Fac without CS delivery';
                                 36='Fac without CS delivery';41:46='Fac without CS delivery'; 
                                 96='Fac without CS delivery';else=NA", 
                                 as.factor=T)
edhs16$child_wanted<-Recode(edhs16$m10, recodes="1='1Then'; 2='2Later'; 3='3Not at all'; else=NA", as.factor=T)

edhs16$ever_vaccin<-Recode(edhs16$h10, recodes="0='No'; 1='Yes'; else=NA", as.factor=T)
edhs16$bcg_vaccin<-Recode(edhs16$h2, recodes="0='No'; 1:3='Yes'; else=NA", as.factor=T)
edhs16$vitA_vaccin<-Recode(edhs16$h33, recodes="0='No'; 1:3='Yes'; else=NA", as.factor=T)
edhs16$Postnatal<-Recode(edhs16$m70, recodes="0='No'; 1='Yes'; else=NA", as.factor=T)
edhs16$antenatal_visits<-Recode(edhs16$m14, recodes="0='0_No Visit'; 1:4='1-4 visits'; 5:98='5+ Visits'; else=NA", as.factor=T)

Design for Total (Rural+Urban)

# Calculate the DHS sample weight 
##remove all missing the BMI parameter 
edhs16$wt<-edhs16$v005/1000000
options(survey.lonely.psu = "adjust")
edhs16$yrstratum<-paste(edhs16$SVYYEAR, edhs16$v022, sep="-")
edhs16$yrpsu<-paste(edhs16$SVYYEAR, edhs16$v021, sep="-")

table(edhs16$yrstratum)
## 
##  2016-1 2016-10 2016-11 2016-12 2016-13 2016-14 2016-15 2016-16 2016-17 
##     169     577     488     301    1204      50     829      95     500 
## 2016-18 2016-19  2016-2 2016-20 2016-21 2016-22 2016-23 2016-24 2016-25 
##     682     175     864     539     201     404     461     264     283 
##  2016-3  2016-4  2016-5  2016-6  2016-7  2016-8  2016-9 
##      94     968      89     507     381      75     441
des<-svydesign(ids=~yrpsu, strata=~yrstratum, weights=~wt, data = edhs16[is.na(edhs16$wt)==F,], nest=T)

## Calculate the proportion of Under-5 Mortality in the Design

prop.table(cat<-svytable(~age_fb+SVYYEAR,design=des), margin = 2)*100
##            SVYYEAR
## age_fb          2016
##   <20 years 63.96452
##   20+years  36.03548
prop.table(cat<-svytable(~v013+SVYYEAR,design=des), margin = 2)*100
##     SVYYEAR
## v013      2016
##    1  3.429153
##    2 18.759059
##    3 30.420495
##    4 22.583805
##    5 16.076227
##    6  6.558483
##    7  2.172777
prop.table(cat<-svytable(~births_5ys+SVYYEAR,design=des), margin = 2)*100
##           SVYYEAR
## births_5ys        2016
##          1 42.01802559
##          2 45.75616270
##          3 10.87498219
##          4  1.29207322
##          5  0.05875629
prop.table(cat<-svytable(~child_sex+SVYYEAR,design=des), margin = 2)*100
##          SVYYEAR
## child_sex    2016
##    Female 48.0625
##    Male   51.9375
prop.table(cat<-svytable(~residence+SVYYEAR,design=des), margin = 2)*100
##          SVYYEAR
## residence     2016
##     Rural 88.97224
##     Urban 11.02776
prop.table(cat<-svytable(~educ+SVYYEAR,design=des), margin = 2)*100
##                       SVYYEAR
## educ                        2016
##   0Noeducation         66.081299
##   1Primary             26.769240
##   2Secodary and Higher  7.149462
prop.table(cat<-svytable(~residence+SVYYEAR,design=des), margin = 2)*100
##          SVYYEAR
## residence     2016
##     Rural 88.97224
##     Urban 11.02776
prop.table(cat<-svytable(~wealthindex+SVYYEAR,design=des), margin = 2)*100
##            SVYYEAR
## wealthindex     2016
##     1Poor   46.77622
##     2Middle 20.68442
##     3Rich   32.53937
prop.table(cat<-svytable(~fac_CS_delivery+SVYYEAR,design=des), margin = 2)*100
##                          SVYYEAR
## fac_CS_delivery                2016
##   Fac with CS delivery    18.470333
##   Fac without CS delivery  8.977616
##   Home                    72.552051
prop.table(cat<-svytable(~contraceptive_use+SVYYEAR,design=des), margin = 2)*100
##                  SVYYEAR
## contraceptive_use     2016
##              0Yes 31.29328
##              No   68.70672
prop.table(cat<-svytable(~child_wanted+SVYYEAR,design=des), margin = 2)*100
##              SVYYEAR
## child_wanted       2016
##   1Then       75.111813
##   2Later      16.747325
##   3Not at all  8.140862
prop.table(cat<-svytable(~antenatal_visits+SVYYEAR,design=des), margin = 2)*100
##                 SVYYEAR
## antenatal_visits     2016
##       0_No Visit 37.13255
##       1-4 visits 46.58585
##       5+ Visits  16.28160
prop.table(cat<-svytable(~Postnatal+SVYYEAR,design=des), margin = 2)*100
##          SVYYEAR
## Postnatal      2016
##       No  91.665432
##       Yes  8.334568
prop.table(cat<-svytable(~region+SVYYEAR,design=des), margin = 2)*100
##              SVYYEAR
## region              2016
##   1Oromia      0.2439650
##   Addis Ababa  6.4945668
##   Afar         1.0365847
##   Amhara      18.8002391
##   Ben-Gumuz   44.0083893
##   Dire Dawa    4.6080560
##   Gambella     1.1035299
##   Harari      20.8315225
##   SNNP         0.2343264
##   Somali       2.2131130
##   Tigray       0.4257073
prop.table(cat<-svytable(~bmi_rec+SVYYEAR,design=des), margin = 2)*100
##              SVYYEAR
## bmi_rec            2016
##   1Normal     74.159151
##   Overweight   6.280034
##   Underweight 19.560815
prop.table(cat<-svytable(~chmort+SVYYEAR,design=des), margin = 2)*100
##       SVYYEAR
## chmort      2016
##      0 94.902101
##      1  5.097899
reg.est<-svyby(~chmort,~REGION+SVYYEAR,FUN = svymean, na.rm=T, design =des)

reg.est.wide<-reshape(reg.est, idvar = "REGION", timevar = "SVYYEAR", direction = "wide")
reg.est.wide
##                       REGION chmort.2016     se.2016
## Addis Adaba.2016 Addis Adaba  0.03326519 0.005740422
## Afar.2016               Afar  0.08346723 0.013123181
## Amhara.2016           Amhara  0.04706449 0.007561748
## Benishangul.2016 Benishangul  0.05414081 0.006569638
## Dire Dawa.2016     Dire Dawa  0.05970065 0.006738315
## Gambela.2016         Gambela  0.06254413 0.007546533
## Harari.2016           Harari  0.05105880 0.008868284
## Oromia.2016           Oromia  0.05390933 0.009321645
## SNNPR.2016             SNNPR  0.06140410 0.011441805
## Somali.2016           Somali  0.03070644 0.008126967
## Tigray.2016           Tigray  0.05780495 0.020216925

popch = paste0("<strong>", eth.geo$REGNAME, "</strong>", "</br>", eth.geo$CMECMRCU5M)
bins <- c(30,40, 50,60, 70,80, 90,100, 110, 130)
bin <- c(3, 5, 10, 15, 20, 25,30,40, 50,60,70, 100, Inf)
pall <- colorBin("YlOrRd", domain =  eth.geo$CMECMRCU5M, bins = bins)

eth.geo$pch_mort<-eth.geo$CMECMRCU5M*100


mypal <- colorQuantile(palette = "YlOrRd", domain = eth.geo$CMECMRCU5M, n = 5, reverse = FALSE)

U5M <- leaflet(eth.geo) %>%
  addProviderTiles("OpenStreetMap.Mapnik") %>%
#setView(lat = 9, lng = 41, zoom = 6.2) %>%
  setView(lat = 9.292, lng =  40.71 , zoom = 6.2) %>%
 
   addProviderTiles("CartoDB.Positron") %>%
   addPolygons( data = eth.geo, fillColor = ~pall(eth.geo$CMECMRCU5M), 
   fillOpacity = 1,
   weight = 1,
   color = "white",
   popup = popch)%>%
  addLabelOnlyMarkers(data = eth.geo, lng = ~X1, lat = ~X2, label = ~REGNAME,
                    labelOptions = labelOptions(noHide = TRUE, direction = 'center', textOnly = TRUE)) %>%
  addLegend(pal = pall, values = ~CMECMRCU5M, title = "U5 Mortality Rate",
                                       opacity = 0.5, position = "bottomright")

U5M
mapshot(U5M, file = "~/Google Drive/PhD_Courses/Spring 2019/PublicHealth/FinalProject/Maps/U5M.png")
## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.

Fit the GLM model for predicting Under-5 Mortality

#Fit Logistic Regression Model for Under-5 Mortality

#Demographic factors
library(lme4)
library(lmerTest)
require(MASS)
library(pROC)

#demographic Factors
#Demographic Factors and Socio-economic Factors

fit<-glm(chmort~age_fb+child_sex+b_order+b_interval+time_water+water_source+toilet_facility+births_5ys+residence+educ+wealthindex+contraceptive_use+region+bmi_rec+fac_CS_delivery+antenatal_visits+Postnatal+child_wanted,family=binomial(link = "logit"), data=train.m, weights = wt)
summary(fit)
## 
## Call:
## glm(formula = chmort ~ age_fb + child_sex + b_order + b_interval + 
##     time_water + water_source + toilet_facility + births_5ys + 
##     residence + educ + wealthindex + contraceptive_use + region + 
##     bmi_rec + fac_CS_delivery + antenatal_visits + Postnatal + 
##     child_wanted, family = binomial(link = "logit"), data = train.m, 
##     weights = wt)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7221  -0.2993  -0.1406  -0.0680   5.6020  
## 
## Coefficients:
##                                          Estimate Std. Error z value
## (Intercept)                            -3.8725243  1.6859802  -2.297
## age_fb20+years                         -0.4809285  0.1931829  -2.489
## child_sexMale                           0.6582764  0.1722778   3.821
## b_order3rd or later                     0.4466072  0.2814350   1.587
## b_interval> 4 yrs                      -0.8167354  0.2838838  -2.877
## b_interval2-4 yrs                      -0.8283873  0.2033067  -4.075
## time_water                             -0.0005096  0.0004223  -1.207
## water_sourceUnimproved                 -0.5895874  0.1811946  -3.254
## toilet_facilityUnimproved               0.4529802  0.4155196   1.090
## births_5ys                              0.1838833  0.1423564   1.292
## residenceUrban                         -0.2075049  0.4708196  -0.441
## educ1Primary                           -0.2382169  0.2224136  -1.071
## educ2Secodary and Higher                0.5614059  0.4767246   1.178
## wealthindex2Middle                     -0.0598605  0.2197083  -0.272
## wealthindex3Rich                        0.0924289  0.2122506   0.435
## contraceptive_useNo                     0.4823318  0.2056633   2.345
## regionAddis Ababa                      -0.1742372  1.5852972  -0.110
## regionAfar                             -0.5213084  1.8084809  -0.288
## regionAmhara                           -0.3179768  1.5579480  -0.204
## regionBen-Gumuz                         0.2332658  1.5456120   0.151
## regionDire Dawa                        -0.3433115  1.5962251  -0.215
## regionGambella                         -0.2642134  1.7697858  -0.149
## regionHarari                            0.1732624  1.5509654   0.112
## regionSNNP                             -0.1348778  2.4121574  -0.056
## regionSomali                            0.2096552  1.8048188   0.116
## regionTigray                           -0.0822718  2.2897300  -0.036
## bmi_recOverweight                      -0.4126576  0.4371340  -0.944
## bmi_recUnderweight                      0.1085875  0.1964976   0.553
## fac_CS_deliveryFac without CS delivery  1.0613701  0.3794836   2.797
## fac_CS_deliveryHome                     0.1833479  0.2748270   0.667
## antenatal_visits1-4 visits             -0.4555848  0.1886194  -2.415
## antenatal_visits5+ Visits              -0.7835006  0.3149680  -2.488
## PostnatalYes                           -0.9087024  0.4936503  -1.841
## child_wanted2Later                     -0.3051008  0.2435738  -1.253
## child_wanted3Not at all                 0.3404207  0.2283477   1.491
##                                        Pr(>|z|)    
## (Intercept)                            0.021625 *  
## age_fb20+years                         0.012792 *  
## child_sexMale                          0.000133 ***
## b_order3rd or later                    0.112537    
## b_interval> 4 yrs                      0.004015 ** 
## b_interval2-4 yrs                      4.61e-05 ***
## time_water                             0.227472    
## water_sourceUnimproved                 0.001138 ** 
## toilet_facilityUnimproved              0.275645    
## births_5ys                             0.196457    
## residenceUrban                         0.659408    
## educ1Primary                           0.284145    
## educ2Secodary and Higher               0.238943    
## wealthindex2Middle                     0.785272    
## wealthindex3Rich                       0.663221    
## contraceptive_useNo                    0.019014 *  
## regionAddis Ababa                      0.912482    
## regionAfar                             0.773150    
## regionAmhara                           0.838276    
## regionBen-Gumuz                        0.880038    
## regionDire Dawa                        0.829707    
## regionGambella                         0.881324    
## regionHarari                           0.911051    
## regionSNNP                             0.955409    
## regionSomali                           0.907522    
## regionTigray                           0.971338    
## bmi_recOverweight                      0.345166    
## bmi_recUnderweight                     0.580527    
## fac_CS_deliveryFac without CS delivery 0.005160 ** 
## fac_CS_deliveryHome                    0.504683    
## antenatal_visits1-4 visits             0.015719 *  
## antenatal_visits5+ Visits              0.012862 *  
## PostnatalYes                           0.065654 .  
## child_wanted2Later                     0.210351    
## child_wanted3Not at all                0.136014    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1380.5  on 3664  degrees of freedom
## Residual deviance: 1258.3  on 3630  degrees of freedom
## AIC: 1279.6
## 
## Number of Fisher Scoring iterations: 7
exp(cbind(coef(fit), confint(fit)))
##                                                          2.5 %      97.5 %
## (Intercept)                            0.02080578 0.0001095749   0.3001960
## age_fb20+years                         0.61820910 0.4185861395   0.8942905
## child_sexMale                          1.93146037 1.3846750199   2.7240020
## b_order3rd or later                    1.56300030 0.9241865038   2.8026020
## b_interval> 4 yrs                      0.44187182 0.2525963594   0.7702819
## b_interval2-4 yrs                      0.43675306 0.2939902563   0.6532502
## time_water                             0.99949050 0.9986141820   1.0002749
## water_sourceUnimproved                 0.55455604 0.3868734248   0.7880703
## toilet_facilityUnimproved              1.57299305 0.7403141889   3.8394442
## births_5ys                             1.20187554 0.9075571929   1.5864558
## residenceUrban                         0.81260929 0.3012050863   1.9346011
## educ1Primary                           0.78803172 0.5019038991   1.2032709
## educ2Secodary and Higher               1.75313554 0.6408699442   4.2382907
## wealthindex2Middle                     0.94189588 0.6065234789   1.4384965
## wealthindex3Rich                       1.09683514 0.7199280001   1.6569856
## contraceptive_useNo                    1.61984720 1.0930608963   2.4527838
## regionAddis Ababa                      0.84009764 0.0754366179 147.2656061
## regionAfar                             0.59374321 0.0198139302 121.6787426
## regionAmhara                           0.72761968 0.0722655282 125.1408307
## regionBen-Gumuz                        1.26271706 0.1306716556 215.2327026
## regionDire Dawa                        0.70941720 0.0614443976 125.3148824
## regionGambella                         0.76780966 0.0310331079 153.2345799
## regionHarari                           1.18917805 0.1209291337 203.4925052
## regionSNNP                             0.87382271 0.0005402771 267.0524521
## regionSomali                           1.23325278 0.0465655535 253.4901450
## regionTigray                           0.92102161 0.0012610136 258.4563288
## bmi_recOverweight                      0.66188890 0.2542241914   1.4465225
## bmi_recUnderweight                     1.11470246 0.7504986095   1.6243882
## fac_CS_deliveryFac without CS delivery 2.89032825 1.3558753255   6.0674382
## fac_CS_deliveryHome                    1.20123224 0.7165662582   2.1158769
## antenatal_visits1-4 visits             0.63407704 0.4360630965   0.9144940
## antenatal_visits5+ Visits              0.45680413 0.2374622195   0.8223529
## PostnatalYes                           0.40304688 0.1315677820   0.9516973
## child_wanted2Later                     0.73704909 0.4470978512   1.1662373
## child_wanted3Not at all                1.40553881 0.8836990276   2.1695625

Fit Cox Proportional Hazard Model for CHILD MORTALITY

## Stratified 1 - level Cluster Sampling design (with replacement)
## With (616) clusters.
## svydesign(ids = ~yrpsu, strata = ~yrstratum, weights = ~wt, data = train.m[is.na(train.m$wt) == 
##     F, ], nest = T)
## Call:
## svycoxph(formula = Surv(death.age, d.event) ~ age_fb + child_sex + 
##     b_order + b_interval + time_water + water_source + toilet_facility + 
##     births_5ys + residence + educ + wealthindex + contraceptive_use + 
##     region + bmi_rec + fac_CS_delivery + antenatal_visits + Postnatal + 
##     child_wanted, design = des3)
## 
##   n= 3665, number of events= 130 
## 
##                                              coef  exp(coef)   se(coef)
## age_fb20+years                         -0.4197088  0.6572382  0.3513775
## child_sexMale                           0.6518537  1.9190950  0.2645349
## b_order3rd or later                     0.3838926  1.4679877  0.4111089
## b_interval> 4 yrs                      -0.4687223  0.6258013  0.4721817
## b_interval2-4 yrs                      -0.6234803  0.5360755  0.3214383
## time_water                             -0.0004918  0.9995084  0.0004743
## water_sourceUnimproved                 -0.5415862  0.5818246  0.2974175
## toilet_facilityUnimproved               0.4704848  1.6007701  0.5636677
## births_5ys                              0.4421816  1.5560984  0.2644649
## residenceUrban                         -0.2182127  0.8039544  0.5250125
## educ1Primary                           -0.2005803  0.8182558  0.3815028
## educ2Secodary and Higher                0.5866950  1.7980361  0.7030814
## wealthindex2Middle                     -0.0559098  0.9456245  0.3879999
## wealthindex3Rich                        0.0915972  1.0959232  0.3258061
## contraceptive_useNo                     0.5633544  1.7565547  0.2913205
## regionAddis Ababa                      -0.1011332  0.9038127  0.5793143
## regionAfar                             -0.4633316  0.6291839  0.5958243
## regionAmhara                           -0.2460628  0.7818731  0.6001792
## regionBen-Gumuz                         0.3052109  1.3569112  0.6021164
## regionDire Dawa                        -0.2625471  0.7690901  0.5527513
## regionGambella                         -0.2497411  0.7790025  0.6076630
## regionHarari                            0.2080125  1.2312286  0.5576088
## regionSNNP                             -0.0983328  0.9063472  0.7021022
## regionSomali                            0.1834710  1.2013801  1.0048936
## regionTigray                           -0.0317239  0.9687740  0.6144651
## bmi_recOverweight                      -0.4221119  0.6556606  0.6141702
## bmi_recUnderweight                      0.0724606  1.0751504  0.3092018
## fac_CS_deliveryFac without CS delivery  1.0245328  2.7857935  0.5649788
## fac_CS_deliveryHome                     0.1073052  1.1132740  0.4083747
## antenatal_visits1-4 visits             -0.4404657  0.6437365  0.2685589
## antenatal_visits5+ Visits              -0.7595519  0.4678760  0.4716453
## PostnatalYes                           -0.8757972  0.4165298  0.6870792
## child_wanted2Later                     -0.3361474  0.7145178  0.4795792
## child_wanted3Not at all                 0.3416369  1.4072492  0.3268126
##                                             z Pr(>|z|)  
## age_fb20+years                         -1.194   0.2323  
## child_sexMale                           2.464   0.0137 *
## b_order3rd or later                     0.934   0.3504  
## b_interval> 4 yrs                      -0.993   0.3209  
## b_interval2-4 yrs                      -1.940   0.0524 .
## time_water                             -1.037   0.2999  
## water_sourceUnimproved                 -1.821   0.0686 .
## toilet_facilityUnimproved               0.835   0.4039  
## births_5ys                              1.672   0.0945 .
## residenceUrban                         -0.416   0.6777  
## educ1Primary                           -0.526   0.5991  
## educ2Secodary and Higher                0.834   0.4040  
## wealthindex2Middle                     -0.144   0.8854  
## wealthindex3Rich                        0.281   0.7786  
## contraceptive_useNo                     1.934   0.0531 .
## regionAddis Ababa                      -0.175   0.8614  
## regionAfar                             -0.778   0.4368  
## regionAmhara                           -0.410   0.6818  
## regionBen-Gumuz                         0.507   0.6122  
## regionDire Dawa                        -0.475   0.6348  
## regionGambella                         -0.411   0.6811  
## regionHarari                            0.373   0.7091  
## regionSNNP                             -0.140   0.8886  
## regionSomali                            0.183   0.8551  
## regionTigray                           -0.052   0.9588  
## bmi_recOverweight                      -0.687   0.4919  
## bmi_recUnderweight                      0.234   0.8147  
## fac_CS_deliveryFac without CS delivery  1.813   0.0698 .
## fac_CS_deliveryHome                     0.263   0.7927  
## antenatal_visits1-4 visits             -1.640   0.1010  
## antenatal_visits5+ Visits              -1.610   0.1073  
## PostnatalYes                           -1.275   0.2024  
## child_wanted2Later                     -0.701   0.4834  
## child_wanted3Not at all                 1.045   0.2959  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                                        exp(coef) exp(-coef) lower .95
## age_fb20+years                            0.6572     1.5215    0.3301
## child_sexMale                             1.9191     0.5211    1.1427
## b_order3rd or later                       1.4680     0.6812    0.6558
## b_interval> 4 yrs                         0.6258     1.5980    0.2480
## b_interval2-4 yrs                         0.5361     1.8654    0.2855
## time_water                                0.9995     1.0005    0.9986
## water_sourceUnimproved                    0.5818     1.7187    0.3248
## toilet_facilityUnimproved                 1.6008     0.6247    0.5303
## births_5ys                                1.5561     0.6426    0.9267
## residenceUrban                            0.8040     1.2439    0.2873
## educ1Primary                              0.8183     1.2221    0.3874
## educ2Secodary and Higher                  1.7980     0.5562    0.4532
## wealthindex2Middle                        0.9456     1.0575    0.4420
## wealthindex3Rich                          1.0959     0.9125    0.5787
## contraceptive_useNo                       1.7566     0.5693    0.9924
## regionAddis Ababa                         0.9038     1.1064    0.2904
## regionAfar                                0.6292     1.5894    0.1957
## regionAmhara                              0.7819     1.2790    0.2411
## regionBen-Gumuz                           1.3569     0.7370    0.4169
## regionDire Dawa                           0.7691     1.3002    0.2603
## regionGambella                            0.7790     1.2837    0.2368
## regionHarari                              1.2312     0.8122    0.4128
## regionSNNP                                0.9063     1.1033    0.2289
## regionSomali                              1.2014     0.8324    0.1676
## regionTigray                              0.9688     1.0322    0.2905
## bmi_recOverweight                         0.6557     1.5252    0.1967
## bmi_recUnderweight                        1.0752     0.9301    0.5865
## fac_CS_deliveryFac without CS delivery    2.7858     0.3590    0.9205
## fac_CS_deliveryHome                       1.1133     0.8983    0.5000
## antenatal_visits1-4 visits                0.6437     1.5534    0.3803
## antenatal_visits5+ Visits                 0.4679     2.1373    0.1856
## PostnatalYes                              0.4165     2.4008    0.1083
## child_wanted2Later                        0.7145     1.3995    0.2791
## child_wanted3Not at all                   1.4072     0.7106    0.7416
##                                        upper .95
## age_fb20+years                             1.309
## child_sexMale                              3.223
## b_order3rd or later                        3.286
## b_interval> 4 yrs                          1.579
## b_interval2-4 yrs                          1.007
## time_water                                 1.000
## water_sourceUnimproved                     1.042
## toilet_facilityUnimproved                  4.832
## births_5ys                                 2.613
## residenceUrban                             2.250
## educ1Primary                               1.728
## educ2Secodary and Higher                   7.133
## wealthindex2Middle                         2.023
## wealthindex3Rich                           2.075
## contraceptive_useNo                        3.109
## regionAddis Ababa                          2.813
## regionAfar                                 2.023
## regionAmhara                               2.535
## regionBen-Gumuz                            4.416
## regionDire Dawa                            2.272
## regionGambella                             2.563
## regionHarari                               3.673
## regionSNNP                                 3.589
## regionSomali                               8.611
## regionTigray                               3.230
## bmi_recOverweight                          2.185
## bmi_recUnderweight                         1.971
## fac_CS_deliveryFac without CS delivery     8.431
## fac_CS_deliveryHome                        2.479
## antenatal_visits1-4 visits                 1.090
## antenatal_visits5+ Visits                  1.179
## PostnatalYes                               1.601
## child_wanted2Later                         1.829
## child_wanted3Not at all                    2.670
## 
## Concordance= 0.737  (se = 0.032 )
## Rsquare= NA   (max possible= NA )
## Likelihood ratio test= NA  on 34 df,   p=NA
## Wald test            = 123.8  on 34 df,   p=4e-12
## Score (logrank) test = NA  on 34 df,   p=NA
##                   [,1]
## sensitivity 0.57142857
## specificity 0.72992701
## cutoff      0.04504857
## Confusion Matrix and Statistics
## 
##    
##        0    1
##   0 1214  293
##   1   36   27
##                                           
##                Accuracy : 0.7904          
##                  95% CI : (0.7695, 0.8103)
##     No Information Rate : 0.7962          
##     P-Value [Acc > NIR] : 0.7255          
##                                           
##                   Kappa : 0.0793          
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.97120         
##             Specificity : 0.08438         
##          Pos Pred Value : 0.80557         
##          Neg Pred Value : 0.42857         
##              Prevalence : 0.79618         
##          Detection Rate : 0.77325         
##    Detection Prevalence : 0.95987         
##       Balanced Accuracy : 0.52779         
##                                           
##        'Positive' Class : 0               
## 
##                  [,1]
## sensitivity 0.5873016
## specificity 0.6589250
## cutoff      1.2832007
## Confusion Matrix and Statistics
## 
##    
##       0   1
##   0 790 717
##   1  21  42
##                                           
##                Accuracy : 0.5299          
##                  95% CI : (0.5049, 0.5549)
##     No Information Rate : 0.5166          
##     P-Value [Acc > NIR] : 0.1503          
##                                           
##                   Kappa : 0.0303          
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.97411         
##             Specificity : 0.05534         
##          Pos Pred Value : 0.52422         
##          Neg Pred Value : 0.66667         
##              Prevalence : 0.51656         
##          Detection Rate : 0.50318         
##    Detection Prevalence : 0.95987         
##       Balanced Accuracy : 0.51472         
##                                           
##        'Positive' Class : 0               
## 

Random Forest Model - Under-5 Mortality

## 
## Call:
##  randomForest(formula = chmort ~ v201 + v445 + v024 + v025 + v106 +      v445 + v012 + v013 + v208 + v137 + bord + b11 + v190 + v201 +      v113 + v115 + v116 + v364 + v212 + b4 + v106 + m15 + v130 +      m1 + m10 + m14 + m15 + m70, data = train.m) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 4
## 
##         OOB estimate of  error rate: 3.22%
## Confusion matrix:
##      0  1 class.error
## 0 3527  8 0.002263083
## 1  110 20 0.846153846

Performance of Random Forest Model on Testing Data

pred = predict(rf.m, newdata=test.m)
accuracy <- table(pred, test.m[,"chmort"])
sum(diag(accuracy))/sum(accuracy)
## [1] 0.9636943
confusionMatrix(data=pred, as.factor(test.m$chmort))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 1503   53
##          1    4   10
##                                           
##                Accuracy : 0.9637          
##                  95% CI : (0.9532, 0.9724)
##     No Information Rate : 0.9599          
##     P-Value [Acc > NIR] : 0.2428          
##                                           
##                   Kappa : 0.2488          
##  Mcnemar's Test P-Value : 2.047e-10       
##                                           
##             Sensitivity : 0.9973          
##             Specificity : 0.1587          
##          Pos Pred Value : 0.9659          
##          Neg Pred Value : 0.7143          
##              Prevalence : 0.9599          
##          Detection Rate : 0.9573          
##    Detection Prevalence : 0.9911          
##       Balanced Accuracy : 0.5780          
##                                           
##        'Positive' Class : 0               
## 
test_prob = predict(fit, newdata = test.m, type = "response")
test_roc = roc(test.m$chmort ~ test_prob, plot = TRUE, print.auc = TRUE,percent=TRUE)

test_prob = predict(cox.child, newdata = test.m, type = "risk")
test_roc2 = roc(test.m$chmort ~ test_prob, plot = TRUE, print.auc = TRUE,percent=TRUE)

test_prob5 = predict(rf.m, newdata=test.m, type = "prob")
test_roc5= roc(test.m$chmort, test_prob5[,c(2)],plot = TRUE, col='darkgoldenrod4', print.auc = TRUE,percent=TRUE)

plot(test_roc,  print.auc = TRUE,  col='blue',percent=TRUE, print.auc.y = 41, print.auc.x = 0)
plot(test_roc2 , add = TRUE, col='red', print.auc = TRUE, print.auc.y = 34, print.auc.x = 0)
plot(test_roc5 , add = TRUE, col='darkgoldenrod4', print.auc = TRUE, print.auc.y = 27, print.auc.x = 0)
abline(a=0, b=1, lty=2, lwd=3, col="black")
legend("topleft", legend = c("Logistic-Regression", "Cox-Proportional Hazard",
"Random Forest-Model") , pch = 15, bty = 'n', col = c("blue","red", "darkgoldenrod4"))