library(foreign)
library(haven)
## Warning: package 'haven' was built under R version 3.4.1
library(survival)
## Warning: package 'survival' was built under R version 3.4.1
library(car)
## Warning: package 'car' was built under R version 3.4.1
library(survey)
## Warning: package 'survey' was built under R version 3.4.1
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(muhaz)
## Warning: package 'muhaz' was built under R version 3.4.1
library(eha)
## Warning: package 'eha' was built under R version 3.4.2
library(survminer)
## Warning: package 'survminer' was built under R version 3.4.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.1
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 3.4.1
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 3.4.1
dhs_2016<-read.dta("C:/Users/atq765/Google Drive/Dem7283/DHS Resources/ET_2016_DHS_08152017_2025_103714/etir70dt/ETIR70FL.dta", convert.factors = F)
edhs16<-subset(dhs_2016,select=c( "caseid","v445","b0_01", "b1_01", "b2_01", "b3_01", "b4_01", "b5_01","b6_01","b7_01","v002", "v003", "v005", "v007", "v008",
"v011","v012", "v013","v022", "v024", "v025","v102", "v106", "v212","v113", "v115","v116", "v119", "v120", "bidx_01","bidx_02","bidx_03",
"v121", "v122", "v123", "v124", "v125", "v127", "v129", "v153", "v190" , "v201","v211", "v213", "v217", "v302", "v501",
"v511","v525", "v623", "v701","v705", "v717"))
Event Variable - Under 5 Mortality 1) Under 5 Mortality: Probability of dying between birth and exactly 5 years of age, expressed per 1,000 live births
If the child is dead at the time of interview, then the variable B5_01!=1, then the age at death in months is the variable B7_01.
If the child is alive at the time of interview, then the variable B5_01==1, and the age at death is censored. If the age at death is censored, then the age at the date of interview (censored age at death) is the date of the interview - date of birth (in months).
edhs16<-edhs16[edhs16$v201!=0,]
edhs16$death.age<-ifelse(edhs16$b5_01==1,((((edhs16$v008))+1900)-(((edhs16$b3_01))+1900)),edhs16$b7_01)
#censoring indicator for death by age 5, in months (60 months)
edhs16$d.event<-ifelse(is.na(edhs16$b7_01)==T|edhs16$b7_01>60,0,1)
edhs16$d.eventfac<-factor(edhs16$d.event); levels(edhs16$d.eventfac)<-c("Alive at 1", "Dead by 1")
table(edhs16$d.eventfac)
##
## Alive at 1 Dead by 1
## 9741 533
#Here we see the data
head(edhs16[,c("death.age","d.event")], n=20)
## death.age d.event
## 1 31 0
## 3 0 1
## 4 44 0
## 5 7 0
## 6 30 0
## 7 28 0
## 8 1 0
## 9 14 0
## 11 105 0
## 13 1 0
## 14 38 0
## 15 9 1
## 16 92 0
## 18 25 0
## 20 12 0
## 21 22 0
## 23 0 0
## 26 0 1
## 27 19 0
## 28 5 0
#The Surv() object
head(Surv(edhs16$death.age, edhs16$d.event), n=20)
## [1] 31+ 0 44+ 7+ 30+ 28+ 1+ 14+ 105+ 1+ 38+ 9 92+ 25+
## [15] 12+ 22+ 0+ 0 19+ 5+
mort<-survfit(Surv(death.age, d.event)~1, data=edhs16,conf.type="none")
plot(mort, ylim=c(.9,1), xlim=c(0,60), main="Survival Function for Under 5 Child Mortality", xlab="Child Age in Months")
summary(mort)
## Call: survfit(formula = Surv(death.age, d.event) ~ 1, data = edhs16,
## conf.type = "none")
##
## time n.risk n.event survival std.err
## 0 10274 273 0.973 0.00159
## 1 9903 26 0.971 0.00166
## 2 9689 16 0.969 0.00170
## 3 9503 18 0.967 0.00176
## 4 9300 9 0.966 0.00178
## 5 9119 12 0.965 0.00182
## 6 8917 21 0.963 0.00188
## 7 8695 12 0.962 0.00191
## 8 8496 16 0.960 0.00196
## 9 8311 14 0.958 0.00201
## 10 8117 7 0.957 0.00203
## 11 7972 8 0.956 0.00206
## 12 7830 22 0.954 0.00213
## 13 7587 2 0.953 0.00214
## 14 7377 3 0.953 0.00215
## 15 7189 2 0.953 0.00215
## 16 7026 3 0.952 0.00217
## 17 6852 1 0.952 0.00217
## 18 6689 7 0.951 0.00220
## 19 6538 2 0.951 0.00221
## 20 6395 2 0.951 0.00222
## 23 6030 2 0.950 0.00223
## 24 5916 32 0.945 0.00240
## 36 4476 12 0.943 0.00250
## 48 3563 5 0.941 0.00256
## 60 2845 6 0.939 0.00268
1000*(1-summary(mort)$surv[26])
## [1] 60.61247
The Overall under-five mortality rate is 60.6 deaths per 1000 live births s
library(muhaz)
haz<-kphaz.fit(time=edhs16$death.age, status=edhs16$d.event, method = "product-limit")
kphaz.plot(haz, main="Hazard function plot")
data.frame(haz)
## time haz var
## 1 0.5 0.0026471490 2.695226e-07
## 2 1.5 0.0016672234 1.737320e-07
## 3 2.5 0.0019159952 2.039577e-07
## 4 3.5 0.0009794258 1.065886e-07
## 5 4.5 0.0013300522 1.474248e-07
## 6 5.5 0.0023830811 2.704482e-07
## 7 6.5 0.0013960534 1.624202e-07
## 8 7.5 0.0019068440 2.272614e-07
## 9 8.5 0.0016990693 2.062093e-07
## 10 9.5 0.0008678057 1.075862e-07
## 11 10.5 0.0010124706 1.281397e-07
## 12 11.5 0.0028574081 3.711564e-07
## 13 12.5 0.0002673581 3.574208e-08
## 14 13.5 0.0004103285 5.612792e-08
## 15 14.5 0.0002793132 3.900838e-08
## 16 15.5 0.0004336751 6.269631e-08
## 17 16.5 0.0001470480 2.162312e-08
## 18 17.5 0.0010575851 1.597890e-07
## 19 18.5 0.0003075286 4.728720e-08
## 20 19.5 0.0003160352 4.993984e-08
## 21 21.5 0.0001119138 6.262389e-09
## 22 23.5 0.0054789456 9.381428e-07
## 23 30.0 0.0002268511 4.288603e-09
## 24 42.0 0.0001179906 2.784468e-09
## 25 54.0 0.0001769402 5.218012e-09
This illustrates, that while the largest drop in survivorship occurred between 0 and 5 years, the hazard is actually higher in the 1-12 month and 25-30 months range, illustrating the conditionality of that probability.
#cumulative hazard
plot(cumsum(haz$haz)~haz$time, main = "Cumulative Hazard function", ylab="H(t)",xlab="Time in Months", type="l",xlim=c(0,60), lwd=2,col=3)
#Survival function, I just store this in an object so I can use it
surv<-mort
#here is a cheap version of the pdf
ft<- -diff(mort$surv)
plot(ft, xlim=c(.5,59.5), type="s", ylab="f(t)",xlab="Time in Months",main="Probability Density Function")
#here is the cumulative distribution function
Ft<-cumsum(ft)
plot(Ft, xlim=c(0.5,60), type="s", ylab="F(t)",xlab="Time in Months", main="Cumulative Distribution Function")
Hypothesis:
Under-five mortality is higher in rural areas and among poorer and less educated communities. The risk of Under five child mortality also varies from one region to another.
edhs16<-edhs16[edhs16$v201!=0,]
edhs16$age_fb<-recode(edhs16$v212, recodes="0:19='<20'; 20:34='20-34'; 35:49='35-49'; else='NA'", as.factor.result=F)
fit8<-survfit(Surv(death.age, d.event)~age_fb, data=edhs16)
plot(fit8, ylim=c(0.9,1), xlim=c(0,60), col=c(1,3), conf.int=T)
title(main="Survival Function for Child Mortality", sub="Low vs. High SES Households")
legend("topright", legend = c('<20','20-29','30-39','40-49'), col=c(1,4), lty=1)
ggsurvplot(fit8,xlab = "Time in Months", xlim=c(0,60), ylim=c(.9, 1), conf.int = F, censor.shape=120,risk.table = F, title = "Survivorship Function for Child Mortality")
#two group compairison
survdiff(Surv(death.age, d.event)~age_fb, data=edhs16)
## Call:
## survdiff(formula = Surv(death.age, d.event) ~ age_fb, data = edhs16)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## age_fb=<20 6430 350 335.8 0.602 1.65
## age_fb=20-34 3823 183 196.1 0.876 1.41
## age_fb=35-49 21 0 1.1 1.105 1.12
##
## Chisq= 2.6 on 2 degrees of freedom, p= 0.27
edhs16$age<-recode(edhs16$v013, recodes="1='15-19'; 2:3='20-29'; 4:5='30-39'; 6:7='40-49'; else='NA'", as.factor.result=T)
fit6<-survfit(Surv(death.age, d.event)~age, data=edhs16)
plot(fit6, ylim=c(0.9,1), xlim=c(0,60), col=c(1,3), conf.int=T)
title(main="Survival Function for Child Mortality", sub="Low vs. High SES Households")
legend("topright", legend = c('15-19','20-29', '30-39','40-49'), col=c(1,3), lty=1)
ggsurvplot(fit6,xlab = "Time in Months", xlim=c(0,60), ylim=c(.9, 1), conf.int = F, censor.shape=120,risk.table = F, title = "Survivorship Function for Child Mortality")
## Warning: Removed 7 rows containing missing values (geom_path).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_path).
## Warning: Removed 6 rows containing missing values (geom_point).
#two group compairison
survdiff(Surv(death.age, d.event)~age, data=edhs16)
## Call:
## survdiff(formula = Surv(death.age, d.event) ~ age, data = edhs16)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## age=15-19 359 22 15.6 2.65 2.79
## age=20-29 3874 151 188.8 7.55 11.99
## age=30-39 3861 175 203.4 3.97 6.52
## age=40-49 2180 185 125.2 28.51 38.28
##
## Chisq= 43.7 on 3 degrees of freedom, p= 1.72e-09
edhs16$mar_stat<-recode(edhs16$v501, recodes='0="1nevermarried"; 1:2="2married"; 3="3widowed"; 4:5="4divorced"; else=NA', as.factor.result = T)
fit5<-survfit(Surv(death.age, d.event)~mar_stat, data=edhs16)
plot(fit5, ylim=c(0.9,1), xlim=c(0,60), col=c(1,3), conf.int=T)
title(main="Survival Function for Child Mortality", sub="Low vs. High SES Households")
legend("topright", legend = c("1nevermarried", "2married", "3widowed", "4divorced"), col=c(1,3), lty=1)
ggsurvplot(fit5,xlab = "Time in Months", xlim=c(0,60), ylim=c(.9, 1), conf.int = F, censor.shape=120,risk.table = F, title = "Survivorship Function for Child Mortality")
#two group compairison
survdiff(Surv(death.age, d.event)~mar_stat, data=edhs16)
## Call:
## survdiff(formula = Surv(death.age, d.event) ~ mar_stat, data = edhs16)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## mar_stat=1nevermarried 120 5 6.62 0.398 0.409
## mar_stat=2married 8839 422 451.50 1.928 12.856
## mar_stat=3widowed 443 37 25.74 4.922 5.255
## mar_stat=4divorced 872 69 49.13 8.038 8.999
##
## Chisq= 15.6 on 3 degrees of freedom, p= 0.00139
edhs16$wealth_index<-recode(edhs16$v190, recodes ='1:2 ="Poor"; 3="Middle"; 4:5="Rich"; else=NA')
fit1<-survfit(Surv(death.age, d.event)~wealth_index, data=edhs16)
plot(fit1, ylim=c(0.9,1), xlim=c(0,60), col=c(1,3), conf.int=T)
title(main="Survival Function for Child Mortality", sub="Low vs. High SES Households")
legend("topright", legend = c("Poor","Middle","Rich"), col=c(1,3), lty=1)
ggsurvplot(fit1,xlab = "Time in Months", ylim=c(.9, 1), conf.int = F, censor.shape=120,risk.table = F, title = "Survivorship Function for Child Mortality")
edhs16$partner_educ<-recode(edhs16$v701, recodes="0='0Noeducation'; 1='1Primary'; 2='2Secodary'; 3='3Higher'; else=NA", as.factor.result=T)
fit9<-survfit(Surv(death.age, d.event)~partner_educ, data=edhs16)
plot(fit9, ylim=c(0.9,1), xlim=c(0,60), col=c(1,4), conf.int=T)
title(main="Survival Function for Child Mortality", sub="Low vs. High SES Households")
legend("topright", legend = c('Noeducation','Primary','Secodary','Higher'), col=c(1,3), lty=1)
ggsurvplot(fit9,xlab = "Time in Months", ylim=c(.9, 1), conf.int = F, censor.shape=120,risk.table = F, title = "Survivorship Function for Child Mortality")
#two group compairison
survdiff(Surv(death.age, d.event)~partner_educ, data=edhs16)
## Call:
## survdiff(formula = Surv(death.age, d.event) ~ partner_educ, data = edhs16)
##
## n=8762, 1512 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## partner_educ=0Noeducation 4162 235 198.7 6.64868 12.84932
## partner_educ=1Primary 2774 130 130.9 0.00654 0.00965
## partner_educ=2Secodary 1019 30 49.2 7.50466 8.62544
## partner_educ=3Higher 807 23 39.2 6.69410 7.49044
##
## Chisq= 21.1 on 3 degrees of freedom, p= 9.81e-05
edhs16$mother_educ<-recode(edhs16$v106, recodes="0='0Noeducation'; 1='1Primary'; 2:3='2Secodary and Higher'; else=NA", as.factor.result=T)
fit9<-survfit(Surv(death.age, d.event)~mother_educ, data=edhs16)
plot(fit9, ylim=c(0.9,1), xlim=c(0,60), col=c(1,3), conf.int=T)
title(main="Survival Function for Child Mortality", sub="Low vs. High SES Households")
legend("topright", legend = c('Noeducation','Primary','Secodary&Higher'), col=c(1,3), lty=1)
ggsurvplot(fit9,xlab = "Time in Months", ylim=c(.9, 1), conf.int = F, censor.shape=120,risk.table = F, title = "Survivorship Function for Child Mortality")
#two group compairison
survdiff(Surv(death.age, d.event)~mother_educ, data=edhs16)
## Call:
## survdiff(formula = Surv(death.age, d.event) ~ mother_educ, data = edhs16)
##
## N Observed Expected (O-E)^2/E
## mother_educ=0Noeducation 6190 370 320.6 7.62
## mother_educ=1Primary 2731 119 141.3 3.51
## mother_educ=2Secodary and Higher 1353 44 71.2 10.38
## (O-E)^2/V
## mother_educ=0Noeducation 19.41
## mother_educ=1Primary 4.84
## mother_educ=2Secodary and Higher 12.15
##
## Chisq= 21.8 on 2 degrees of freedom, p= 1.82e-05
edhs16$mother_occup<-recode(edhs16$v106, recodes="0 ='1Unemployed'; 1:3='2Non-manual/Professional'; 4:9='3Agricultural/Manual(skilled/unskilled)'; else=NA", as.factor.result=T)
fit10<-survfit(Surv(death.age, d.event)~mother_occup, data=edhs16)
plot(fit10, ylim=c(0.9,1), xlim=c(0,60), col=c(1,3), conf.int=T)
title(main="Survival Function for Child Mortality", sub="Low vs. High SES Households")
legend("topright", legend = c('Noeducation','Primary','Secodary','Higher'), col=c(1,3), lty=1)
ggsurvplot(fit10,xlab = "Time in Months", ylim=c(.9, 1), conf.int = F, censor.shape=120,risk.table = F, title = "Survivorship Function for Child Mortality")
#two group compairison
survdiff(Surv(death.age, d.event)~mother_occup, data=edhs16)
## Call:
## survdiff(formula = Surv(death.age, d.event) ~ mother_occup, data = edhs16)
##
## N Observed Expected (O-E)^2/E
## mother_occup=1Unemployed 6190 370 321 7.62
## mother_occup=2Non-manual/Professional 4084 163 212 11.50
## (O-E)^2/V
## mother_occup=1Unemployed 19.4
## mother_occup=2Non-manual/Professional 19.4
##
## Chisq= 19.4 on 1 degrees of freedom, p= 1.05e-05
table(edhs16$v025)
##
## 1 2
## 2732 7542
edhs16$rural<-recode(edhs16$v025, recodes ="2 = '0rural'; 1='1urban'; else=NA", as.factor.result = T)
fit2<-survfit(Surv(death.age, d.event)~rural, data=edhs16, conf.type = "log")
plot(fit2, xlim=c(0,60), col=c(1,2), conf.int=T)
title(main="Survival Function for Child Mortality", sub="Rural vs Urban Residence")
legend("topright", legend = c("Urban","Rural" ), col=c(1,2), lty=1)
fit2
## Call: survfit(formula = Surv(death.age, d.event) ~ rural, data = edhs16,
## conf.type = "log")
##
## n events median 0.95LCL 0.95UCL
## rural=0rural 7542 417 NA NA NA
## rural=1urban 2732 116 NA NA NA
summary(fit2)
## Call: survfit(formula = Surv(death.age, d.event) ~ rural, data = edhs16,
## conf.type = "log")
##
## rural=0rural
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 7542 219 0.971 0.00193 0.967 0.975
## 1 7236 22 0.968 0.00203 0.964 0.972
## 2 7054 13 0.966 0.00208 0.962 0.970
## 3 6904 14 0.964 0.00214 0.960 0.968
## 4 6753 8 0.963 0.00218 0.959 0.967
## 5 6609 9 0.962 0.00222 0.957 0.966
## 6 6452 15 0.960 0.00229 0.955 0.964
## 7 6275 8 0.958 0.00233 0.954 0.963
## 8 6111 12 0.956 0.00238 0.952 0.961
## 9 5961 10 0.955 0.00243 0.950 0.960
## 10 5820 6 0.954 0.00246 0.949 0.959
## 11 5707 6 0.953 0.00249 0.948 0.958
## 12 5596 19 0.950 0.00259 0.945 0.955
## 13 5399 2 0.949 0.00261 0.944 0.954
## 14 5219 3 0.949 0.00262 0.944 0.954
## 15 5063 1 0.949 0.00263 0.943 0.954
## 16 4938 2 0.948 0.00264 0.943 0.953
## 18 4678 5 0.947 0.00268 0.942 0.952
## 19 4570 1 0.947 0.00269 0.942 0.952
## 20 4461 1 0.947 0.00269 0.941 0.952
## 23 4166 2 0.946 0.00271 0.941 0.952
## 24 4088 22 0.941 0.00291 0.936 0.947
## 36 2934 8 0.939 0.00304 0.933 0.945
## 48 2223 4 0.937 0.00315 0.931 0.943
## 60 1700 5 0.934 0.00337 0.928 0.941
##
## rural=1urban
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 2732 54 0.980 0.00266 0.975 0.985
## 1 2667 4 0.979 0.00276 0.973 0.984
## 2 2635 3 0.978 0.00283 0.972 0.983
## 3 2599 4 0.976 0.00292 0.970 0.982
## 4 2547 1 0.976 0.00295 0.970 0.982
## 5 2510 3 0.975 0.00302 0.969 0.981
## 6 2465 6 0.972 0.00316 0.966 0.978
## 7 2420 4 0.971 0.00326 0.964 0.977
## 8 2385 4 0.969 0.00335 0.962 0.976
## 9 2350 4 0.967 0.00345 0.961 0.974
## 10 2297 1 0.967 0.00347 0.960 0.974
## 11 2265 2 0.966 0.00352 0.959 0.973
## 12 2234 3 0.965 0.00360 0.958 0.972
## 15 2126 1 0.964 0.00362 0.957 0.971
## 16 2088 1 0.964 0.00365 0.957 0.971
## 17 2048 1 0.963 0.00368 0.956 0.971
## 18 2011 2 0.962 0.00374 0.955 0.970
## 19 1968 1 0.962 0.00377 0.955 0.969
## 20 1934 1 0.961 0.00380 0.954 0.969
## 24 1828 10 0.956 0.00412 0.948 0.964
## 36 1542 4 0.954 0.00430 0.945 0.962
## 48 1340 1 0.953 0.00435 0.944 0.962
## 60 1145 1 0.952 0.00443 0.944 0.961
#Two- sample test
survdiff(Surv(death.age, d.event)~rural, data=edhs16)
## Call:
## survdiff(formula = Surv(death.age, d.event) ~ rural, data = edhs16)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## rural=0rural 7542 417 385 2.70 9.88
## rural=1urban 2732 116 148 7.01 9.88
##
## Chisq= 9.9 on 1 degrees of freedom, p= 0.00167
library(survminer)
ggsurvplot(fit2,conf.int = F, risk.table = F, title = "Survivorship Function for Child Mortality", xlab = "Child Age in Months", xlim = c(0,60), ylim=c(.9, 1))
prop.table(table(edhs16$d.event, edhs16$rural), margin = 2)
##
## 0rural 1urban
## 0 0.94470963 0.95754026
## 1 0.05529037 0.04245974
chisq.test(table(edhs16$d.event, edhs16$rural))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(edhs16$d.event, edhs16$rural)
## X-squared = 6.454, df = 1, p-value = 0.01107
edhs16$region<- recode(edhs16$v024, recodes='1="Tigray"; 2="Afar"; 3= "Amhara"; 4= "Oromia"; 5="Somali"; 6="Ben-Gumuz"; 7="SNNP"; 8="Gambella"; 9="Harari"; 10="Addis Ababa"; 11="Dire Dawa";else=NA', as.factor.result = F )
fit3<-survfit(Surv(death.age, d.event)~region, data=edhs16, conf.type = "log")
#Two- sample test
survdiff(Surv(death.age, d.event)~region, data=edhs16)
## Call:
## survdiff(formula = Surv(death.age, d.event) ~ region, data = edhs16)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## region=Addis Ababa 760 20 42.1 11.637 12.830
## region=Afar 835 55 41.8 4.139 4.562
## region=Amhara 1160 51 62.0 1.950 2.239
## region=Ben-Gumuz 804 35 41.7 1.063 1.170
## region=Dire Dawa 654 47 34.8 4.238 4.601
## region=Gambella 756 44 39.9 0.419 0.459
## region=Harari 605 24 31.7 1.885 2.034
## region=Oromia 1366 78 69.0 1.182 1.379
## region=SNNP 1225 55 63.8 1.215 1.401
## region=Somali 1002 81 48.4 22.042 24.652
## region=Tigray 1107 43 57.7 3.764 4.284
##
## Chisq= 54.4 on 10 degrees of freedom, p= 4.01e-08
library(survminer)
ggsurvplot(fit3,conf.int = F, risk.table = F, title = "Survivorship Function for Child Mortality", xlab = "Time in Months", ylim=c(.9, 1))
## Warning: Removed 86 rows containing missing values (geom_path).
## Warning: Removed 86 rows containing missing values (geom_point).
## Warning: Removed 86 rows containing missing values (geom_path).
## Warning: Removed 86 rows containing missing values (geom_point).
prop.table(table(edhs16$d.event, edhs16$region), margin = 1)
##
## Addis Ababa Afar Amhara Ben-Gumuz Dire Dawa Gambella
## 0 0.07596756 0.08007391 0.11384868 0.07894467 0.06231393 0.07309311
## 1 0.03752345 0.10318949 0.09568480 0.06566604 0.08818011 0.08255159
##
## Harari Oromia SNNP Somali Tigray
## 0 0.05964480 0.13222462 0.12011087 0.09454881 0.10922903
## 1 0.04502814 0.14634146 0.10318949 0.15196998 0.08067542
chisq.test(table(edhs16$d.event, edhs16$region))
##
## Pearson's Chi-squared test
##
## data: table(edhs16$d.event, edhs16$region)
## X-squared = 46.673, df = 10, p-value = 1.083e-06
The primary goal of the study was to assess the risk factors of under- five mortality in Ethiopian by applying an Kaplan-Meier non-parametric method to provide valid estimates for correct statistical inference needed for policy decision making.
The overall under-five mortality rate in Ethiopia is found to be 61 deaths per 1000 live births. We found that region has the most significant risk factor for child mortality. Somali had higher risk of dying before age five, followed by children born in Dire Dawa, Ben-Gumuz, Afar, Gambela. Addis Ababa has the lowest under five mortality. This indicates that there is a differential of child mortality among the regions of Ethiopia.
Next, the mothers’ age emerge as a powerful background covariates of under-five mortality in the Ethiopia states. Children from mothers aged (15-19) and (40-49) have a higher risk of dying than mothers’ age (20-39).
Where there is a strong tradition for young women to be married at a very early age in rural parts of the country the relative risk of their under five children is high. Strong policy shall be implemented to protect early marriage in the country.
Mother’s education emerge as powerful background covariates of under-five mortality in the Ethiopia states, for the reason that both are known to be associated with better child care practices. Thus, policy makers shall focus on educating illiterate mothers about the child care and maternal education.
As expected children of mothers from wealthy households are found to be at a lower risk of dying than mothers’ from poor households. This study therefore urges for policy makers improving the status of women through education and creating more employment opportunities, by giving more rural women.
edhs16$secedu<-recode(edhs16$v106, recodes ="2:3 = 1; 0:1=0; else=NA")
table(edhs16$secedu, edhs16$d.eventfac)
##
## Alive at 1 Dead by 1
## 0 8432 489
## 1 1309 44
fit4<-survfit(Surv(death.age, d.event)~rural+secedu, data=edhs16)
summary(fit4)
## Call: survfit(formula = Surv(death.age, d.event) ~ rural + secedu,
## data = edhs16)
##
## rural=0rural, secedu=0
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 7217 210 0.971 0.00198 0.967 0.975
## 1 6925 21 0.968 0.00207 0.964 0.972
## 2 6757 13 0.966 0.00213 0.962 0.970
## 3 6620 14 0.964 0.00220 0.960 0.968
## 4 6477 8 0.963 0.00224 0.958 0.967
## 5 6340 8 0.962 0.00227 0.957 0.966
## 6 6195 15 0.959 0.00235 0.955 0.964
## 7 6027 8 0.958 0.00239 0.953 0.963
## 8 5871 10 0.956 0.00244 0.952 0.961
## 9 5729 10 0.955 0.00249 0.950 0.960
## 10 5596 5 0.954 0.00252 0.949 0.959
## 11 5488 6 0.953 0.00255 0.948 0.958
## 12 5382 19 0.949 0.00265 0.944 0.955
## 13 5191 2 0.949 0.00267 0.944 0.954
## 14 5020 3 0.949 0.00268 0.943 0.954
## 15 4870 1 0.948 0.00269 0.943 0.954
## 16 4753 2 0.948 0.00270 0.943 0.953
## 18 4502 5 0.947 0.00274 0.942 0.952
## 19 4399 1 0.947 0.00275 0.941 0.952
## 20 4293 1 0.946 0.00276 0.941 0.952
## 23 4017 2 0.946 0.00278 0.941 0.951
## 24 3940 22 0.941 0.00298 0.935 0.947
## 36 2838 8 0.938 0.00312 0.932 0.944
## 48 2162 4 0.936 0.00323 0.930 0.943
## 60 1652 5 0.933 0.00346 0.927 0.940
##
## rural=0rural, secedu=1
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 325 9 0.972 0.00910 0.955 0.990
## 1 311 1 0.969 0.00959 0.951 0.988
## 5 269 1 0.966 0.01021 0.946 0.986
## 8 240 2 0.958 0.01161 0.935 0.981
## 10 224 1 0.953 0.01232 0.929 0.978
##
## rural=1urban, secedu=0
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 1704 37 0.978 0.00353 0.971 0.985
## 1 1661 4 0.976 0.00371 0.969 0.983
## 2 1641 2 0.975 0.00380 0.967 0.982
## 3 1620 3 0.973 0.00394 0.965 0.981
## 4 1586 1 0.972 0.00398 0.965 0.980
## 5 1564 2 0.971 0.00407 0.963 0.979
## 6 1539 4 0.969 0.00425 0.960 0.977
## 7 1511 4 0.966 0.00443 0.957 0.975
## 8 1488 4 0.963 0.00460 0.954 0.972
## 9 1467 4 0.961 0.00478 0.951 0.970
## 11 1413 2 0.959 0.00486 0.950 0.969
## 12 1396 2 0.958 0.00495 0.948 0.968
## 16 1312 1 0.957 0.00500 0.948 0.967
## 17 1291 1 0.957 0.00505 0.947 0.967
## 18 1270 2 0.955 0.00516 0.945 0.965
## 19 1247 1 0.954 0.00521 0.944 0.965
## 20 1225 1 0.954 0.00526 0.943 0.964
## 24 1166 6 0.949 0.00560 0.938 0.960
## 36 992 4 0.945 0.00590 0.933 0.956
## 48 869 1 0.944 0.00599 0.932 0.956
##
## rural=1urban, secedu=1
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 1028 17 0.983 0.00398 0.976 0.991
## 2 994 1 0.982 0.00409 0.974 0.991
## 3 979 1 0.981 0.00421 0.973 0.990
## 5 946 1 0.980 0.00433 0.972 0.989
## 6 926 2 0.978 0.00458 0.969 0.987
## 10 866 1 0.977 0.00471 0.968 0.986
## 12 838 1 0.976 0.00484 0.967 0.986
## 15 794 1 0.975 0.00499 0.965 0.985
## 24 662 4 0.969 0.00576 0.958 0.980
## 60 394 1 0.966 0.00625 0.954 0.979
ggsurvplot(fit4,conf.int = F, risk.table = F, title = "Survivorship Function for child Mortality", xlab = "Time in Months", xlim = c(0,60), ylim=c(.9, 1))
plot(fit4, ylim=c(.85,1), xlim=c(0,60), col=c(1,1,2,2),lty=c(1,2,1,2), conf.int=F)
title(main="Survival Function for child Mortality", sub="Rural/Urban * Mother's Education")
legend("topright", legend = c("Rural, Low Edu","Urban, Low Edu", "Rural, High Edu","Urban, High Edu" ), col=c(1,1,2,2),lty=c(1,2,1,2))
# test
survdiff(Surv(death.age, d.event)~rural+secedu, data=edhs16)
## Call:
## survdiff(formula = Surv(death.age, d.event) ~ rural + secedu,
## data = edhs16)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## rural=0rural, secedu=0 7217 403 369.0 3.139 10.368
## rural=0rural, secedu=1 325 14 15.8 0.202 0.211
## rural=1urban, secedu=0 1704 86 92.9 0.506 0.623
## rural=1urban, secedu=1 1028 30 55.4 11.641 13.185
##
## Chisq= 15.7 on 3 degrees of freedom, p= 0.00129
We can also calculate the hazard function for each group using the kphaz.fit function in the muhaz library.
haz2<-kphaz.fit(edhs16$death.age, edhs16$d.event, edhs16$rural)
haz2
## $time
## [1] 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 10.5 11.5 12.5 13.5
## [15] 14.5 15.5 17.0 18.5 19.5 21.5 23.5 30.0 42.0 54.0 0.5 1.5 2.5 3.5
## [29] 4.5 5.5 6.5 7.5 8.5 9.5 10.5 11.5 13.5 15.5 16.5 17.5 18.5 19.5
## [43] 22.0 30.0 42.0 54.0
##
## $haz
## [1] 3.071240e-03 1.864027e-03 2.051801e-03 1.199911e-03 1.379496e-03
## [6] 2.358900e-03 1.293841e-03 1.991030e-03 1.692530e-03 1.037762e-03
## [11] 1.060348e-03 3.458828e-03 3.765343e-04 5.809352e-04 1.976675e-04
## [16] 4.143857e-04 5.407287e-04 2.194908e-04 2.275831e-04 1.619312e-04
## [21] 5.465815e-03 2.307431e-04 1.509894e-04 2.469013e-04 1.504190e-03
## [26] 1.143160e-03 1.555152e-03 3.966680e-04 1.200656e-03 2.448860e-03
## [31] 1.661832e-03 1.691527e-03 1.715326e-03 4.374453e-04 8.908688e-04
## [36] 1.358936e-03 1.579030e-04 4.803074e-04 4.909180e-04 1.002256e-03
## [41] 5.112474e-04 5.200208e-04 1.378284e-03 2.190115e-04 6.303580e-05
## [46] 7.322789e-05
##
## $var
## [1] 4.287669e-07 2.672856e-07 3.007223e-07 1.799790e-07 2.114522e-07
## [6] 3.709899e-07 2.092653e-07 3.303665e-07 2.864749e-07 1.794967e-07
## [11] 1.873946e-07 6.297257e-07 7.089434e-08 1.125079e-07 3.907245e-08
## [16] 8.585916e-08 5.848014e-08 4.817620e-08 5.179405e-08 1.311093e-08
## [21] 1.358062e-06 6.655660e-09 5.699677e-09 1.219216e-08 5.656494e-07
## [26] 4.356093e-07 6.046610e-07 1.573455e-07 4.805313e-07 9.995054e-07
## [31] 6.904264e-07 7.153229e-07 7.356116e-07 1.913584e-07 3.968237e-07
## [36] 6.155854e-07 2.493337e-08 2.306952e-07 2.410005e-07 5.022585e-07
## [41] 2.613739e-07 2.704216e-07 1.899718e-07 1.199159e-08 3.973513e-09
## [46] 5.362323e-09
##
## $strata
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
## [36] 2 2 2 2 2 2 2 2 2 2 2
plot(y=haz2$haz[1:60], x=haz2$time[1:60], col=1, lty=1, type="s")
lines(y=haz2$haz[0:24], x=haz2$time[0:24], col=2, lty=1, type="s")
haz3<-kphaz.fit(edhs16$death.age, edhs16$d.event, edhs16$wealth_index)
haz3
## $time
## [1] 0.5 1.5 2.5 4.5 6.5 7.5 9.5 11.5 12.5 13.5 15.0 17.0 19.0 22.0
## [15] 30.0 0.5 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 10.5 11.5 12.5
## [29] 13.5 14.5 16.5 18.5 21.0 23.5 30.0 42.0 54.0 0.5 1.5 2.5 3.5 4.5
## [43] 5.5 6.5 7.5 8.5 9.5 10.5 11.5 13.5 15.5 16.5 17.5 18.5 19.5 21.5
## [57] 23.5 30.0 42.0 54.0
##
## $haz
## [1] 2.239388e-03 1.515752e-03 2.318394e-03 8.134606e-04 2.505010e-03
## [6] 1.717590e-03 3.052503e-04 3.791981e-03 9.852217e-04 1.023541e-03
## [11] 5.359057e-04 1.670401e-03 5.834306e-04 2.584993e-03 2.822631e-04
## [16] 3.928962e-03 2.623533e-03 2.439841e-03 1.504130e-03 1.539074e-03
## [21] 2.379323e-03 1.092733e-03 1.680283e-03 1.720435e-03 1.471415e-03
## [26] 1.203384e-03 3.094886e-03 3.170577e-04 6.534894e-04 3.376097e-04
## [31] 1.243317e-04 3.815338e-04 1.052632e-04 5.616394e-03 2.707156e-04
## [36] 2.951872e-04 3.859166e-04 1.443126e-03 7.338104e-04 1.248325e-03
## [41] 7.645332e-04 1.550318e-03 2.370707e-03 1.339856e-03 2.184930e-03
## [46] 2.216435e-03 5.633825e-04 8.600312e-04 2.343843e-03 1.032311e-04
## [51] 6.330561e-04 3.212335e-04 9.800783e-04 3.334445e-04 3.394433e-04
## [56] 1.193033e-04 3.990242e-03 1.826718e-04 4.311088e-05 1.027855e-04
##
## $var
## [1] 1.671639e-06 1.148772e-06 1.791650e-06 2.205759e-07 2.091808e-06
## [6] 1.475155e-06 9.317775e-08 3.595261e-06 9.706617e-07 1.047637e-06
## [11] 2.871949e-07 9.300918e-07 3.403912e-07 8.353318e-07 3.983854e-08
## [16] 9.081016e-07 6.257471e-07 5.953224e-07 3.770816e-07 3.947968e-07
## [21] 6.290882e-07 2.985332e-07 4.705817e-07 4.933409e-07 4.330264e-07
## [26] 3.620379e-07 9.579097e-07 1.005256e-07 2.135247e-07 1.139803e-07
## [31] 1.545838e-08 1.455680e-07 1.108033e-08 2.426656e-06 1.465805e-08
## [36] 2.178432e-08 3.723313e-08 3.471046e-07 1.794962e-07 3.116810e-07
## [41] 1.948388e-07 4.005973e-07 6.244899e-07 3.590484e-07 5.967487e-07
## [46] 6.140899e-07 1.587005e-07 2.465574e-07 6.867354e-07 1.065667e-08
## [51] 2.003941e-07 1.031910e-07 3.201866e-07 1.111852e-07 1.152218e-07
## [56] 1.423327e-08 1.447506e-06 6.673925e-09 1.858548e-09 5.282430e-09
##
## $strata
## [1] "Middle" "Middle" "Middle" "Middle" "Middle" "Middle" "Middle"
## [8] "Middle" "Middle" "Middle" "Middle" "Middle" "Middle" "Middle"
## [15] "Middle" "Poor" "Poor" "Poor" "Poor" "Poor" "Poor"
## [22] "Poor" "Poor" "Poor" "Poor" "Poor" "Poor" "Poor"
## [29] "Poor" "Poor" "Poor" "Poor" "Poor" "Poor" "Poor"
## [36] "Poor" "Poor" "Rich" "Rich" "Rich" "Rich" "Rich"
## [43] "Rich" "Rich" "Rich" "Rich" "Rich" "Rich" "Rich"
## [50] "Rich" "Rich" "Rich" "Rich" "Rich" "Rich" "Rich"
## [57] "Rich" "Rich" "Rich" "Rich"
plot(y=haz3$haz[1:60], x=haz3$time[1:60], col=1, lty=1, type="s")
lines(y=haz3$haz[13:24], x=haz3$time[13:24], col=2, lty=1, type="s")
haz4<-kphaz.fit(edhs16$death.age, edhs16$d.event, edhs16$region)
haz4
## $time
## [1] 0.5 2.0 5.0 7.5 8.5 9.5 11.0 13.5 16.5 21.0 36.0 0.5 2.0 3.5
## [15] 5.0 6.5 7.5 8.5 9.5 10.5 11.5 13.0 17.0 22.0 30.0 42.0 0.5 1.5
## [29] 2.5 4.5 6.5 7.5 8.5 10.0 11.5 14.0 20.0 30.0 0.5 1.5 3.0 4.5
## [43] 5.5 6.5 8.0 9.5 11.0 15.5 21.0 23.5 42.0 0.5 1.5 3.0 4.5 5.5
## [57] 6.5 7.5 8.5 10.0 12.0 14.5 17.0 21.0 30.0 0.5 1.5 2.5 3.5 4.5
## [71] 5.5 7.0 11.0 14.5 16.5 21.0 30.0 42.0 0.5 3.0 5.5 6.5 7.5 8.5
## [85] 16.5 42.0 0.5 1.5 2.5 3.5 5.0 6.5 7.5 8.5 10.5 13.0 16.0 20.5
## [99] 23.5 30.0 42.0 54.0 0.5 1.5 2.5 3.5 5.0 7.0 8.5 10.5 12.5 15.5
## [113] 19.0 22.0 30.0 1.0 2.5 4.0 5.5 6.5 7.5 9.0 10.5 11.5 15.0 21.0
## [127] 30.0 42.0 1.5 4.0 6.0 7.5 9.5 14.0 17.5 18.5 21.5 30.0 48.0
##
## $haz
## [1] 1.342282e-03 6.906077e-04 3.628447e-04 1.472754e-03 1.483680e-03
## [6] 1.517451e-03 7.824726e-04 5.473454e-04 5.707763e-04 3.092146e-04
## [11] 9.850276e-05 5.014218e-03 2.008384e-03 1.356852e-03 2.878006e-03
## [16] 2.965174e-03 1.572327e-03 4.818558e-03 3.278724e-03 1.661130e-03
## [21] 6.862117e-03 9.074410e-04 3.745318e-04 1.777278e-03 3.063725e-04
## [26] 3.858025e-04 2.689804e-03 9.074410e-04 9.337068e-04 3.242542e-04
## [31] 1.975348e-03 1.010101e-03 2.040835e-03 1.588446e-03 1.089325e-03
## [36] 5.875473e-04 3.344663e-04 1.377410e-04 2.595902e-03 2.691912e-03
## [41] 6.993007e-04 1.422475e-03 1.440922e-03 1.479290e-03 7.692308e-04
## [46] 1.552795e-03 1.614231e-03 2.834467e-04 5.307856e-04 6.593808e-03
## [51] 1.316482e-04 4.769483e-03 3.246762e-03 8.445946e-04 6.826038e-03
## [56] 5.220548e-03 1.779359e-03 1.798561e-03 1.831502e-03 9.541985e-04
## [61] 9.803922e-04 6.872852e-04 1.084599e-03 2.036911e-03 7.676769e-04
## [66] 1.366120e-03 4.207689e-03 1.416431e-03 2.890179e-03 1.455604e-03
## [71] 1.499250e-03 2.359744e-03 2.888504e-04 1.769912e-03 6.422608e-04
## [76] 1.805344e-03 4.688464e-04 2.986858e-04 1.730104e-03 9.320061e-04
## [81] 1.890359e-03 1.980198e-03 3.980103e-03 2.016129e-03 1.851852e-04
## [86] 1.501502e-04 5.415120e-03 2.394270e-03 8.058018e-04 1.692067e-03
## [91] 2.681971e-03 9.149131e-04 1.874613e-03 9.496676e-04 1.719340e-03
## [96] 5.543237e-04 3.037667e-04 2.828854e-04 4.343936e-03 1.637197e-04
## [101] 2.131287e-04 5.473469e-04 3.378404e-03 2.583302e-03 3.541438e-03
## [106] 1.796139e-03 1.410660e-03 9.843292e-04 4.026232e-03 2.153880e-03
## [111] 1.122334e-03 2.496879e-04 6.535948e-04 7.097321e-04 1.633987e-04
## [116] 1.109290e-03 6.830655e-03 1.217308e-03 1.246883e-03 1.322751e-03
## [121] 2.762478e-03 2.217278e-03 3.091197e-03 4.777126e-03 3.346720e-04
## [126] 1.994718e-03 6.172924e-04 3.987241e-04 3.267974e-04 1.023050e-03
## [131] 1.064483e-03 1.094092e-03 3.875969e-04 2.204586e-04 1.367989e-03
## [136] 1.406470e-03 6.250015e-04 1.711157e-04 2.675234e-04
##
## $var
## [1] 1.801721e-06 4.769390e-07 1.316563e-07 2.169004e-06 2.201305e-06
## [6] 2.302657e-06 6.122634e-07 2.995870e-07 3.257855e-07 9.561367e-08
## [11] 9.702793e-09 6.285741e-06 1.344769e-06 1.841048e-06 2.070952e-06
## [16] 4.396150e-06 2.472212e-06 7.740414e-06 5.375073e-06 2.759351e-06
## [21] 1.177396e-05 8.234492e-07 1.402741e-07 1.052921e-06 9.386414e-08
## [26] 1.488435e-07 2.411704e-06 8.234492e-07 8.718084e-07 1.051408e-07
## [31] 1.951038e-06 1.020304e-06 2.082524e-06 8.410667e-07 1.186628e-06
## [36] 1.726069e-07 5.593687e-08 1.897260e-08 3.369593e-06 3.623359e-06
## [41] 4.890215e-07 2.023435e-06 2.076257e-06 2.188299e-06 5.917160e-07
## [46] 2.411172e-06 1.302893e-06 8.034204e-08 2.817333e-07 1.449365e-05
## [51] 1.733126e-08 7.582670e-06 5.270745e-06 7.133400e-07 1.164887e-05
## [56] 9.084933e-06 3.166120e-06 3.234822e-06 3.354399e-06 9.104947e-07
## [61] 9.611688e-07 4.723610e-07 1.176354e-06 8.299722e-07 1.964480e-07
## [66] 1.866284e-06 5.901712e-06 2.006276e-06 4.176577e-06 2.118783e-06
## [71] 2.247752e-06 1.856144e-06 8.343454e-08 3.132587e-06 4.124989e-07
## [76] 6.518639e-07 1.099139e-07 8.921320e-08 2.993259e-06 4.343362e-07
## [81] 3.573458e-06 3.921184e-06 7.920620e-06 4.064776e-06 3.429355e-08
## [86] 2.254507e-08 4.189197e-06 1.910856e-06 6.493165e-07 1.431561e-06
## [91] 1.198919e-06 8.370659e-07 1.757275e-06 9.018686e-07 5.912478e-07
## [96] 3.072748e-07 9.227421e-08 8.002417e-08 6.290371e-06 2.680414e-08
## [101] 4.542386e-08 1.497947e-07 2.853424e-06 2.224538e-06 3.135488e-06
## [106] 1.613057e-06 6.633217e-07 4.844900e-07 4.052698e-06 7.732914e-07
## [111] 1.259635e-06 6.234404e-08 4.271861e-07 2.518630e-07 2.669913e-08
## [116] 6.152775e-07 7.777255e-06 7.409323e-07 1.554717e-06 1.749671e-06
## [121] 3.815709e-06 1.639170e-06 4.777762e-06 7.607069e-06 1.120054e-07
## [126] 7.958373e-07 1.905276e-07 1.589809e-07 1.067965e-07 5.233325e-07
## [131] 5.666085e-07 1.197037e-06 1.502314e-07 4.860197e-08 1.871394e-06
## [136] 1.978157e-06 1.953139e-07 2.928057e-08 3.578448e-08
##
## $strata
## [1] "Addis Ababa" "Addis Ababa" "Addis Ababa" "Addis Ababa" "Addis Ababa"
## [6] "Addis Ababa" "Addis Ababa" "Addis Ababa" "Addis Ababa" "Addis Ababa"
## [11] "Addis Ababa" "Afar" "Afar" "Afar" "Afar"
## [16] "Afar" "Afar" "Afar" "Afar" "Afar"
## [21] "Afar" "Afar" "Afar" "Afar" "Afar"
## [26] "Afar" "Amhara" "Amhara" "Amhara" "Amhara"
## [31] "Amhara" "Amhara" "Amhara" "Amhara" "Amhara"
## [36] "Amhara" "Amhara" "Amhara" "Ben-Gumuz" "Ben-Gumuz"
## [41] "Ben-Gumuz" "Ben-Gumuz" "Ben-Gumuz" "Ben-Gumuz" "Ben-Gumuz"
## [46] "Ben-Gumuz" "Ben-Gumuz" "Ben-Gumuz" "Ben-Gumuz" "Ben-Gumuz"
## [51] "Ben-Gumuz" "Dire Dawa" "Dire Dawa" "Dire Dawa" "Dire Dawa"
## [56] "Dire Dawa" "Dire Dawa" "Dire Dawa" "Dire Dawa" "Dire Dawa"
## [61] "Dire Dawa" "Dire Dawa" "Dire Dawa" "Dire Dawa" "Dire Dawa"
## [66] "Gambella" "Gambella" "Gambella" "Gambella" "Gambella"
## [71] "Gambella" "Gambella" "Gambella" "Gambella" "Gambella"
## [76] "Gambella" "Gambella" "Gambella" "Harari" "Harari"
## [81] "Harari" "Harari" "Harari" "Harari" "Harari"
## [86] "Harari" "Oromia" "Oromia" "Oromia" "Oromia"
## [91] "Oromia" "Oromia" "Oromia" "Oromia" "Oromia"
## [96] "Oromia" "Oromia" "Oromia" "Oromia" "Oromia"
## [101] "Oromia" "Oromia" "SNNP" "SNNP" "SNNP"
## [106] "SNNP" "SNNP" "SNNP" "SNNP" "SNNP"
## [111] "SNNP" "SNNP" "SNNP" "SNNP" "SNNP"
## [116] "Somali" "Somali" "Somali" "Somali" "Somali"
## [121] "Somali" "Somali" "Somali" "Somali" "Somali"
## [126] "Somali" "Somali" "Somali" "Tigray" "Tigray"
## [131] "Tigray" "Tigray" "Tigray" "Tigray" "Tigray"
## [136] "Tigray" "Tigray" "Tigray" "Tigray"
plot(y=haz4$haz[1:60], x=haz4$time[1:60], col=1, lty=1, type="s")
lines(y=haz4$haz[13:24], x=haz4$time[13:24], col=2, lty=1, type="s")
My outcome variable: Age at first marriage
Age at First Marriage: Median age in years when women ages 15 to 49 first married or lived with a consensual partner.
sub3<-data.frame(CASEID=dhs_2016$caseid,
int.cmc=dhs_2016$v008,
fbir.cmc=dhs_2016$b3_01,
sbir.cmc=dhs_2016$b3_02,
rural=dhs_2016$v025,
region=dhs_2016$v024,
wealth.index=dhs_2016$v190,
marr.cmc=dhs_2016$v509,
mar.stat=dhs_2016$v501,
age.fm=dhs_2016$v511,
mar.dur=dhs_2016$v513,
educ=dhs_2016$v106,
age=dhs_2016$v012,
age_r=dhs_2016$v013,
weight=dhs_2016$v005/1000000,
psu=dhs_2016$v021, strata=dhs_2016$v022)
## Parametric Model for my outcome varaible "Age at First Marriage"
sub3$agefm<-ifelse(sub3$mar.stat==0, 0,sub3$age.fm)
sub3$fmevent<-ifelse(sub3$mar.stat==0,0,1)
fit<-survfit(Surv(agefm, fmevent)~1, sub3)
fit
## Call: survfit(formula = Surv(agefm, fmevent) ~ 1, data = sub3)
##
## n events median 0.95LCL 0.95UCL
## 15683 11405 17 16 17
plot(fit, conf.int=T, ylab="S(t)", xlab="Age in Years")
title(main="Survival Function for Age at First Marriage, EDHS - 2016")
###Estimating Parametric Hazard Model
fit.haz.km<-kphaz.fit(sub3$agefm, sub3$fmevent )
#this is a version of the hazard that is smoothed using a kernel-density method
fit.haz.sm<-muhaz(sub3$agefm[sub3$agefm>0], sub3$fmevent[sub3$agefm>0] )
#Empirical hazard function
kphaz.plot(fit.haz.km, main="Hazard Function for Age at First Marriage, EDHS - 2016")
lines(fit.haz.sm, col=2, lwd=3)
legend("topleft", legend = c("KM hazard", "Smoothed Hazard"), col=c(1,2), lty=c(1,1))
predictor variables: Woman’s education (secondary +, vs < secondary), Woman’s age^2, residence(rural, urban), wealth_index(poor, not poor);
sub3$educ.high<-ifelse(sub3$educ %in% c(2,3), 1, 0)
sub3$mother_educ<-recode(sub3$educ, recodes="0='0Noeducation'; 1='1Primary'; 2:3='2Secodary and Higher'; else=NA", as.factor.result=T)
sub3$wealth_index<-recode(sub3$wealth.index, recodes ='1:2 ="Poor"; 3="Middle"; 4:5="Rich"; else=NA')
sub3$age_new<-recode(sub3$age_r, recodes="1='15-19'; 2:3='20-29'; 4:5='30-39'; 6:7='40-49'; else='NA'", as.factor.result=T)
sub3$rural_c<-recode(sub3$rural, recodes ="2 = '0rural'; 1='1urban'; else=NA", as.factor.result = T)
sub3$age2<-(sub3$age/5)^2
sub3$rural2<-recode(sub3$rural, recodes='2=1;else=0')
sub3$poor<-recode(sub3$wealth.index, recodes='1:2=1;else=0')
sub3$educ.high<-ifelse(sub3$educ %in% c(2,3), 1, 0)
sub3$region<- recode(sub3$v024, recodes='1="Tigray"; 2="Afar"; 3= "Amhara"; 4= "Oromia"; 5="Somali"; 6="Ben-Gumuz"; 7="SNNP"; 8="Gambella"; 9="Harari"; 10="Addis Ababa"; 11="Dire Dawa";else=NA', as.factor.result = F )
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~psu, strata=~strata, data=sub3[sub3$agefm>0,], weight=~weight )
rep.des<-as.svrepdesign(des, type="bootstrap" )
fit.1<-phreg(Surv(agefm, fmevent)~educ.high+rural2+poor+I(age/5)+age2, data=sub3[sub3$agefm>0,],dist = "weibull", shape = 1)
summary(fit.1)
## Call:
## phreg(formula = Surv(agefm, fmevent) ~ educ.high + rural2 + poor +
## I(age/5) + age2, data = sub3[sub3$agefm > 0, ], dist = "weibull",
## shape = 1)
##
## Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
## educ.high 0.172 -0.147 0.863 0.030 0.000
## rural2 0.697 0.058 1.060 0.026 0.024
## poor 0.420 -0.003 0.997 0.022 0.879
## I(age/5) 6.235 -0.100 0.904 0.042 0.016
## age2 41.621 0.007 1.007 0.003 0.041
##
## log(scale) 2.513 0.132 0.000
##
## Shape is fixed at 1
##
## Events 11405
## Total time at risk 196750
## Max. log. likelihood -43852
## LR test statistic 65.35
## Degrees of freedom 5
## Overall p-value 9.46465e-13
plot(fit.1)
fit.2<-phreg(Surv(agefm, fmevent)~educ.high+rural2+poor+I(age/5)+age2, data=sub3[sub3$agefm>0,],dist = "weibull")
summary(fit.2)
## Call:
## phreg(formula = Surv(agefm, fmevent) ~ educ.high + rural2 + poor +
## I(age/5) + age2, data = sub3[sub3$agefm > 0, ], dist = "weibull")
##
## Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
## educ.high 0.172 -0.651 0.522 0.030 0.000
## rural2 0.697 0.322 1.380 0.026 0.000
## poor 0.420 0.025 1.025 0.022 0.265
## I(age/5) 6.235 -0.634 0.530 0.043 0.000
## age2 41.621 0.035 1.036 0.003 0.000
##
## log(scale) 2.430 0.029 0.000
## log(shape) 1.538 0.007 0.000
##
## Events 11405
## Total time at risk 196750
## Max. log. likelihood -31675
## LR test statistic 2173.13
## Degrees of freedom 5
## Overall p-value 0
plot(fit.2)
fit.1.aft<-survreg(Surv(agefm, fmevent)~educ.high+rural2+poor+I(age/5)+age2, data=sub3[sub3$agefm>0,],dist = "exponential")
summary(fit.1.aft)
##
## Call:
## survreg(formula = Surv(agefm, fmevent) ~ educ.high + rural2 +
## poor + I(age/5) + age2, data = sub3[sub3$agefm > 0, ], dist = "exponential")
## Value Std. Error z p
## (Intercept) 2.51303 0.13245 18.974 2.81e-80
## educ.high 0.14737 0.02973 4.957 7.18e-07
## rural2 -0.05846 0.02586 -2.261 2.38e-02
## poor 0.00332 0.02190 0.152 8.79e-01
## I(age/5) 0.10045 0.04186 2.400 1.64e-02
## age2 -0.00659 0.00322 -2.046 4.08e-02
##
## Scale fixed at 1
##
## Exponential distribution
## Loglik(model)= -43852.4 Loglik(intercept only)= -43885.1
## Chisq= 65.35 on 5 degrees of freedom, p= 9.5e-13
## Number of Newton-Raphson Iterations: 3
## n= 11405
#re-scaled beta's
(betaHat <- -coef(fit.1.aft) / fit.1.aft$scale)
## (Intercept) educ.high rural2 poor I(age/5)
## -2.513034921 -0.147370824 0.058459730 -0.003324933 -0.100450148
## age2
## 0.006594256
#beta's from the PH model
coef(fit.1)
## educ.high rural2 poor I(age/5) age2
## -0.147370824 0.058459730 -0.003324933 -0.100450148 0.006594256
## log(scale)
## 2.513034921
#log-normal distribution for hazard
fit.3<-phreg(Surv(agefm, fmevent)~educ.high+rural2+poor+I(age/5)+age2, data=sub3[sub3$agefm>0,], dist="lognormal", center=T)
summary(fit.3)
## Call:
## phreg(formula = Surv(agefm, fmevent) ~ educ.high + rural2 + poor +
## I(age/5) + age2, data = sub3[sub3$agefm > 0, ], dist = "lognormal",
## center = T)
##
## Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
## (Intercept) 1.544 0.178 0.000
## educ.high 0.172 -0.585 0.557 0.030 0.000
## rural2 0.697 0.268 1.307 0.026 0.000
## poor 0.420 0.008 1.008 0.022 0.725
## I(age/5) 6.235 -0.651 0.522 0.043 0.000
## age2 41.621 0.042 1.043 0.003 0.000
##
## log(scale) 2.710 0.015 0.000
## log(shape) 1.816 0.033 0.000
##
## Events 11405
## Total time at risk 196750
## Max. log. likelihood -30158
## LR test statistic 1453.17
## Degrees of freedom 5
## Overall p-value 0
plot(fit.3)
#plot the hazard from the log normal vs the empirical hazard
plot(fit.3, fn="haz")
lines(fit.haz.sm, col=2)
We now see the age effect completely gone from the model.
So, the log-normal model fits the empirical hazard pretty well up to ~20 years, where the empirical rate increase at a lower rate after 20 years.
Log-logistic Model
#log-normal distribution for hazard
fit.4<-phreg(Surv(agefm, fmevent)~educ.high+rural2+I(age/5)+age2, data=sub3[sub3$agefm>0,], dist="loglogistic", center=T)
summary(fit.4)
## Call:
## phreg(formula = Surv(agefm, fmevent) ~ educ.high + rural2 + I(age/5) +
## age2, data = sub3[sub3$agefm > 0, ], dist = "loglogistic",
## center = T)
##
## Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
## (Intercept) 1.987 0.149 0.000
## educ.high 0.172 -0.562 0.570 0.030 0.000
## rural2 0.697 0.253 1.288 0.023 0.000
## I(age/5) 6.235 -0.650 0.522 0.044 0.000
## age2 41.621 0.043 1.044 0.003 0.000
##
## log(scale) 2.784 0.006 0.000
## log(shape) 2.246 0.016 0.000
##
## Events 11405
## Total time at risk 196750
## Max. log. likelihood -30227
## LR test statistic 1245.91
## Degrees of freedom 4
## Overall p-value 0
plot(fit.4)
#plot the hazard from the log normal vs the empirical hazard
plot(fit.4, fn="haz")
lines(fit.haz.sm, col=2)
# here I must supply the times for the "pieces" where I expect the hazard to be constant
fit.5<-phreg(Surv(agefm, fmevent)~educ.high+rural2+I(age/5)+age2, data=sub3[sub3$agefm>0,], dist="pch", cuts=c(7, 10,13,15,20,23))
summary(fit.5)
## Call:
## phreg(formula = Surv(agefm, fmevent) ~ educ.high + rural2 + I(age/5) +
## age2, data = sub3[sub3$agefm > 0, ], dist = "pch", cuts = c(7,
## 10, 13, 15, 20, 23))
##
## Covariate W.mean Coef Exp(Coef) se(Coef) Wald p
## educ.high 0.172 -0.511 0.600 0.030 0.000
## rural2 0.697 0.229 1.257 0.024 0.000
## I(age/5) 6.235 -0.510 0.601 0.043 0.000
## age2 41.621 0.033 1.033 0.003 0.000
##
##
## Events 11405
## Total time at risk 196750
## Max. log. likelihood -31828
## LR test statistic 996.13
## Degrees of freedom 4
## Overall p-value 0
plot(fit.5)
plot(fit.5, fn="haz", ylim=c(0, .5))
lines(fit.haz.sm, col=2)
We may want to compare the models to one another based off AIC values. the eha package doesn’t give this to you, so we must calculate it:
AIC1<--2*fit.1$loglik[2]+2*length(fit.1$coefficients); AIC1
## [1] 87716.84
AIC2<--2*fit.2$loglik[2]+2*length(fit.2$coefficients); AIC2
## [1] 63364.98
AIC3<--2*fit.3$loglik[2]+2*length(fit.3$coefficients); AIC3
## [1] 60331.95
AIC4<--2*fit.4$loglik[2]+2*length(fit.4$coefficients); AIC4
## [1] 60468.6
AIC5<--2*fit.5$loglik[2]+2*length(fit.5$coefficients); AIC5
## [1] 63664.75
Which looks like it actually fits the data pretty good. The AIC’s show the log-normal is still fitting better.
emp<-coxreg(Surv(agefm, fmevent)~educ.high+rural2+I(age/5)+age2, data=sub3[sub3$agefm>0,])
check.dist(sp=emp,pp=fit.1, main = "Empirical vs. Exponential")
check.dist(sp=emp,pp=fit.2, main = "Empirical vs. Weibull")
check.dist(sp=emp,pp=fit.3, main = "Empirical vs. Log-Normal")
check.dist(sp=emp,pp=fit.4, main = "Empirical vs. Log-Logistic")
check.dist(sp=emp,pp=fit.5, main = "Empirical vs. PCH")
We see that the PCH model and the log-Normal models both appear to fit the empirical hazard function better than the other parametric models.
survey.fit <- withReplicates(rep.des, quote(coef(survreg(Surv(agefm, fmevent)~educ.high+rural2+poor +I(age/5)+age2,dist="lognormal", weights = .weights+.0001))))
survey.est<-as.data.frame(survey.fit)
survey.test<-data.frame(beta = rownames(survey.est), estimate=survey.est$theta, se.est= survey.est$SE)
survey.test$t<-survey.test$estimate/survey.test$se.est
survey.test$pval<-2*pnorm(survey.test$t,lower.tail = F )
survey.test
## beta estimate se.est t pval
## 1 (Intercept) 2.586382764 0.0413285887 62.5809602 0.000000e+00
## 2 educ.high 0.171339975 0.0102406561 16.7313474 7.746582e-63
## 3 rural2 -0.025472394 0.0114195679 -2.2305918 1.974292e+00
## 4 poor -0.003263082 0.0088316658 -0.3694753 1.288226e+00
## 5 I(age/5) 0.066791982 0.0124048276 5.3843539 7.270534e-08
## 6 age2 -0.004552940 0.0009821052 -4.6358988 1.999996e+00
fit.2.aft<-survreg(Surv(agefm, fmevent)~educ.high+rural2+poor+I(age/5)+age2 + age2, data=sub3[sub3$agefm>0,],dist = "lognormal")
fit.2.aft.sum<-summary(fit.2.aft)
fit.2.aft.sum
##
## Call:
## survreg(formula = Surv(agefm, fmevent) ~ educ.high + rural2 +
## poor + I(age/5) + age2 + age2, data = sub3[sub3$agefm > 0,
## ], dist = "lognormal")
## Value Std. Error z p
## (Intercept) 2.54000 0.027215 93.33 0.00e+00
## educ.high 0.14540 0.006128 23.73 1.89e-124
## rural2 -0.05396 0.005322 -10.14 3.70e-24
## poor 0.00466 0.004513 1.03 3.01e-01
## I(age/5) 0.08778 0.008612 10.19 2.14e-24
## age2 -0.00595 0.000664 -8.97 3.05e-19
## Log(scale) -1.57754 0.006621 -238.26 0.00e+00
##
## Scale= 0.206
##
## Log Normal distribution
## Loglik(model)= -30391.6 Loglik(intercept only)= -31003.5
## Chisq= 1223.84 on 5 degrees of freedom, p= 0
## Number of Newton-Raphson Iterations: 3
## n= 11405
summary(fit.2.aft)
##
## Call:
## survreg(formula = Surv(agefm, fmevent) ~ educ.high + rural2 +
## poor + I(age/5) + age2 + age2, data = sub3[sub3$agefm > 0,
## ], dist = "lognormal")
## Value Std. Error z p
## (Intercept) 2.54000 0.027215 93.33 0.00e+00
## educ.high 0.14540 0.006128 23.73 1.89e-124
## rural2 -0.05396 0.005322 -10.14 3.70e-24
## poor 0.00466 0.004513 1.03 3.01e-01
## I(age/5) 0.08778 0.008612 10.19 2.14e-24
## age2 -0.00595 0.000664 -8.97 3.05e-19
## Log(scale) -1.57754 0.006621 -238.26 0.00e+00
##
## Scale= 0.206
##
## Log Normal distribution
## Loglik(model)= -30391.6 Loglik(intercept only)= -31003.5
## Chisq= 1223.84 on 5 degrees of freedom, p= 0
## Number of Newton-Raphson Iterations: 3
## n= 11405
#Compare the se's of the parameters
survey.test$se.est/sqrt(diag(fit.2.aft.sum$var[-6, -6]))
## (Intercept) educ.high rural2 poor I(age/5) Log(scale)
## 1.5186110 1.6711439 2.1458376 1.9569308 1.4403422 0.1483272
#survey based errors are larger, as they should be.
cor.test(sub3$rural2, sub3$educ.high, method = c("pearson", "kendall", "spearman"))
##
## Pearson's product-moment correlation
##
## data: sub3$rural2 and sub3$educ.high
## t = -62.19, df = 15681, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4572681 -0.4321581
## sample estimates:
## cor
## -0.4448005
This shows that secondary education and residence rural are correlated to eachother with (cor :-0.4448005) and p-value:0.000
The statistical distribution and the associated factors was investigated. Five parametric survival models were fitted with the covariates, to determine the best parametric survival model that best described the waiting time to first marriage of the women. The results showed that, the time to first marriage is best modeled by the log-normal model, the median age at first marriage among Ethiopian womnen is 18.
An investigation into the impact of the prognostic factors on the waiting time by the Log-normal model evidenced that, the place of residence of a woman, her educational level, her wealth status and region were the statistically significant factors that determined her time to first marrriage.
Older women in this sample are more likely to be involved in first marriage than younger women.Women residing in rural areas are 6% (OR: 1.06) times more likely to be involved in first marriage than women residing in urban areas. Women with higher education are 13.7% (OR:0.836) less likely to be involved in early first marriage than women with no or lower education.