HomeWork 4 - Cox regression, DEM 7223 - Event History Analysis
load library
library(tidyverse)
library(haven)
library(survival)
library(car)
library(survey)
library(muhaz)
library(eha)
library(ggsurvfit)
library(dplyr)# load the data
d1<-read_dta("GHKR72DT/GHKR72FL.DTA")
d1<-zap_label(d1)tyhe outcome variable is the event of a child dying before age 5.
d2<-d1 %>%
mutate(caseid=caseid,
death.age=b7,
int.cmc=v008,
fbir.cmc=b3,
marr.cmc=v509,
birth_date=b3,
child_alive=b5,
highses=v190,
educ=v106,
partnedu=v701,
age=v012,
rural=v025,
agec=cut(v012, breaks = seq(15,50,5), include.lowest=T),
partnerage=v730,
weight=v005/1000000,
psu=v021,
strata=v022) %>%
select(caseid,death.age,int.cmc,fbir.cmc,marr.cmc,fbir.cmc,birth_date,child_alive,highses,educ,rural,agec,partnedu, partnerage, weight, psu, strata) %>%
mutate(highses =car::Recode(highses, recodes ="1:3 =0; 4:5 =1; else =NA"))
d2$death.age<-ifelse(d2$child_alive==1,
((((d2$int.cmc)))- (((d2$birth_date))))
,d2$death.age)
# censoring indicator for death by age 1, in months (12 months)
d2$d.event<-ifelse(is.na(d2$death.age) ==T| d2$death.age > 12,0,1)
d2$d.eventfac<-factor(d2$d.event)
levels(d2$d.eventfac)<-c("Alive at 1", "Death by 1")#Creating covariates
d2$educ.high<-ifelse(d2$educ %in% c(2,3), 1, 0)
d2$partnhiedu<-ifelse(d2$partnedu<3,0,
ifelse(d2$partnedu%in%c(8,9),NA,1 ))options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~psu, strata = ~strata,
data = d2, weight = ~weight)Fitting the Cox proportional Harzard Model
fit.cox<- coxph(Surv(death.age,d.event)~highses+rural+partnhiedu+educ.high + agec, data = d2)
summary(fit.cox)Call:
coxph(formula = Surv(death.age, d.event) ~ highses + rural +
partnhiedu + educ.high + agec, data = d2)
n= 5407, number of events= 1393
(477 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
highses -0.01252 0.98756 0.08141 -0.154 0.877828
rural -0.06417 0.93785 0.06868 -0.934 0.350172
partnhiedu 0.17275 1.18857 0.09608 1.798 0.072188 .
educ.high -0.03300 0.96754 0.06098 -0.541 0.588386
agec(20,25] -0.44491 0.64088 0.12023 -3.701 0.000215 ***
agec(25,30] -0.57020 0.56541 0.11766 -4.846 1.26e-06 ***
agec(30,35] -0.60272 0.54732 0.11969 -5.036 4.76e-07 ***
agec(35,40] -0.68377 0.50471 0.12589 -5.431 5.59e-08 ***
agec(40,45] -0.85388 0.42576 0.15644 -5.458 4.81e-08 ***
agec(45,50] -0.92629 0.39602 0.25264 -3.666 0.000246 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
highses 0.9876 1.0126 0.8419 1.1584
rural 0.9378 1.0663 0.8197 1.0730
partnhiedu 1.1886 0.8413 0.9846 1.4349
educ.high 0.9675 1.0336 0.8585 1.0904
agec(20,25] 0.6409 1.5603 0.5063 0.8112
agec(25,30] 0.5654 1.7686 0.4490 0.7121
agec(30,35] 0.5473 1.8271 0.4329 0.6920
agec(35,40] 0.5047 1.9813 0.3943 0.6460
agec(40,45] 0.4258 2.3487 0.3133 0.5785
agec(45,50] 0.3960 2.5251 0.2414 0.6498
Concordance= 0.542 (se = 0.008 )
Likelihood ratio test= 44.66 on 10 df, p=3e-06
Wald test = 47.84 on 10 df, p=7e-07
Score (logrank) test = 48.94 on 10 df, p=4e-07
Cox proportional Harzard Model with Interactions
fit.cox.interact<- coxph(Surv(death.age,d.event)~highses + rural + educ.high + agec+ (highses*educ.high), data = d2)
summary(fit.cox.interact)Call:
coxph(formula = Surv(death.age, d.event) ~ highses + rural +
educ.high + agec + (highses * educ.high), data = d2)
n= 5884, number of events= 1550
coef exp(coef) se(coef) z Pr(>|z|)
highses 0.0375638 1.0382782 0.1173073 0.320 0.749
rural -0.0244564 0.9758403 0.0641674 -0.381 0.703
educ.high 0.0001252 1.0001252 0.0638928 0.002 0.998
agec(20,25] -0.5210585 0.5938916 0.0962215 -5.415 6.12e-08 ***
agec(25,30] -0.6557914 0.5190312 0.0949502 -6.907 4.96e-12 ***
agec(30,35] -0.6999526 0.4966089 0.0978609 -7.153 8.52e-13 ***
agec(35,40] -0.7803758 0.4582338 0.1056496 -7.386 1.51e-13 ***
agec(40,45] -0.9605718 0.3826740 0.1400025 -6.861 6.83e-12 ***
agec(45,50] -1.0426143 0.3525319 0.2436453 -4.279 1.88e-05 ***
highses:educ.high -0.0425246 0.9583669 0.1330335 -0.320 0.749
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
highses 1.0383 0.9631 0.8250 1.3067
rural 0.9758 1.0248 0.8605 1.1066
educ.high 1.0001 0.9999 0.8824 1.1335
agec(20,25] 0.5939 1.6838 0.4918 0.7172
agec(25,30] 0.5190 1.9267 0.4309 0.6252
agec(30,35] 0.4966 2.0137 0.4099 0.6016
agec(35,40] 0.4582 2.1823 0.3725 0.5637
agec(40,45] 0.3827 2.6132 0.2908 0.5035
agec(45,50] 0.3525 2.8366 0.2187 0.5683
highses:educ.high 0.9584 1.0434 0.7384 1.2439
Concordance= 0.552 (se = 0.008 )
Likelihood ratio test= 75.58 on 10 df, p=4e-12
Wald test = 84.17 on 10 df, p=8e-14
Score (logrank) test = 87.29 on 10 df, p=2e-14
Plotting of Survival function and cummulative hazard function
plot(survfit(fit.cox),
xlim=c(0,5),
ylab="S(t)",
xlab="Age in Months")
title(main="Survival Plot for Under Five Mortality")Cox proportional Harzard Model with Interactions
sf1 <- survfit(fit.cox)
sf2 <- survfit(fit.cox,
newdata=data.frame(educ.high=0,
rural=1,
partnhiedu=0,
highses=1,
agec="(25,30]"))
sf3 <- survfit(fit.cox,
newdata=data.frame(educ.high=1,
rural=1,
partnhiedu=0,
highses=1,
agec="(25,30]"))
H1<--log(sf1$surv)
H2<--log(sf2$surv)
H3<--log(sf3$surv)
times <- sf1$time
hs1<-loess(diff(c(0,H1))~times, degree=1, span=.25)
hs2<-loess(diff(c(0,H2))~times, degree=1, span=.25)
hs3<-loess(diff(c(0,H3))~times, degree=1, span=.25)
plot(predict(hs1)~times,type="l", ylab="smoothed h(t)", xlab="Months",
ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs2)~times, type="l", col="red")
lines(predict(hs3)~times, type="l", col="green")
legend("topright",
legend=c("Means of Covariates",
"low edu, highses", "high edu,lowses", "rural, agec 25-30"),
lty=1, col=c(1,"red", "green"), cex=.8)Checking Proportionality of Hazards in the model
cox.s <- coxph(Surv(death.age,d.event)~highses+rural+partnhiedu+educ.high + agec, data = d2)
summary(cox.s)Call:
coxph(formula = Surv(death.age, d.event) ~ highses + rural +
partnhiedu + educ.high + agec, data = d2)
n= 5407, number of events= 1393
(477 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
highses -0.01252 0.98756 0.08141 -0.154 0.877828
rural -0.06417 0.93785 0.06868 -0.934 0.350172
partnhiedu 0.17275 1.18857 0.09608 1.798 0.072188 .
educ.high -0.03300 0.96754 0.06098 -0.541 0.588386
agec(20,25] -0.44491 0.64088 0.12023 -3.701 0.000215 ***
agec(25,30] -0.57020 0.56541 0.11766 -4.846 1.26e-06 ***
agec(30,35] -0.60272 0.54732 0.11969 -5.036 4.76e-07 ***
agec(35,40] -0.68377 0.50471 0.12589 -5.431 5.59e-08 ***
agec(40,45] -0.85388 0.42576 0.15644 -5.458 4.81e-08 ***
agec(45,50] -0.92629 0.39602 0.25264 -3.666 0.000246 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
highses 0.9876 1.0126 0.8419 1.1584
rural 0.9378 1.0663 0.8197 1.0730
partnhiedu 1.1886 0.8413 0.9846 1.4349
educ.high 0.9675 1.0336 0.8585 1.0904
agec(20,25] 0.6409 1.5603 0.5063 0.8112
agec(25,30] 0.5654 1.7686 0.4490 0.7121
agec(30,35] 0.5473 1.8271 0.4329 0.6920
agec(35,40] 0.5047 1.9813 0.3943 0.6460
agec(40,45] 0.4258 2.3487 0.3133 0.5785
agec(45,50] 0.3960 2.5251 0.2414 0.6498
Concordance= 0.542 (se = 0.008 )
Likelihood ratio test= 44.66 on 10 df, p=3e-06
Wald test = 47.84 on 10 df, p=7e-07
Score (logrank) test = 48.94 on 10 df, p=4e-07
# Schoenfield test
cox.zph(cox.s) chisq df p
highses 0.1021 1 0.75
rural 0.0803 1 0.78
partnhiedu 0.4503 1 0.50
educ.high 0.9780 1 0.32
agec 9.7605 6 0.14
GLOBAL 11.3824 10 0.33
par(mfrow=c(2, 2))
plot(cox.zph(cox.s))The formal test using the weighted residuals and the graphical representation suggested strong evidence of non-proportional hazards.