HomeWork 4 - Cox regression, DEM 7223 - Event History Analysis

Author

Assogba Albert

Published

October 6, 2022

load library

library(tidyverse)
library(haven)
library(survival)
library(car)
library(survey)
library(muhaz)
library(eha)
library(ggsurvfit)
library(dplyr)
# load the data

d1<-read_dta("GHKR72DT/GHKR72FL.DTA")
d1<-zap_label(d1)

tyhe outcome variable is the event of a child dying before age 5.

d2<-d1 %>%
    mutate(caseid=caseid,
           death.age=b7,
           int.cmc=v008,
           fbir.cmc=b3,
           marr.cmc=v509,
           birth_date=b3,
           child_alive=b5,
           highses=v190,
           educ=v106,
           partnedu=v701,
           age=v012,
           rural=v025,
           agec=cut(v012, breaks = seq(15,50,5), include.lowest=T),
           partnerage=v730,
           weight=v005/1000000,
           psu=v021,
           strata=v022) %>% 
  select(caseid,death.age,int.cmc,fbir.cmc,marr.cmc,fbir.cmc,birth_date,child_alive,highses,educ,rural,agec,partnedu, partnerage, weight, psu, strata) %>% 
    mutate(highses =car::Recode(highses, recodes ="1:3 =0; 4:5 =1; else =NA")) 
 

d2$death.age<-ifelse(d2$child_alive==1,
                      ((((d2$int.cmc)))- (((d2$birth_date))))
                      ,d2$death.age)

# censoring indicator for death by age 1, in months (12 months)

d2$d.event<-ifelse(is.na(d2$death.age) ==T| d2$death.age > 12,0,1)
d2$d.eventfac<-factor(d2$d.event)
levels(d2$d.eventfac)<-c("Alive at 1", "Death by 1")

#Creating covariates

d2$educ.high<-ifelse(d2$educ %in% c(2,3), 1, 0)
d2$partnhiedu<-ifelse(d2$partnedu<3,0,
                           ifelse(d2$partnedu%in%c(8,9),NA,1 ))
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~psu, strata = ~strata,
               data = d2, weight = ~weight)

Fitting the Cox proportional Harzard Model

fit.cox<- coxph(Surv(death.age,d.event)~highses+rural+partnhiedu+educ.high + agec, data = d2)

summary(fit.cox)
Call:
coxph(formula = Surv(death.age, d.event) ~ highses + rural + 
    partnhiedu + educ.high + agec, data = d2)

  n= 5407, number of events= 1393 
   (477 observations deleted due to missingness)

                coef exp(coef) se(coef)      z Pr(>|z|)    
highses     -0.01252   0.98756  0.08141 -0.154 0.877828    
rural       -0.06417   0.93785  0.06868 -0.934 0.350172    
partnhiedu   0.17275   1.18857  0.09608  1.798 0.072188 .  
educ.high   -0.03300   0.96754  0.06098 -0.541 0.588386    
agec(20,25] -0.44491   0.64088  0.12023 -3.701 0.000215 ***
agec(25,30] -0.57020   0.56541  0.11766 -4.846 1.26e-06 ***
agec(30,35] -0.60272   0.54732  0.11969 -5.036 4.76e-07 ***
agec(35,40] -0.68377   0.50471  0.12589 -5.431 5.59e-08 ***
agec(40,45] -0.85388   0.42576  0.15644 -5.458 4.81e-08 ***
agec(45,50] -0.92629   0.39602  0.25264 -3.666 0.000246 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

            exp(coef) exp(-coef) lower .95 upper .95
highses        0.9876     1.0126    0.8419    1.1584
rural          0.9378     1.0663    0.8197    1.0730
partnhiedu     1.1886     0.8413    0.9846    1.4349
educ.high      0.9675     1.0336    0.8585    1.0904
agec(20,25]    0.6409     1.5603    0.5063    0.8112
agec(25,30]    0.5654     1.7686    0.4490    0.7121
agec(30,35]    0.5473     1.8271    0.4329    0.6920
agec(35,40]    0.5047     1.9813    0.3943    0.6460
agec(40,45]    0.4258     2.3487    0.3133    0.5785
agec(45,50]    0.3960     2.5251    0.2414    0.6498

Concordance= 0.542  (se = 0.008 )
Likelihood ratio test= 44.66  on 10 df,   p=3e-06
Wald test            = 47.84  on 10 df,   p=7e-07
Score (logrank) test = 48.94  on 10 df,   p=4e-07

Cox proportional Harzard Model with Interactions

fit.cox.interact<- coxph(Surv(death.age,d.event)~highses + rural + educ.high + agec+ (highses*educ.high), data = d2)

summary(fit.cox.interact)
Call:
coxph(formula = Surv(death.age, d.event) ~ highses + rural + 
    educ.high + agec + (highses * educ.high), data = d2)

  n= 5884, number of events= 1550 

                        coef  exp(coef)   se(coef)      z Pr(>|z|)    
highses            0.0375638  1.0382782  0.1173073  0.320    0.749    
rural             -0.0244564  0.9758403  0.0641674 -0.381    0.703    
educ.high          0.0001252  1.0001252  0.0638928  0.002    0.998    
agec(20,25]       -0.5210585  0.5938916  0.0962215 -5.415 6.12e-08 ***
agec(25,30]       -0.6557914  0.5190312  0.0949502 -6.907 4.96e-12 ***
agec(30,35]       -0.6999526  0.4966089  0.0978609 -7.153 8.52e-13 ***
agec(35,40]       -0.7803758  0.4582338  0.1056496 -7.386 1.51e-13 ***
agec(40,45]       -0.9605718  0.3826740  0.1400025 -6.861 6.83e-12 ***
agec(45,50]       -1.0426143  0.3525319  0.2436453 -4.279 1.88e-05 ***
highses:educ.high -0.0425246  0.9583669  0.1330335 -0.320    0.749    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                  exp(coef) exp(-coef) lower .95 upper .95
highses              1.0383     0.9631    0.8250    1.3067
rural                0.9758     1.0248    0.8605    1.1066
educ.high            1.0001     0.9999    0.8824    1.1335
agec(20,25]          0.5939     1.6838    0.4918    0.7172
agec(25,30]          0.5190     1.9267    0.4309    0.6252
agec(30,35]          0.4966     2.0137    0.4099    0.6016
agec(35,40]          0.4582     2.1823    0.3725    0.5637
agec(40,45]          0.3827     2.6132    0.2908    0.5035
agec(45,50]          0.3525     2.8366    0.2187    0.5683
highses:educ.high    0.9584     1.0434    0.7384    1.2439

Concordance= 0.552  (se = 0.008 )
Likelihood ratio test= 75.58  on 10 df,   p=4e-12
Wald test            = 84.17  on 10 df,   p=8e-14
Score (logrank) test = 87.29  on 10 df,   p=2e-14

Plotting of Survival function and cummulative hazard function

plot(survfit(fit.cox), 
xlim=c(0,5), 
ylab="S(t)", 
xlab="Age in Months") 
title(main="Survival Plot for Under Five Mortality")

Cox proportional Harzard Model with Interactions

sf1 <- survfit(fit.cox)

sf2 <- survfit(fit.cox,
newdata=data.frame(educ.high=0,
                   rural=1,
                   partnhiedu=0,
                   highses=1,
                   agec="(25,30]"))

sf3 <- survfit(fit.cox,
newdata=data.frame(educ.high=1, 
                   rural=1, 
                   partnhiedu=0,
                   highses=1,
                   agec="(25,30]"))

H1<--log(sf1$surv)
H2<--log(sf2$surv)
H3<--log(sf3$surv)


times <- sf1$time
hs1<-loess(diff(c(0,H1))~times, degree=1, span=.25)
hs2<-loess(diff(c(0,H2))~times, degree=1, span=.25)
hs3<-loess(diff(c(0,H3))~times, degree=1, span=.25)


plot(predict(hs1)~times,type="l", ylab="smoothed h(t)", xlab="Months",
ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs2)~times, type="l", col="red")
lines(predict(hs3)~times, type="l", col="green")


legend("topright",
legend=c("Means of Covariates",
"low edu, highses", "high edu,lowses", "rural, agec 25-30"),
lty=1, col=c(1,"red", "green"), cex=.8)

Checking Proportionality of Hazards in the model

cox.s <- coxph(Surv(death.age,d.event)~highses+rural+partnhiedu+educ.high + agec, data = d2)

summary(cox.s)
Call:
coxph(formula = Surv(death.age, d.event) ~ highses + rural + 
    partnhiedu + educ.high + agec, data = d2)

  n= 5407, number of events= 1393 
   (477 observations deleted due to missingness)

                coef exp(coef) se(coef)      z Pr(>|z|)    
highses     -0.01252   0.98756  0.08141 -0.154 0.877828    
rural       -0.06417   0.93785  0.06868 -0.934 0.350172    
partnhiedu   0.17275   1.18857  0.09608  1.798 0.072188 .  
educ.high   -0.03300   0.96754  0.06098 -0.541 0.588386    
agec(20,25] -0.44491   0.64088  0.12023 -3.701 0.000215 ***
agec(25,30] -0.57020   0.56541  0.11766 -4.846 1.26e-06 ***
agec(30,35] -0.60272   0.54732  0.11969 -5.036 4.76e-07 ***
agec(35,40] -0.68377   0.50471  0.12589 -5.431 5.59e-08 ***
agec(40,45] -0.85388   0.42576  0.15644 -5.458 4.81e-08 ***
agec(45,50] -0.92629   0.39602  0.25264 -3.666 0.000246 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

            exp(coef) exp(-coef) lower .95 upper .95
highses        0.9876     1.0126    0.8419    1.1584
rural          0.9378     1.0663    0.8197    1.0730
partnhiedu     1.1886     0.8413    0.9846    1.4349
educ.high      0.9675     1.0336    0.8585    1.0904
agec(20,25]    0.6409     1.5603    0.5063    0.8112
agec(25,30]    0.5654     1.7686    0.4490    0.7121
agec(30,35]    0.5473     1.8271    0.4329    0.6920
agec(35,40]    0.5047     1.9813    0.3943    0.6460
agec(40,45]    0.4258     2.3487    0.3133    0.5785
agec(45,50]    0.3960     2.5251    0.2414    0.6498

Concordance= 0.542  (se = 0.008 )
Likelihood ratio test= 44.66  on 10 df,   p=3e-06
Wald test            = 47.84  on 10 df,   p=7e-07
Score (logrank) test = 48.94  on 10 df,   p=4e-07
# Schoenfield test
cox.zph(cox.s)
             chisq df    p
highses     0.1021  1 0.75
rural       0.0803  1 0.78
partnhiedu  0.4503  1 0.50
educ.high   0.9780  1 0.32
agec        9.7605  6 0.14
GLOBAL     11.3824 10 0.33
par(mfrow=c(2, 2))
plot(cox.zph(cox.s))

The formal test using the weighted residuals and the graphical representation suggested strong evidence of non-proportional hazards.