library(haven)
library(survival)
library(car)
library(survey)
library(survminer)
library(ggplot2)
library(ggpubr)
library(muhaz)
library(car)
library(dplyr)
library(eha)
library(haven)
#load the data
dat3<-read_dta("https://github.com/coreysparks/data/blob/master/ZZIR62FL.DTA?raw=true")
dat3<-zap_labels(dat3)

Event: Event of 2nd birth

Time duration: 1st birth to 2nd birth

Independent variable: place of residence (Rural&Urban) and religion (Islam&Hinduism)

Censoring indicator: no 2nd child during the interview

Risk set:Women had 1st child

table(is.na(dat3$bidx_01))
## 
## FALSE  TRUE 
##  6161  2187
#now we extract those women
sub<-subset(dat3, dat3$bidx_01==1&dat3$b0_01==0)
#Here I keep only a few of the variables for the dates,
#and some characteristics of the women, and details of the survey
sub2<-data.frame(CASEID=sub$caseid, 
                 int.cmc=sub$v008,
                 fbir.cmc=sub$b3_01,
                 sbir.cmc=sub$b3_02,
                 marr.cmc=sub$v509,
                 rural=sub$v025,
                 religion=sub$v130,
                 educ=sub$v106,
                 age=sub$v012,
                 partneredu=sub$v701,
                 partnerage=sub$v730,
                 weight=sub$v005/1000000,
                 psu=sub$v021, strata=sub$v022)
sub2$agefb = (sub2$age - (sub2$int.cmc - sub2$fbir.cmc)/12)

Calculation of observed and censored

sub2$secbi<-ifelse(is.na(sub2$sbir.cmc)==T, 
                   ((sub2$int.cmc))-((sub2$fbir.cmc)),
                   (sub2$fbir.cmc-sub2$sbir.cmc))
sub2$b2event<-ifelse(is.na(sub2$sbir.cmc)==T,0,1) 

Creating covariates:

place of residence: rural & urban

Religion: Islam & Hinduism

sub2$rururb <- ifelse(sub2$rural==2,0,1)
sub2$relg <- Recode(sub2$religion, recodes="1='Islam'; 2='Hinduism'; else=NA", as.factor=T)

# sub2$educ.high<-ifelse(sub2$educ %in% c(2,3), 1, 0)
# sub2$age2<-(sub2$agefb/5)^2

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~psu, strata=~strata,
               data=sub2[sub2$secbi>0,], weight=~weight )

rep.des<-as.svrepdesign(des, type="bootstrap" )

Cox model

fit.cox2<-coxph(Surv(secbi,b2event)~rururb+relg,
                data=sub2)
summary(fit.cox2)
## Call:
## coxph(formula = Surv(secbi, b2event) ~ rururb + relg, data = sub2)
## 
##   n= 5987, number of events= 4760 
##    (39 observations deleted due to missingness)
## 
##               coef exp(coef) se(coef)       z Pr(>|z|)    
## rururb    -0.33333   0.71653  0.03125 -10.667  < 2e-16 ***
## relgIslam -0.18242   0.83325  0.03773  -4.835 1.33e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## rururb       0.7165      1.396    0.6740    0.7618
## relgIslam    0.8333      1.200    0.7739    0.8972
## 
## Concordance= 0.551  (se = 0.004 )
## Likelihood ratio test= 164.4  on 2 df,   p=<2e-16
## Wald test            = 157.8  on 2 df,   p=<2e-16
## Score (logrank) test = 159.4  on 2 df,   p=<2e-16
plot(survfit(fit.cox2),   xlim=c(0,90),
     ylab="S(t)", xlab="Age in Months")
title(main="Survival Function for Second Birth Interval")

Interpretation: The urban survival time for 2nd birth is 30% lower compared to rural and there is 17% less likely to have 2nd children among Hindus compared to Muslims. And the result is significant for both cases as the p-value is less than .0

Interaction terms

fit.cox3<-coxph(Surv(secbi,b2event)~rururb*relg,
                data=sub2)
summary(fit.cox3)
## Call:
## coxph(formula = Surv(secbi, b2event) ~ rururb * relg, data = sub2)
## 
##   n= 5987, number of events= 4760 
##    (39 observations deleted due to missingness)
## 
##                      coef exp(coef) se(coef)      z Pr(>|z|)    
## rururb           -0.28490   0.75209  0.03491 -8.161 3.32e-16 ***
## relgIslam        -0.08311   0.92025  0.04923 -1.688  0.09138 .  
## rururb:relgIslam -0.22762   0.79643  0.07595 -2.997  0.00273 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## rururb              0.7521      1.330    0.7024    0.8054
## relgIslam           0.9202      1.087    0.8356    1.0135
## rururb:relgIslam    0.7964      1.256    0.6863    0.9243
## 
## Concordance= 0.551  (se = 0.004 )
## Likelihood ratio test= 173.4  on 3 df,   p=<2e-16
## Wald test            = 160.4  on 3 df,   p=<2e-16
## Score (logrank) test = 163.4  on 3 df,   p=<2e-16
plot(survfit(fit.cox3),   xlim=c(0,90),
     ylab="S(t)", xlab="Age in Months")
title(main="Survival Function for Second Birth Interval")

Using survey design

des<-svydesign(ids=~psu, strata = ~strata ,
               weights=~weight, data=sub2)
cox.s<-svycoxph(Surv(secbi,b2event)~rururb+relg,
                design=des)
summary(cox.s)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (217) clusters.
## svydesign(ids = ~psu, strata = ~strata, weights = ~weight, data = sub2)
## Call:
## svycoxph(formula = Surv(secbi, b2event) ~ rururb + relg, design = des)
## 
##   n= 5987, number of events= 4760 
##    (39 observations deleted due to missingness)
## 
##               coef exp(coef) se(coef) robust se      z Pr(>|z|)    
## rururb    -0.41792   0.65841  0.03100   0.04417 -9.462  < 2e-16 ***
## relgIslam -0.21441   0.80702  0.03794   0.06279 -3.415 0.000639 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## rururb       0.6584      1.519    0.6038    0.7179
## relgIslam    0.8070      1.239    0.7136    0.9127
## 
## Concordance= 0.563  (se = 0.006 )
## Likelihood ratio test= NA  on 2 df,   p=NA
## Wald test            = 115.2  on 2 df,   p=<2e-16
## Score (logrank) test = NA  on 2 df,   p=NA
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
dat<-expand.grid(rururb=c(0,1), relg=c(1,2), b2event=1)
#head(dat3)

Creating plot

plot(survfit(fit.cox2, conf.int = F), xlim=c(0, 90),  ylab="S(t)",
     xlab="Months")
title (main = "Survival Plots for for Second Birth Interval")
lines(survfit(fit.cox2,
              newdata=data.frame(rururb=0, relg=1),
              conf.int = F), col="red")
lines(survfit(fit.cox2,
              newdata=data.frame(rururb=0, relg=2),
              conf.int = F), col="blue")

lines(survfit(fit.cox2,
              newdata=data.frame(rururb=1, relg=1),
              conf.int = F), col="green")
lines(survfit(fit.cox2,
              newdata=data.frame(rururb=1, relg=2),
              conf.int = F), col="yellow")
legend("bottomleft", legend=c("Means of Covariates", "rural, Islam", "rural, Hinduism", "urban, Islam", "urban, Hiduism"), lty=1, col=c(1, "red", "blue", "green", "yellow"), cex=.8)

Interpretation: Urban Hindus had higher survival time to have second birth across all the groups. rural Muslims had the lowest survival time compared to any other other group. This also implies that urban Hindus had lower hazard of 2nd birth and rural Muslims had the highest hazard compared to other groups.

Cumulative hazard functions for 2nd birth (place of residence & Religion)

sf1<-survfit(fit.cox2)
sf2<-survfit(fit.cox2,
             newdata=data.frame(rururb=0, relg=1))
sf3<-survfit(fit.cox2,
             newdata=data.frame(rururb=0, relg=2))
sf4<-survfit(fit.cox2,
             newdata=data.frame(rururb=1, relg=1))
sf5<-survfit(fit.cox2,
             newdata=data.frame(rururb=1, relg=2))
H1<--log(sf1$surv)
H2<--log(sf2$surv)
H3<--log(sf3$surv)
H4<--log(sf4$surv)
H5<--log(sf5$surv)
times<-sf1$time

plot(H1~times, type="s",  ylab="H(t)",ylim=c(0, 6), xlab="Months")
lines(H2~times,col="red", type="s")
lines(H3~times,col="blue", type="s")
lines(H4~times, col="green", type="s")
lines(H5~times, col="yellow", type="s")

legend("topright",
       legend=c("Means of Covariates", "rural, Islam", "rural, Hinduism", "urban, Islam", "urban, Hinduism"),
       lty=1, col=c(1,"red", "green"), cex=.8)

Hazard functions for 2nd birth (Rural & Religion)

hs1<-loess(diff(c(0,H1))~times, degree=1, span=.25)
hs2<-loess(diff(c(0,H2))~times, degree=1, span=.25)
hs3<-loess(diff(c(0,H3))~times, degree=1, span=.25)
plot(predict(hs1)~times,type="l", ylab="smoothed h(t)", xlab="Months",
    ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs2)~times, type="l", col="red")
lines(predict(hs3)~times, type="l", col="green")
legend("topright",
       legend=c("Means of Covariates", "rural, Islam", "rural, Hinduism"),
      lty=1, col=c(1,"red", "green"), cex=.8)

Interpretation: Right after the first birth both religious group had higher events however, Rural Muslims had higher tendency of 2nd birth in a shorter period of time compared to urban Hindus.

Hazard functions for 2nd birth (Urban & Religion)

hs1<-loess(diff(c(0,H1))~times, degree=1, span=.25)
hs4<-loess(diff(c(0,H4))~times, degree=1, span=.25)
hs5<-loess(diff(c(0,H5))~times, degree=1, span=.25)
plot(predict(hs1)~times,type="l", ylab="smoothed h(t)", xlab="Months",
     ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs4)~times, type="l", col="red")
lines(predict(hs5)~times, type="l", col="green")
legend("topright",
       legend=c("Means of Covariates", "Urban, Islam", "Urban, Hinduism"),
      lty=1, col=c(1,"red", "green"), cex=.8)

Interpretation: Right after the first birth both religious group had higher events however, Urban Muslims had higher tendency of 2nd birth in a shorter period of time compared to urban Hindus.

Interaction terms:

Cumulative hazard functions for 2nd birth (place of residence & Religion)

sf6<-survfit(fit.cox3)
sf7<-survfit(fit.cox3,
             newdata=data.frame(rururb=0, relg=1))
sf8<-survfit(fit.cox3,
             newdata=data.frame(rururb=0, relg=2))
sf9<-survfit(fit.cox3,
             newdata=data.frame(rururb=1, relg=1))
sf10<-survfit(fit.cox3,
             newdata=data.frame(rururb=1, relg=2))
H11<--log(sf6$surv)
H12<--log(sf7$surv)
H13<--log(sf8$surv)
H14<--log(sf9$surv)
H15<--log(sf10$surv)
times<-sf1$time

plot(H11~times, type="s",  ylab="H(t)",ylim=c(0, 6), xlab="Months")
lines(H12~times,col="red", type="s")
lines(H13~times,col="blue", type="s")
lines(H14~times, col="green", type="s")
lines(H15~times, col="yellow", type="s")

legend("topright",
       legend=c("Means of Covariates", "rural*Islam", "rural*Hinduism", "urban*Islam", "urban*Hinduism"),
       lty=1, col=c(1,"red", "green", "blue", "yellow"), cex=.8)

Hazard functions for 2nd birth (Rural & Religion)

hs11<-loess(diff(c(0,H11))~times, degree=1, span=.25)
hs12<-loess(diff(c(0,H12))~times, degree=1, span=.25)
hs13<-loess(diff(c(0,H13))~times, degree=1, span=.25)
plot(predict(hs11)~times,type="l", ylab="smoothed h(t)", xlab="Months",
    ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs12)~times, type="l", col="red")
lines(predict(hs13)~times, type="l", col="green")
legend("topright",
       legend=c("Means of Covariates", "rural*Islam", "rural*Hinduism"),
      lty=1, col=c(1,"red", "green"), cex=.8)

Hazard functions for 2nd birth (Urban & Religion)

hs11<-loess(diff(c(0,H11))~times, degree=1, span=.25)
hs14<-loess(diff(c(0,H14))~times, degree=1, span=.25)
hs15<-loess(diff(c(0,H15))~times, degree=1, span=.25)
plot(predict(hs11)~times,type="l", ylab="smoothed h(t)", xlab="Months",
     ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs14)~times, type="l", col="red")
lines(predict(hs15)~times, type="l", col="green")
legend("topright",
       legend=c("Means of Covariates", "Urban*Islam", "Urban*Hinduism"),
      lty=1, col=c(1,"red", "green"), cex=.8)