library(haven)
library(survival)
library(car)
library(survey)
library(survminer)
library(ggplot2)
library(ggpubr)
library(muhaz)
library(car)
library(dplyr)
library(eha)
library(haven)
#load the data
dat3<-read_dta("https://github.com/coreysparks/data/blob/master/ZZIR62FL.DTA?raw=true")
dat3<-zap_labels(dat3)
Event: Event of 2nd birth
Time duration: 1st birth to 2nd birth
Independent variable: place of residence (Rural&Urban) and religion (Islam&Hinduism)
Censoring indicator: no 2nd child during the interview
Risk set:Women had 1st child
table(is.na(dat3$bidx_01))
##
## FALSE TRUE
## 6161 2187
#now we extract those women
sub<-subset(dat3, dat3$bidx_01==1&dat3$b0_01==0)
#Here I keep only a few of the variables for the dates,
#and some characteristics of the women, and details of the survey
sub2<-data.frame(CASEID=sub$caseid,
int.cmc=sub$v008,
fbir.cmc=sub$b3_01,
sbir.cmc=sub$b3_02,
marr.cmc=sub$v509,
rural=sub$v025,
religion=sub$v130,
educ=sub$v106,
age=sub$v012,
partneredu=sub$v701,
partnerage=sub$v730,
weight=sub$v005/1000000,
psu=sub$v021, strata=sub$v022)
sub2$agefb = (sub2$age - (sub2$int.cmc - sub2$fbir.cmc)/12)
Calculation of observed and censored
sub2$secbi<-ifelse(is.na(sub2$sbir.cmc)==T,
((sub2$int.cmc))-((sub2$fbir.cmc)),
(sub2$fbir.cmc-sub2$sbir.cmc))
sub2$b2event<-ifelse(is.na(sub2$sbir.cmc)==T,0,1)
Creating covariates:
place of residence: rural & urban
Religion: Islam & Hinduism
sub2$rururb <- ifelse(sub2$rural==2,0,1)
sub2$relg <- Recode(sub2$religion, recodes="1='Islam'; 2='Hinduism'; else=NA", as.factor=T)
# sub2$educ.high<-ifelse(sub2$educ %in% c(2,3), 1, 0)
# sub2$age2<-(sub2$agefb/5)^2
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~psu, strata=~strata,
data=sub2[sub2$secbi>0,], weight=~weight )
rep.des<-as.svrepdesign(des, type="bootstrap" )
Cox model
fit.cox2<-coxph(Surv(secbi,b2event)~rururb+relg,
data=sub2)
summary(fit.cox2)
## Call:
## coxph(formula = Surv(secbi, b2event) ~ rururb + relg, data = sub2)
##
## n= 5987, number of events= 4760
## (39 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## rururb -0.33333 0.71653 0.03125 -10.667 < 2e-16 ***
## relgIslam -0.18242 0.83325 0.03773 -4.835 1.33e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## rururb 0.7165 1.396 0.6740 0.7618
## relgIslam 0.8333 1.200 0.7739 0.8972
##
## Concordance= 0.551 (se = 0.004 )
## Likelihood ratio test= 164.4 on 2 df, p=<2e-16
## Wald test = 157.8 on 2 df, p=<2e-16
## Score (logrank) test = 159.4 on 2 df, p=<2e-16
plot(survfit(fit.cox2), xlim=c(0,90),
ylab="S(t)", xlab="Age in Months")
title(main="Survival Function for Second Birth Interval")
Interpretation: The urban survival time for 2nd birth is 30% lower compared to rural and there is 17% less likely to have 2nd children among Hindus compared to Muslims. And the result is significant for both cases as the p-value is less than .0
Interaction terms
fit.cox3<-coxph(Surv(secbi,b2event)~rururb*relg,
data=sub2)
summary(fit.cox3)
## Call:
## coxph(formula = Surv(secbi, b2event) ~ rururb * relg, data = sub2)
##
## n= 5987, number of events= 4760
## (39 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## rururb -0.28490 0.75209 0.03491 -8.161 3.32e-16 ***
## relgIslam -0.08311 0.92025 0.04923 -1.688 0.09138 .
## rururb:relgIslam -0.22762 0.79643 0.07595 -2.997 0.00273 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## rururb 0.7521 1.330 0.7024 0.8054
## relgIslam 0.9202 1.087 0.8356 1.0135
## rururb:relgIslam 0.7964 1.256 0.6863 0.9243
##
## Concordance= 0.551 (se = 0.004 )
## Likelihood ratio test= 173.4 on 3 df, p=<2e-16
## Wald test = 160.4 on 3 df, p=<2e-16
## Score (logrank) test = 163.4 on 3 df, p=<2e-16
plot(survfit(fit.cox3), xlim=c(0,90),
ylab="S(t)", xlab="Age in Months")
title(main="Survival Function for Second Birth Interval")
Using survey design
des<-svydesign(ids=~psu, strata = ~strata ,
weights=~weight, data=sub2)
cox.s<-svycoxph(Surv(secbi,b2event)~rururb+relg,
design=des)
summary(cox.s)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (217) clusters.
## svydesign(ids = ~psu, strata = ~strata, weights = ~weight, data = sub2)
## Call:
## svycoxph(formula = Surv(secbi, b2event) ~ rururb + relg, design = des)
##
## n= 5987, number of events= 4760
## (39 observations deleted due to missingness)
##
## coef exp(coef) se(coef) robust se z Pr(>|z|)
## rururb -0.41792 0.65841 0.03100 0.04417 -9.462 < 2e-16 ***
## relgIslam -0.21441 0.80702 0.03794 0.06279 -3.415 0.000639 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## rururb 0.6584 1.519 0.6038 0.7179
## relgIslam 0.8070 1.239 0.7136 0.9127
##
## Concordance= 0.563 (se = 0.006 )
## Likelihood ratio test= NA on 2 df, p=NA
## Wald test = 115.2 on 2 df, p=<2e-16
## Score (logrank) test = NA on 2 df, p=NA
##
## (Note: the likelihood ratio and score tests assume independence of
## observations within a cluster, the Wald and robust score tests do not).
dat<-expand.grid(rururb=c(0,1), relg=c(1,2), b2event=1)
#head(dat3)
Creating plot
plot(survfit(fit.cox2, conf.int = F), xlim=c(0, 90), ylab="S(t)",
xlab="Months")
title (main = "Survival Plots for for Second Birth Interval")
lines(survfit(fit.cox2,
newdata=data.frame(rururb=0, relg=1),
conf.int = F), col="red")
lines(survfit(fit.cox2,
newdata=data.frame(rururb=0, relg=2),
conf.int = F), col="blue")
lines(survfit(fit.cox2,
newdata=data.frame(rururb=1, relg=1),
conf.int = F), col="green")
lines(survfit(fit.cox2,
newdata=data.frame(rururb=1, relg=2),
conf.int = F), col="yellow")
legend("bottomleft", legend=c("Means of Covariates", "rural, Islam", "rural, Hinduism", "urban, Islam", "urban, Hiduism"), lty=1, col=c(1, "red", "blue", "green", "yellow"), cex=.8)
Interpretation: Urban Hindus had higher survival time to have second birth across all the groups. rural Muslims had the lowest survival time compared to any other other group. This also implies that urban Hindus had lower hazard of 2nd birth and rural Muslims had the highest hazard compared to other groups.
Cumulative hazard functions for 2nd birth (place of residence & Religion)
sf1<-survfit(fit.cox2)
sf2<-survfit(fit.cox2,
newdata=data.frame(rururb=0, relg=1))
sf3<-survfit(fit.cox2,
newdata=data.frame(rururb=0, relg=2))
sf4<-survfit(fit.cox2,
newdata=data.frame(rururb=1, relg=1))
sf5<-survfit(fit.cox2,
newdata=data.frame(rururb=1, relg=2))
H1<--log(sf1$surv)
H2<--log(sf2$surv)
H3<--log(sf3$surv)
H4<--log(sf4$surv)
H5<--log(sf5$surv)
times<-sf1$time
plot(H1~times, type="s", ylab="H(t)",ylim=c(0, 6), xlab="Months")
lines(H2~times,col="red", type="s")
lines(H3~times,col="blue", type="s")
lines(H4~times, col="green", type="s")
lines(H5~times, col="yellow", type="s")
legend("topright",
legend=c("Means of Covariates", "rural, Islam", "rural, Hinduism", "urban, Islam", "urban, Hinduism"),
lty=1, col=c(1,"red", "green"), cex=.8)
Hazard functions for 2nd birth (Rural & Religion)
hs1<-loess(diff(c(0,H1))~times, degree=1, span=.25)
hs2<-loess(diff(c(0,H2))~times, degree=1, span=.25)
hs3<-loess(diff(c(0,H3))~times, degree=1, span=.25)
plot(predict(hs1)~times,type="l", ylab="smoothed h(t)", xlab="Months",
ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs2)~times, type="l", col="red")
lines(predict(hs3)~times, type="l", col="green")
legend("topright",
legend=c("Means of Covariates", "rural, Islam", "rural, Hinduism"),
lty=1, col=c(1,"red", "green"), cex=.8)
Interpretation: Right after the first birth both religious group had higher events however, Rural Muslims had higher tendency of 2nd birth in a shorter period of time compared to urban Hindus.
Hazard functions for 2nd birth (Urban & Religion)
hs1<-loess(diff(c(0,H1))~times, degree=1, span=.25)
hs4<-loess(diff(c(0,H4))~times, degree=1, span=.25)
hs5<-loess(diff(c(0,H5))~times, degree=1, span=.25)
plot(predict(hs1)~times,type="l", ylab="smoothed h(t)", xlab="Months",
ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs4)~times, type="l", col="red")
lines(predict(hs5)~times, type="l", col="green")
legend("topright",
legend=c("Means of Covariates", "Urban, Islam", "Urban, Hinduism"),
lty=1, col=c(1,"red", "green"), cex=.8)
Interpretation: Right after the first birth both religious group had higher events however, Urban Muslims had higher tendency of 2nd birth in a shorter period of time compared to urban Hindus.
Interaction terms:
Cumulative hazard functions for 2nd birth (place of residence & Religion)
sf6<-survfit(fit.cox3)
sf7<-survfit(fit.cox3,
newdata=data.frame(rururb=0, relg=1))
sf8<-survfit(fit.cox3,
newdata=data.frame(rururb=0, relg=2))
sf9<-survfit(fit.cox3,
newdata=data.frame(rururb=1, relg=1))
sf10<-survfit(fit.cox3,
newdata=data.frame(rururb=1, relg=2))
H11<--log(sf6$surv)
H12<--log(sf7$surv)
H13<--log(sf8$surv)
H14<--log(sf9$surv)
H15<--log(sf10$surv)
times<-sf1$time
plot(H11~times, type="s", ylab="H(t)",ylim=c(0, 6), xlab="Months")
lines(H12~times,col="red", type="s")
lines(H13~times,col="blue", type="s")
lines(H14~times, col="green", type="s")
lines(H15~times, col="yellow", type="s")
legend("topright",
legend=c("Means of Covariates", "rural*Islam", "rural*Hinduism", "urban*Islam", "urban*Hinduism"),
lty=1, col=c(1,"red", "green", "blue", "yellow"), cex=.8)
Hazard functions for 2nd birth (Rural & Religion)
hs11<-loess(diff(c(0,H11))~times, degree=1, span=.25)
hs12<-loess(diff(c(0,H12))~times, degree=1, span=.25)
hs13<-loess(diff(c(0,H13))~times, degree=1, span=.25)
plot(predict(hs11)~times,type="l", ylab="smoothed h(t)", xlab="Months",
ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs12)~times, type="l", col="red")
lines(predict(hs13)~times, type="l", col="green")
legend("topright",
legend=c("Means of Covariates", "rural*Islam", "rural*Hinduism"),
lty=1, col=c(1,"red", "green"), cex=.8)
Hazard functions for 2nd birth (Urban & Religion)
hs11<-loess(diff(c(0,H11))~times, degree=1, span=.25)
hs14<-loess(diff(c(0,H14))~times, degree=1, span=.25)
hs15<-loess(diff(c(0,H15))~times, degree=1, span=.25)
plot(predict(hs11)~times,type="l", ylab="smoothed h(t)", xlab="Months",
ylim=c(0, .04))
title(main="Smoothed hazard plots")
lines(predict(hs14)~times, type="l", col="red")
lines(predict(hs15)~times, type="l", col="green")
legend("topright",
legend=c("Means of Covariates", "Urban*Islam", "Urban*Hinduism"),
lty=1, col=c(1,"red", "green"), cex=.8)