Discrete time model_HW4

Event: 2nd birth

Time duration: <=2 years, <=4years, <=6 years of birth interval

Independent variable: Religion

Censoring indicator: no 2nd child during the interview

Risk set: Each time point has a risk set for 2nd birth

Data: DHS, 2014

library(haven)
library(survival)
library(car)
library(survey)
library(survminer)
library(ggplot2)
library(ggpubr)
library(muhaz)
library(car)
library(dplyr)

Loading data

dta <- read_dta ("C:/Users/nahin/Google Drive/MSc Demography/Fall 2020/Event History/Term paper/BD_2014_DHS_09042018_147_123449/BDIR72DT/BDIR72FL.dta")

dta <- zap_labels(dta)

Creating subset

sub<-subset(dta,dta$bidx_01==1&dta$b0_01==0)

sub2<-data.frame(CASEID=sub$caseid, 
                 int.cmc=sub$v008,
                 fbir.cmc=sub$b3_01,
                 sbir.cmc=sub$b3_02,
                 marr.cmc=sub$v509,
                 rural=sub$v025,
                 educ=sub$v106,
                 religion=sub$v130,
                 age=sub$v012,
                 partneredu=sub$v701,
                 partnerage=sub$v730,
                 weight=sub$v005/1000000,
                 psu=sub$v021, strata=sub$v022)

sub2$agefb = (sub2$age - (sub2$int.cmc - sub2$fbir.cmc)/12)

#censoring indicator for death by age 5, in months (<=60 months)
sub2$secbi<-ifelse(is.na(sub2$sbir.cmc)==T,
                   ((sub2$int.cmc))-((sub2$fbir.cmc)),
                   (sub2$fbir.cmc-sub2$sbir.cmc))
sub2$b2event<-ifelse(is.na(sub2$sbir.cmc)==T,0,1) 
table(sub2$b2event)

## 
##     0     1 
##  3975 11959

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~psu, strata=~strata, data=sub2[sub2$secbi>0,], weight=~weight )

Creating covariates:

place of residence: rural & urban

Education: less than high school and more than high school

sub2$rururb <- ifelse(sub2$rural==2,0,1)
sub2$educ.high<-ifelse(sub2$educ %in% c(2,3), 1, 0)


options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~psu, strata=~strata,
               data=sub2[sub2$secbi>0,], weight=~weight )

rep.des<-as.svrepdesign(des, type="bootstrap" )

Question a: You must form a person-period data set

The person-period file preparation

pp<-survSplit(Surv(secbi, b2event)~. , data = sub2[sub2$secbi>0,],
              cut=c(0, 24, 48, 72),  episode="year_birth")

pp$year <- pp$year_birth-1
pp<-pp[order(pp$CASEID, pp$year_birth),]
head(pp[, c("CASEID", "secbi", "b2event", "year", "religion", "agefb")], n=20)

##             CASEID secbi b2event year religion    agefb
## 1          1  3  2    24       0    1        1 24.91667
## 2          1  3  2    48       0    2        1 24.91667
## 3          1  3  2    72       0    3        1 24.91667
## 4          1  3  2   110       1    4        1 24.91667
## 5          1  6  2    24       0    1        1 33.75000
## 6          1  6  2    48       0    2        1 33.75000
## 7          1  6  2    72       0    3        1 33.75000
## 8          1  6  2   114       1    4        1 33.75000
## 9          1  9  2    24       0    1        1 19.83333
## 10         1  9  2    30       1    2        1 19.83333
## 11         1 20  2    24       0    1        1 23.00000
## 12         1 20  2    48       0    2        1 23.00000
## 13         1 20  2    72       0    3        1 23.00000
## 14         1 20  2    98       1    4        1 23.00000
## 15         1 23  3    24       0    1        1 24.08333
## 16         1 23  3    48       0    2        1 24.08333
## 17         1 23  3    64       1    3        1 24.08333
## 18         1 26  1    24       0    1        1 32.08333
## 19         1 26  1    48       0    2        1 32.08333
## 20         1 26  1    72       0    3        1 32.08333

Question b: Consider both the general model and other time specifications

Descriptive analysis

pp%>%
  group_by(year)%>%
  summarise(prop_bir=mean(b2event, na.rm=T))%>%
  ggplot(aes(x=year, y=prop_bir))+
  geom_line()+
  ggtitle(label = "Hazard of having a second birth by year after first birth")

## `summarise()` ungrouping output (override with `.groups` argument)

pp%>%
  group_by(year, rururb)%>%
  summarise(prop_bir=mean(b2event, na.rm=T))%>%
  ggplot(aes(x=year, y=prop_bir))+
  geom_line(aes(group=factor(rururb), color=factor(rururb) ))+
  ggtitle(label = "Hazard of having a second birth by year after first birth and Rural&Urban")

## `summarise()` regrouping output by 'year' (override with `.groups` argument)

Interpretation:

Between 3-4 years time interval, probability of 2nd birth increased compared to other time periods.

Rural areas have higher probability of 2nd child compared to the urban areas in each time points.