Let’s load the dataset.
#-------Dataset
adopt <- read.dta("~/Dropbox/event_history/dta/adopt_singleevent.dta")
#-------Data prepation
# Create survival object for dv
dv <- Surv(adopt$'_t', adopt$'_d')
First, we run each of the models.
# The conditional logit model is the same as the exact discrete method we used earlier
ed_mod <- coxph(dv ~ mooneymean, data = adopt, ties = "exact")
### Can't run Royston-Parmar model
# Weibull P/H/
weib_mod <- survreg(dv ~ mooneymean, data = adopt, dist = "weibull")
Again, we have to convert the Weibull A.F.T. to a P.H. parameterization.
# Conversion from Weibull A.F.T. to P.H.
intercept <- rep(1, 50)
adopt$intercept <- intercept
weib_mod1 <- survreg(dv ~ 0 + mooneymean + intercept, data = adopt)
weib_mod2 <- survreg(dv ~ 0 + intercept + mooneymean, data = adopt)
weib_mod1_ph <- ConvertWeibull(weib_mod1)
weib_mod2_ph <- ConvertWeibull(weib_mod2)
weib_ph <- rbind(weib_mod1_ph$vars, weib_mod2_ph$vars)
# Delete rows that are redundant/unnecessary
weib_ph <- weib_ph[-c(1,2,4),]
weib_ph <- weib_ph[c("mooneymean", "gamma", "intercept"),]
weib_ph
## Estimate SE
## mooneymean -0.2227881 0.08239561
## gamma 0.9830112 0.12000313
## intercept -2.0966280 0.31945773
Let’s format these models into a presentable table.
| Cox Model | Weibull Model | ||
|---|---|---|---|
| Pre-Roe | -0.22 (0.09) | -0.22 (0.08) | |
| Constant | -2.10 (0.32) | ||
| Log Likelihood | -96.60 | -133.07 | |
| N | 50 | 50 | |
| Shape Parameter | 0.98 | ||
We first calculate the baseline hazard for the Weibull.
# Let's create lambda from the Weibull model we ran earlier
lambda_base <- unname(exp(-(weib_mod$coef[1])))
p <- 1/weib_mod$scale
t <- adopt$'_t'
# We cangenerate the baseline hazard function, knowing that
# h(t) = lambda * p * (lambda * t)^(p-1)
haz_baseweib <- lambda_base * p * (lambda_base * t)^(p-1)
weib <- data.frame(cbind(t, haz_baseweib))
We do the same for the Cox model we already ran earlier.
# Calculates integrated baseline hazard, H(t)
haz_rte <- basehaz(ed_mod, centered = FALSE)
# To get the baseline hazard, we calculate H(t) - H(t-1), which gives us the corresponding # value for all obs. except for the first.
haz_cox <- data.frame(diff(haz_rte$hazard))
# Take out H(t) at t = 1 and merge with previous calculations
row <- data.frame(0.2208064)
colnames(row) <- "diff.haz_rte.hazard."
haz_cox <- rbind(row, haz_cox)
colnames(haz_cox) <- "baseline_hazard"
# Merge baseline hazards into master dataframe with integrated hazards
haz_rte$haz_cox <- haz_cox
#Drop last row for graphical purposes
haz_rte <- haz_rte[-c(16), ]
### Calculate baseline hazard rate for Royston-Parmar model
We are ready to plot now.
ggplot(data = haz_rte, aes(x = time, y = haz_cox, color = "black")) + geom_step() +
geom_line(data = weib, aes(x = t, y = haz_baseweib, color = "red")) +
theme_bw() +
ggtitle("Hazard Functions") +
xlab("Years Since Roe vs. Wade") +
ylab("") +
labs(colour = "Model") +
scale_color_manual(labels = c("Cox", "Weibull"), values = c("red", "black"))