Chapter 4: Cabinet Data in R

Table 4.4

First, we run each of the models.

# Run Cox model with the Breslow approximation
b_mod <- coxph(dv ~ invest + polar + numst + format + postelec + caretakr,
               data = cabinet, ties = "breslow")

# Run Cox model with the Efron approximation

ef_mod <- coxph(dv ~ invest + polar + numst + format + postelec + caretakr,
                data = cabinet)

# Run Cox model with the Exact Discrete apprximation 

ed_mod <- coxph(dv ~ invest + polar + numst + format + postelec + caretakr,
                data = cabinet, ties = "exact")

Here is the output, with the standard errors in parantheses:

Cox Model of Cabinet Durations
	Breslow	Efron	Exact
Investiture	0.38 (0.14)	0.39 (0.14)	0.41 (0.14)
Polarization	0.02 (0.01)	0.02 (0.01)	0.02 (0.01)
Majority	-0.57 (0.13)	-0.58 (0.13)	-0.62 (0.14)
Formation	0.13 (0.04)	0.13 (0.04)	0.13 (0.05)
Post-Election	-0.83 (0.14)	-0.86 (0.14)	-0.88 (0.15)
Caretaker	1.54 (0.28)	1.71 (0.28)	1.86 (0.33)
Log Likelihood	-1299.89	-1287.74	-918.29
N	314	314	314

Table 4.5

Let’s compare the Cox Efron approximation with the Weibull proportional hazards.

To convert from the A.F.T. to Prop. Hazards parameterization, we will use the ConvertWeibull function from Chap. 3. We will prepare the data the same way we did in Chap. 3.

intercept <- rep(1, 314)

cabinet$intercept <- intercept

weib_mod1 <- survreg(dv ~ 0 + intercept + invest + polar + numst+ format + postelec
                     + caretakr, data = cabinet)

weib_mod2 <- survreg(dv ~ 0  + invest + intercept + polar + numst+ format + postelec
                     + caretakr, data = cabinet)

weib_mod1_ph <- ConvertWeibull(weib_mod1)

weib_mod2_ph <- ConvertWeibull(weib_mod2)

weib_ph <- rbind(weib_mod1_ph$vars, weib_mod2_ph$vars)


# Delete rows that are redundant/unnecessary 
weib_ph <- weib_ph[-c(1,2,9,12,13,14,15,16),]

weib_ph <- weib_ph[c("intercept", "invest", "polar", "numst", "format", "postelec",
                     "caretakr", "gamma"),]

weib_ph

##              Estimate          SE
## intercept -3.86270244 0.262780694
## invest     0.38274582 0.137174519
## polar      0.02321558 0.005585448
## numst     -0.60149819 0.131047758
## format     0.13245769 0.043749027
## postelec  -0.87931813 0.137687010
## caretakr   1.72601174 0.275890539
## gamma      1.29385218 0.064767331

Let’s format this into a presentable table and compare it with the Cox Efron approximation.

Cox and Weibull Estimates of Cabinet Duration
	Cox	Weibull
Investiture	0.39 (0.14)	0.38 (0.14)
Polarization	0.02 (0.01)	0.02 (0.01)
Majority	-0.58 (0.13)	-0.60 (0.13)
Formation	0.13 (0.04)	0.13 (0.04)
Post-Election	-0.86 (0.14)	-0.88 (0.14)
Caretaker	1.71 (0.28)	1.73 (0.28)
Constant		-3.86 (0.26)
Log Likelihood	-1287.74	-1014.62
N	314	314
Shape Parameter		1.29

Figure 4.1

In preparation for the plots, we mean center the polarization and formation attempts covariates.

cabinet$polarmean <- cabinet$polar - mean(cabinet$polar)
cabinet$formmean <- cabinet$format - mean(cabinet$format)

We run Cox and Weibull models with these mean centered variables. We will use these models for out plots.

cox_mod_plot <- coxph(dv ~ invest + polarmean + numst + formmean + postelec + caretakr,
                 data = cabinet)

weib_mod_plot <- survreg(dv ~invest + polarmean + numst + formmean + postelec
                    + caretakr, data = cabinet)

We now calculate the baseline integrated hazard function, baseline survivor function, and baseline hazard function from the Cox model.

# Calculate baseline integrated hazard function H(t) 
haz_rte <- basehaz(cox_mod_plot, centered = FALSE)

# Convert H(t) to the baseline hazard, h(t). h(t) = H(t) - H(t-1) 

haz_cox <- data.frame(diff(haz_rte$hazard))

# Take out H(t) at t = 1 and merge with previous calculations

row <- data.frame(0.03594616)
colnames(row) <- "diff.haz_rte.hazard."
haz_cox <- rbind(row, haz_cox)
colnames(haz_cox) <- "baseline_hazard"

# Merge baseline hazards into master dataframe with integrated hazards
haz_rte$haz_cox <- haz_cox

# Calculate baseline survivor function
surv_basecox <- exp(-haz_rte$hazard)

# Merge baseline survivor values into master dataframe with integrated hazards
haz_rte$surv_basecox <- surv_basecox

# A glimpse of the values for all three functions 
head(haz_rte)

##   time     hazard baseline_hazard surv_basecox
## 1  0.5 0.03594616      0.03594616    0.9646922
## 2  1.0 0.06974011      0.03379395    0.9326362
## 3  2.0 0.11021916      0.04047905    0.8956378
## 4  3.0 0.19579836      0.08557921    0.8221780
## 5  4.0 0.24758576      0.05178740    0.7806833
## 6  5.0 0.35689931      0.10931355    0.6998430

We do the same for the Weibull model.

# Plot baseline survivor function for Weibull

# Let's create lambda from Weibull model

lambda_base <- unname(exp(-(weib_mod_plot$coef[1])))

# Because S(t) = exp(-(lambda*t))^p, we can generate the survivor function for the
# baseline case. 

p <- 1/weib_mod_plot$scale

t <- cabinet$'_t'

surv_baseweib <- exp(-(lambda_base * t)^p)

weib <- data.frame(cbind(t, surv_baseweib))

# We can also generate the baseline hazard function, knowing that 
# h(t) = lambda * p * (lambda * t)^(p-1)

haz_baseweib <- lambda_base * p * (lambda_base * t)^(p-1)

weib$haz_baseweib <- haz_baseweib

# The baseline integrated hazard is thus H(t) = -log(S(t))

inthaz_baseweib <- -log(surv_baseweib)

weib$inthaz_baseweib <- inthaz_baseweib

# A glimpse of the values for all three functions 

head(weib)

##     t surv_baseweib haz_baseweib inthaz_baseweib
## 1 0.5     0.9843950   0.04069950      0.01572803
## 2 1.0     0.9621718   0.04989390      0.03856229
## 3 1.0     0.9621718   0.04989390      0.03856229
## 4 2.0     0.9097843   0.06116539      0.09454773
## 5 0.5     0.9843950   0.04069950      0.01572803
## 6 2.0     0.9097843   0.06116539      0.09454773

Using the values from the haz_rte and weib dataframes, we can plot the values for the baseline survivor, integrated hazard, and baseline hazards for both models. Let’s start with the baseline survivor function.

base_surv <- ggplot(data = haz_rte, aes(x = time, y = surv_basecox, color = "black")) + geom_step() + geom_line(data = weib, aes(x = t, y = surv_baseweib, color = "red")) + 
  theme_bw() +
  ggtitle("Baseline Survivor Function") +
  xlab("Cabinet Duration") +
  ylab("") +
  labs(colour = "Model") +
  scale_color_manual(labels = c("Cox", "Weibull"), values = c("red", "black"))

Here is the integrated hazard function.

int_haz <- ggplot(data = haz_rte, aes(x = time, y = hazard, color = "black")) + geom_step() + geom_line(data = weib, aes(x = t, y = inthaz_baseweib, color = "red")) + 
  theme_bw() +
  ggtitle("Baseline Integrated Hazard Function") +
  xlab("Cabinet Duration") +
  ylab("") +
  labs(colour = "Model") +
  scale_color_manual(labels = c("Cox", "Weibull"), values = c("red", "black"))

Lastly, this is the baseline hazard.

#Drop last seven rows for graphical purposes

haz_rte <- haz_rte[-c(48:54),]

#Plot

base_haz <- ggplot(data = haz_rte, aes(x = time, y = haz_cox, color = "black")) + geom_step() + geom_line(data = weib, aes(x = t, y = haz_baseweib, color = "red")) + 
  theme_bw() +
  ggtitle("Baseline Hazard Function") +
  xlab("Cabinet Duration") +
  ylab("") +
  labs(colour = "Model") +
  scale_color_manual(labels = c("Cox", "Weibull"), values = c("red", "black"))

Lastly, we can arrange all the plots together.

grid.arrange(base_surv, int_haz, base_haz, ncol = 2)