Chapter 3: UN Data in R

Let’s load the packages we will need and the UN dataset.

#---- Packages 
library(foreign)
library(survival)
library(survminer)
library(flexsurv)
library(texreg)
library(ggplot2)
library(SurvRegCensCov)
#-------Dataset
un <- read.dta("~/Dropbox/event_history/dta/UNFINAL.dta")

We create the survival object for our dependent variable, using the syntax Surv(time, event).

#--------Data prepation

dv <- Surv(un$'_t', un$'_d')

Table 3.1

First we run the exponential and Weibull A.F.T models

# Exponential Model

exp_mod <- survreg(dv ~ civil + interst, data = un, dist = "exp")

# Weibull A.F.T

weib_mod <- survreg(dv ~ civil + interst, data = un, dist = "weib")

The Weibull Prop. Hazards model is a little more involved to run, because the default in R is the accelerated failure time (A.F.T.) parameterization. To convert the Weibull A.F.T. to Prop. Hazards, we will use the ConvertWeibull function. Because this function does not calculate the intercept or standard errors, however, we will first have to create an intercept term manually by entering a vector of 1s in the dataset. We will then enter the intercept term as a covariate in the model.

intercept <- rep(1,58)

un$intercept <- intercept

We run the Weibull A.F.T models with this intercept term. The ConvertWeibull function will drop the first term so we have to two identical models with the terms ordered differently to get the conversions for all the variables.

weib_mod1 <- survreg(dv ~ 0 + civil + intercept + interst, data = un)

weib_mod2 <- survreg(dv ~ 0 + intercept + civil + interst, data = un)

weib_mod1_ph <- ConvertWeibull(weib_mod1)

weib_mod2_ph <- ConvertWeibull(weib_mod2)

We can extract the coefficients and standard errors from these two models, and delete the redundant rows.

#Extract the coefficients and standard errors

weib_ph <- rbind(weib_mod1_ph$vars, weib_mod2_ph$vars)

# Delete rows that are redundant/unnecessary 
weib_ph <- weib_ph[-c(1,2,5,8),]

weib_ph <- weib_ph[c("intercept", "civil", "interst", "gamma"),]

weib_ph
##             Estimate         SE
## intercept -3.4599094 0.49528585
## civil      0.8879243 0.38320176
## interst   -1.4014415 0.51178181
## gamma      0.8068950 0.09988463
Weibull Model of U.N. Peacekeeping Missions
Exponential Model Weibull A.F.T. Weibull Prop. Hazards
Constant 4.35 (0.21) 4.29 (0.27) -3.46 (0.50)
Civil War -1.17 (0.36) -1.10 (0.45) 0.89 (0.38)
Interstate Conflict 1.64 (0.50) 1.74 (0.62) -1.40 (0.51)
Shape Parameter 1.00 1.24 0.81
Log Likelihood -202.85 -201.15 -201.15
N 54 54 54

Figure 3.1

# Using the Weibull A.F.T. model we ran earlier, we first calculate lambda for each conflict type, using the fact that lambda = exp(-beta'x)
lambda_civil = unname(exp(-(weib_mod$coef[1] + weib_mod$coef[2])))

lambda_interst = unname(exp(-(weib_mod$coef[1] + weib_mod$coef[3])))

lambda_icw <- unname(exp(-(weib_mod$coef[1])))
# We can generate the hazard rate for each covariate profile, knowing that h(t) = 
# lambda * p * (lambda * t)^(p-1)

p <- 1/weib_mod$scale
t <- un$'_t'

hazard_civil <- lambda_civil * p * (lambda_civil * t)^(p-1)

hazard_interstate <- lambda_interst * p * (lambda_interst * t)^(p-1)

hazard_icw <- lambda_icw * p * (lambda_icw * t)^(p-1)
#Plot hazard rates

ggplot(data = un, aes(x = t)) + 
  #geom_point(aes(y = hazard_civil)) +
  geom_line(aes(y = hazard_civil, colour = "Civil War")) +
  #geom_point(aes(y = hazard_interstate)) +
  geom_line(aes(y = hazard_interstate, colour = "Interstate Conflict")) +
  #geom_point(aes(y = hazard_icw)) +
  geom_line(aes(y = hazard_icw, colour = "Internationalized Civil War")) +
  theme_bw() +
  ylab(label = "Hazard Rates") +  xlab("Duration of U.N. Peacekeeping Missions") +
  labs(colour = "Type of Conflict") +
  ggtitle("Weibull Hazard Rates by Type of Conflict")

Chapter 3: UN Data in Stata

Let’s load the dataset.

/* Dataset is UNFINAL.DTA */

use "/Users/liwugan/Dropbox/event_history/dta/UNFINAL.dta"

Table 3.1

We can estimate and store each of the following models: exponential, Weibull A.F.T., and Weibull Prop. Hazards.

eststo clear

* Exponential model 

eststo: streg civil interst, dist(exp) time 

* Weibull model using accelerated failure time

eststo: streg civil interst, dist(weib) nohr 

* Weibull model using proportional hazards

eststo: streg civil interst, dist(weib) time 

Now we can generate the regression table for the three models.

* Reset working directory to collect following output 

cd ~/Dropbox/event_history/rmd

* Generate regression table output 

esttab using ch3_exp_weib.html, replace ///
    coeflabel(  _t:civil "Civil War"    _t:interst "Interstate Conflict" ///
                _t:_cons "Constant") ///
    title(Weibull Model of U.N. Peacekeeping Missions) ///
    mtitles("Exponential Model" "Weibull A.F.T." "Weibull Prop. Hazards") ///
    eqlabels("", none) ///
    b(2) se(2) nostar ///
    stats(ll N, label("Log-Likelihood" "<em>N</em>") fmt(2 0)) 

Figure 3.1

We generate hazard rates for each conflict type from the Weibull P.H. model.

*Weibull Model with P.H. Parameterization

streg civil interst, dist(weib) time

* First, note that lambda=exp(-beta'x). We can compute lambda for each conflict type:

gen lambda_civil=exp(-(_b[_cons]+_b[civil]))
gen lambda_interstate=exp(-(_b[_cons]+_b[interst]))
gen lambda_icw=exp(-(_b[_cons]))

* Second, note that h(t)=lambda*p*(lambda*t)^(p-1).  We can generate the hazard rate for each covariate profile:
  
gen haz_civil=lambda_civil*e(aux_p)*(lambda_civil*duration)^(e(aux_p)-1)
gen haz_interstate=lambda_interstate*e(aux_p)*(lambda_interstate*duration)^(e(aux_p)-1)
gen haz_icw=lambda_icw*e(aux_p)*(lambda_icw*duration)^(e(aux_p)-1)

We are now ready to plot the hazard rates.

twoway (line haz_civil duration, sort lpattern(solid) lcolor(red)) (line haz_icw duration, sort lpattern(solid) lcolor(green)) ///
    (line haz_interstate duration, sort lpattern(solid) lcolor(blue)), ///
    legend(pos(3) col(1) ring(0) ///
    lab(1 "Civil War") lab(2 "Internationalized Civil War") lab(3 "Interstate Conflict")) ///
    xtitle("Duration of U.N. Peacekeeping Missions") ///
    title("Hazard Rates", position (11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(cabcoxinthaz.gph, replace)

* Export graph as .png file.

graph export UNweibhaz.png, replace

Chapter 3: Cabinet Data in R

We begin by opening the cabinet dataset.

#-------Dataset
cabinet <- read.dta("~/Dropbox/event_history/dta/cabinet.dta")

We create the survival object for our dependent variable, using the syntax Surv(time, event).

#--------Data prepation

dv <- Surv(cabinet$durat, cabinet$'_d')

Table 3.3

We run the Generalized Gamma and Weibull models.

# Generalized Gamma model 

gg_mod <- flexsurvreg(dv ~invest + polar + numst + format + postelec + caretakr,
                      data = cabinet, dist = "gengamma")


# Weibull model 

weib_mod <- survreg(dv ~invest + polar + numst + format + postelec + caretakr,
                  data = cabinet)

Let’s compare the results of our models.

Generalized Gamma Model of Cabinet Durations
Generalized Gamma Weibull
Constant 2.96 (0.14) 2.99 (0.13)
Investiture -0.30 (0.11) -0.30 (0.11)
Polarization -0.02 (0.01) -0.02 (0.01)
Majority 0.47 (0.10) 0.46 (0.10)
Formation -0.10 (0.03) -0.10 (0.03)
Post-Election 0.68 (0.11) 0.68 (0.10)
Caretaker -1.33 (0.21) -1.33 (0.20)
Shape Parameter 0.79 0.77
Scale Parameter 0.92
Log Likelihood -1014.55 -1014.62
N 314 314

Chapter 3: Cabinet Data in Stata

Let’s load the dataset.

/* Dataset is cabinet.dta */

use "/Users/liwugan/Dropbox/event_history/dta/cabinet.dta"

Table 3.3

We estimate the generalized gamma and Weibull models, and store the results.

* Estimate generalized gamma model

eststo clear

eststo: streg invest polar numst format postelec caretakr, dist(ggamma) time

* Estimate weibull model 

eststo: streg invest polar numst format postelec caretakr, dist(weib) time

We can generate the results in a table now.

* Reset working directory to collect following output 

cd ~/Dropbox/event_history/rmd

* Generate regression table output 

esttab using ch3_gg_weib.html, replace ///
    coeflabel(  _t:invest "Investiture" _t:polar "Polarization" _t:numst "Majority" ///
                _t:format "Formation" _t:postelec "Post-Election" _t:caretakr "Caretaker" ///
                _t:_cons "Constant") ///
    title(Generalized Gamma Model of Cabinet Durations) ///
    mtitles("Generalized Gamma" "Weibull") ///
    eqlabels("", none) ///
    b(2) se(2) nostar ///
    stats(ll N, label("Log-Likelihood" "<em>N</em>") fmt(2 0)) nogaps

Chapter 4: Cabinet Data in R

Table 4.4

First, we run each of the models.

# Run Cox model with the Breslow approximation
b_mod <- coxph(dv ~ invest + polar + numst + format + postelec + caretakr,
               data = cabinet, ties = "breslow")

# Run Cox model with the Efron approximation

ef_mod <- coxph(dv ~ invest + polar + numst + format + postelec + caretakr,
                data = cabinet)

# Run Cox model with the Exact Discrete apprximation 

ed_mod <- coxph(dv ~ invest + polar + numst + format + postelec + caretakr,
                data = cabinet, ties = "exact")

Here is the output, with the standard errors in parantheses:

Cox Model of Cabinet Durations
Breslow Efron Exact
Investiture 0.38 (0.14) 0.39 (0.14) 0.41 (0.14)
Polarization 0.02 (0.01) 0.02 (0.01) 0.02 (0.01)
Majority -0.57 (0.13) -0.58 (0.13) -0.62 (0.14)
Formation 0.13 (0.04) 0.13 (0.04) 0.13 (0.05)
Post-Election -0.83 (0.14) -0.86 (0.14) -0.88 (0.15)
Caretaker 1.54 (0.28) 1.71 (0.28) 1.86 (0.33)
Log Likelihood -1299.89 -1287.74 -918.29
N 314 314 314

Table 4.5

Let’s compare the Cox Efron approximation with the Weibull proportional hazards.

To convert from the A.F.T. to Prop. Hazards parameterization, we will use the ConvertWeibull function from Chap. 3. We will prepare the data the same way we did in Chap. 3.

intercept <- rep(1, 314)

cabinet$intercept <- intercept

weib_mod1 <- survreg(dv ~ 0 + intercept + invest + polar + numst+ format + postelec
                     + caretakr, data = cabinet)

weib_mod2 <- survreg(dv ~ 0  + invest + intercept + polar + numst+ format + postelec
                     + caretakr, data = cabinet)

weib_mod1_ph <- ConvertWeibull(weib_mod1)

weib_mod2_ph <- ConvertWeibull(weib_mod2)

weib_ph <- rbind(weib_mod1_ph$vars, weib_mod2_ph$vars)


# Delete rows that are redundant/unnecessary 
weib_ph <- weib_ph[-c(1,2,9,12,13,14,15,16),]

weib_ph <- weib_ph[c("intercept", "invest", "polar", "numst", "format", "postelec",
                     "caretakr", "gamma"),]

weib_ph
##              Estimate          SE
## intercept -3.86270244 0.262780694
## invest     0.38274582 0.137174519
## polar      0.02321558 0.005585448
## numst     -0.60149819 0.131047758
## format     0.13245769 0.043749027
## postelec  -0.87931813 0.137687010
## caretakr   1.72601174 0.275890539
## gamma      1.29385218 0.064767331

Let’s format this into a presentable table and compare it with the Cox Efron approximation.

Cox and Weibull Estimates of Cabinet Duration
Cox Weibull
Investiture 0.39 (0.14) 0.38 (0.14)
Polarization 0.02 (0.01) 0.02 (0.01)
Majority -0.58 (0.13) -0.60 (0.13)
Formation 0.13 (0.04) 0.13 (0.04)
Post-Election -0.86 (0.14) -0.88 (0.14)
Caretaker 1.71 (0.28) 1.73 (0.28)
Constant -3.86 (0.26)
Log Likelihood -1287.74 -1014.62
N 314 314
Shape Parameter 1.29

Figure 4.1

In preparation for the plots, we mean center the polarization and formation attempts covariates.

cabinet$polarmean <- cabinet$polar - mean(cabinet$polar)
cabinet$formmean <- cabinet$format - mean(cabinet$format)

We run Cox and Weibull models with these mean centered variables. We will use these models for out plots.

cox_mod_plot <- coxph(dv ~ invest + polarmean + numst + formmean + postelec + caretakr,
                 data = cabinet)

weib_mod_plot <- survreg(dv ~invest + polarmean + numst + formmean + postelec
                    + caretakr, data = cabinet)

We now calculate the baseline integrated hazard function, baseline survivor function, and baseline hazard function from the Cox model.

# Calculate baseline integrated hazard function H(t) 
haz_rte <- basehaz(cox_mod_plot, centered = FALSE)

# Convert H(t) to the baseline hazard, h(t). h(t) = H(t) - H(t-1) 

haz_cox <- data.frame(diff(haz_rte$hazard))

# Take out H(t) at t = 1 and merge with previous calculations

row <- data.frame(0.03594616)
colnames(row) <- "diff.haz_rte.hazard."
haz_cox <- rbind(row, haz_cox)
colnames(haz_cox) <- "baseline_hazard"

# Merge baseline hazards into master dataframe with integrated hazards
haz_rte$haz_cox <- haz_cox

# Calculate baseline survivor function
surv_basecox <- exp(-haz_rte$hazard)

# Merge baseline survivor values into master dataframe with integrated hazards
haz_rte$surv_basecox <- surv_basecox
# A glimpse of the values for all three functions 
head(haz_rte)
##   time     hazard baseline_hazard surv_basecox
## 1  0.5 0.03594616      0.03594616    0.9646922
## 2  1.0 0.06974011      0.03379395    0.9326362
## 3  2.0 0.11021916      0.04047905    0.8956378
## 4  3.0 0.19579836      0.08557921    0.8221780
## 5  4.0 0.24758576      0.05178740    0.7806833
## 6  5.0 0.35689931      0.10931355    0.6998430

We do the same for the Weibull model.

# Plot baseline survivor function for Weibull

# Let's create lambda from Weibull model

lambda_base <- unname(exp(-(weib_mod_plot$coef[1])))

# Because S(t) = exp(-(lambda*t))^p, we can generate the survivor function for the
# baseline case. 

p <- 1/weib_mod_plot$scale

t <- cabinet$'_t'

surv_baseweib <- exp(-(lambda_base * t)^p)

weib <- data.frame(cbind(t, surv_baseweib))

# We can also generate the baseline hazard function, knowing that 
# h(t) = lambda * p * (lambda * t)^(p-1)

haz_baseweib <- lambda_base * p * (lambda_base * t)^(p-1)

weib$haz_baseweib <- haz_baseweib

# The baseline integrated hazard is thus H(t) = -log(S(t))

inthaz_baseweib <- -log(surv_baseweib)

weib$inthaz_baseweib <- inthaz_baseweib

# A glimpse of the values for all three functions 

head(weib)
##     t surv_baseweib haz_baseweib inthaz_baseweib
## 1 0.5     0.9843950   0.04069950      0.01572803
## 2 1.0     0.9621718   0.04989390      0.03856229
## 3 1.0     0.9621718   0.04989390      0.03856229
## 4 2.0     0.9097843   0.06116539      0.09454773
## 5 0.5     0.9843950   0.04069950      0.01572803
## 6 2.0     0.9097843   0.06116539      0.09454773

Using the values from the haz_rte and weib dataframes, we can plot the values for the baseline survivor, integrated hazard, and baseline hazards for both models. Let’s start with the baseline survivor function.

base_surv <- ggplot(data = haz_rte, aes(x = time, y = surv_basecox, color = "black")) + geom_step() + geom_line(data = weib, aes(x = t, y = surv_baseweib, color = "red")) + 
  theme_bw() +
  ggtitle("Baseline Survivor Function") +
  xlab("Cabinet Duration") +
  ylab("") +
  labs(colour = "Model") +
  scale_color_manual(labels = c("Cox", "Weibull"), values = c("red", "black"))

Here is the integrated hazard function.

int_haz <- ggplot(data = haz_rte, aes(x = time, y = hazard, color = "black")) + geom_step() + geom_line(data = weib, aes(x = t, y = inthaz_baseweib, color = "red")) + 
  theme_bw() +
  ggtitle("Baseline Integrated Hazard Function") +
  xlab("Cabinet Duration") +
  ylab("") +
  labs(colour = "Model") +
  scale_color_manual(labels = c("Cox", "Weibull"), values = c("red", "black"))

Lastly, this is the baseline hazard.

#Drop last seven rows for graphical purposes

haz_rte <- haz_rte[-c(48:54),]

#Plot

base_haz <- ggplot(data = haz_rte, aes(x = time, y = haz_cox, color = "black")) + geom_step() + geom_line(data = weib, aes(x = t, y = haz_baseweib, color = "red")) + 
  theme_bw() +
  ggtitle("Baseline Hazard Function") +
  xlab("Cabinet Duration") +
  ylab("") +
  labs(colour = "Model") +
  scale_color_manual(labels = c("Cox", "Weibull"), values = c("red", "black"))

Lastly, we can arrange all the plots together.

grid.arrange(base_surv, int_haz, base_haz, ncol = 2)

Chapter 4: Cabinet Data in Stata

Table 4.4

We estimate the Cox model in four different ways, one for each method of ties.

eststo clear

* Breslow Method (Stata default)
eststo: stcox  invest polar numst format postelec caretakr, nohr breslow

* Efron Method
eststo: stcox  invest polar numst format postelec caretakr, nohr efron

* Averaged Likelihood
eststo: stcox  invest polar numst format postelec caretakr, nohr exactm

* Exact Discrete
eststo: stcox  invest polar numst format postelec caretakr, nohr exactp

Now we can format the results into one table.

* Reset working directory to collect following output 

cd ~/Dropbox/event_history/rmd

* Generate regression table output 

esttab using ch4_cox.html, replace ///
    coeflabel(invest "Investiture" polar "Polarization" numst "Majority" ///
    format "Formation" postelec "Post-Election" caretakr "Caretaker" ///
    _cons "Constant") ///
    title(Cox Model of Cabinet Durations) ///
    mtitles("Breslow" "Efron" "Avg. Lik." "Exact") ///
    eqlabels("", none) ///
    b(2) se(2) nostar ///
    stats(ll N, label("Log-Likelihood" "<em>N</em>") fmt(2 0)) 
    
eststo clear

Table 4.5

We want to compare the Cox model using the averaged likelihood approximation with the Weibull proportional hazards model.

*Averaged Likelihood*
eststo: stcox  invest polar numst format postelec caretakr, nohr exactm


*Weibull*
eststo: streg  invest polar numst format postelec caretakr, dist(weib) nohr 

Again, we can format the results into a table.

* Generate regression table output 

esttab using ch4_cox_weib.html, replace ///
    coeflabel(invest "Investiture" polar "Polarization" numst "Majority" ///
    format "Formation" postelec "Post-Election" caretakr "Caretaker" ///
    _cons "Constant") ///
    title(Cox and Weibull Estimates of Cabinet Durations) ///
    mtitles("Cox" "Weibull") ///
    eqlabels("", none) ///
    b(2) se(2) nostar ///
    stats(ll N, label("Log-Likelihood" "<em>N</em>") fmt(2 0)) nogaps

Figure 4.1

In preparation for the plots, we mean center the polarization and formation attempts covariates.

* To obtain a natural 0 point on the covariates, we mean center the polarization and formation attempts covariates.  We do this using Stata's extension generator function:

egen meanpolar=mean(polar)
egen meanform=mean(format)

* Now we mean center these two variables:

gen polarmean=polar-meanpolar
gen formmean=format-meanform

Using the mean centered variables, we run the Cox and Weibull models that sill serve as the foundation for our plots. When we run the Cox model, we can immediately generate the baseline survivor, integrated hazard, and hazard functions.

* Estimate Cox model. The last three commands  in the stcox statement produce the estimates of the baseline functions, which we can graph.

stcox invest polarmean numst formmean postelec caretakr, nohr exactm basech(inthaz) basehc(haz) basesurv(surv)

predict hr, hr

* which is exp(xb)

*To compute the survivor function for each observation we generate new variable:

gen surv_i=surv^hr

* Weibull model

streg invest polarmean numst formmean postelec caretakr, dist(weib)  time

Now we can generate the baseline survivor, integrated hazard, and hazard functions from the Weibull model.

* Estimate the baseline survivor function from the Weibull

* First create lambda

gen lambda_base=exp(-(_b[_cons]))

*Second, note that S(t)=exp^-(lambda*t)^p.  We can generate the survivor functions for the baseline case:

gen surv_baseweib=exp(-(lambda_base*durat)^e(aux_p))

* Now we generate the "baseline hazard" from the Weibull

* Note that h(t)=lambda*p*(lambda*t)^(p-1).  We can generate the hazard rate for the "baseline case."

gen haz_baseweib=lambda_base*e(aux_p)*(lambda_base*durat)^(e(aux_p)-1)

*The baseline integrated hazard is thus H(t)=-log(S(t))

gen inthaz_baseweib=-log(surv_baseweib)

We can graph each of the three functions from the Cox and Weibull Models

*Graph the baseline functions from the Cox and Weibull Models

twoway (line surv durat, sort connect(stairstep)) (line surv_baseweib durat, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Cabinet Duration") ///
    title("Baseline Survivor Function", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(cabcoxst.gph, replace)

* Graph the baseline integrated hazard from the Cox and Weibull Models

twoway (line inthaz durat, sort connect(stairstep)) (line inthaz_baseweib durat, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Cabinet Duration") ///
    title("Baseline Integrated Hazard Function", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(cabcoxinthaz.gph, replace)

*Graph the baseline hazard from the Cox and Weibull Models

twoway (line haz durat, sort connect(stairstep)) (line haz_baseweib durat, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Cabinet Duration") ///
    title("Baseline Hazard Function", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(cabcoxht.gph, replace)

Now we can combine all three graphs and export to a .png file

graph combine cabcoxst.gph cabcoxinthaz.gph cabcoxht.gph, graphregion(color(white) ///
icolor(none)) saving(cabcoxbase.gph, replace)

graph export cabcoxbase.png, replace

Chapter 5: Militarized Interventions Data in R

Note that for two of the models, the replication of Table 5.3 will be based on the smaller dataset (which I name “imi”) than the one shown in Stata and in the book. This is because the exact discrete Cox model using the larger dataset (“imi2”) is too computionally intense for R to estimate within a reasonable time. In addition, the Weibull model cannot handle start-stop Surv objects. Nevertheless, the results using the smaller dataset are essentially the same so I still reproduce them below.

First, we prepare both the large and small datasets for analysis by renaming certain variables to avoid conflicting with R’s base functions, creating the Surv object, and creating mean centered variables for analysis of the larger dataset.

#-------Dataset

# This is the dataset used in the Stata version, but because it takes so long to run, one can get the same results with the smaller dataset. This is important when running the exact discrete Cox model, because that model is computationally intense. 

imi2 <- read.dta("~/Dropbox/event_history/dta/omifinal2.dta")


imi <- read.dta("~/Dropbox/event_history/dta/omismall.dta")


#-------Data prepation

# Rename dependent variable for both datasets to avoid confusion with base R functions

names(imi2)[names(imi2) == 'break'] <- 'breakdown'

# Create survival object for dv in the larger dataset

dv2 <- Surv(imi2$'_t0', imi2$'_t', imi2$event)

# Create survival object for dv in the smller dataset

dv <- Surv(imi$'_t', imi$'_d')

# We want to only run the models on the data that has no missing observations for the following logit equation. 

#m1 <- glm(fail ~ ctg + ali + idem + tdem + pbal + breakdown, data = imi2)

imi2 <- imi2 %>% 
  drop_na(fail, ctg, ali, idem, tdem, pbal, breakdown)


# Lastly, we mean center the pbal, idem, and tdem variables.

imi2$idemmean <- imi2$idem - mean(imi2$idem)

imi2$tdemmean <- imi2$tdem - mean(imi2$tdem)

imi2$pbalmean <- imi2$pbal - mean(imi2$pbal)

Table 5.3

We first run the Cox exact discrete model (which is essentially the same as the conditoinal logit except it does not drop observations) and the Weibull model using the smaller dataset.

# Cox exact discrete

ed_mod <- coxph(dv ~ pbal + ctg + ali + idem + tdem + breakdown, data = imi, ties = "exact")

# Weibull A.F.T. 

weib_mod <- survreg(dv ~ pbal + ctg + ali + idem + tdem + breakdown, data = imi, dist = "weibull")

Again, we have to convert the Weibull A.F.T. to a P.H. parameterization.

# Conversion from Weibull A.F.T.  to P.H. 

intercept <- rep(1, 656)

imi$intercept <- intercept

weib_mod1 <- survreg(dv ~ 0 + intercept +  pbal + ctg + ali + idem + tdem + breakdown, data = imi, dist = "weibull")

weib_mod2 <- survreg(dv ~ 0 + pbal + intercept +  ctg + ali + idem + tdem + breakdown, data = imi, dist = "weibull")

weib_mod1_ph <- ConvertWeibull(weib_mod1)

weib_mod2_ph <- ConvertWeibull(weib_mod2)

weib_ph <- rbind(weib_mod1_ph$vars, weib_mod2_ph$vars)

# Delete rows that are redundant/unnecessary 
weib_ph <- weib_ph[-c(1,2,9,12,13,14,15,16),]

weib_ph <- weib_ph[c("intercept", "pbal", "ctg", "ali", "idem", "tdem",
                     "breakdown", "gamma"),]

weib_ph
##              Estimate          SE
## intercept -1.32153217 0.151345243
## pbal      -0.50188655 0.152589537
## ctg       -0.28344841 0.101406267
## ali        0.25609560 0.095594264
## idem       0.01247340 0.006120223
## tdem       0.02285714 0.006710263
## breakdown -0.44311104 0.197807373
## gamma      0.65543389 0.022078980

Let’s format these models into a presentable table. I show how to do this for the exact discrete model and repeat the same steps for the Weibull (not shown).

# Create texreg object for exact discrete/conditional logit model. 

tex_reg_converter <- function(original_model){
  
  
  coefficient.names <- c("Relative Capabilities", "Territorial Contiguity", "Intervenor Allied to Target", "Intervenor Democracy", "Target Democracy",
                       "Breakdown of Authority")
  coefficients <- original_model$coef 
  se <- sqrt(diag(vcov(original_model)))
  n <- original_model$n
  lik <- logLik(original_model)
  gof <- c(lik, n)
  gof.names <- c("Log Likelihood", "N" )
  decimal.places <- c(TRUE, FALSE)
  
  createTexreg(
    coef.names = coefficient.names, coef = coefficients, se = se,
    gof.names = gof.names, gof = gof, gof.decimal = decimal.places)
  
}

ed_mod <- tex_reg_converter(ed_mod)

Now we can run the logit model with the lowess term using the large dataset.

log_mod <- glm(event ~ pbalmean + ctg + ali + idemmean + tdemmean + breakdown + lowesst2, data = imi2, family = "binomial")

We format the results of the logit and put them together with the Cox and Weibull results.

tex_reg_converter <- function(original_model){

coefficient.names <- c("Constant","Relative Capabilities", "Territorial Contiguity", "Intervenor Allied to Target", "Intervenor Democracy", "Target Democracy", "Breakdown of Authority", "Duration Dependency")
coefficients <- original_model$coef 
se <- sqrt(diag(vcov(original_model)))
n <- 9374
lik <- logLik(original_model)
gof <- c(lik, n)
gof.names <- c("Log Likelihood", "N" )
decimal.places <- c(TRUE, FALSE)

createTexreg(
    coef.names = coefficient.names, coef = coefficients, se = se,
    gof.names = gof.names, gof = gof, gof.decimal = decimal.places)
  
}

logit_mod <- tex_reg_converter(log_mod)
# Put all the tables together

htmlreg(list(ed_mod, logit_mod, weib_mod_ph), stars = c(), caption = "Models of Militarized Interventions", caption.above = TRUE, custom.model.names = c("Conditional Logit", "Logit", "Weibull"), center = FALSE, single.row = TRUE)
Models of Militarized Interventions
Conditional Logit Logit Weibull
Relative Capabilities -0.48 (0.16) -0.43 (0.16) -0.50 (0.15)
Territorial Contiguity -0.26 (0.11) -0.25 (0.11) -0.28 (0.10)
Intervenor Allied to Target 0.24 (0.10) 0.22 (0.10) 0.26 (0.10)
Intervenor Democracy 0.01 (0.01) 0.01 (0.01) 0.01 (0.01)
Target Democracy 0.02 (0.01) 0.02 (0.01) 0.02 (0.01)
Breakdown of Authority -0.46 (0.21) -0.43 (0.20) -0.44 (0.20)
Constant -4.07 (0.13) -1.32 (0.15)
Duration Dependency 16.20 (0.95)
Log Likelihood -1591.49 -1779.31 -1931.50
N 520 9374 520
Shape Parameter 0.66

Chapter 5: Militarized Interventions Data in Stata

Let’s load the dataset.

/* Dataset is omifinal.dta */

use "/Users/liwugan/Dropbox/event_history/dta/omifinal2.dta"

Table 5.3

Before we estimate the models, we want to mean center the intervenor democracy, target democracy, and relative capabilities covariates after removing the missing observations from a logit model.

/* First mean center quantitative variables */

* To get sample estimates, create esample variable

logit fail ctg  ali idem tdem pbal break

gen insample=1 if e(sample)

egen meanidem=mean(idem)  if insample==1
egen meantdem=mean(tdem) if insample==1
egen meanpbal=mean(pbal) if insample==1

gen idemmean=idem-meanidem
gen tdemmean=tdem-meantdem
gen pbalmean=pbal-meanpbal

Now we can estimate the three models with the mean centered variables.

* Estimate Cox model with exact discrete

eststo clear 

eststo: stcox pbalmean ctg ali idemmean tdemmean break, nohr exactp

* Estimate a conditional logit through clogit. Should have same results as the Cox model with exact discrete, but with some dropped observations. We do not include this model in the final regression output.

clogit event ctg ali idem tdem pbal break, group(durmths)

* Estimate Logit Model with Lowess Term

eststo: logit _d lowesst2 pbalmean ctg ali idemmean tdemmean break

* Now estimate Weibull for comparison

eststo: streg pbalmean ctg ali idemmean tdemmean break, nohr dist(weib)

Lastly, we can generate the regression table for the three models.

* Reset working directory to collect following output 

cd ~/Dropbox/event_history/rmd

* Generate regression table output 

esttab using ch5_log_weib.html, replace ///
    coeflabel(  lowesst2 "Duration Dependency"  ctg "Territorial Contiguity" ///
                 ali "Intervenor Allied to Target" idemmean "Intervenor Democracy" ///
                 tdemmean "Target Democracy" pbalmean "Relative Capabilities" ///
                 break "Breakdown of Authority" _cons "Constant") ///
    title(Models of Militarized Interventions) ///
    mtitles("Conditional Logit" "Logit" "Weibull") ///
    eqlabels("", none) ///
    b(2) se(2) nostar ///
    stats(ll N, label("Log-Likelihood" "<em>N</em>") fmt(2 0)) 

Chapter 6: Restrictive Abortion Legislation Data in R

Let’s load the dataset.

#-------Dataset

adopt <- read.dta("~/Dropbox/event_history/dta/adopt_singleevent.dta")

#-------Data prepation

# Create survival object for dv 

dv <- Surv(adopt$'_t', adopt$'_d')

Table 6.1

First, we run each of the models.

# The conditional logit model is the same as the exact discrete method we used earlier

ed_mod <- coxph(dv ~ mooneymean, data = adopt, ties = "exact")

### Can't run Royston-Parmar model 


# Weibull P/H/
weib_mod <- survreg(dv ~ mooneymean, data = adopt, dist = "weibull")

Again, we have to convert the Weibull A.F.T. to a P.H. parameterization.

# Conversion from Weibull A.F.T.  to P.H. 

intercept <- rep(1, 50)

adopt$intercept <- intercept

weib_mod1 <- survreg(dv ~ 0 + mooneymean + intercept, data = adopt)

weib_mod2 <- survreg(dv ~ 0 + intercept + mooneymean, data = adopt)

weib_mod1_ph <- ConvertWeibull(weib_mod1)

weib_mod2_ph <- ConvertWeibull(weib_mod2)

weib_ph <- rbind(weib_mod1_ph$vars, weib_mod2_ph$vars)

# Delete rows that are redundant/unnecessary 
weib_ph <- weib_ph[-c(1,2,4),]


weib_ph <- weib_ph[c("mooneymean", "gamma", "intercept"),]

weib_ph
##              Estimate         SE
## mooneymean -0.2227881 0.08239561
## gamma       0.9830112 0.12000313
## intercept  -2.0966280 0.31945773

Let’s format these models into a presentable table.

Models of Adoption of Restrictive Abortion Legislation
Cox Model Weibull Model
Pre-Roe -0.22 (0.09) -0.22 (0.08)
Constant -2.10 (0.32)
Log Likelihood -96.60 -133.07
N 50 50
Shape Parameter 0.98

Figure 6.1

We first calculate the baseline hazard for the Weibull.

# Let's create lambda from the Weibull model we ran earlier
  
lambda_base <- unname(exp(-(weib_mod$coef[1])))

p <- 1/weib_mod$scale

t <- adopt$'_t'

# We cangenerate the baseline hazard function, knowing that 
# h(t) = lambda * p * (lambda * t)^(p-1)

haz_baseweib <- lambda_base * p * (lambda_base * t)^(p-1)

weib <- data.frame(cbind(t, haz_baseweib))

We do the same for the Cox model we already ran earlier.

# Calculates integrated baseline hazard, H(t)

haz_rte <- basehaz(ed_mod, centered = FALSE)

# To get the baseline hazard, we calculate H(t) - H(t-1), which gives us the corresponding # value for all obs. except for the first. 

haz_cox <- data.frame(diff(haz_rte$hazard))

# Take out H(t) at t = 1 and merge with previous calculations

row <- data.frame(0.2208064)
colnames(row) <- "diff.haz_rte.hazard."
haz_cox <- rbind(row, haz_cox)
colnames(haz_cox) <- "baseline_hazard"

# Merge baseline hazards into master dataframe with integrated hazards

haz_rte$haz_cox <- haz_cox

#Drop last row for graphical purposes

haz_rte <- haz_rte[-c(16), ]

### Calculate baseline hazard rate for Royston-Parmar model

We are ready to plot now.

ggplot(data = haz_rte, aes(x = time, y = haz_cox, color = "black")) + geom_step() +
  geom_line(data = weib, aes(x = t, y = haz_baseweib, color = "red")) + 
  theme_bw() +
  ggtitle("Hazard Functions") +
  xlab("Years Since Roe vs. Wade") +
  ylab("") +
  labs(colour = "Model") +
  scale_color_manual(labels = c("Cox", "Weibull"), values = c("red", "black"))

Chapter 6: Restrictive Abortion Legislation Data in Stata

Let’s load the dataset.

/* Dataset is adopt_singleevent.dta */

use "~/Dropbox/event_history/dta/adopt_singleevent.dta"

Table 6.1

We begin by estimating the Cox and Weibull models, and storing the results to make a table.

* Cox model 

eststo clear 

eststo: stcox mooneymean, exactp nohr basehc(haz_cox)

* Weibull model

eststo: streg mooneymean, dist(weib) nohr

* Reset working directory to collect output

cd ~/Dropbox/event_history/rmd

* Generate regression table output for Cox and Weibull 

esttab using ch6_cox_weib.html, replace ///
    coeflabel(mooneymean "Pre-Roe" _cons "Constant") ///
    title(Models of Adoption of Restrictive Abortion Legislation) ///
    mtitles("Cox Model" "Weibull Model") ///
    eqlabels("", none) ///
    b(2) se(2) nostar ///
    stats(ll N, label("Log-Likelihood" "<em>N</em>") fmt(2 0)) 

Let’s estimate the Royston-Parmar model in comparison, and display the results in a table.

eststo clear 

eststo:stpm mooneymean, scale(h) df(3)

* Generate regression table output for Royston-Parmar model

esttab using ch6_rp.html, replace ///
    coeflabel(s0:_cons "Spline 1" s1:_cons "Spline 2" s2:_cons "Spline 3" xb:mooneymean "Pre-Roe" ///
    xb:_cons "Constant") ///
    title(Models of Adoption of Restrictive Abortion Legislation) ///
    mtitles("Royston-Parmar Model") ///
    eqlabels("", none) ///
    b(2) se(2) nostar ///
    stats(ll N, label("Log-Likelihood" "<em>N</em>") fmt(2 0)) 

Figure 6.1

We already computed the baseline hazard for the Cox model earlier in the regression equation, so now we compute the baseline hazards for the Royston-Parmar and Weibull models.

* Compute baseline hazard for Royston-Parmar model 

predict haz_rp, haz zero

* Compute baseline hazard for Weibull based on P.H. parameterization.

streg mooneymean, dist(weib) time

* First, note that lambda=exp(-beta'x).  We can compute lambda for each confict type.

gen lambda_base=exp(-(_b[_cons])) if e(sample)

* Second, note that h(t)=lambda*p*(lambda*t)^(p-1).  We can generate the hazard rate for the baseline.

gen haz_weib=lambda_base*e(aux_p)*(lambda_base*yrtoadp)^(e(aux_p)-1)

twoway (line haz_rp yrtoadp, sort lpattern(solid)) (line haz_weib yrtoadp, sort lpattern(solid)) ///
    (line haz_cox yrtoadp, sort connect(stairstep) lpattern(solid)), ///
    legend(off) ///
    xtitle("Years Since Roe vs. Wade") ///
    title("Hazard Functions", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(hazadopt.gph, replace)

graph export hazadopt.png, replace

{

Chapter 8: Cabinet Data in R

Figure 8.1

First we run the Cox model with the exact discrete approximation.

ed_mod <- coxph(dv ~ invest + polar + numst + format + postelec + caretakr,
                data = cabinet, ties = "exact")

We calculate the Cox-Snell residuals in the following way. Then we plot the residuals on the x-axis and the integrated hazard of the residuals on the y-axis, against a 45 degree line that serves as a reference line. If the model holds, the plot of the residuals agianst the integrated hazard should fall roughly on that line.

# First we calculate Martingale residuals

ed_resid <- resid(ed_mod, type="martingale")

# We subtract these residuals from the actual values of the event to get the 
# Cox-snell residuals.

ed_res <- cabinet$'_d' - ed_resid

# Compute S(t)

ed_surv <- survfit(Surv(ed_res,cabinet$'_d')~1)

# Plot the integrated hazard function, which is H(t) = -log(S(t)), on the y-axis

plot(ed_surv$time, -log(ed_surv$surv), type = "l", xlab="Time", 
     ylab = "H(t) based on Cox-Snell Residuals", main="Cox-Snell Residuals from Cabinet Data")
lines(ed_res, ed_res, type = "l")

Figure 8.2

We estimate the models that will be used for the figure and calculate their Martingale residuals. The goal is to assess the functional form of particular covariates. For example, can we assume that it is linear or are adjustments necessary?

# The model for the top two panels of the figure. 

mgale_mod <- coxph(dv ~ format + polar, data = cabinet, ties = "exact")

# This model only has polarization covariate and will be used for the bottom right panel

polar_mod <- coxph(dv ~ polar, data = cabinet)

# This model only has the formation attempts covariate, and will be used for the bottom left panel.
     
format_mod <- coxph(dv ~ format, data = cabinet, ties = "exact")

# Calculate Martingale residuals for each model 

mgale_mod_resid <- resid(mgale_mod,type='martingale')

polar_mod_resid <- resid(polar_mod,type='martingale')

format_mod_resid <- resid(format_mod,type='martingale')

Now we are ready to plot. In all four panels, we plot the martingale residuals and the smoothed residuals using lowess against either the polarization and formation attempts covariates. The top two panels are based on a Cox model that includes both covoriates, the bottom right panel uses only the polarization covariate, and the bottom left panel uses only the formation attempts variable. In all four plots, we see mostly flat lines centered around 0, which indicates that no adjustments need to be made to the functional form

par(mfrow=c(2,2))

# Plot Martingale residuals against the polarization covariate

plot(cabinet$polar, mgale_mod_resid, xlab="Polarization Index", 
      ylab = "Residuals", main="Martingale Residuals: Approach 1")
      lines(lowess(cabinet$polar, mgale_mod_resid),col='red')

# Plot Martingale residuals against the formation attempts covariate
          
plot(cabinet$format, mgale_mod_resid, xlab="Formation attempts", 
     ylab = "Residuals", main="Martingale Residuals: Approach 1")
     lines(lowess(cabinet$format, mgale_mod_resid),col='red')
     
# Plot Martingale residuals against the polarization covariate

plot(cabinet$polar, format_mod_resid, xlab="Polarization index", 
     ylab = "Residuals", main="Martingale Residuals: Approach 2")
lines(lowess(cabinet$polar, format_mod_resid),col='red')

# Plot Martingale residuals against the polarization covariate

plot(cabinet$format, polar_mod_resid, xlab="Formation attempts", 
     ylab = "Residuals", main="Martingale Residuals: Approach 2")
lines(lowess(cabinet$format, polar_mod_resid),col='red')

Chapter 8: Cabinet Data in Stata

Figure 8.1

First we run the Cox model with the exact discrete approximation and compute the martingale residuals.

stcox invest polar  numst format postelec caretakr, exactm nohr mgale(martingale) 

We calculate the Cox-Snell residuals in the following way.

*Use predict to derive Cox-Snell residuals*

predict CoxSnell, csnell

*Re-stset the data to treat the Cox-Snell residuals as "the data" (i.e. the time variable)*

stset CoxSnell, fail(censor)

*Generate the K-M estimates for the new data*

sts generate km=s

Then we plot the the residuals on the x-axis and the integrated hazard oof the residuals on the y-axis, against a 45 degree line that serves as a reference line. If the model holds, the plot of the residuals agianst the integrated hazard should fall roughly on that line.

*Generate the integrated hazard (using double option for increased computer precision)*

gen double H_cs=-log(km) 

*Reset working directory to output folder

cd ~/Dropbox/event_history/rmd

twoway (line H_cs CoxSnell, sort) (line CoxSnell CoxSnell, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Cox-Snell Residuals from Cabinet Data") ///
    title("H(t) based on Cox-Snell Residuals", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(coxsnellcab.gph, replace)
    
graph export coxsnellcab.png, replace

Figure 8.2

Before plotting the martingale residuals on the polarization index and formation attempts variables, we drop the variables from our previous analysis and re-stset the data.

*Drop previous data
drop martingale CoxSnell H_cs km

*Resetting the data to original form*

stset durat, fail(censor)

We start with estimating the model for Approach 1.

* Estimate model of interest

stcox format polar, exactp nohr mgale(mg) 

Then we can plot the martingales and lowess term against either the polarization or formation attempts covoriate (Approach 1).

twoway (scatter mg polar, sort mfcolor(white) mlcolor(black)) (lowess mg polar, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Polarization Index") ///
    title("Martingale Residuals: Approach 1", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(polarff1.gph, replace)

twoway (scatter mg format, sort mfcolor(white) mlcolor(black)) (lowess mg format, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Formation Attempts") ///
    title("Martingale Residuals: Approach 1", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(formatff1.gph, replace)

drop mg

Now we run two different models, one with only the formation attempts covariate and the other with only the polarization index covariate. From the first model, we plot the Martingale residuals and the smoothed residuals against the polarization variable while in the second, we plot the residuals against the formation attempts covariate.

* First estimate submodel

stcox format, exactp nohr mgale(mg)

* Plot of martingales vs. polarization variable with lowess term

twoway (scatter mg polar, sort mfcolor(white) mlcolor(black)) (lowess mg polar, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Polarization Index") ///
    title("Martingale Residuals: Approach 2", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(polarff2.gph, replace)

drop mg

* Now test for functional form of formation attempts

stcox polar, exactp nohr mgale(mg)

* Plot of martingales vs. formation attempts variable with lowess term  

  twoway (scatter mg format, sort mfcolor(white) mlcolor(black)) (lowess mg format, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Formation Attempts") ///
    title("Martingale Residuals: Approach 2", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(formatff2.gph, replace)

drop mg

Lastly, we combine all four graphs. In all four plots, we see mostly flat lines centered around 0, which indicates that no adjustments need to be made to the functional form

graph combine polarff1.gph formatff1.gph polarff2.gph ///
 formatff2.gph, graphregion(color(white) ///
icolor(none)) saving(funcformcab.gph, replace)

graph export funcformcab.png, replace

Figure 8.6

We want to plot the Cox-snell residuals on the x-axis and the integrated hazard function on the y-axis for a variety of parametric models. Let’s start with the exponential.

streg invest polar  numst format postelec caretakr, dist(exp) time

*Now compute Cox-Snell residuals*

predict double cs, csnell

*Now restset the data*

stset cs, failure(censor)

*Now generate K-M estimates*
  
sts generate km=s

*Back out the estimated cumulative hazard:*

gen double H=-log(km)

*Graph functions*

twoway (line H cs, sort) (line cs cs, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Exponential Model") ///
    title("H(t) based on Cox-Snell Residuals", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(coxsnellexp.gph, replace)
    
drop H cs km 
stset durat, fail(censor)

Let’s estimate the Weibull next and calculate the Cox-snell residuals and integrated hazard.

streg invest polar  numst format postelec caretakr, dist(weibull) time

*Now compute Cox-Snell residuals*

predict double cs, csnell

*Now restset the data*
  
stset cs, failure(censor)

*Now generate K-M estimates*

sts generate km=s

*Back out the estimated cumulative hazard:*

gen double H=-log(km)

*Graph functions*

twoway (line H cs, sort) (line cs cs, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Weibull Model") ///
    title("H(t) based on Cox-Snell Residuals", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(coxsnellweib.gph, replace)

drop H cs km    
stset durat, fail(censor)

We do the same for the log-log model.

streg invest polar  numst format postelec caretakr, dist(loglog) 

*Now compute Cox-Snell residuals*

predict double cs, csnell

*Now restset the data*

stset cs, failure(censor)

*Now generate K-M estimates*

sts generate km=s

*Back out the estimated cumulative hazard:*

gen double H=-log(km)

*Graph functions*

twoway (line H cs, sort) (line cs cs, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Log-Logistic Model") ///
    title("H(t) based on Cox-Snell Residuals", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(coxsnellll.gph, replace)

drop H cs km 
stset durat, fail(censor)

The log-normal model is next.

streg invest polar  numst format postelec caretakr, dist(lognorm) time

*Now compute Cox-Snell residuals*

predict double cs, csnell

*Now restset the data*

stset cs, failure(censor)

*Now generate K-M estimates*

sts generate km=s

*Back out the estimated cumulative hazard:*

gen double H=-log(km)

*Graph functions*

twoway (line H cs, sort) (line cs cs, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Log-Normal Model") ///
    title("H(t) based on Cox-Snell Residuals", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(coxsnellln.gph, replace)
    
drop H cs km 
stset durat, fail(censor)

Here is the gompertz model.

streg invest polar  numst format postelec caretakr, dist(gompertz) nohr

*Now compute Cox-Snell residuals*

predict double cs, csnell

*Now restset the data*

stset cs, failure(censor)

*Now generate K-M estimates*

sts generate km=s

*Back out the estimated cumulative hazard:*

gen double H=-log(km)

*Graph functions*

twoway (line H cs, sort) (line cs cs, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Gompertz Model") ///
    title("H(t) based on Cox-Snell Residuals", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(coxsnellgomp.gph, replace)
    
drop H cs km 
stset durat, fail(censor)

Lastly, here is the generalized gamma model.

streg invest polar  numst format postelec caretakr, dist(ggamma) time

*Now compute Cox-Snell residuals*

predict double cs, csnell

*Now restset the data*

stset cs, failure(censor)

*Now generate K-M estimates*

sts generate km=s

*Back out the estimated cumulative hazard:*

gen double H=-log(km)

*Graph functions*

twoway (line H cs, sort) (line cs cs, sort lpattern(solid)), ///
    legend(off) ///
    xtitle("Generalized Gamma Model") ///
    title("H(t) based on Cox-Snell Residuals", position(11)) ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(coxsnellgg.gph, replace)
    
drop H cs km 

We can combine all six graphs.

graph combine coxsnellexp.gph coxsnellweib.gph coxsnellll.gph ///
 coxsnellln.gph coxsnellgomp.gph coxsnellgg.gph, col(2) graphregion(color(white) ///
icolor(none)) saving(coxsnellparm.gph, replace)

graph export coxsnellparm.png, replace

Chapter 8: Militarized Interventions Data in R

Let’s load the dataset and prepare the survival object.

#-------Dataset

imi <- read.dta("~/Dropbox/event_history/dta/omifinal.dta")

#-------Data prepation

names(imi)[names(imi) == 'break'] <- 'breakdown'

# Create survival object for dv 

#dv <- Surv(imi$'_t', imi$'_d')

dv <- Surv(imi$'_t0', imi$'_t', imi$'_d')

Figure 8.3

After running a Cox model (note that we are running the model with the Efron approximation because the exact discrete method used in Stata and in the book takes much longer to run), we want to assess whether any observations are exerting influence on the coefficient estimates. We can calculate the dfbeta residuals directly in R using the ggcoxdiagnostics command in the survminer package. The dfbeta residuals capture ``the estimated changes in the regression coefficients upon deleting each observation in turn."

cox_mod <- coxph(dv ~ pbal + ctg + ali + idem + tdem + breakdown, data = imi)



ggcoxdiagnostics(cox_mod, type = "dfbetas", linear.predictions = FALSE, ylab = "") + geom_line()

Figure 8.4

To check the presence of outliers, we use the ggcoxdiagnostics command again to calculate the deviance residuals and plot them against the observation number. We also plotted the smoothed residuals using lowess for graphical purposes. What we want to see are the residuals distributed uniformly around 0. As the plot shows, there are some observations with very large negative residuals.

ggcoxdiagnostics(cox_mod, type = "deviance", linear.predictions = FALSE, ylab = "", xlab = "Observation Number", title = "Deviance Residuals")

Chapter 8: Militarized Interventions Data in Stata

Figure 8.3

After running a Cox model, we want to assess whether any observations are exerting influence on the coefficient estimates. To do this, we create a matrix of score residuals and multiply it by the variance-covariance matrix generated from the Cox model. What we get is standard deviation changes to the estimates (called the dfbeta).

* Estimate a Cox model

stcox pbal idem tdem ctg ali break, nohr exactp esr(esr*)

* The esr(esr*) command tells Stata to compute three variables storing the values of the efficient score residuals (esr1 and esr2 and esr3)

* Next use Stata's matrix commands to generate an n x m matrix of score residuals

mkmat esr1 esr2 esr3 esr4 esr5 esr6, matrix(score_residuals), if(e(sample))

* Now compute the var-cov matrix of beta

mat var_cov=e(V)

* and multiply the score residual matrix by the var-cov matrix to obtain the n x m matrix of scaled changes in the m coefficients

mat dfbeta=score_residuals*var_cov

* Now, we can name the columns, which will correspond to the influence values for the ith observation on the mth covariate

svmat dfbeta, names(x)

* For graphing purposes, we create a variable storing the observation number

gen obs=_n

* Also for display purposes, create a constant equal to 0

gen zero=0

For each covariate, we plot the dfbetas by the observation number to check for influential observations. Deviations from 0 suggest influential observations. From the plots, none appear large in magnitude.

* Reset working directory to collect following output 

cd ~/Dropbox/event_history/rmd

* Now we can graph them; for the power balance covariate, the graph command is

twoway (line x1 obs, sort) (line zero obs, sort), ///
    legend(off) ///
    xtitle("Observation Number") ///
    title("Power Balance") ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(influencepb.gph, replace)
    
    
* And for the intervenor democracy score

twoway (line x2 obs, sort) (line zero obs, sort), ///
    legend(off) ///
    xtitle("Observation Number") ///
    title("Intervenor Democracy") ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(influenceid.gph, replace)
    
    
* And for the target democracy score


twoway (line x3 obs, sort) (line zero obs, sort), ///
    legend(off) ///
    xtitle("Observation Number") ///
    title("Target Democracy") ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(influencetd.gph, replace)
    
* And for the contiguity status covariate

twoway (line x4 obs, sort) (line zero obs, sort), ///
    legend(off) ///
    xtitle("Observation Number") ///
    title("Territorial Contiguity") ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(influencec.gph, replace)
    
* And for the alliance status covariate

twoway (line x5 obs, sort) (line zero obs, sort), ///
    legend(off) ///
    xtitle("Observation Number") ///
    title("Alliance") ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(influencea.gph, replace)
    
* And for the government breakdown covariate

twoway (line x6 obs, sort) (line zero obs, sort), ///
    legend(off) ///
    xtitle("Observation Number") ///
    title("Authority Breakdown") ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(influenceb.gph, replace)


* Now we can combine all six graphs 

graph combine influencepb.gph influenceid.gph influencetd.gph ///
 influencec.gph influencea.gph influenceb.gph, graphregion(color(white) ///
icolor(none)) saving(influence.gph, replace)

* Export graph to .png file

graph export influence.png, replace

Figure 8.4

We want to check the presence of outliers. The deviance residuals were calculated from a Cox model, and we plot the deviance residuals against the observation number. We also plotted the smoothed residuals using lowess for graphical purposes. What we want to see are the residuals distributed uniformly around 0. As the plot shows, there are some observations with very large negative residuals.

* Estimate Cox model and output martingale residuals

stcox  ctg idem tdem pbal break ali, nohr exactp mgale(mg)

* Now, create deviance residuals using predict option

predict double deviance, deviance

* Ise ksm to graph deviance (using lowess)

twoway (scatter deviance obs, sort) (lowess deviance obs, sort) (line zero obs, sort), ///
    legend(off) ///
    xtitle("Observation Number") ///
    title("Deviance Residuals") ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(outliersomi.gph, replace)
    
* Export graph to .png file

graph export outliersomi.png, replace

Figure 8.5

Here, the deviance residuals are plotted against duration times. We also include the smoothed residuals using lowess again. We can see that interventions which last for a long time tend to have large negative residuals. This suggests that for longer interventions, the probability of an intervention terminating is overestimated by the Cox model.

twoway (scatter deviance durmths, sort) (lowess deviance durmths, sort) (line zero durmths, sort), ///
    legend(off) ///
    xtitle("Duration of Militarized Intervention") ///
    title("Deviance Residuals") ///
    scheme(s2mono) graphregion(color(white) icolor(none)) ///
    saving(outliersomi2.gph, replace)

    
*Export graph to .png file

graph export outliersomi2.png, replace