library(foreign)
library(ggplot2)
library(stargazer)
library(tidyverse)
library(rdd)
mydata <- read.dta("AEJfigs.dta")
summary(mydata)
## agecell all allfitted internal
## Min. :19.07 Min. : 88.43 Min. : 91.71 Min. :15.98
## 1st Qu.:20.08 1st Qu.: 92.79 1st Qu.: 93.04 1st Qu.:18.60
## Median :21.00 Median : 95.69 Median : 95.18 Median :20.29
## Mean :21.00 Mean : 95.67 Mean : 95.80 Mean :20.29
## 3rd Qu.:21.92 3rd Qu.: 98.03 3rd Qu.: 97.79 3rd Qu.:21.98
## Max. :22.93 Max. :105.27 Max. :102.89 Max. :24.37
## NA's :2 NA's :2
## internalfitted external externalfitted alcohol
## Min. :16.74 Min. :71.34 Min. :73.16 Min. :0.6391
## 1st Qu.:18.67 1st Qu.:73.04 1st Qu.:74.06 1st Qu.:0.9962
## Median :20.54 Median :74.81 Median :74.74 Median :1.2119
## Mean :20.28 Mean :75.39 Mean :75.52 Mean :1.2573
## 3rd Qu.:21.66 3rd Qu.:77.24 3rd Qu.:76.06 3rd Qu.:1.4701
## Max. :24.04 Max. :83.33 Max. :81.78 Max. :2.5193
## NA's :2 NA's :2
## alcoholfitted homicide homicidefitted suicide
## Min. :0.7943 Min. :14.95 Min. :16.26 Min. :10.89
## 1st Qu.:1.0724 1st Qu.:16.61 1st Qu.:16.54 1st Qu.:11.61
## Median :1.2471 Median :16.99 Median :16.99 Median :12.20
## Mean :1.2674 Mean :16.91 Mean :16.95 Mean :12.35
## 3rd Qu.:1.4455 3rd Qu.:17.29 3rd Qu.:17.25 3rd Qu.:12.82
## Max. :1.8174 Max. :18.41 Max. :17.76 Max. :14.83
## NA's :2 NA's :2
## suicidefitted mva mvafitted drugs
## Min. :11.59 Min. :26.86 Min. :27.87 Min. :3.202
## 1st Qu.:11.61 1st Qu.:30.12 1st Qu.:30.17 1st Qu.:3.755
## Median :12.25 Median :31.64 Median :31.73 Median :4.314
## Mean :12.36 Mean :31.62 Mean :31.68 Mean :4.250
## 3rd Qu.:13.04 3rd Qu.:33.10 3rd Qu.:33.40 3rd Qu.:4.756
## Max. :13.55 Max. :36.39 Max. :34.82 Max. :5.565
## NA's :2 NA's :2
## drugsfitted externalother externalotherfitted
## Min. :3.449 Min. : 7.973 Min. : 8.388
## 1st Qu.:3.769 1st Qu.: 9.149 1st Qu.: 9.347
## Median :4.323 Median : 9.561 Median : 9.690
## Mean :4.255 Mean : 9.599 Mean : 9.610
## 3rd Qu.:4.679 3rd Qu.:10.122 3rd Qu.: 9.939
## Max. :5.130 Max. :11.483 Max. :10.353
## NA's :2
# Create dummy for over21
mydata$over21 = ifelse(mydata$agecell >= 21,1,0)
# Create dataframes for each category
df1<-data.frame(age=mydata$agecell,deaths=mydata$all)
df2<-data.frame(age=mydata$agecell,deaths=mydata$internal)
df3<-data.frame(age=mydata$agecell,deaths=mydata$external)
df4<-data.frame(age=mydata$agecell,deaths=mydata$allfitted)
df5<-data.frame(age=mydata$agecell,deaths=mydata$internalfitted)
df6<-data.frame(age=mydata$agecell,deaths=mydata$externalfitted)
#Generate plot
plot1 <- ggplot(df1,aes(y=deaths, x=age)) +
geom_point(shape=5, color="gray15") +
geom_point(aes(y=deaths, x=age), data=df2, shape=0, color="gray20") +
geom_point(aes(y=deaths, x=age), data=df3, shape=2) +
geom_line(aes(y=deaths, x=age), data=df4[df4$age<21,], linetype="dotted") +
geom_line(aes(y=deaths, x=age), data=df4[df4$age>=21,], linetype="dotted") +
geom_line(aes(y=deaths, x=age), data=df5[df5$age<21,], linetype="dashed",color= "gray20") +
geom_line(aes(y=deaths, x=age), data=df5[df5$age>=21,], linetype="dashed",color="gray20") +
geom_line(aes(y=deaths, x=age), data=df6[df6$age<21,], linetype="solid") +
geom_line(aes(y=deaths, x=age), data=df6[df6$age>=21,], linetype="solid") +
scale_x_continuous(breaks = seq(19, 23, by = 0.5), expand=c(0,0),limits=c(19,23)) +
scale_y_continuous(breaks = seq(0, 120, by = 20), expand=c(0,0),limits=c(0,120)) +
xlab("Age") + ylab("Deaths per 100,000 person-years")
plot1
#Run the regression
rd <- lm(all ~ agecell + over21, data = mydata)
# Create table for results
stargazer(rd, type = "text", title = "Simplest Regression-Discontinuity Design",
align = TRUE, keep.stat = c("n","adj.rsq"), omit = "Constant",
dep.var.labels = c("Discontinuity in Deaths"), covariate.labels = c("agecell", "over21"))
##
## Simplest Regression-Discontinuity Design
## ========================================
## Dependent variable:
## ---------------------------
## Discontinuity in Deaths
## ----------------------------------------
## agecell -0.975
## (0.632)
##
## over21 7.663***
## (1.440)
##
## ----------------------------------------
## Observations 48
## Adjusted R2 0.577
## ========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
As age crosses 21, the mortality caused by drinking increases by 7.663 units
# Create the plot using regression results
plot2 <- ggplot(mydata,aes(y=all,x=agecell))+geom_point(shape = 1) +
geom_line(aes(y=-0.975*agecell+112.310,x=agecell),data=mydata[mydata$agecell<21,]) +
geom_line(aes(y=-0.975*agecell+112.310+7.663,x=agecell),data=mydata[mydata$agecell>=21,]) +
xlab("Age") + ylab("Deaths")
plot2
# Run regressions and create results table
rd1 <- lm(all ~ agecell, data = mydata[mydata$agecell<21,])
stargazer(rd1, type = "text", title = "Flexible Regression-Discontinuity Design 1",
align = TRUE, keep.stat = c("n","adj.rsq"))
##
## Flexible Regression-Discontinuity Design 1
## ========================================
## Dependent variable:
## ---------------------------
## all
## ----------------------------------------
## agecell 0.827
## (0.857)
##
## Constant 76.251***
## (17.153)
##
## ----------------------------------------
## Observations 24
## Adjusted R2 -0.003
## ========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
rd2 <- lm(all ~ agecell, data = mydata[mydata$agecell>=21,])
stargazer(rd2, type = "text", title = "Flexible Regression-Discontinuity Design 2",
align = TRUE, keep.stat = c("n","adj.rsq"))
##
## Flexible Regression-Discontinuity Design 2
## ========================================
## Dependent variable:
## ---------------------------
## all
## ----------------------------------------
## agecell -2.776***
## (0.779)
##
## Constant 159.585***
## (17.140)
##
## ----------------------------------------
## Observations 24
## Adjusted R2 0.337
## ========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
It can be seen from the results that after age 21, there is a negative effect of drinking on mortality, but not before.
# Create the plot using regression results
plot3 <- ggplot(mydata,aes(y=all,x=agecell)) +
geom_point(shape = 1) +
geom_line(aes(y=0.817*agecell+76.251,x=agecell),data = mydata[mydata$agecell<21,]) +
geom_line(aes(y=-2.776*agecell+159.585,x=agecell),data = mydata[mydata$agecell>=21,])
plot3
RDD1 <- RDestimate(all ~ agecell, data = mydata, cutpoint=21)
RDD1
##
## Call:
## RDestimate(formula = all ~ agecell, data = mydata, cutpoint = 21)
##
## Coefficients:
## LATE Half-BW Double-BW
## 9.001 9.579 7.953
plot(RDD1)
It does not differ much from part f but the local average treatment effect is 9.001 which is higher. After age 21 there is a negative effect of drinking on mortality but the effect is not linear
RDD2 <- RDestimate(all ~ agecell,data = mydata, cutpoint=21, kernel = "rectangular", bw=2)
RDD2
##
## Call:
## RDestimate(formula = all ~ agecell, data = mydata, cutpoint = 21,
## bw = 2, kernel = "rectangular")
##
## Coefficients:
## LATE Half-BW Double-BW
## 7.663 9.753 7.663
plot(RDD2)
Compared to the previous output, the local average treatment effect is 7.663 which is the same as the result obtained from the simplest regression.