Regression Discontinuity

b. Get the Carpenter & Dobkin data

library(foreign)
library(ggplot2)
library(stargazer)
library(tidyverse)
library(rdd)
mydata <- read.dta("AEJfigs.dta")
summary(mydata)
##     agecell           all           allfitted         internal    
##  Min.   :19.07   Min.   : 88.43   Min.   : 91.71   Min.   :15.98  
##  1st Qu.:20.08   1st Qu.: 92.79   1st Qu.: 93.04   1st Qu.:18.60  
##  Median :21.00   Median : 95.69   Median : 95.18   Median :20.29  
##  Mean   :21.00   Mean   : 95.67   Mean   : 95.80   Mean   :20.29  
##  3rd Qu.:21.92   3rd Qu.: 98.03   3rd Qu.: 97.79   3rd Qu.:21.98  
##  Max.   :22.93   Max.   :105.27   Max.   :102.89   Max.   :24.37  
##                  NA's   :2                         NA's   :2      
##  internalfitted     external     externalfitted     alcohol      
##  Min.   :16.74   Min.   :71.34   Min.   :73.16   Min.   :0.6391  
##  1st Qu.:18.67   1st Qu.:73.04   1st Qu.:74.06   1st Qu.:0.9962  
##  Median :20.54   Median :74.81   Median :74.74   Median :1.2119  
##  Mean   :20.28   Mean   :75.39   Mean   :75.52   Mean   :1.2573  
##  3rd Qu.:21.66   3rd Qu.:77.24   3rd Qu.:76.06   3rd Qu.:1.4701  
##  Max.   :24.04   Max.   :83.33   Max.   :81.78   Max.   :2.5193  
##                  NA's   :2                       NA's   :2       
##  alcoholfitted       homicide     homicidefitted     suicide     
##  Min.   :0.7943   Min.   :14.95   Min.   :16.26   Min.   :10.89  
##  1st Qu.:1.0724   1st Qu.:16.61   1st Qu.:16.54   1st Qu.:11.61  
##  Median :1.2471   Median :16.99   Median :16.99   Median :12.20  
##  Mean   :1.2674   Mean   :16.91   Mean   :16.95   Mean   :12.35  
##  3rd Qu.:1.4455   3rd Qu.:17.29   3rd Qu.:17.25   3rd Qu.:12.82  
##  Max.   :1.8174   Max.   :18.41   Max.   :17.76   Max.   :14.83  
##                   NA's   :2                       NA's   :2      
##  suicidefitted        mva          mvafitted         drugs      
##  Min.   :11.59   Min.   :26.86   Min.   :27.87   Min.   :3.202  
##  1st Qu.:11.61   1st Qu.:30.12   1st Qu.:30.17   1st Qu.:3.755  
##  Median :12.25   Median :31.64   Median :31.73   Median :4.314  
##  Mean   :12.36   Mean   :31.62   Mean   :31.68   Mean   :4.250  
##  3rd Qu.:13.04   3rd Qu.:33.10   3rd Qu.:33.40   3rd Qu.:4.756  
##  Max.   :13.55   Max.   :36.39   Max.   :34.82   Max.   :5.565  
##                  NA's   :2                       NA's   :2      
##   drugsfitted    externalother    externalotherfitted
##  Min.   :3.449   Min.   : 7.973   Min.   : 8.388     
##  1st Qu.:3.769   1st Qu.: 9.149   1st Qu.: 9.347     
##  Median :4.323   Median : 9.561   Median : 9.690     
##  Mean   :4.255   Mean   : 9.599   Mean   : 9.610     
##  3rd Qu.:4.679   3rd Qu.:10.122   3rd Qu.: 9.939     
##  Max.   :5.130   Max.   :11.483   Max.   :10.353     
##                  NA's   :2
# Create dummy for over21
mydata$over21 = ifelse(mydata$agecell >= 21,1,0)

c. Reproduce Figure 3 of the paper

# Create dataframes for each category
df1<-data.frame(age=mydata$agecell,deaths=mydata$all)
df2<-data.frame(age=mydata$agecell,deaths=mydata$internal)
df3<-data.frame(age=mydata$agecell,deaths=mydata$external)
df4<-data.frame(age=mydata$agecell,deaths=mydata$allfitted)
df5<-data.frame(age=mydata$agecell,deaths=mydata$internalfitted)
df6<-data.frame(age=mydata$agecell,deaths=mydata$externalfitted)
#Generate plot
plot1 <- ggplot(df1,aes(y=deaths, x=age)) + 
  geom_point(shape=5, color="gray15") +
  geom_point(aes(y=deaths, x=age), data=df2, shape=0, color="gray20") +
  geom_point(aes(y=deaths, x=age), data=df3, shape=2) +
  geom_line(aes(y=deaths, x=age), data=df4[df4$age<21,], linetype="dotted") +
  geom_line(aes(y=deaths, x=age), data=df4[df4$age>=21,], linetype="dotted") +
  geom_line(aes(y=deaths, x=age), data=df5[df5$age<21,], linetype="dashed",color= "gray20") +
  geom_line(aes(y=deaths, x=age), data=df5[df5$age>=21,], linetype="dashed",color="gray20") +
  geom_line(aes(y=deaths, x=age), data=df6[df6$age<21,], linetype="solid") +
  geom_line(aes(y=deaths, x=age), data=df6[df6$age>=21,], linetype="solid") +
  scale_x_continuous(breaks = seq(19, 23, by = 0.5), expand=c(0,0),limits=c(19,23)) +
  scale_y_continuous(breaks = seq(0, 120, by = 20), expand=c(0,0),limits=c(0,120)) +
  xlab("Age") + ylab("Deaths per 100,000 person-years")
plot1

d. Simplest regression-discontinuity design

#Run the regression
rd <- lm(all ~ agecell + over21, data = mydata)
# Create table for results
stargazer(rd, type = "text", title = "Simplest Regression-Discontinuity Design",
          align = TRUE, keep.stat = c("n","adj.rsq"), omit = "Constant",
          dep.var.labels = c("Discontinuity in Deaths"), covariate.labels = c("agecell", "over21"))
## 
## Simplest Regression-Discontinuity Design
## ========================================
##                  Dependent variable:    
##              ---------------------------
##                Discontinuity in Deaths  
## ----------------------------------------
## agecell                -0.975           
##                        (0.632)          
##                                         
## over21                7.663***          
##                        (1.440)          
##                                         
## ----------------------------------------
## Observations             48             
## Adjusted R2             0.577           
## ========================================
## Note:        *p<0.1; **p<0.05; ***p<0.01

As age crosses 21, the mortality caused by drinking increases by 7.663 units

e. Plot the results

# Create the plot using regression results
plot2 <- ggplot(mydata,aes(y=all,x=agecell))+geom_point(shape = 1) +
  geom_line(aes(y=-0.975*agecell+112.310,x=agecell),data=mydata[mydata$agecell<21,]) +
  geom_line(aes(y=-0.975*agecell+112.310+7.663,x=agecell),data=mydata[mydata$agecell>=21,]) +
  xlab("Age") + ylab("Deaths")
plot2

f. Allow more flexibility to your RD

# Run regressions and create results table
rd1 <- lm(all ~ agecell, data = mydata[mydata$agecell<21,])
stargazer(rd1, type = "text", title = "Flexible Regression-Discontinuity Design 1",
          align = TRUE, keep.stat = c("n","adj.rsq"))
## 
## Flexible Regression-Discontinuity Design 1
## ========================================
##                  Dependent variable:    
##              ---------------------------
##                          all            
## ----------------------------------------
## agecell                 0.827           
##                        (0.857)          
##                                         
## Constant              76.251***         
##                       (17.153)          
##                                         
## ----------------------------------------
## Observations             24             
## Adjusted R2            -0.003           
## ========================================
## Note:        *p<0.1; **p<0.05; ***p<0.01
rd2 <- lm(all ~ agecell, data = mydata[mydata$agecell>=21,])
stargazer(rd2, type = "text", title = "Flexible Regression-Discontinuity Design 2",
          align = TRUE, keep.stat = c("n","adj.rsq"))
## 
## Flexible Regression-Discontinuity Design 2
## ========================================
##                  Dependent variable:    
##              ---------------------------
##                          all            
## ----------------------------------------
## agecell               -2.776***         
##                        (0.779)          
##                                         
## Constant             159.585***         
##                       (17.140)          
##                                         
## ----------------------------------------
## Observations             24             
## Adjusted R2             0.337           
## ========================================
## Note:        *p<0.1; **p<0.05; ***p<0.01

It can be seen from the results that after age 21, there is a negative effect of drinking on mortality, but not before.

g. Again, plot the data

# Create the plot using regression results
plot3 <- ggplot(mydata,aes(y=all,x=agecell)) + 
  geom_point(shape = 1) +
  geom_line(aes(y=0.817*agecell+76.251,x=agecell),data = mydata[mydata$agecell<21,]) +
  geom_line(aes(y=-2.776*agecell+159.585,x=agecell),data = mydata[mydata$agecell>=21,])
plot3

h. Compare to pre-packaged RDD:

RDD1 <- RDestimate(all ~ agecell, data = mydata, cutpoint=21)
RDD1
## 
## Call:
## RDestimate(formula = all ~ agecell, data = mydata, cutpoint = 21)
## 
## Coefficients:
##      LATE    Half-BW  Double-BW  
##     9.001      9.579      7.953
plot(RDD1)

It does not differ much from part f but the local average treatment effect is 9.001 which is higher. After age 21 there is a negative effect of drinking on mortality but the effect is not linear

RDD2 <- RDestimate(all ~ agecell,data = mydata, cutpoint=21, kernel = "rectangular", bw=2)
RDD2
## 
## Call:
## RDestimate(formula = all ~ agecell, data = mydata, cutpoint = 21, 
##     bw = 2, kernel = "rectangular")
## 
## Coefficients:
##      LATE    Half-BW  Double-BW  
##     7.663      9.753      7.663
plot(RDD2)

Compared to the previous output, the local average treatment effect is 7.663 which is the same as the result obtained from the simplest regression.