library(haven)
library(ggplot2)
library(stargazer)
library(rdd)
data <- read_dta("AEJfigs.dta")
over21<-as.data.frame(ifelse(data[,'agecell']>21,1,0))
colnames(over21)<-'over21'
data<-cbind(data,over21)

Reproducing Figure 3

plot(data[,'agecell'],data[,'all'],ylim=c(0,140),main='Figure 3: Age profile for death rates',xlab='age',ylab='deaths', pch=18, col="azure3")
points(data[,'agecell'],data[,'external'], pch=17)
points(data[,'agecell'],data[,'internal'], pch=0)

#All
lines(data[,'agecell'],ifelse(data[,'agecell']<21,data[,'allfitted'],NA), lty = 3)
lines(data[,'agecell'],ifelse(data[,'agecell']>21,data[,'allfitted'],NA), lty = 3)

#External
lines(data[,'agecell'],ifelse(data[,'agecell']<21,data[,'externalfitted'],NA), lty = 5)
lines(data[,'agecell'],ifelse(data[,'agecell']>21,data[,'externalfitted'],NA), lty = 5)

#Internal
lines(data[,'agecell'],ifelse(data[,'agecell']<21,data[,'internalfitted'],NA), lty = 1)
lines(data[,'agecell'],ifelse(data[,'agecell']>21,data[,'internalfitted'],NA), lty = 1)

#Legend
legend("topleft", lty = c(0,0,0,3,3,5,5,1,1), pch = c(18,17,0,NA,NA,NA,NA,NA,NA), col=c("azure3", "black", "black", "black", "black", "black", "black", "black", "black"),
      legend = c("All","Internal","External", "All fitted", "Internal fitted", "External fitted"),
      ncol = 2, cex = 0.6)

Simple regression-discontinuity design

reg1<-lm(all ~ agecell + over21, data=data)
summary(reg1)
## 
## Call:
## lm(formula = all ~ agecell + over21, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.0559 -1.8483  0.1149  1.4909  5.8043 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 112.3097    12.6681   8.866 1.96e-11 ***
## agecell      -0.9747     0.6325  -1.541     0.13    
## over21        7.6627     1.4403   5.320 3.15e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.493 on 45 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.5946, Adjusted R-squared:  0.5765 
## F-statistic: 32.99 on 2 and 45 DF,  p-value: 1.508e-09

Results show that alcohol consumption leads to 7 additional deaths.

Plotting simple RDD results

reg2<-coef(reg1)[1]+coef(reg1)[2]*data[,'agecell']+coef(reg1)[3]*data[,'over21']
plot(data[,'agecell'],data[,'all'],ylim=c(80,110),main='Simple Regression Discontinuity Design',xlab='Age',ylab='No. of deaths')
lines(data[,'agecell'],reg2)

More flexible RDD

reg3<-lm(all~agecell+over21+agecell*over21,data=data)
summary(reg3)
## 
## Call:
## lm(formula = all ~ agecell + over21 + agecell * over21, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.368 -1.787  0.117  1.108  5.341 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     76.2515    16.3965   4.650 3.03e-05 ***
## agecell          0.8270     0.8189   1.010  0.31809    
## over21          83.3332    24.3568   3.421  0.00136 ** 
## agecell:over21  -3.6034     1.1581  -3.111  0.00327 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.283 on 44 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.6677, Adjusted R-squared:  0.645 
## F-statistic: 29.47 on 3 and 44 DF,  p-value: 1.325e-10

After summing estimated coefficients between over21 and _agecell*over21_, the results show that the estimate is the same as above.

Plotting flexible RDD

reg2<-coef(reg3)[1]+coef(reg3)[2]*data[,'agecell']+coef(reg3)[3]*data[,'over21']+
  coef(reg3)[4]*data[,'over21']*data[,'agecell']
plot(data[,'agecell'],data[,'all'],ylim=c(85,115),main='Flexible Regression Discontinuity Design',xlab='Age',ylab='No. of deaths')
lines(data[,'agecell'],reg2)

Pre-packaged RDD

reg4<-RDestimate(all~agecell,data=data,cutpoint=21)
plot(reg4)

Prepackaged linear RDD

reg5<-RDestimate(all~agecell,data=data,cutpoint=21,kernel = "rectangular",bw=2)
plot(reg5)

It is similar to what we got for simple RDD.