library(haven)
library(ggplot2)
library(stargazer)
library(rdd)
data <- read_dta("AEJfigs.dta")
over21<-as.data.frame(ifelse(data[,'agecell']>21,1,0))
colnames(over21)<-'over21'
data<-cbind(data,over21)
plot(data[,'agecell'],data[,'all'],ylim=c(0,140),main='Figure 3: Age profile for death rates',xlab='age',ylab='deaths', pch=18, col="azure3")
points(data[,'agecell'],data[,'external'], pch=17)
points(data[,'agecell'],data[,'internal'], pch=0)
#All
lines(data[,'agecell'],ifelse(data[,'agecell']<21,data[,'allfitted'],NA), lty = 3)
lines(data[,'agecell'],ifelse(data[,'agecell']>21,data[,'allfitted'],NA), lty = 3)
#External
lines(data[,'agecell'],ifelse(data[,'agecell']<21,data[,'externalfitted'],NA), lty = 5)
lines(data[,'agecell'],ifelse(data[,'agecell']>21,data[,'externalfitted'],NA), lty = 5)
#Internal
lines(data[,'agecell'],ifelse(data[,'agecell']<21,data[,'internalfitted'],NA), lty = 1)
lines(data[,'agecell'],ifelse(data[,'agecell']>21,data[,'internalfitted'],NA), lty = 1)
#Legend
legend("topleft", lty = c(0,0,0,3,3,5,5,1,1), pch = c(18,17,0,NA,NA,NA,NA,NA,NA), col=c("azure3", "black", "black", "black", "black", "black", "black", "black", "black"),
legend = c("All","Internal","External", "All fitted", "Internal fitted", "External fitted"),
ncol = 2, cex = 0.6)
reg1<-lm(all ~ agecell + over21, data=data)
summary(reg1)
##
## Call:
## lm(formula = all ~ agecell + over21, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0559 -1.8483 0.1149 1.4909 5.8043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 112.3097 12.6681 8.866 1.96e-11 ***
## agecell -0.9747 0.6325 -1.541 0.13
## over21 7.6627 1.4403 5.320 3.15e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.493 on 45 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.5946, Adjusted R-squared: 0.5765
## F-statistic: 32.99 on 2 and 45 DF, p-value: 1.508e-09
Results show that alcohol consumption leads to 7 additional deaths.
reg2<-coef(reg1)[1]+coef(reg1)[2]*data[,'agecell']+coef(reg1)[3]*data[,'over21']
plot(data[,'agecell'],data[,'all'],ylim=c(80,110),main='Simple Regression Discontinuity Design',xlab='Age',ylab='No. of deaths')
lines(data[,'agecell'],reg2)
reg3<-lm(all~agecell+over21+agecell*over21,data=data)
summary(reg3)
##
## Call:
## lm(formula = all ~ agecell + over21 + agecell * over21, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.368 -1.787 0.117 1.108 5.341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.2515 16.3965 4.650 3.03e-05 ***
## agecell 0.8270 0.8189 1.010 0.31809
## over21 83.3332 24.3568 3.421 0.00136 **
## agecell:over21 -3.6034 1.1581 -3.111 0.00327 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.283 on 44 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.6677, Adjusted R-squared: 0.645
## F-statistic: 29.47 on 3 and 44 DF, p-value: 1.325e-10
After summing estimated coefficients between over21 and _agecell*over21_, the results show that the estimate is the same as above.
reg2<-coef(reg3)[1]+coef(reg3)[2]*data[,'agecell']+coef(reg3)[3]*data[,'over21']+
coef(reg3)[4]*data[,'over21']*data[,'agecell']
plot(data[,'agecell'],data[,'all'],ylim=c(85,115),main='Flexible Regression Discontinuity Design',xlab='Age',ylab='No. of deaths')
lines(data[,'agecell'],reg2)
reg4<-RDestimate(all~agecell,data=data,cutpoint=21)
plot(reg4)
reg5<-RDestimate(all~agecell,data=data,cutpoint=21,kernel = "rectangular",bw=2)
plot(reg5)
It is similar to what we got for simple RDD.