setwd("C:/Users/Julie/Documents/Grad_School/PhD/2.2.p/MA_HW/HW_10")
library(foreign)
AEJ <- read.dta("AEJfigs.dta")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
AEJ_a<-AEJ
AEJ_ap<- mutate(AEJ_a, over21 = ifelse(agecell >= 21, 1, 0))
The data used in this exercise is sample data from the same larger data set used in:
Carpenter and Dobkin(2009) The Effect of Alcohol Consumption on Mortality: Regression Discontinuity Evidence from the Minimum Drinking Age AEJ: Applied Economics 1(1)164-82
library(ggplot2)
g = ggplot() +
geom_point(data = AEJ_ap, aes(x = agecell, y = all), color = "blue") +
geom_line(data = AEJ_ap, aes(x = agecell, y = allfitted), color = "blue4") +
geom_point(data = AEJ_ap, aes(x = agecell, y = internal), color = "red") +
geom_line(data = AEJ_ap, aes(x = agecell, y = internalfitted), color = "red4") +
geom_point(data = AEJ_ap, aes(x = agecell, y = external), color = "purple") +
geom_line(data = AEJ_ap, aes(x = agecell, y = externalfitted), color = "purple4") +
xlab('Age') +
ylab('Deaths per 100,000 person-years')+
ggtitle('Age Profile for Death Rates')
g
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).
This graph shows the results similar to Table 3 in the above mentioned paper. The purple line and data points refer to external deaths, the red refers to internal deaths, and the blue refers to all deaths.
RDD <-lm(all~agecell+over21, data = AEJ_ap)
print(RDD)
##
## Call:
## lm(formula = all ~ agecell + over21, data = AEJ_ap)
##
## Coefficients:
## (Intercept) agecell over21
## 112.3097 -0.9747 7.6627
agecoef<-RDD[["coefficients"]][["agecell"]]
over21coef<-RDD[["coefficients"]][["over21"]]
int1<-RDD[["coefficients"]][["(Intercept)"]]
AEJ_1<-mutate(AEJ_ap, pred_all1 = int1+over21coef*over21+agecell*agecoef)
#Actual Plot
g2 = ggplot() +
geom_point(data = AEJ_1, aes(x = agecell, y = all), color = "blue")+
geom_line(data = AEJ_1, aes(x = agecell, y=pred_all1), color = "blue4")+
xlab('Age') +
ylab('Deaths per 100,000 person-years')+
ggtitle('Age Profile for Death Rates - RDD')+theme_classic()
g2
## Warning: Removed 2 rows containing missing values (geom_point).
Ended up having to make a predicted all deaths variable to properly graph.
From the results of the regression, it is clear that turning 21 does have a positive impact on the number of overall deaths, consistent with the arguments laid out by Carpenter and Dobkin. It also makes sense that there is a slight decrease in deaths as age increases, especially looking at the ages that are covered in the data set. Forcing the two sides of the discontinuity to be the same slope may be a hindrance of getting an actual picture as to the impact of alcohol intake (if the explanation of the RDD from the paper is to be believed.)
AEJ_2 <- mutate(AEJ_ap, slopeshift = over21*agecell)
RDD_s <-lm(all~agecell+over21+slopeshift, data = AEJ_2)
print(RDD_s)
##
## Call:
## lm(formula = all ~ agecell + over21 + slopeshift, data = AEJ_2)
##
## Coefficients:
## (Intercept) agecell over21 slopeshift
## 76.251 0.827 83.333 -3.603
agecoef_s<-RDD_s[["coefficients"]][["agecell"]]
over21coef_s<-RDD_s[["coefficients"]][["over21"]]
int2<-RDD_s[["coefficients"]][["(Intercept)"]]
slopecoef<-RDD_s[["coefficients"]][["slopeshift"]]
AEJ_2<-mutate(AEJ_2, pred_all2 = int2+over21coef_s*over21+agecell*agecoef_s+slopeshift*slopecoef)
#Actual Plot
g3 = ggplot() +
geom_point(data = AEJ_2, aes(x = agecell, y = all), color = "blue")+
geom_line(data = AEJ_2, aes(x = agecell, y=pred_all2), color = "blue4")+
xlab('Age') +
ylab('Deaths per 100,000 person-years')+
ggtitle('Age Profile for Death Rates - Simple RDD')+theme_classic()
g3
## Warning: Removed 2 rows containing missing values (geom_point).
This regression follows common sense expectations a lot more than the previous regression (at least in graphical form). The coefficient on the over 21 dummy is still positive, meaning the intercept for over 21 is higher. The slope on this side of the regression however is negative, which makes sense in this context, as once a person becomes used to having access to alcohol they are less likely to engage in risky behaviors than then they just turn 21. The slope on the other side of the of the discontinuity the slope is positive, probably as people get close to turning 21 they become more risky in their alcohol habits.
library(rdd)
## Warning: package 'rdd' was built under R version 3.6.3
## Loading required package: sandwich
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: AER
## Warning: package 'AER' was built under R version 3.6.3
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## Loading required package: survival
## Loading required package: Formula
RDD_p<-RDestimate(all~agecell, AEJ_ap, cutpoint = 21)
print(RDD_p)
##
## Call:
## RDestimate(formula = all ~ agecell, data = AEJ_ap, cutpoint = 21)
##
## Coefficients:
## LATE Half-BW Double-BW
## 9.001 9.579 7.953
plot(RDD_p)
The coefficient on the LATE variable is much smaller the other previous over21 variables, but the graph does look very similar to the previous sections graph.
RDD_p2<-RDestimate(all~agecell, AEJ_ap, cutpoint = 21, kernel="rectangular", bw=2)
print(RDD_p2)
##
## Call:
## RDestimate(formula = all ~ agecell, data = AEJ_ap, cutpoint = 21,
## bw = 2, kernel = "rectangular")
##
## Coefficients:
## LATE Half-BW Double-BW
## 7.663 9.753 7.663
plot(RDD_p2)
The results of this are almost exactly the same as from the first simple RDD regression. meaning the impact of turning 21 results in about 7.5 more deaths.