In this homework, we explore regression discontinuity designs, based on Carpenter and Dobkin(2009) The Effect of Alcohol Consumption on Mortality: Regression Discontinuity Evidence from the Minimum Drinking Age AEJ: Applied Economics 1(1)164-82.
# Load dataset
df <- read.dta("AEJfigs.dta")
head(df)
## agecell all allfitted internal internalfitted external externalfitted
## 1 19.06849 92.82540 91.70615 16.61759 16.73813 76.20782 74.96801
## 2 19.15068 95.10074 91.88372 18.32768 16.92065 76.77306 74.96307
## 3 19.23288 92.14429 92.04906 18.91105 17.09884 73.23324 74.95023
## 4 19.31507 88.42776 92.20214 16.10177 17.27268 72.32598 74.92947
## 5 19.39726 88.70494 92.34292 17.36352 17.44216 71.34142 74.90076
## 6 19.47945 90.19179 92.47134 17.87210 17.60725 72.31968 74.86409
## alcohol alcoholfitted homicide homicidefitted suicide suicidefitted
## 1 0.6391380 0.7943445 16.31682 16.28457 11.20371 11.59210
## 2 0.6774093 0.8375749 16.85996 16.27070 12.19337 11.59361
## 3 0.8664426 0.8778347 15.21925 16.26288 11.71581 11.59513
## 4 0.8673084 0.9151149 16.74282 16.26115 11.27501 11.59665
## 5 1.0191631 0.9494066 14.94773 16.26551 10.98431 11.59819
## 6 1.1713219 0.9807007 15.64281 16.27599 12.16663 11.59973
## mva mvafitted drugs drugsfitted externalother externalotherfitted
## 1 35.82933 34.81778 3.872425 3.448835 8.534373 8.388236
## 2 35.63926 34.63389 3.236511 3.470022 8.655786 8.530174
## 3 34.20565 34.44674 3.202071 3.492069 8.513741 8.662681
## 4 32.27896 34.25630 3.280689 3.514980 8.258285 8.785728
## 5 32.65097 34.06259 3.548198 3.538755 8.417533 8.899288
## 6 32.72144 33.86558 3.211689 3.563399 7.972546 9.003332
# Create over21 dummy
df$over21 <- ifelse(df$agecell >= 21, 1, 0)
plot(df$agecell, df$all,
xlab = "Age",
xaxt = 'n',
ylab = "Death per 100,000 persons-years",
ylim = c(0, 120),
col = "#626669", pch = 18)
axis (side = 1, at = seq(19, 23, by = 0.5))
points(df$agecell, df$internal, pch = 22, cex = 0.6, col = "darkgray")
points(df$agecell, df$external, pch = 17, cex = 0.6, col = "black")
lines(df$agecell, ifelse(df$over21 == 1, df$allfitted, NA),
lty = 3, lwd = 2, col = "darkgray")
lines(df$agecell, ifelse(df$over21 == 0, df$allfitted, NA),
lty = 3, lwd = 2, col = "darkgray")
lines(df$agecell, ifelse(df$over21 == 1, df$internalfitted, NA),
lty = 5, col = "darkgrey")
lines(df$agecell, ifelse(df$over21 == 0, df$internalfitted, NA),
lty = 5, col = "darkgrey")
lines(df$agecell, ifelse(df$over21 == 1, df$externalfitted, NA),
lty = 1, col = "black")
lines(df$agecell, ifelse(df$over21 == 0, df$externalfitted, NA),
lty = 1, col = "black")
legend(21.60, 60, lty = c(0,0,0,3,3,5,5,1,1), pch = c(18,22,17,NA,NA,NA,NA,NA,NA),
col = c("#626669","darkgray","black","darkgray","darkgrey","darkgrey","darkgrey","black","black"),
legend = c("All","Internal","External",
"All fitted", "Internal fitted", "External fitted"),
ncol = 2, cex = 0.6)
mod <- lm(all ~ agecell + over21, data = df)
stargazer(mod, type = "text", title = "Regression-Discontinuity Design",
align = TRUE, keep.stat = c("n","rsq"),
dep.var.labels = c("Discontinuity in Deaths"), covariate.labels = c("agecell", "over21"))
##
## Regression-Discontinuity Design
## ========================================
## Dependent variable:
## ---------------------------
## Discontinuity in Deaths
## ----------------------------------------
## agecell -0.975
## (0.632)
##
## over21 7.663***
## (1.440)
##
## Constant 112.310***
## (12.668)
##
## ----------------------------------------
## Observations 48
## R2 0.595
## ========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
The coefficient of dummy variable over21 is the treatment effect. It can be interpreted as on average, the mortality rate per 100,000 individuals reaching the minimum drinking age is 7.66 times higher.
plot(df$agecell, df$all,
xlab = "Age",
xaxt = 'n',
ylab = "Death per 100,000 persons-years",
ylim = c(80, 110),
col = "#626669", pch = 18)
axis (side = 1, at = seq(19, 23, by = 0.5))
df$modPredict <- predict(mod, df) #getting the fitted values
lines(df$agecell, ifelse(df$over21 == 1, df$modPredict, NA),
lty = 1, lwd = 2, col = "#FF4500")
lines(df$agecell, ifelse(df$over21 == 0, df$modPredict, NA),
lty = 1, lwd = 2, col = "#FF4500")
legend(22.5, 90, lty = c(0,1,1), pch = c(18,NA,NA), col = c("#626669", "#FF4500", "#FF4500"), legend = c("All", "Fitted"), cex = 0.6)
mod1 <- lm(all ~ agecell + over21 + agecell*over21, data = df)
stargazer(mod1, type = "text", title = "Regression-Discontinuity Design",
align = TRUE, keep.stat = c("n","rsq"),
dep.var.labels = c("Discontinuity in Deaths"), covariate.labels = c("agecell", "over21", "agecell*over21"))
##
## Regression-Discontinuity Design
## ==========================================
## Dependent variable:
## ---------------------------
## Discontinuity in Deaths
## ------------------------------------------
## agecell 0.827
## (0.819)
##
## over21 83.333***
## (24.357)
##
## agecell*over21 -3.603***
## (1.158)
##
## Constant 76.251***
## (16.396)
##
## ------------------------------------------
## Observations 48
## R2 0.668
## ==========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
estimate <- summary(mod1)$coefficients[3, 1] +
summary(mod1)$coefficients[4,1] * 21
estimate
## [1] 7.662709
Here our interest is on the the sum of coefficients of over21 and (agecell * over21). The sum is 7.633 which is equal to the coefficient on d. So, there is no any change in the interpretation.
plot(df$agecell, df$all,
xlab = "Age",
xaxt = 'n',
ylab = "Death per 100,000 persons-years",
ylim = c(80, 110),
col = "#626669", pch = 18)
axis (side = 1, at = seq(19, 23, by = 0.5))
df$modPredict <- predict(mod1, df) #getting the fitted values
lines(df$agecell, ifelse(df$over21 == 1, df$modPredict, NA),
lty = 1, lwd = 2, col = "#FF4500")
lines(df$agecell, ifelse(df$over21 == 0, df$modPredict, NA),
lty = 1, lwd = 2, col = "#FF4500")
legend(22.5, 90, lty = c(0,1,1), pch = c(18,NA,NA), col = c("#626669", "#FF4500", "#FF4500"), legend = c("All", "Fitted"), cex = 0.6)
modrdd <- RDestimate(all ~ agecell, data = df, cutpoint = 21)
summary(modrdd)
##
## Call:
## RDestimate(formula = all ~ agecell, data = df, cutpoint = 21)
##
## Type:
## sharp
##
## Estimates:
## Bandwidth Observations Estimate Std. Error z value Pr(>|z|)
## LATE 1.6561 40 9.001 1.480 6.080 1.199e-09
## Half-BW 0.8281 20 9.579 1.914 5.004 5.609e-07
## Double-BW 3.3123 48 7.953 1.278 6.223 4.882e-10
##
## LATE ***
## Half-BW ***
## Double-BW ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## F-statistics:
## F Num. DoF Denom. DoF p
## LATE 33.08 3 36 3.799e-10
## Half-BW 29.05 3 16 2.078e-06
## Double-BW 32.54 3 44 6.129e-11
plot(modrdd)
Using a bandwidth of 1.65 and default triangular kernel, the estimated average treatment effect is 9.01 which is higher than what we got in part d.
modrdd1 <- RDestimate(all ~ agecell, data = df, cutpoint = 21,
kernel = "rectangular", bw = 2)
summary(modrdd1)
##
## Call:
## RDestimate(formula = all ~ agecell, data = df, cutpoint = 21,
## bw = 2, kernel = "rectangular")
##
## Type:
## sharp
##
## Estimates:
## Bandwidth Observations Estimate Std. Error z value Pr(>|z|)
## LATE 2 48 7.663 1.273 6.017 1.776e-09
## Half-BW 1 24 9.753 1.902 5.128 2.929e-07
## Double-BW 4 48 7.663 1.273 6.017 1.776e-09
##
## LATE ***
## Half-BW ***
## Double-BW ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## F-statistics:
## F Num. DoF Denom. DoF p
## LATE 29.47 3 44 2.651e-10
## Half-BW 16.82 3 20 2.167e-05
## Double-BW 29.47 3 44 2.651e-10
plot(modrdd1)
Here, with the band width of 2 and rectangular kernel, the average treatment effect is 7.66 which is similar to what we got in part d and f but smaller than part f.