#Loading required packages
library(prettydoc) #For the theme used in this document
library(tidyverse) #Required for renaming
library(haven) #Stata data loading to R
library(stargazer) #Nice tables
library(reshape2) #Required for reshaping the data
library(dplyr)
library(tidyr)
library(ggplot2)
library(rdd)Part 1: Reading and questions
Briefly answer these questions:
a. Download and read the paper:
Downloaded and read the paper.
b. Get the Carpenter & Dobkin and data:
#Setting up the directory
setwd("D:/UGA Coursework/Second Year/AAEC 8610/HWs/HW6")
df <- read_dta("AEJfigs.dta")
head(df)## # A tibble: 6 × 19
## agecell all allfit…¹ inter…² inter…³ exter…⁴ exter…⁵ alcohol alcoh…⁶ homic…⁷
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 19.1 92.8 91.7 16.6 16.7 76.2 75.0 0.639 0.794 16.3
## 2 19.2 95.1 91.9 18.3 16.9 76.8 75.0 0.677 0.838 16.9
## 3 19.2 92.1 92.0 18.9 17.1 73.2 75.0 0.866 0.878 15.2
## 4 19.3 88.4 92.2 16.1 17.3 72.3 74.9 0.867 0.915 16.7
## 5 19.4 88.7 92.3 17.4 17.4 71.3 74.9 1.02 0.949 14.9
## 6 19.5 90.2 92.5 17.9 17.6 72.3 74.9 1.17 0.981 15.6
## # … with 9 more variables: homicidefitted <dbl>, suicide <dbl>,
## # suicidefitted <dbl>, mva <dbl>, mvafitted <dbl>, drugs <dbl>,
## # drugsfitted <dbl>, externalother <dbl>, externalotherfitted <dbl>, and
## # abbreviated variable names ¹allfitted, ²internal, ³internalfitted,
## # ⁴external, ⁵externalfitted, ⁶alcoholfitted, ⁷homicide
df$over21 = ifelse(df$agecell >= 21,1,0)c. Reproduce Figure 3 of the paper
#Trying to reproduce figure 3
f3 <- ggplot(data = df, aes(x = agecell, y = all)) +
geom_point(size = 0.5, shape = 16, color = "steelblue") +
geom_point(aes(y = internal), size = 0.35, shape = 0, color = "green") +
geom_point(aes(y = external), size = 0.5, shape = 17, color = "red") +
geom_line(aes(y = allfitted, linetype = "All fitted"), color = "black", size = 1, linetype = "dashed") +
geom_line(aes(y = internalfitted, linetype = "Int. fitted"), color = "black", size = 1, linetype = "dotted") +
geom_line(aes(y = externalfitted, linetype = "Ext. fitted"), color = "black", size = 1) +
xlim(19, 23) +
ylim(0, 120) +
labs(x = "Age", y = "Deaths per 100,000 person-years",
title = "FIGURE 3") +
theme_classic()
f3d. Simplest regression-discontinuity design
#Treatment: age 21
simplestRDD <- lm(all ~ agecell + over21, data = df)
df$predicted_d <- predict(simplestRDD, newdata = df)
stargazer(simplestRDD, title = "Simple Regression Discontinuity Results", type = "text",
column.labels = c("Coefficients", "Std. Err.", "t value", "Pr(>|t|)"),
covariate.labels = c("Age", "Treatment=1 (for > Over 21)"),
dep.var.caption = "Dependent variable: All",
dep.var.labels.include = FALSE,
out = "regression_results.html")##
## Simple Regression Discontinuity Results
## =======================================================
## Dependent variable: All
## ---------------------------
## Coefficients
## -------------------------------------------------------
## Age -0.975
## (0.632)
##
## Treatment=1 (for > Over 21) 7.663***
## (1.440)
##
## Constant 112.310***
## (12.668)
##
## -------------------------------------------------------
## Observations 48
## R2 0.595
## Adjusted R2 0.577
## Residual Std. Error 2.493 (df = 45)
## F Statistic 32.995*** (df = 2; 45)
## =======================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
e. Plot the results
fig_e <- ggplot(data = df, aes(x = agecell, y = all)) +
geom_point(pch = 10) +
geom_line(data = subset(df, over21 == 0), aes(y = predicted_d), color = "red", size = 1.5, linetype = "solid") +
geom_line(data = subset(df, over21 == 1), aes(y = predicted_d), color = "green", size = 1.5, linetype = "solid")
fig_ef. Allow more flexibility to your RD
#Allow slope to vary as well:
df$interaction21xAGE=df$over21*df$agecell
nOtSOsimpleRDD <- lm(all ~ agecell + over21 + interaction21xAGE, data = df)
df$predicted_f <- predict(nOtSOsimpleRDD, newdata = df)
stargazer(nOtSOsimpleRDD, title = "Regression Discontinuity Results", type = "text",
dep.var.caption = "Dependent variable: All",
dep.var.labels.include = FALSE,
out = "regression_results.html")##
## Regression Discontinuity Results
## ===============================================
## Dependent variable: All
## ---------------------------
## -----------------------------------------------
## agecell 0.827
## (0.819)
##
## over21 83.333***
## (24.357)
##
## interaction21xAGE -3.603***
## (1.158)
##
## Constant 76.251***
## (16.396)
##
## -----------------------------------------------
## Observations 48
## R2 0.668
## Adjusted R2 0.645
## Residual Std. Error 2.283 (df = 44)
## F Statistic 29.466*** (df = 3; 44)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
g. Again, plot the data
fig_g <- ggplot(data = df, aes(x = agecell, y = all)) +
geom_point(pch = 10) +
geom_line(data = subset(df, over21 == 0), aes(y = predicted_f), color = "red", size = 1.5, linetype = "solid") +
geom_line(data = subset(df, over21 == 1), aes(y = predicted_f), color = "green", size = 1.5, linetype = "solid")
fig_gh. Compare to pre-packaged RDD:
packagedRDD <- RDestimate(all ~ agecell, data = df, cutpoint = 21)
parth <-summary(packagedRDD) #any fancy way to do this?##
## Call:
## RDestimate(formula = all ~ agecell, data = df, cutpoint = 21)
##
## Type:
## sharp
##
## Estimates:
## Bandwidth Observations Estimate Std. Error z value Pr(>|z|)
## LATE 1.6561 40 9.001 1.480 6.080 1.199e-09
## Half-BW 0.8281 20 9.579 1.914 5.004 5.609e-07
## Double-BW 3.3123 48 7.953 1.278 6.223 4.882e-10
##
## LATE ***
## Half-BW ***
## Double-BW ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## F-statistics:
## F Num. DoF Denom. DoF p
## LATE 33.08 3 36 3.799e-10
## Half-BW 29.05 3 16 2.078e-06
## Double-BW 32.54 3 44 6.129e-11
parth## $coefficients
## Bandwidth Observations Estimate Std. Error z value Pr(>|z|)
## LATE 1.6561482 40 9.000704 1.480302 6.080318 1.199444e-09
## Half-BW 0.8280741 20 9.578812 1.914144 5.004227 5.608665e-07
## Double-BW 3.3122964 48 7.952527 1.277953 6.222865 4.881563e-10
##
## $fstat
## F Num. DoF Denom. DoF p
## LATE 33.08419 3 36 3.799427e-10
## Half-BW 29.04964 3 16 2.077717e-06
## Double-BW 32.53625 3 44 6.128609e-11
##
## attr(,"class")
## [1] "summary.RD"
plot(packagedRDD)i. Prepackaged linear RDD:
linearpackagedRDD <- RDestimate(all~agecell, data = df,
cutpoint = 21,kernel = "rectangular", bw=2)
parti <- summary(linearpackagedRDD)##
## Call:
## RDestimate(formula = all ~ agecell, data = df, cutpoint = 21,
## bw = 2, kernel = "rectangular")
##
## Type:
## sharp
##
## Estimates:
## Bandwidth Observations Estimate Std. Error z value Pr(>|z|)
## LATE 2 48 7.663 1.273 6.017 1.776e-09
## Half-BW 1 24 9.753 1.902 5.128 2.929e-07
## Double-BW 4 48 7.663 1.273 6.017 1.776e-09
##
## LATE ***
## Half-BW ***
## Double-BW ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## F-statistics:
## F Num. DoF Denom. DoF p
## LATE 29.47 3 44 2.651e-10
## Half-BW 16.82 3 20 2.167e-05
## Double-BW 29.47 3 44 2.651e-10
parti## $coefficients
## Bandwidth Observations Estimate Std. Error z value Pr(>|z|)
## LATE 2 48 7.662709 1.273498 6.017055 1.776186e-09
## Half-BW 1 24 9.753311 1.901993 5.127942 2.929270e-07
## Double-BW 4 48 7.662709 1.273498 6.017055 1.776186e-09
##
## $fstat
## F Num. DoF Denom. DoF p
## LATE 29.46647 3 44 2.650711e-10
## Half-BW 16.81713 3 20 2.166598e-05
## Double-BW 29.46647 3 44 2.650711e-10
##
## attr(,"class")
## [1] "summary.RD"
plot(linearpackagedRDD)