Homework 6

Duflo’s Plumber Economist

2023-02-07

#Loading required packages
library(prettydoc) #For the theme used in this document
library(tidyverse) #Required for renaming
library(haven) #Stata data loading to R
library(stargazer) #Nice tables
library(reshape2) #Required for reshaping the data
library(dplyr)
library(tidyr)
library(ggplot2)
library(rdd)

Part 1: Reading and questions

Briefly answer these questions:

a. Download and read the paper:

Downloaded and read the paper.

b. Get the Carpenter & Dobkin and data:

#Setting up the directory
setwd("D:/UGA Coursework/Second Year/AAEC 8610/HWs/HW6")
df <- read_dta("AEJfigs.dta")
head(df)
## # A tibble: 6 × 19
##   agecell   all allfit…¹ inter…² inter…³ exter…⁴ exter…⁵ alcohol alcoh…⁶ homic…⁷
##     <dbl> <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1    19.1  92.8     91.7    16.6    16.7    76.2    75.0   0.639   0.794    16.3
## 2    19.2  95.1     91.9    18.3    16.9    76.8    75.0   0.677   0.838    16.9
## 3    19.2  92.1     92.0    18.9    17.1    73.2    75.0   0.866   0.878    15.2
## 4    19.3  88.4     92.2    16.1    17.3    72.3    74.9   0.867   0.915    16.7
## 5    19.4  88.7     92.3    17.4    17.4    71.3    74.9   1.02    0.949    14.9
## 6    19.5  90.2     92.5    17.9    17.6    72.3    74.9   1.17    0.981    15.6
## # … with 9 more variables: homicidefitted <dbl>, suicide <dbl>,
## #   suicidefitted <dbl>, mva <dbl>, mvafitted <dbl>, drugs <dbl>,
## #   drugsfitted <dbl>, externalother <dbl>, externalotherfitted <dbl>, and
## #   abbreviated variable names ¹​allfitted, ²​internal, ³​internalfitted,
## #   ⁴​external, ⁵​externalfitted, ⁶​alcoholfitted, ⁷​homicide
df$over21 = ifelse(df$agecell >= 21,1,0)

c. Reproduce Figure 3 of the paper

#Trying to reproduce figure 3

f3 <- ggplot(data = df, aes(x = agecell, y = all)) +
  geom_point(size = 0.5, shape = 16, color = "steelblue") +
  geom_point(aes(y = internal), size = 0.35, shape = 0, color = "green") +
  geom_point(aes(y = external), size = 0.5, shape = 17, color = "red") +
  geom_line(aes(y = allfitted, linetype = "All fitted"), color = "black", size = 1, linetype = "dashed") +
  geom_line(aes(y = internalfitted, linetype = "Int. fitted"), color = "black", size = 1, linetype = "dotted") +
  geom_line(aes(y = externalfitted, linetype = "Ext. fitted"), color = "black", size = 1) +
  xlim(19, 23) +
  ylim(0, 120) +
  labs(x = "Age", y = "Deaths per 100,000 person-years",
       title = "FIGURE 3") +
  theme_classic()

f3

d. Simplest regression-discontinuity design

#Treatment: age 21

simplestRDD <- lm(all ~ agecell + over21, data = df)
df$predicted_d <- predict(simplestRDD, newdata = df)
stargazer(simplestRDD, title = "Simple Regression Discontinuity Results", type = "text",
          column.labels = c("Coefficients", "Std. Err.", "t value", "Pr(>|t|)"),
          covariate.labels = c("Age", "Treatment=1 (for > Over 21)"),
          dep.var.caption = "Dependent variable: All",
          dep.var.labels.include = FALSE,
          out = "regression_results.html")
## 
## Simple Regression Discontinuity Results
## =======================================================
##                               Dependent variable: All  
##                             ---------------------------
##                                    Coefficients        
## -------------------------------------------------------
## Age                                   -0.975           
##                                       (0.632)          
##                                                        
## Treatment=1 (for > Over 21)          7.663***          
##                                       (1.440)          
##                                                        
## Constant                            112.310***         
##                                      (12.668)          
##                                                        
## -------------------------------------------------------
## Observations                            48             
## R2                                     0.595           
## Adjusted R2                            0.577           
## Residual Std. Error               2.493 (df = 45)      
## F Statistic                   32.995*** (df = 2; 45)   
## =======================================================
## Note:                       *p<0.1; **p<0.05; ***p<0.01

e. Plot the results

fig_e <- ggplot(data = df, aes(x = agecell, y = all)) +
  geom_point(pch = 10) +
  geom_line(data = subset(df, over21 == 0), aes(y = predicted_d), color = "red", size = 1.5, linetype = "solid") +
  geom_line(data = subset(df, over21 == 1), aes(y = predicted_d), color = "green", size = 1.5, linetype = "solid") 

fig_e

f. Allow more flexibility to your RD

#Allow slope to vary as well:

df$interaction21xAGE=df$over21*df$agecell
nOtSOsimpleRDD <- lm(all ~ agecell + over21 + interaction21xAGE, data = df)
df$predicted_f <- predict(nOtSOsimpleRDD, newdata = df)
stargazer(nOtSOsimpleRDD, title = "Regression Discontinuity Results", type = "text",
          dep.var.caption = "Dependent variable: All",
          dep.var.labels.include = FALSE,
          out = "regression_results.html")
## 
## Regression Discontinuity Results
## ===============================================
##                       Dependent variable: All  
##                     ---------------------------
## -----------------------------------------------
## agecell                        0.827           
##                               (0.819)          
##                                                
## over21                       83.333***         
##                              (24.357)          
##                                                
## interaction21xAGE            -3.603***         
##                               (1.158)          
##                                                
## Constant                     76.251***         
##                              (16.396)          
##                                                
## -----------------------------------------------
## Observations                    48             
## R2                             0.668           
## Adjusted R2                    0.645           
## Residual Std. Error       2.283 (df = 44)      
## F Statistic           29.466*** (df = 3; 44)   
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01

g. Again, plot the data

fig_g <- ggplot(data = df, aes(x = agecell, y = all)) +
  geom_point(pch = 10) +
  geom_line(data = subset(df, over21 == 0), aes(y = predicted_f), color = "red", size = 1.5, linetype = "solid") +
  geom_line(data = subset(df, over21 == 1), aes(y = predicted_f), color = "green", size = 1.5, linetype = "solid") 
fig_g

h. Compare to pre-packaged RDD:

packagedRDD <- RDestimate(all ~ agecell, data = df, cutpoint = 21)
parth <-summary(packagedRDD) #any fancy way to do this?
## 
## Call:
## RDestimate(formula = all ~ agecell, data = df, cutpoint = 21)
## 
## Type:
## sharp 
## 
## Estimates:
##            Bandwidth  Observations  Estimate  Std. Error  z value  Pr(>|z|) 
## LATE       1.6561     40            9.001     1.480       6.080    1.199e-09
## Half-BW    0.8281     20            9.579     1.914       5.004    5.609e-07
## Double-BW  3.3123     48            7.953     1.278       6.223    4.882e-10
##               
## LATE       ***
## Half-BW    ***
## Double-BW  ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## F-statistics:
##            F      Num. DoF  Denom. DoF  p        
## LATE       33.08  3         36          3.799e-10
## Half-BW    29.05  3         16          2.078e-06
## Double-BW  32.54  3         44          6.129e-11
parth
## $coefficients
##           Bandwidth Observations Estimate Std. Error  z value     Pr(>|z|)
## LATE      1.6561482           40 9.000704   1.480302 6.080318 1.199444e-09
## Half-BW   0.8280741           20 9.578812   1.914144 5.004227 5.608665e-07
## Double-BW 3.3122964           48 7.952527   1.277953 6.222865 4.881563e-10
## 
## $fstat
##                  F Num. DoF Denom. DoF            p
## LATE      33.08419        3         36 3.799427e-10
## Half-BW   29.04964        3         16 2.077717e-06
## Double-BW 32.53625        3         44 6.128609e-11
## 
## attr(,"class")
## [1] "summary.RD"
plot(packagedRDD)

i. Prepackaged linear RDD:

linearpackagedRDD <- RDestimate(all~agecell, data = df, 
                                cutpoint = 21,kernel = "rectangular", bw=2)
parti <- summary(linearpackagedRDD)
## 
## Call:
## RDestimate(formula = all ~ agecell, data = df, cutpoint = 21, 
##     bw = 2, kernel = "rectangular")
## 
## Type:
## sharp 
## 
## Estimates:
##            Bandwidth  Observations  Estimate  Std. Error  z value  Pr(>|z|) 
## LATE       2          48            7.663     1.273       6.017    1.776e-09
## Half-BW    1          24            9.753     1.902       5.128    2.929e-07
## Double-BW  4          48            7.663     1.273       6.017    1.776e-09
##               
## LATE       ***
## Half-BW    ***
## Double-BW  ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## F-statistics:
##            F      Num. DoF  Denom. DoF  p        
## LATE       29.47  3         44          2.651e-10
## Half-BW    16.82  3         20          2.167e-05
## Double-BW  29.47  3         44          2.651e-10
parti
## $coefficients
##           Bandwidth Observations Estimate Std. Error  z value     Pr(>|z|)
## LATE              2           48 7.662709   1.273498 6.017055 1.776186e-09
## Half-BW           1           24 9.753311   1.901993 5.127942 2.929270e-07
## Double-BW         4           48 7.662709   1.273498 6.017055 1.776186e-09
## 
## $fstat
##                  F Num. DoF Denom. DoF            p
## LATE      29.46647        3         44 2.650711e-10
## Half-BW   16.81713        3         20 2.166598e-05
## Double-BW 29.46647        3         44 2.650711e-10
## 
## attr(,"class")
## [1] "summary.RD"
plot(linearpackagedRDD)