b. Get the Carpenter and Dobkin and data:

# Load the foreign package
library(foreign)

# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")

My_Data$Over_21 <- 0  # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1  # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable

c. Reproduce Figure 3 of the paper:

# Load the foreign package
library(foreign)

# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")
## Part c) Reproduce Figure 3 of the paper
library(ggplot2)
ggplot(My_Data, aes(agecell)) + 
  geom_point(aes(y=all, shape="19")) +
  geom_point(aes(y=internal, shape="24")) +
  geom_point(aes(y=external, shape="22")) +
  geom_line(aes(y=allfitted)) + 
  geom_line(aes(y=internalfitted)) +
  geom_line(aes(y=externalfitted)) +
  geom_vline(xintercept = 21, color="red", alpha=10, linetype=1)

## d. Simplest regression-discontinuity design

We seek to run this regressgion: $Y_{i}= + X_i + D_i + _{i} $ where \(Y_{i}\) is the outcome variable (all deaths), \(X_i\) is the assignment variable (age cells) and \(D_i\) is a dummy variable that is equal to 1 if \(age >=21\).

# Load the foreign package
library(foreign)
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")

My_Data$Over_21 <- 0  # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1  # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable

# Regression for part d 
simple_rdd <- lm(all~agecell + Over_21, data=My_Data)
# creating results table
library(stargazer)
stargazer(simple_rdd, type="text",
          no.space=TRUE, keep.stat = c("n","rsq"),
          title = "SIMPLE RDD REGRESSION",
           dep.var.labels = "All Deaths")
## 
## SIMPLE RDD REGRESSION
## ========================================
##                  Dependent variable:    
##              ---------------------------
##                      All Deaths         
## ----------------------------------------
## agecell                -0.975           
##                        (0.632)          
## Over_21               7.663***          
##                        (1.440)          
## Constant             112.310***         
##                       (12.668)          
## ----------------------------------------
## Observations             48             
## R2                      0.595           
## ========================================
## Note:        *p<0.1; **p<0.05; ***p<0.01

f.Allow more reflexibility to your data

We seek to run this regressgion: $Y_{i}= + X_i + D_i + (X_i D_i) + _{i} $

# Load the foreign package
library(foreign)

# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")

My_Data$Over_21 <- 0  # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1  # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable

flexible_rdd <- lm(all~ agecell + Over_21 + agecell*Over_21, data=My_Data)
# creating results table
library(stargazer)
stargazer(flexible_rdd, type="text",
          no.space=TRUE, keep.stat = c("n","rsq"),
          title = "FLEXIBLE RDD REGRESSION",
          dep.var.labels = "All Deaths")
## 
## FLEXIBLE RDD REGRESSION
## ===========================================
##                     Dependent variable:    
##                 ---------------------------
##                         All Deaths         
## -------------------------------------------
## agecell                    0.827           
##                           (0.819)          
## Over_21                  83.333***         
##                          (24.357)          
## agecell:Over_21          -3.603***         
##                           (1.158)          
## Constant                 76.251***         
##                          (16.396)          
## -------------------------------------------
## Observations                48             
## R2                         0.668           
## ===========================================
## Note:           *p<0.1; **p<0.05; ***p<0.01

h.Compare to pre-packaged RDD:

# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")

My_Data$Over_21 <- 0  # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1  # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable

# part h)
library(rdd)
pre_package_rdd <- RDestimate(all~agecell,
                     data=My_Data,
                     cutpoint=21)
summary(pre_package_rdd)
## 
## Call:
## RDestimate(formula = all ~ agecell, data = My_Data, cutpoint = 21)
## 
## Type:
## sharp 
## 
## Estimates:
##            Bandwidth  Observations  Estimate  Std. Error  z value  Pr(>|z|) 
## LATE       1.6561     40            9.001     1.480       6.080    1.199e-09
## Half-BW    0.8281     20            9.579     1.914       5.004    5.609e-07
## Double-BW  3.3123     48            7.953     1.278       6.223    4.882e-10
##               
## LATE       ***
## Half-BW    ***
## Double-BW  ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## F-statistics:
##            F      Num. DoF  Denom. DoF  p        
## LATE       33.08  3         36          3.799e-10
## Half-BW    29.05  3         16          2.078e-06
## Double-BW  32.54  3         44          6.129e-11
# Plot the output
plot(pre_package_rdd)
title(sub="Plot of non-linear RDD results (using RDestimate) ", xlab="Age",ylab="Mortality rate from all causes (per 100,000)")                     

h.Compare to pre-packaged RDD:

# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")

My_Data$Over_21 <- 0  # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1  # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable

# part i)
packaged_linear_rdd=RDestimate(all ~ agecell, data=My_Data, cutpoint=21, kernel="rectangular", bw=2)
summary(packaged_linear_rdd)
## 
## Call:
## RDestimate(formula = all ~ agecell, data = My_Data, cutpoint = 21, 
##     bw = 2, kernel = "rectangular")
## 
## Type:
## sharp 
## 
## Estimates:
##            Bandwidth  Observations  Estimate  Std. Error  z value  Pr(>|z|) 
## LATE       2          48            7.663     1.273       6.017    1.776e-09
## Half-BW    1          24            9.753     1.902       5.128    2.929e-07
## Double-BW  4          48            7.663     1.273       6.017    1.776e-09
##               
## LATE       ***
## Half-BW    ***
## Double-BW  ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## F-statistics:
##            F      Num. DoF  Denom. DoF  p        
## LATE       29.47  3         44          2.651e-10
## Half-BW    16.82  3         20          2.167e-05
## Double-BW  29.47  3         44          2.651e-10
# Plot the output
plot(packaged_linear_rdd)
title(sub="Plot of linear RDD results (using RDestimate) ", xlab="Age",ylab="Mortality rate from all causes (per 100,000)")