# Load the foreign package
library(foreign)
# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")
My_Data$Over_21 <- 0 # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1 # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable
# Load the foreign package
library(foreign)
# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")
## Part c) Reproduce Figure 3 of the paper
library(ggplot2)
ggplot(My_Data, aes(agecell)) +
geom_point(aes(y=all, shape="19")) +
geom_point(aes(y=internal, shape="24")) +
geom_point(aes(y=external, shape="22")) +
geom_line(aes(y=allfitted)) +
geom_line(aes(y=internalfitted)) +
geom_line(aes(y=externalfitted)) +
geom_vline(xintercept = 21, color="red", alpha=10, linetype=1)
## d. Simplest regression-discontinuity design
We seek to run this regressgion: $Y_{i}= + X_i + D_i + _{i} $ where \(Y_{i}\) is the outcome variable (all deaths), \(X_i\) is the assignment variable (age cells) and \(D_i\) is a dummy variable that is equal to 1 if \(age >=21\).
# Load the foreign package
library(foreign)
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")
My_Data$Over_21 <- 0 # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1 # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable
# Regression for part d
simple_rdd <- lm(all~agecell + Over_21, data=My_Data)
# creating results table
library(stargazer)
stargazer(simple_rdd, type="text",
no.space=TRUE, keep.stat = c("n","rsq"),
title = "SIMPLE RDD REGRESSION",
dep.var.labels = "All Deaths")
##
## SIMPLE RDD REGRESSION
## ========================================
## Dependent variable:
## ---------------------------
## All Deaths
## ----------------------------------------
## agecell -0.975
## (0.632)
## Over_21 7.663***
## (1.440)
## Constant 112.310***
## (12.668)
## ----------------------------------------
## Observations 48
## R2 0.595
## ========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
We seek to run this regressgion: $Y_{i}= + X_i + D_i + (X_i D_i) + _{i} $
# Load the foreign package
library(foreign)
# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")
My_Data$Over_21 <- 0 # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1 # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable
flexible_rdd <- lm(all~ agecell + Over_21 + agecell*Over_21, data=My_Data)
# creating results table
library(stargazer)
stargazer(flexible_rdd, type="text",
no.space=TRUE, keep.stat = c("n","rsq"),
title = "FLEXIBLE RDD REGRESSION",
dep.var.labels = "All Deaths")
##
## FLEXIBLE RDD REGRESSION
## ===========================================
## Dependent variable:
## ---------------------------
## All Deaths
## -------------------------------------------
## agecell 0.827
## (0.819)
## Over_21 83.333***
## (24.357)
## agecell:Over_21 -3.603***
## (1.158)
## Constant 76.251***
## (16.396)
## -------------------------------------------
## Observations 48
## R2 0.668
## ===========================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")
My_Data$Over_21 <- 0 # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1 # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable
# part h)
library(rdd)
pre_package_rdd <- RDestimate(all~agecell,
data=My_Data,
cutpoint=21)
summary(pre_package_rdd)
##
## Call:
## RDestimate(formula = all ~ agecell, data = My_Data, cutpoint = 21)
##
## Type:
## sharp
##
## Estimates:
## Bandwidth Observations Estimate Std. Error z value Pr(>|z|)
## LATE 1.6561 40 9.001 1.480 6.080 1.199e-09
## Half-BW 0.8281 20 9.579 1.914 5.004 5.609e-07
## Double-BW 3.3123 48 7.953 1.278 6.223 4.882e-10
##
## LATE ***
## Half-BW ***
## Double-BW ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## F-statistics:
## F Num. DoF Denom. DoF p
## LATE 33.08 3 36 3.799e-10
## Half-BW 29.05 3 16 2.078e-06
## Double-BW 32.54 3 44 6.129e-11
# Plot the output
plot(pre_package_rdd)
title(sub="Plot of non-linear RDD results (using RDestimate) ", xlab="Age",ylab="Mortality rate from all causes (per 100,000)")
# Load Stata data into R
My_Data <- read.dta("/Users/godwinnutsugah/Dropbox/AAEE-UGA/AAEC 8610/HW/HW06/AEJfigs.dta")
My_Data$Over_21 <- 0 # create new variable and set default value to 0
My_Data$Over_21[My_Data$agecell >= 21] <- 1 # set values to 1 if agecell >= 21
My_Data2 <- as.data.frame(filter(My_Data, is.na(all)==FALSE)) # creating a new data set with no NA in all variable
# part i)
packaged_linear_rdd=RDestimate(all ~ agecell, data=My_Data, cutpoint=21, kernel="rectangular", bw=2)
summary(packaged_linear_rdd)
##
## Call:
## RDestimate(formula = all ~ agecell, data = My_Data, cutpoint = 21,
## bw = 2, kernel = "rectangular")
##
## Type:
## sharp
##
## Estimates:
## Bandwidth Observations Estimate Std. Error z value Pr(>|z|)
## LATE 2 48 7.663 1.273 6.017 1.776e-09
## Half-BW 1 24 9.753 1.902 5.128 2.929e-07
## Double-BW 4 48 7.663 1.273 6.017 1.776e-09
##
## LATE ***
## Half-BW ***
## Double-BW ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## F-statistics:
## F Num. DoF Denom. DoF p
## LATE 29.47 3 44 2.651e-10
## Half-BW 16.82 3 20 2.167e-05
## Double-BW 29.47 3 44 2.651e-10
# Plot the output
plot(packaged_linear_rdd)
title(sub="Plot of linear RDD results (using RDestimate) ", xlab="Age",ylab="Mortality rate from all causes (per 100,000)")