R. You will be asked
to combine the difference-in-differences approach with other methods
(synthetic control, regression discontinuity design). You will also
practice machine learning and event studies.Rpubs using your temporary account.RPubs link of your work and submit it on
Canvas.RPubs link last has copied the others. So, timely
submissions are important. Own your work. I can randomly ask your
R script and .Rmd files for double-checking purposes. As a
standard practice, work in a script file before making your code chunks
in the .Rmd file. Your .Rmd file and Rpubs submission page
MUST show the code used to produce any of the outputs you present in
your answers.Academic integrity is the pursuit of scholarly activity in an open, honest and responsible manner. Academic integrity is a basic guiding principle for all academic activity at The Pennsylvania State University, and all members of the University community are expected to act in accordance with this principle. Consistent with this expectation, the University’s Code of Conduct states that all students should act with personal integrity, respect other students’ dignity, rights and property, and help create and maintain an environment in which all can succeed through the fruits of their efforts.
Academic integrity includes a commitment by all members of the University community not to engage in or tolerate acts of falsification, misrepresentation or deception. Such acts of dishonesty violate the fundamental ethical principles of the University community and compromise the worth of work completed by others.
# Load packages
library(pacman)
p_load(devtools, synthdid, did, DRDID, gtrendsR, glmnet, causalweight, nnls, grf,
rdrobust, rdd, RDHonest, tidyverse, lubridate, usmap, gridExtra, stringr, readxl,
reshape2, scales, broom, data.table, ggplot2, stargazer, modelsummary,
foreign, ggthemes, ggforce, ggridges, latex2exp, viridis, extrafont,
kableExtra, snakecase, janitor, tableone, lfe, fixest, multcomp, etable)
##################### Q4. Regression Discontinuity Design ####################
######### i.Conventional and Robust RDD (Sharp Design) ######################
library(rdrobust)
data(rdrobust_RDsenate)
library(dplyr)
rdrobust_RDsenate <- rdrobust_RDsenate %>%
mutate(treat = ifelse(margin > 0, 1, 0))
### a. plot the outcome against margin
install.packages("rdrobust") # Run this only if it's not installed
## Warning: package 'rdrobust' is in use and will not be installed
library(rdrobust)
# If you have rdplot installed
rdplot(y = rdrobust_RDsenate$vote, x = rdrobust_RDsenate$margin,
title = "Binned Averages of Vote by Margin",
x.label = "Margin",
y.label = "Vote Share",
c = 0)
########## (b) Optimal bandwidth ################
### Conventional and robust inference results
rd_result <- rdrobust(y = rdrobust_RDsenate$vote,
x = rdrobust_RDsenate$margin,all= TRUE)
summary(rd_result)
## Sharp RD estimates using local polynomial regression.
##
## Number of Obs. 1297
## BW type mserd
## Kernel Triangular
## VCE method NN
##
## Number of Obs. 595 702
## Eff. Number of Obs. 360 323
## Order est. (p) 1 1
## Order bias (q) 2 2
## BW est. (h) 17.754 17.754
## BW bias (b) 28.028 28.028
## rho (h/b) 0.633 0.633
## Unique Obs. 595 665
##
## =============================================================================
## Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
## =============================================================================
## Conventional 7.414 1.459 5.083 0.000 [4.555 , 10.273]
## Bias-Corrected 7.507 1.459 5.146 0.000 [4.647 , 10.366]
## Robust 7.507 1.741 4.311 0.000 [4.094 , 10.919]
## =============================================================================
### conventional results without rdrobust
rdrobust_RDsenate$D_X <- rdrobust_RDsenate$treat * rdrobust_RDsenate$margin
# Run the regression
model1 <- lm(vote ~ margin + treat + D_X, data = rdrobust_RDsenate)
library(stargazer)
# Generate formatted regression table
stargazer(model1, type = "text",
title = "Regression Results: Vote on Margin, Treat, and Interaction Term",
digits = 3)
##
## Regression Results: Vote on Margin, Treat, and Interaction Term
## ===============================================
## Dependent variable:
## ---------------------------
## vote
## -----------------------------------------------
## margin 0.216***
## (0.028)
##
## treat 6.044***
## (0.942)
##
## D_X 0.170***
## (0.032)
##
## Constant 44.904***
## (0.699)
##
## -----------------------------------------------
## Observations 1,297
## R2 0.587
## Adjusted R2 0.586
## Residual Std. Error 11.654 (df = 1293)
## F Statistic 613.586*** (df = 3; 1293)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
####### (c) In the running variable continuous at the threshold #######
### Plot a histogram of density of running variable
summary(rdrobust_RDsenate$margin)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -100.000 -12.206 2.166 7.171 22.766 100.000
library(ggplot2)
# Plot histogram for the density and number of observations
ggplot(rdrobust_RDsenate, aes(x = margin)) +
geom_histogram(aes(y = ..density..), bins = 30, fill = "blue", color = "black", alpha = 0.7) +
labs(
title = "Density and Number of Observations by Margin (Running Variable)",
x = "Margin (Running Variable)",
y = "Density"
) +
theme_minimal() +
theme(
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 14)
)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
### Use the DCdensity command to test for a discontinuity
library(rdd)
# Run the DCdensity command for the running variable 'margin' at the threshold 0
result <- DCdensity(rdrobust_RDsenate$margin, c=0)
# View the result
print(result)
## [1] 0.3897849
####### ii. Conventional and Robust RDD (Fuzzy design)
library(rdd)
install.packages("RDHonest")
## Warning: package 'RDHonest' is in use and will not be installed
library(RDHonest)
data(rcp)
library(dplyr)
rcp <- rcp %>%
mutate(treat = ifelse(retired == TRUE, 1, 0))
### (a) create a plot of treatment against running variable, and plot of outcome
### against the running variable
library(ggplot2)
ggplot(rcp, aes(x = elig_year, y = treat)) +
geom_point(alpha = 0.4) +
geom_smooth(method = "glm", method.args = list(family = "binomial"),
se = FALSE, color = "blue") +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
labs(
title = "Treatment Assignment vs. Running Variable",
x = "Eligibility Year (elig_year)",
y = "Treatment (treat)"
) +
theme_minimal() +
ylim(0, 1) # Optional: explicitly set y-axis limits
## `geom_smooth()` using formula = 'y ~ x'
### plot the outcome variable against the running variable
ggplot(rcp, aes(x = elig_year, y = cn)) +
geom_point(aes(color = treat), alpha = 0.6) +
geom_smooth(method = "loess", se = FALSE, color = "blue") +
labs(
title = "Outcome Variable vs Running Variable in Fuzzy RDD",
x = "Running Variable",
y = "Outcome Variable",
color = "Treatment Probability"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
#### (b) Conventional and robust inference with rdrobust
library(rdrobust)
rd_result_fuzzy <- rdrobust(y = rcp$cn,
x = rcp$elig_year,
all = TRUE,
fuzzy = rcp$treat)
## Warning in rdrobust(y = rcp$cn, x = rcp$elig_year, all = TRUE, fuzzy =
## rcp$treat): Mass points detected in the running variable.
summary(rd_result_fuzzy)
## Fuzzy RD estimates using local polynomial regression.
##
## Number of Obs. 30006
## BW type mserd
## Kernel Triangular
## VCE method NN
##
## Number of Obs. 16556 13450
## Eff. Number of Obs. 1599 2078
## Order est. (p) 1 1
## Order bias (q) 2 2
## BW est. (h) 4.950 4.950
## BW bias (b) 15.002 15.002
## rho (h/b) 0.330 0.330
## Unique Obs. 39 49
##
## First-stage estimates.
##
## =============================================================================
## Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
## =============================================================================
## Conventional 0.313 0.039 7.936 0.000 [0.235 , 0.390]
## Bias-Corrected 0.293 0.039 7.436 0.000 [0.216 , 0.370]
## Robust 0.293 0.041 7.094 0.000 [0.212 , 0.374]
## =============================================================================
##
## Treatment effect estimates.
##
## =============================================================================
## Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
## =============================================================================
## Conventional -5603.339 3072.345 -1.824 0.068[-11625.025 , 418.346]
## Bias-Corrected -5913.127 3072.345 -1.925 0.054[-11934.812 , 108.559]
## Robust -5913.127 3219.043 -1.837 0.066[-12222.336 , 396.082]
## =============================================================================
### (c) Reproduce the conventional inference results without rdrobust
install.packages("AER")
## Warning: package 'AER' is in use and will not be installed
library(AER)
# Create instrument: 1 if individual is pension-eligible
rcp$above <- ifelse(rcp$elig_year >= 0, 1, 0)
# Optionally limit to bandwidth window (local analysis)
bw <- 5
rcp_bw <- subset(rcp, abs(elig_year) <= bw)
iv_model <- ivreg(cn ~ treat + elig_year | above + elig_year, data = rcp_bw)
summary(iv_model)
##
## Call:
## ivreg(formula = cn ~ treat + elig_year | above + elig_year, data = rcp_bw)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17455 -6503 -2158 3944 122783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20671.75 886.19 23.327 <0.0000000000000002 ***
## treat -3957.89 2156.80 -1.835 0.0666 .
## elig_year 99.14 165.81 0.598 0.5499
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10540 on 5015 degrees of freedom
## Multiple R-Squared: 0.03554, Adjusted R-squared: 0.03515
## Wald test: 11.97 on 2 and 5015 DF, p-value: 0.000006523
library(stargazer)
stargazer(iv_model, type = "text",
title = "Fuzzy RD IV Estimation of Retirement on Consumption",
dep.var.labels = "Household Expenditure (EUR/year)",
covariate.labels = c("Retired", "Age Relative to Eligibility"),
omit.stat = c("f", "ser"),
digits = 3)
##
## Fuzzy RD IV Estimation of Retirement on Consumption
## ============================================================
## Dependent variable:
## --------------------------------
## Household Expenditure (EUR/year)
## ------------------------------------------------------------
## Retired -3,957.895*
## (2,156.796)
##
## Age Relative to Eligibility 99.139
## (165.814)
##
## Constant 20,671.750***
## (886.190)
##
## ------------------------------------------------------------
## Observations 5,018
## R2 0.036
## Adjusted R2 0.035
## ============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
######################### iii. RDD and Diff-in-Diff ###########################