#Loading required packages
library(prettydoc) #For the theme used in this document
library(tidyverse) #Required for renaming
library(stargazer) #Nice tables
library(reshape2) #Required for reshaping the data
library(dplyr)
library(tidyr)Part 1: Reading and questions
Briefly answer these questions:
a. What is the causal link the paper is trying to reveal?
This paper aims to study the causal effects of increase in minimum wage on the employment is fast-food restaurants.
b. What would be the ideal experiment to test this causal link?
In an ideal experiment, I would randomly assign minimum wages across the fast-food restaurants.
c. What is the identification strategy?
This paper exploits the difference-in-differences strategy to study the effects of increase in minimum wage on employment.
d. What are the assumptions / threats to this identification strategy?
The identifying assumption is that outcome would have evolved parallel in treatment and control groups had there been no minimum wage change.
Part 2: Replication Analysis
a. Load data from Card and Krueger AER 1994
#Setting up the directory
setwd("D:/UGA Coursework/Second Year/AAEC 8610/HWs/HW5")
df <- read.csv("CardKrueger1994_fastfood.csv")
head(df)## id state emptot emptot2 demp chain bk kfc roys wendys wage_st wage_st2
## 1 46 0 40.50 24.0 -16.50 1 1 0 0 0 NA 4.30
## 2 49 0 13.75 11.5 -2.25 2 0 1 0 0 NA 4.45
## 3 506 0 8.50 10.5 2.00 2 0 1 0 0 NA 5.00
## 4 56 0 34.00 20.0 -14.00 4 0 0 0 1 5.0 5.25
## 5 61 0 24.00 35.5 11.50 4 0 0 0 1 5.5 4.75
## 6 62 0 20.50 NA NA 4 0 0 0 1 5.0 NA
b. Verify that the data is correct
summary_table <- df %>%
group_by(state) %>%
summarize(
BurgerKing = mean(bk),
KFC = mean(kfc),
Roys = mean(roys),
Wendys = mean(wendys),
FTE_W1 = mean(emptot, na.rm = TRUE),
FTE_W2 = mean(emptot2, na.rm = TRUE)
)
# replace 0 with "PA" and 1 with "NJ" in the first row
summary_table <- summary_table %>%
mutate(state = ifelse(state == "0", "PA", "NJ"))
summary_table <- t(summary_table)
summary_table## [,1] [,2]
## state "PA" "NJ"
## BurgerKing "0.4430380" "0.4108761"
## KFC "0.1518987" "0.2054381"
## Roys "0.2151899" "0.2477341"
## Wendys "0.1898734" "0.1359517"
## FTE_W1 "23.33117" "20.43941"
## FTE_W2 "21.16558" "21.02743"
c. Use a “first-differenced” OLS to obtain their Diff-in-diff estimator
df$first_diff_emp <- df$emptot2-df$emptot
reg <- lm(first_diff_emp~state, data = df)
stargazer(reg, type = "text", title = "First Diff OLSRegression Results",
align = TRUE)##
## First Diff OLSRegression Results
## ===============================================
## Dependent variable:
## ---------------------------
## first_diff_emp
## -----------------------------------------------
## state 2.750**
## (1.154)
##
## Constant -2.283**
## (1.036)
##
## -----------------------------------------------
## Observations 384
## R2 0.015
## Adjusted R2 0.012
## Residual Std. Error 8.968 (df = 382)
## F Statistic 5.675** (df = 1; 382)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Part 3: Alternative ways of running DiD
d. What would be the equation of a standard “difference in difference” regression?
In this case, our regression would look like:
\(Y_{\text {ist}}=\alpha+\gamma N J_s+\lambda\) Post \(_t+\delta\left(NJ_s \times\right.\) Post \(\left._t\right)+\varepsilon_{\text {ist }}\)
Where, \(Y_{\text {ist}}\) represents fast food employment at restaurant i in state s and period t; \(NJ_s\) and \(Post_t\) are dummy variables equal 1 for restaurants in NJ and observations in November, receptively.
e. Compute the difference-in-differences estimator “by hand”
DID_hand <- df %>%
group_by(state) %>%
summarize(
FTE_W1 = mean(emptot, na.rm = TRUE),
FTE_W2 = mean(emptot2, na.rm = TRUE)
)
DID_hand <- t(DID_hand)
DID_mat <-as.matrix(DID_hand)
DID_hand <- (DID_mat[2,1] - DID_mat[3,1])-(DID_mat[2,2] - DID_mat[3,2])
DID_hand## FTE_W1
## 2.753606
f. Run the regression you wrote up in part d
dfNEW <- reshape(data = df,
idvar = c("id", "state"),
varying = list(c(3, 4), c(11, 12)),
v.names = c("emptot", "wagetot"),
times = c(0, 1),
direction = "long")
dfNEW$state <- 1-dfNEW$state #Adjusting NJ PA dummies
dfNEW$stateXtime <- dfNEW$state*dfNEW$time
DID_reg <- lm(emptot~ stateXtime + state + time, data = dfNEW)
stargazer(DID_reg, type = "text", title = "DID Results",
align = TRUE, dep.var.labels = "Employment",
covariate.labels = c("Treatment"))##
## DID Results
## ===============================================
## Dependent variable:
## ---------------------------
## Employment
## -----------------------------------------------
## Treatment -2.754
## (1.688)
##
## state 2.892**
## (1.194)
##
## time 0.588
## (0.744)
##
## Constant 20.439***
## (0.525)
##
## -----------------------------------------------
## Observations 794
## R2 0.007
## Adjusted R2 0.004
## Residual Std. Error 9.406 (df = 790)
## F Statistic 1.964 (df = 3; 790)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01