#Libraries

# Load packages
library(pacman)
p_load(Matching, Jmisc, lmtest, sandwich, kdensity, haven, boot, 
       cobalt, Matchit, Zelig, estimatr, cem, tidyverse, 
       lubridate, usmap, gridExtra, stringr, readxl, plot3D,  
       cowplot, reshape2, scales, broom, data.table, ggplot2, stargazer,  
       foreign, ggthemes, ggforce, ggridges, latex2exp, viridis, extrafont, 
       kableExtra, snakecase, janitor)
library(fixest)
## 
## Attaching package: 'fixest'
## The following object is masked from 'package:scales':
## 
##     pvalue
## The following object is masked from 'package:Jmisc':
## 
##     demean
library(multiwayvcov)
library(lmtest)
library(stargazer)
library(modelsummary)
library(causalweight)
## Loading required package: ranger
library(fixest)

Load Data

library(wooldridge) 
## Warning: package 'wooldridge' was built under R version 4.4.3
## 
## Attaching package: 'wooldridge'
## The following object is masked from 'package:MASS':
## 
##     cement
# Load `kielmc` data
data(kielmc)
attach(kielmc)

as.data.frame(table(nearinc, y81))
##   nearinc y81 Freq
## 1       0   0  123
## 2       1   0   56
## 3       0   1  102
## 4       1   1   40
kielmc <- kielmc
summary(rprice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   26000   59000   82000   83721  100230  300000
length(unique(cbd)) # number of clusters
## [1] 34

#PROBLEM 1

Part i . Without Covariates

# 2x2 Difference-in-Differences Model
did_model <- feols(rprice ~ nearinc * y81, data = kielmc, cluster = ~cbd)

# Display results
modelsummary(did_model, stars = TRUE)
(1)
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 82517.228***
(3221.924)
nearinc -18824.370*
(7796.294)
y81 18790.286***
(5154.864)
nearinc × y81 -11863.903+
(6621.818)
Num.Obs. 321
R2 0.174
R2 Adj. 0.166
AIC 7538.5
BIC 7553.5
RMSE 30053.88
Std.Errors by: cbd

Results suggest that the price of a house near to incinerator is approximately $11,863 lower than those that are not.

Part ii. With Covariates

# 2x2 Difference-in-Differences Model with Covariates
did_model1 <- feols(rprice ~ nearinc * y81 + rooms + baths + age + agesq + larea + lland, 
                       data = kielmc, cluster = ~cbd)

# 2x2 Difference-in-Differences Model with Covariates
did_model2 <- feols(rprice ~ nearinc * y81 + rooms + baths + age + area + land, 
                       data = kielmc, cluster = ~cbd)

# Create a comparative table with model estimates
modelsummary(list("Model 1" = did_model1, "Model 2" = did_model2),
             stars = TRUE,  # Display significance stars
             output = "markdown")  # Display in a clean table format
Model 1 Model 2
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) -279726.300** -15131.031
(93485.448) (11348.423)
nearinc 15554.707 4877.285
(10224.753) (7465.639)
y81 16672.713*** 13792.808**
(3714.861) (3839.912)
rooms 2978.963* 4211.879*
(1401.381) (1731.493)
baths 7458.220** 11103.272***
(2595.273) (2504.100)
age -616.245*** -190.563**
(128.447) (53.182)
agesq 3.053***
(0.776)
larea 32815.339***
(8487.841)
lland 7194.257
(4660.224)
nearinc × y81 -17545.704** -11692.428*
(5821.574) (4884.688)
area 17.937**
(5.653)
land 0.118
(0.111)
Num.Obs. 321 321
R2 0.637 0.636
R2 Adj. 0.627 0.626
AIC 7286.3 7285.8
BIC 7324.1 7319.7
RMSE 19917.40 19961.94
Std.Errors by: cbd by: cbd

Treatment effect with non-log covariate: -17545.704; treatment effect with logged covaraite: -11692.428. The magnitude is smaller with logged variables.

covariates <- as.matrix(kielmc$rooms, kielmc$baths, kielmc$age, kielmc$agesq, kielmc$larea, kielmc$lland)

## IPW-DiD model with cluster bootstrap standard errors
did3_ipw <- didweight(kielmc$rprice,     # Outcome variable
                     kielmc$nearinc,    # Treatment indicator
                     kielmc$y81,        # Time indicator
                     x = as.matrix(kielmc[, covariates]),  # Covariates matrix
                     boot = 500,            # Number of bootstrap repetitions
                     cluster = kielmc$cbd)  # Cluster variable

# 3. Results
treatment_effect <- as.numeric(did3_ipw$effect)
se <- as.numeric(did3_ipw$se)
pval <- as.numeric(did3_ipw$pvalue)

# Create a clean results table with rounded values
ipw_results <- data.frame(
  Metric = c("TE", "SE", "P-Value"),
  Value = c(
    round(did3_ipw$effect, 2),
    round(did3_ipw$se, 2),
    round(did3_ipw$pvalue, 4)
  )
)


## Display results 
kable(ipw_results, caption = "IPW DiD Results")
IPW DiD Results
Metric Value
TE 17687.7300
SE 34992.7100
P-Value 0.6132

Treatment effect estimate is similar to the first model but is insignificant with IPW. However, IPW helps weighting the groups so that they are more comparable and ensures similar distribution of covariates, making it the most reliable in theory.

Part iii. TWFE

Equation 1 is TWFE, where as equation 2 is standard DID. If there are reasons to believe treatment effects are heterogeneous, DID is better since TWFE average out the differences between counties. If assumes homogeneous TE, TWFE is preferred for its robustness against county-specific unobservables.

# Friend's data
i <- c(1, 1, 2, 2, 3, 3, 4, 4)
t <- c(1, 2, 1, 2, 1, 2, 1, 2)
P_t <- c(0, 1, 0, 1, 0, 1, 0, 1)
D_it <- c(0, 0, 0, 0, 0, 1, 0, 1)
D_i <- c(0, 0, 0, 0, 1, 1, 1, 1)
Y_it <- c(6, 7, 10, 9, 8, 14, 8, 12)

df <- data.frame(i=i, t=t, P_t=P_t, D_it=D_it, D_i=D_i, Y_it=Y_it)
df
##   i t P_t D_it D_i Y_it
## 1 1 1   0    0   0    6
## 2 1 2   1    0   0    7
## 3 2 1   0    0   0   10
## 4 2 2   1    0   0    9
## 5 3 1   0    0   1    8
## 6 3 2   1    1   1   14
## 7 4 1   0    0   1    8
## 8 4 2   1    1   1   12
twfe_model <- feols(Y_it ~ D_it | i + t, data = df, cluster = ~i)

#Equation 2
did_model <- feols(Y_it ~ D_i + P_t + D_i:P_t, data = df, cluster = ~i)

#Equation 3
rsa_model <- feols(Y_it ~ D_i:P_t | i + t, data = df, cluster = ~i)

#Compare Results
modelsummary(
  list("TWFE" = twfe_model, 
       "DiD" = did_model, 
       "Roth et al" = rsa_model),
  stars = TRUE, 
  gof_omit = "IC|R2|Log|F",
  output = "markdown"
)
TWFE DiD Roth et al
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
D_it 5.000*
(1.366)
(Intercept) 8.000*
(2.160)
D_i 0.000
(2.160)
P_t 0.000
(1.080)
D_i × P_t 5.000* 5.000*
(1.528) (1.366)
Num.Obs. 8 8 8
RMSE 0.50 1.22 0.50
Std.Errors by: i by: i by: i

All methods give the same TE estimates with slightly different SE. TWFE and DID are essentially the same with 2 groups and 2 periods, which is why both estimates and SE are the same.

PROBLEM 2

# Load `mpdta` data
library(did)
## Warning: package 'did' was built under R version 4.4.3
data(mpdta)
table(mpdta$treat, mpdta$year)
##    
##     2003 2004 2005 2006 2007
##   0  309  309  309  309  309
##   1  191  191  191  191  191
#table(JC$trainy2)
table(JC$assignment, JC$trainy1)
##    
##        0    1
##   0 1809 1854
##   1  857 4720
length(unique(mpdta$first.treat)) # cohorts or groups (incl. never-treated)
## [1] 4
table(mpdta$first.treat, mpdta$year)
##       
##        2003 2004 2005 2006 2007
##   0     309  309  309  309  309
##   2004   20   20   20   20   20
##   2006   40   40   40   40   40
##   2007  131  131  131  131  131
summary(mpdta$first.treat[mpdta$treat==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0
summary(mpdta$first.treat[mpdta$treat==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2004    2006    2007    2006    2007    2007
summary(mpdta$lemp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.099   4.868   5.697   5.773   6.684  10.443

Part i.

mpdta <- mpdta %>%
  dplyr::rename(fips = countyreal) %>%
  mutate(treat = as.factor(treat))
# Map 1:  Treatment status
plot_treat <- plot_usmap(data = mpdta%>% na.omit(), values = "treat", regions = "counties") +
  scale_fill_manual(values = c("#FFFFBF", "darkred"), name = "Treatment Status") +
  labs(title = "Treatment Status") +
  theme_minimal()
# Map 2: First Treatment Year
mpdta$first.treat <- as.factor(mpdta$first.treat)

plot_first_treat <- plot_usmap(data = mpdta %>% na.omit(), values = "first.treat", regions = "counties") +
  scale_fill_viridis_d(name = "First Treatment Year") +  
  labs(title = "First Year of Treatment") +
  theme_minimal()


# Map Together
library(gridExtra)
grid.arrange(plot_treat, plot_first_treat, ncol = 1)  

mpdta$first.treat <- as.numeric(mpdta$first.treat)

mpdta_treated <- mpdta %>%
  filter(first.treat != 0)

# Compute mean log employment for each county by first treatment year
lemp_summary <- mpdta_treated %>%
  group_by(year, first.treat) %>%
  dplyr::summarise(mean_lemp = mean(lemp, na.rm = TRUE)) %>%
  ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
plot_list <- list()

# Loop over each first.treat year (2004, 2006, 2007)
for (ft in c(2004, 2006, 2007)) {
  
  df_ft <- filter(mpdta, first.treat == ft)
  
  # Generate the county-level map
  p <- plot_usmap(data = df_ft, values = "lemp", regions = "counties") +
    scale_fill_distiller(palette = "Spectral", name = "Log Employment", direction = 1) +
    labs(title = paste("Log Employment by First Treatment Year:", ft)) +
    theme_minimal()
  
  # Store plot in list
  plot_list[[as.character(ft)]] <- p
}

grid.arrange(grobs = plot_list, ncol = 3)

Part ii.

data(mpdta)

# Estimate ATT using `att_gt`
att <- att_gt(yname = "lemp",            # Outcome variable
                      tname = "year",           # Time variable
                      idname = "countyreal",    # County identifier
                      gname = "first.treat",    # First treatment year (cohort)
                      xformla = ~lpop,         # Covariate (log population)
                      data = mpdta,           
                      est_method = "reg",       # Outcome regression estimation
                      control_group = "nevertreated",  # Never-treated as control group
                      clustervars = "countyreal")  # Cluster SEs at county level
#Print Results
summary(att)
## 
## Call:
## att_gt(yname = "lemp", tname = "year", idname = "countyreal", 
##     gname = "first.treat", xformla = ~lpop, data = mpdta, control_group = "nevertreated", 
##     clustervars = "countyreal", est_method = "reg")
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## Group-Time Average Treatment Effects:
##  Group Time ATT(g,t) Std. Error [95% Simult.  Conf. Band]  
##   2004 2004  -0.0149     0.0239       -0.0801      0.0503  
##   2004 2005  -0.0770     0.0301       -0.1591      0.0051  
##   2004 2006  -0.1411     0.0351       -0.2371     -0.0451 *
##   2004 2007  -0.1075     0.0355       -0.2046     -0.0105 *
##   2006 2004  -0.0021     0.0223       -0.0629      0.0588  
##   2006 2005  -0.0070     0.0189       -0.0586      0.0446  
##   2006 2006   0.0008     0.0200       -0.0537      0.0553  
##   2006 2007  -0.0415     0.0200       -0.0962      0.0131  
##   2007 2004   0.0264     0.0135       -0.0106      0.0634  
##   2007 2005  -0.0048     0.0151       -0.0460      0.0365  
##   2007 2006  -0.0285     0.0175       -0.0762      0.0192  
##   2007 2007  -0.0288     0.0160       -0.0724      0.0148  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## P-value for pre-test of parallel trends assumption:  0.23116
## Control Group:  Never Treated,  Anticipation Periods:  0
## Estimation Method:  Outcome Regression

The results show group that got treated in 2004 has relatively larger, negative and significant TE in years 2006 and 2007, suggesting they actually experienced a decline in the log of employment soon after the treatment.

att_ipw <- att_gt(yname = "lemp",          # Outcome variable
                  tname = "year",          # Time variable
                  idname = "countyreal",   # County identifier (for clustering)
                  gname = "first.treat",   # First treatment year (cohort)
                  xformla = ~ lpop,        # Covariate (log population)
                  data = mpdta,          
                  est_method = "ipw",      # Inverse Probability Weighting (IPW)
                  control_group = "nevertreated", # Never-treated as control
                  clustervars = "countyreal") # Cluster SEs at county level

#Print Results
summary(att_ipw)
## 
## Call:
## att_gt(yname = "lemp", tname = "year", idname = "countyreal", 
##     gname = "first.treat", xformla = ~lpop, data = mpdta, control_group = "nevertreated", 
##     clustervars = "countyreal", est_method = "ipw")
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## Group-Time Average Treatment Effects:
##  Group Time ATT(g,t) Std. Error [95% Simult.  Conf. Band]  
##   2004 2004  -0.0145     0.0234       -0.0788      0.0497  
##   2004 2005  -0.0764     0.0301       -0.1594      0.0065  
##   2004 2006  -0.1405     0.0382       -0.2456     -0.0353 *
##   2004 2007  -0.1069     0.0314       -0.1933     -0.0206 *
##   2006 2004  -0.0009     0.0219       -0.0612      0.0595  
##   2006 2005  -0.0064     0.0195       -0.0599      0.0472  
##   2006 2006   0.0012     0.0213       -0.0573      0.0597  
##   2006 2007  -0.0413     0.0201       -0.0966      0.0140  
##   2007 2004   0.0266     0.0129       -0.0090      0.0622  
##   2007 2005  -0.0047     0.0143       -0.0441      0.0347  
##   2007 2006  -0.0283     0.0184       -0.0790      0.0223  
##   2007 2007  -0.0289     0.0164       -0.0739      0.0161  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## P-value for pre-test of parallel trends assumption:  0.23604
## Control Group:  Never Treated,  Anticipation Periods:  0
## Estimation Method:  Inverse Probability Weighting

Similar results.

# Estimate ATT using the doubly robust (DR) estimator
att_dr <- att_gt(yname = "lemp",           # Outcome variable (log employment)
                 tname = "year",           # Time variable
                 idname = "countyreal",    # County identifier (for clustering)
                 gname = "first.treat",    # First treatment year (cohort)
                 xformla = ~ lpop,         # Covariate (log population)
                 data = mpdta,           
                 est_method = "dr",        # Doubly robust (DR) estimation
                 control_group = "nevertreated",  # Never-treated as control
                 clustervars = "countyreal") # Cluster SEs at county level

#Print Results
summary(att_dr)
## 
## Call:
## att_gt(yname = "lemp", tname = "year", idname = "countyreal", 
##     gname = "first.treat", xformla = ~lpop, data = mpdta, control_group = "nevertreated", 
##     clustervars = "countyreal", est_method = "dr")
## 
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna.  "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015> 
## 
## Group-Time Average Treatment Effects:
##  Group Time ATT(g,t) Std. Error [95% Simult.  Conf. Band]  
##   2004 2004  -0.0145     0.0228       -0.0745      0.0455  
##   2004 2005  -0.0764     0.0290       -0.1526     -0.0003 *
##   2004 2006  -0.1404     0.0398       -0.2449     -0.0359 *
##   2004 2007  -0.1069     0.0342       -0.1968     -0.0170 *
##   2006 2004  -0.0005     0.0228       -0.0605      0.0595  
##   2006 2005  -0.0062     0.0197       -0.0581      0.0457  
##   2006 2006   0.0010     0.0204       -0.0527      0.0547  
##   2006 2007  -0.0413     0.0208       -0.0959      0.0133  
##   2007 2004   0.0267     0.0143       -0.0110      0.0644  
##   2007 2005  -0.0046     0.0155       -0.0453      0.0361  
##   2007 2006  -0.0284     0.0187       -0.0775      0.0206  
##   2007 2007  -0.0288     0.0172       -0.0739      0.0164  
## ---
## Signif. codes: `*' confidence band does not cover 0
## 
## P-value for pre-test of parallel trends assumption:  0.23267
## Control Group:  Never Treated,  Anticipation Periods:  0
## Estimation Method:  Doubly Robust

IPW yields the highest decline for group treated in 2004 in 2006 and 2007. Map for 2004 was darkest, the estimate was likely adjusted downwar because of hte decline in 2006 and 2007.

install.packages("staggered")
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4:
##   cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4/PACKAGES'
## package 'staggered' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'staggered'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\zxl5576\AppData\Local\Programs\R\R-4.4.1\library\00LOCK\staggered\libs\x64\staggered.dll
## to
## C:\Users\zxl5576\AppData\Local\Programs\R\R-4.4.1\library\staggered\libs\x64\staggered.dll:
## Permission denied
## Warning: restored 'staggered'
## 
## The downloaded binary packages are in
##  C:\Users\zxl5576\AppData\Local\Temp\RtmpcHdslt\downloaded_packages
install.packages("DRDID")
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4:
##   cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4/PACKAGES'
## package 'DRDID' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'DRDID'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\zxl5576\AppData\Local\Programs\R\R-4.4.1\library\00LOCK\DRDID\libs\x64\DRDID.dll
## to
## C:\Users\zxl5576\AppData\Local\Programs\R\R-4.4.1\library\DRDID\libs\x64\DRDID.dll:
## Permission denied
## Warning: restored 'DRDID'
## 
## The downloaded binary packages are in
##  C:\Users\zxl5576\AppData\Local\Temp\RtmpcHdslt\downloaded_packages
install.packages("fastdid")
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4:
##   cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4/PACKAGES'
## package 'fastdid' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\zxl5576\AppData\Local\Temp\RtmpcHdslt\downloaded_packages
library(staggered)
## Warning: package 'staggered' was built under R version 4.4.3
library(DRDID)
## Warning: package 'DRDID' was built under R version 4.4.3
library(fastdid)
## Warning: package 'fastdid' was built under R version 4.4.3
data(mpdta)
summary(mpdta)
##       year        countyreal         lpop              lemp       
##  Min.   :2003   Min.   : 8001   Min.   :0.06579   Min.   : 1.099  
##  1st Qu.:2004   1st Qu.:19113   1st Qu.:2.39762   1st Qu.: 4.868  
##  Median :2005   Median :31048   Median :3.25780   Median : 5.697  
##  Mean   :2005   Mean   :32211   Mean   :3.31291   Mean   : 5.773  
##  3rd Qu.:2006   3rd Qu.:46111   3rd Qu.:4.09790   3rd Qu.: 6.684  
##  Max.   :2007   Max.   :55137   Max.   :7.70477   Max.   :10.443  
##   first.treat         treat      
##  Min.   :   0.0   Min.   :0.000  
##  1st Qu.:   0.0   1st Qu.:0.000  
##  Median :   0.0   Median :0.000  
##  Mean   : 766.5   Mean   :0.382  
##  3rd Qu.:2007.0   3rd Qu.:1.000  
##  Max.   :2007.0   Max.   :1.000
library(staggered)

mpdta_treated <- mpdta %>%
  filter(first.treat > 0)

# Check for NAs
summary(mpdta_treated[c("countyreal", "year", "first.treat", "lemp")])
##    countyreal         year       first.treat        lemp       
##  Min.   : 8001   Min.   :2003   Min.   :2004   Min.   : 1.609  
##  1st Qu.:24027   1st Qu.:2004   1st Qu.:2006   1st Qu.: 5.097  
##  Median :29101   Median :2005   Median :2007   Median : 5.940  
##  Mean   :30007   Mean   :2005   Mean   :2006   Mean   : 6.003  
##  3rd Qu.:37123   3rd Qu.:2006   3rd Qu.:2007   3rd Qu.: 6.903  
##  Max.   :55137   Max.   :2007   Max.   :2007   Max.   :10.002
# Confirm panel structure
mpdta_treated %>% count(countyreal) %>% arrange(n) %>% head()
##   countyreal n
## 1       8001 5
## 2       8019 5
## 3       8023 5
## 4       8029 5
## 5       8041 5
## 6       8063 5
results_cohort <- staggered(
  df = mpdta_treated,
  i = "countyreal",
  t = "year",
  g = "first.treat",
  y = "lemp",
  estimand = "cohort"  # Cohort-specific ATTs
)

summary(results_cohort)
##     estimate              se            se_neyman     
##  Min.   :-0.01904   Min.   :0.01629   Min.   :0.0165  
##  1st Qu.:-0.01904   1st Qu.:0.01629   1st Qu.:0.0165  
##  Median :-0.01904   Median :0.01629   Median :0.0165  
##  Mean   :-0.01904   Mean   :0.01629   Mean   :0.0165  
##  3rd Qu.:-0.01904   3rd Qu.:0.01629   3rd Qu.:0.0165  
##  Max.   :-0.01904   Max.   :0.01629   Max.   :0.0165

Something is wrong here, not sure why the results are all the same.

library(did2s)
## Warning: package 'did2s' was built under R version 4.4.3
## did2s (v1.0.2). For more information on the methodology, visit <https://www.kylebutts.github.io/did2s>
## 
## To cite did2s in publications use:
## 
##   Butts, Kyle (2021).  did2s: Two-Stage Difference-in-Differences
##   Following Gardner (2021). R package version 1.0.2.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {did2s: Two-Stage Difference-in-Differences Following Gardner (2021)},
##     author = {Kyle Butts},
##     year = {2021},
##     url = {https://github.com/kylebutts/did2s/},
##   }
data(mpdta)

# Run Model
model_did2s <- did2s(
  mpdta,
  yname = "lemp",
  first_stage = ~ lpop | year,
  second_stage = ~ treat * first.treat,
  treatment = "treat",
  cluster_var = "countyreal"
)
## Running Two-stage Difference-in-Differences
##  - first stage formula `~ lpop | year`
##  - second stage formula `~ treat * first.treat`
##  - The indicator variable that denotes when treatment is on is `treat`
##  - Standard errors will be clustered by `countyreal`
# Summary
summary(model_did2s)
## OLS estimation, Dep. Var.: lemp
## Observations: 2,500
## Standard-errors: Custom 
##               Estimate Std. Error  t value   Pr(>|t|)    
## treat       239.106955  57.691193  4.14460 3.5174e-05 ***
## first.treat  -0.119164   0.028758 -4.14374 3.5305e-05 ***
## ... 1 variable was removed because of collinearity (treat:first.treat)
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.544414   Adj. R2: 0.015059