#Libraries
# Load packages
library(pacman)
p_load(Matching, Jmisc, lmtest, sandwich, kdensity, haven, boot,
cobalt, Matchit, Zelig, estimatr, cem, tidyverse,
lubridate, usmap, gridExtra, stringr, readxl, plot3D,
cowplot, reshape2, scales, broom, data.table, ggplot2, stargazer,
foreign, ggthemes, ggforce, ggridges, latex2exp, viridis, extrafont,
kableExtra, snakecase, janitor)
library(fixest)
##
## Attaching package: 'fixest'
## The following object is masked from 'package:scales':
##
## pvalue
## The following object is masked from 'package:Jmisc':
##
## demean
library(multiwayvcov)
library(lmtest)
library(stargazer)
library(modelsummary)
library(causalweight)
## Loading required package: ranger
library(fixest)
library(wooldridge)
## Warning: package 'wooldridge' was built under R version 4.4.3
##
## Attaching package: 'wooldridge'
## The following object is masked from 'package:MASS':
##
## cement
# Load `kielmc` data
data(kielmc)
attach(kielmc)
as.data.frame(table(nearinc, y81))
## nearinc y81 Freq
## 1 0 0 123
## 2 1 0 56
## 3 0 1 102
## 4 1 1 40
kielmc <- kielmc
summary(rprice)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 26000 59000 82000 83721 100230 300000
length(unique(cbd)) # number of clusters
## [1] 34
#PROBLEM 1
# 2x2 Difference-in-Differences Model
did_model <- feols(rprice ~ nearinc * y81, data = kielmc, cluster = ~cbd)
# Display results
modelsummary(did_model, stars = TRUE)
(1) | |
---|---|
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | |
(Intercept) | 82517.228*** |
(3221.924) | |
nearinc | -18824.370* |
(7796.294) | |
y81 | 18790.286*** |
(5154.864) | |
nearinc × y81 | -11863.903+ |
(6621.818) | |
Num.Obs. | 321 |
R2 | 0.174 |
R2 Adj. | 0.166 |
AIC | 7538.5 |
BIC | 7553.5 |
RMSE | 30053.88 |
Std.Errors | by: cbd |
Results suggest that the price of a house near to incinerator is approximately $11,863 lower than those that are not.
# 2x2 Difference-in-Differences Model with Covariates
did_model1 <- feols(rprice ~ nearinc * y81 + rooms + baths + age + agesq + larea + lland,
data = kielmc, cluster = ~cbd)
# 2x2 Difference-in-Differences Model with Covariates
did_model2 <- feols(rprice ~ nearinc * y81 + rooms + baths + age + area + land,
data = kielmc, cluster = ~cbd)
# Create a comparative table with model estimates
modelsummary(list("Model 1" = did_model1, "Model 2" = did_model2),
stars = TRUE, # Display significance stars
output = "markdown") # Display in a clean table format
Model 1 | Model 2 | |
---|---|---|
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||
(Intercept) | -279726.300** | -15131.031 |
(93485.448) | (11348.423) | |
nearinc | 15554.707 | 4877.285 |
(10224.753) | (7465.639) | |
y81 | 16672.713*** | 13792.808** |
(3714.861) | (3839.912) | |
rooms | 2978.963* | 4211.879* |
(1401.381) | (1731.493) | |
baths | 7458.220** | 11103.272*** |
(2595.273) | (2504.100) | |
age | -616.245*** | -190.563** |
(128.447) | (53.182) | |
agesq | 3.053*** | |
(0.776) | ||
larea | 32815.339*** | |
(8487.841) | ||
lland | 7194.257 | |
(4660.224) | ||
nearinc × y81 | -17545.704** | -11692.428* |
(5821.574) | (4884.688) | |
area | 17.937** | |
(5.653) | ||
land | 0.118 | |
(0.111) | ||
Num.Obs. | 321 | 321 |
R2 | 0.637 | 0.636 |
R2 Adj. | 0.627 | 0.626 |
AIC | 7286.3 | 7285.8 |
BIC | 7324.1 | 7319.7 |
RMSE | 19917.40 | 19961.94 |
Std.Errors | by: cbd | by: cbd |
Treatment effect with non-log covariate: -17545.704; treatment effect with logged covaraite: -11692.428. The magnitude is smaller with logged variables.
covariates <- as.matrix(kielmc$rooms, kielmc$baths, kielmc$age, kielmc$agesq, kielmc$larea, kielmc$lland)
## IPW-DiD model with cluster bootstrap standard errors
did3_ipw <- didweight(kielmc$rprice, # Outcome variable
kielmc$nearinc, # Treatment indicator
kielmc$y81, # Time indicator
x = as.matrix(kielmc[, covariates]), # Covariates matrix
boot = 500, # Number of bootstrap repetitions
cluster = kielmc$cbd) # Cluster variable
# 3. Results
treatment_effect <- as.numeric(did3_ipw$effect)
se <- as.numeric(did3_ipw$se)
pval <- as.numeric(did3_ipw$pvalue)
# Create a clean results table with rounded values
ipw_results <- data.frame(
Metric = c("TE", "SE", "P-Value"),
Value = c(
round(did3_ipw$effect, 2),
round(did3_ipw$se, 2),
round(did3_ipw$pvalue, 4)
)
)
## Display results
kable(ipw_results, caption = "IPW DiD Results")
Metric | Value |
---|---|
TE | 17687.7300 |
SE | 34992.7100 |
P-Value | 0.6132 |
Treatment effect estimate is similar to the first model but is insignificant with IPW. However, IPW helps weighting the groups so that they are more comparable and ensures similar distribution of covariates, making it the most reliable in theory.
Equation 1 is TWFE, where as equation 2 is standard DID. If there are reasons to believe treatment effects are heterogeneous, DID is better since TWFE average out the differences between counties. If assumes homogeneous TE, TWFE is preferred for its robustness against county-specific unobservables.
# Friend's data
i <- c(1, 1, 2, 2, 3, 3, 4, 4)
t <- c(1, 2, 1, 2, 1, 2, 1, 2)
P_t <- c(0, 1, 0, 1, 0, 1, 0, 1)
D_it <- c(0, 0, 0, 0, 0, 1, 0, 1)
D_i <- c(0, 0, 0, 0, 1, 1, 1, 1)
Y_it <- c(6, 7, 10, 9, 8, 14, 8, 12)
df <- data.frame(i=i, t=t, P_t=P_t, D_it=D_it, D_i=D_i, Y_it=Y_it)
df
## i t P_t D_it D_i Y_it
## 1 1 1 0 0 0 6
## 2 1 2 1 0 0 7
## 3 2 1 0 0 0 10
## 4 2 2 1 0 0 9
## 5 3 1 0 0 1 8
## 6 3 2 1 1 1 14
## 7 4 1 0 0 1 8
## 8 4 2 1 1 1 12
twfe_model <- feols(Y_it ~ D_it | i + t, data = df, cluster = ~i)
#Equation 2
did_model <- feols(Y_it ~ D_i + P_t + D_i:P_t, data = df, cluster = ~i)
#Equation 3
rsa_model <- feols(Y_it ~ D_i:P_t | i + t, data = df, cluster = ~i)
#Compare Results
modelsummary(
list("TWFE" = twfe_model,
"DiD" = did_model,
"Roth et al" = rsa_model),
stars = TRUE,
gof_omit = "IC|R2|Log|F",
output = "markdown"
)
TWFE | DiD | Roth et al | |
---|---|---|---|
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | |||
D_it | 5.000* | ||
(1.366) | |||
(Intercept) | 8.000* | ||
(2.160) | |||
D_i | 0.000 | ||
(2.160) | |||
P_t | 0.000 | ||
(1.080) | |||
D_i × P_t | 5.000* | 5.000* | |
(1.528) | (1.366) | ||
Num.Obs. | 8 | 8 | 8 |
RMSE | 0.50 | 1.22 | 0.50 |
Std.Errors | by: i | by: i | by: i |
All methods give the same TE estimates with slightly different SE. TWFE and DID are essentially the same with 2 groups and 2 periods, which is why both estimates and SE are the same.
# Load `mpdta` data
library(did)
## Warning: package 'did' was built under R version 4.4.3
data(mpdta)
table(mpdta$treat, mpdta$year)
##
## 2003 2004 2005 2006 2007
## 0 309 309 309 309 309
## 1 191 191 191 191 191
#table(JC$trainy2)
table(JC$assignment, JC$trainy1)
##
## 0 1
## 0 1809 1854
## 1 857 4720
length(unique(mpdta$first.treat)) # cohorts or groups (incl. never-treated)
## [1] 4
table(mpdta$first.treat, mpdta$year)
##
## 2003 2004 2005 2006 2007
## 0 309 309 309 309 309
## 2004 20 20 20 20 20
## 2006 40 40 40 40 40
## 2007 131 131 131 131 131
summary(mpdta$first.treat[mpdta$treat==0])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 0 0 0
summary(mpdta$first.treat[mpdta$treat==1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2004 2006 2007 2006 2007 2007
summary(mpdta$lemp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.099 4.868 5.697 5.773 6.684 10.443
mpdta <- mpdta %>%
dplyr::rename(fips = countyreal) %>%
mutate(treat = as.factor(treat))
# Map 1: Treatment status
plot_treat <- plot_usmap(data = mpdta%>% na.omit(), values = "treat", regions = "counties") +
scale_fill_manual(values = c("#FFFFBF", "darkred"), name = "Treatment Status") +
labs(title = "Treatment Status") +
theme_minimal()
# Map 2: First Treatment Year
mpdta$first.treat <- as.factor(mpdta$first.treat)
plot_first_treat <- plot_usmap(data = mpdta %>% na.omit(), values = "first.treat", regions = "counties") +
scale_fill_viridis_d(name = "First Treatment Year") +
labs(title = "First Year of Treatment") +
theme_minimal()
# Map Together
library(gridExtra)
grid.arrange(plot_treat, plot_first_treat, ncol = 1)
mpdta$first.treat <- as.numeric(mpdta$first.treat)
mpdta_treated <- mpdta %>%
filter(first.treat != 0)
# Compute mean log employment for each county by first treatment year
lemp_summary <- mpdta_treated %>%
group_by(year, first.treat) %>%
dplyr::summarise(mean_lemp = mean(lemp, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
plot_list <- list()
# Loop over each first.treat year (2004, 2006, 2007)
for (ft in c(2004, 2006, 2007)) {
df_ft <- filter(mpdta, first.treat == ft)
# Generate the county-level map
p <- plot_usmap(data = df_ft, values = "lemp", regions = "counties") +
scale_fill_distiller(palette = "Spectral", name = "Log Employment", direction = 1) +
labs(title = paste("Log Employment by First Treatment Year:", ft)) +
theme_minimal()
# Store plot in list
plot_list[[as.character(ft)]] <- p
}
grid.arrange(grobs = plot_list, ncol = 3)
data(mpdta)
# Estimate ATT using `att_gt`
att <- att_gt(yname = "lemp", # Outcome variable
tname = "year", # Time variable
idname = "countyreal", # County identifier
gname = "first.treat", # First treatment year (cohort)
xformla = ~lpop, # Covariate (log population)
data = mpdta,
est_method = "reg", # Outcome regression estimation
control_group = "nevertreated", # Never-treated as control group
clustervars = "countyreal") # Cluster SEs at county level
#Print Results
summary(att)
##
## Call:
## att_gt(yname = "lemp", tname = "year", idname = "countyreal",
## gname = "first.treat", xformla = ~lpop, data = mpdta, control_group = "nevertreated",
## clustervars = "countyreal", est_method = "reg")
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
## Group-Time Average Treatment Effects:
## Group Time ATT(g,t) Std. Error [95% Simult. Conf. Band]
## 2004 2004 -0.0149 0.0239 -0.0801 0.0503
## 2004 2005 -0.0770 0.0301 -0.1591 0.0051
## 2004 2006 -0.1411 0.0351 -0.2371 -0.0451 *
## 2004 2007 -0.1075 0.0355 -0.2046 -0.0105 *
## 2006 2004 -0.0021 0.0223 -0.0629 0.0588
## 2006 2005 -0.0070 0.0189 -0.0586 0.0446
## 2006 2006 0.0008 0.0200 -0.0537 0.0553
## 2006 2007 -0.0415 0.0200 -0.0962 0.0131
## 2007 2004 0.0264 0.0135 -0.0106 0.0634
## 2007 2005 -0.0048 0.0151 -0.0460 0.0365
## 2007 2006 -0.0285 0.0175 -0.0762 0.0192
## 2007 2007 -0.0288 0.0160 -0.0724 0.0148
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## P-value for pre-test of parallel trends assumption: 0.23116
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Outcome Regression
The results show group that got treated in 2004 has relatively larger, negative and significant TE in years 2006 and 2007, suggesting they actually experienced a decline in the log of employment soon after the treatment.
att_ipw <- att_gt(yname = "lemp", # Outcome variable
tname = "year", # Time variable
idname = "countyreal", # County identifier (for clustering)
gname = "first.treat", # First treatment year (cohort)
xformla = ~ lpop, # Covariate (log population)
data = mpdta,
est_method = "ipw", # Inverse Probability Weighting (IPW)
control_group = "nevertreated", # Never-treated as control
clustervars = "countyreal") # Cluster SEs at county level
#Print Results
summary(att_ipw)
##
## Call:
## att_gt(yname = "lemp", tname = "year", idname = "countyreal",
## gname = "first.treat", xformla = ~lpop, data = mpdta, control_group = "nevertreated",
## clustervars = "countyreal", est_method = "ipw")
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
## Group-Time Average Treatment Effects:
## Group Time ATT(g,t) Std. Error [95% Simult. Conf. Band]
## 2004 2004 -0.0145 0.0234 -0.0788 0.0497
## 2004 2005 -0.0764 0.0301 -0.1594 0.0065
## 2004 2006 -0.1405 0.0382 -0.2456 -0.0353 *
## 2004 2007 -0.1069 0.0314 -0.1933 -0.0206 *
## 2006 2004 -0.0009 0.0219 -0.0612 0.0595
## 2006 2005 -0.0064 0.0195 -0.0599 0.0472
## 2006 2006 0.0012 0.0213 -0.0573 0.0597
## 2006 2007 -0.0413 0.0201 -0.0966 0.0140
## 2007 2004 0.0266 0.0129 -0.0090 0.0622
## 2007 2005 -0.0047 0.0143 -0.0441 0.0347
## 2007 2006 -0.0283 0.0184 -0.0790 0.0223
## 2007 2007 -0.0289 0.0164 -0.0739 0.0161
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## P-value for pre-test of parallel trends assumption: 0.23604
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Inverse Probability Weighting
Similar results.
# Estimate ATT using the doubly robust (DR) estimator
att_dr <- att_gt(yname = "lemp", # Outcome variable (log employment)
tname = "year", # Time variable
idname = "countyreal", # County identifier (for clustering)
gname = "first.treat", # First treatment year (cohort)
xformla = ~ lpop, # Covariate (log population)
data = mpdta,
est_method = "dr", # Doubly robust (DR) estimation
control_group = "nevertreated", # Never-treated as control
clustervars = "countyreal") # Cluster SEs at county level
#Print Results
summary(att_dr)
##
## Call:
## att_gt(yname = "lemp", tname = "year", idname = "countyreal",
## gname = "first.treat", xformla = ~lpop, data = mpdta, control_group = "nevertreated",
## clustervars = "countyreal", est_method = "dr")
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
## Group-Time Average Treatment Effects:
## Group Time ATT(g,t) Std. Error [95% Simult. Conf. Band]
## 2004 2004 -0.0145 0.0228 -0.0745 0.0455
## 2004 2005 -0.0764 0.0290 -0.1526 -0.0003 *
## 2004 2006 -0.1404 0.0398 -0.2449 -0.0359 *
## 2004 2007 -0.1069 0.0342 -0.1968 -0.0170 *
## 2006 2004 -0.0005 0.0228 -0.0605 0.0595
## 2006 2005 -0.0062 0.0197 -0.0581 0.0457
## 2006 2006 0.0010 0.0204 -0.0527 0.0547
## 2006 2007 -0.0413 0.0208 -0.0959 0.0133
## 2007 2004 0.0267 0.0143 -0.0110 0.0644
## 2007 2005 -0.0046 0.0155 -0.0453 0.0361
## 2007 2006 -0.0284 0.0187 -0.0775 0.0206
## 2007 2007 -0.0288 0.0172 -0.0739 0.0164
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## P-value for pre-test of parallel trends assumption: 0.23267
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Doubly Robust
IPW yields the highest decline for group treated in 2004 in 2006 and 2007. Map for 2004 was darkest, the estimate was likely adjusted downwar because of hte decline in 2006 and 2007.
install.packages("staggered")
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4:
## cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4/PACKAGES'
## package 'staggered' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'staggered'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\zxl5576\AppData\Local\Programs\R\R-4.4.1\library\00LOCK\staggered\libs\x64\staggered.dll
## to
## C:\Users\zxl5576\AppData\Local\Programs\R\R-4.4.1\library\staggered\libs\x64\staggered.dll:
## Permission denied
## Warning: restored 'staggered'
##
## The downloaded binary packages are in
## C:\Users\zxl5576\AppData\Local\Temp\RtmpcHdslt\downloaded_packages
install.packages("DRDID")
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4:
## cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4/PACKAGES'
## package 'DRDID' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'DRDID'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\zxl5576\AppData\Local\Programs\R\R-4.4.1\library\00LOCK\DRDID\libs\x64\DRDID.dll
## to
## C:\Users\zxl5576\AppData\Local\Programs\R\R-4.4.1\library\DRDID\libs\x64\DRDID.dll:
## Permission denied
## Warning: restored 'DRDID'
##
## The downloaded binary packages are in
## C:\Users\zxl5576\AppData\Local\Temp\RtmpcHdslt\downloaded_packages
install.packages("fastdid")
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4:
## cannot open URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.4/PACKAGES'
## package 'fastdid' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\zxl5576\AppData\Local\Temp\RtmpcHdslt\downloaded_packages
library(staggered)
## Warning: package 'staggered' was built under R version 4.4.3
library(DRDID)
## Warning: package 'DRDID' was built under R version 4.4.3
library(fastdid)
## Warning: package 'fastdid' was built under R version 4.4.3
data(mpdta)
summary(mpdta)
## year countyreal lpop lemp
## Min. :2003 Min. : 8001 Min. :0.06579 Min. : 1.099
## 1st Qu.:2004 1st Qu.:19113 1st Qu.:2.39762 1st Qu.: 4.868
## Median :2005 Median :31048 Median :3.25780 Median : 5.697
## Mean :2005 Mean :32211 Mean :3.31291 Mean : 5.773
## 3rd Qu.:2006 3rd Qu.:46111 3rd Qu.:4.09790 3rd Qu.: 6.684
## Max. :2007 Max. :55137 Max. :7.70477 Max. :10.443
## first.treat treat
## Min. : 0.0 Min. :0.000
## 1st Qu.: 0.0 1st Qu.:0.000
## Median : 0.0 Median :0.000
## Mean : 766.5 Mean :0.382
## 3rd Qu.:2007.0 3rd Qu.:1.000
## Max. :2007.0 Max. :1.000
library(staggered)
mpdta_treated <- mpdta %>%
filter(first.treat > 0)
# Check for NAs
summary(mpdta_treated[c("countyreal", "year", "first.treat", "lemp")])
## countyreal year first.treat lemp
## Min. : 8001 Min. :2003 Min. :2004 Min. : 1.609
## 1st Qu.:24027 1st Qu.:2004 1st Qu.:2006 1st Qu.: 5.097
## Median :29101 Median :2005 Median :2007 Median : 5.940
## Mean :30007 Mean :2005 Mean :2006 Mean : 6.003
## 3rd Qu.:37123 3rd Qu.:2006 3rd Qu.:2007 3rd Qu.: 6.903
## Max. :55137 Max. :2007 Max. :2007 Max. :10.002
# Confirm panel structure
mpdta_treated %>% count(countyreal) %>% arrange(n) %>% head()
## countyreal n
## 1 8001 5
## 2 8019 5
## 3 8023 5
## 4 8029 5
## 5 8041 5
## 6 8063 5
results_cohort <- staggered(
df = mpdta_treated,
i = "countyreal",
t = "year",
g = "first.treat",
y = "lemp",
estimand = "cohort" # Cohort-specific ATTs
)
summary(results_cohort)
## estimate se se_neyman
## Min. :-0.01904 Min. :0.01629 Min. :0.0165
## 1st Qu.:-0.01904 1st Qu.:0.01629 1st Qu.:0.0165
## Median :-0.01904 Median :0.01629 Median :0.0165
## Mean :-0.01904 Mean :0.01629 Mean :0.0165
## 3rd Qu.:-0.01904 3rd Qu.:0.01629 3rd Qu.:0.0165
## Max. :-0.01904 Max. :0.01629 Max. :0.0165
Something is wrong here, not sure why the results are all the same.
library(did2s)
## Warning: package 'did2s' was built under R version 4.4.3
## did2s (v1.0.2). For more information on the methodology, visit <https://www.kylebutts.github.io/did2s>
##
## To cite did2s in publications use:
##
## Butts, Kyle (2021). did2s: Two-Stage Difference-in-Differences
## Following Gardner (2021). R package version 1.0.2.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {did2s: Two-Stage Difference-in-Differences Following Gardner (2021)},
## author = {Kyle Butts},
## year = {2021},
## url = {https://github.com/kylebutts/did2s/},
## }
data(mpdta)
# Run Model
model_did2s <- did2s(
mpdta,
yname = "lemp",
first_stage = ~ lpop | year,
second_stage = ~ treat * first.treat,
treatment = "treat",
cluster_var = "countyreal"
)
## Running Two-stage Difference-in-Differences
## - first stage formula `~ lpop | year`
## - second stage formula `~ treat * first.treat`
## - The indicator variable that denotes when treatment is on is `treat`
## - Standard errors will be clustered by `countyreal`
# Summary
summary(model_did2s)
## OLS estimation, Dep. Var.: lemp
## Observations: 2,500
## Standard-errors: Custom
## Estimate Std. Error t value Pr(>|t|)
## treat 239.106955 57.691193 4.14460 3.5174e-05 ***
## first.treat -0.119164 0.028758 -4.14374 3.5305e-05 ***
## ... 1 variable was removed because of collinearity (treat:first.treat)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.544414 Adj. R2: 0.015059