PROBLEM 1. Canonical Difference-in-Differences.
# Load "kielmc" data
data(kielmc)
attach(kielmc)
table(nearinc)
## nearinc
## 0 1
## 225 96
table(y81)
## y81
## 0 1
## 179 142
as.data.frame(table(nearinc, y81))
## nearinc y81 Freq
## 1 0 0 123
## 2 1 0 56
## 3 0 1 102
## 4 1 1 40
i. Without covariates.
m1 <- lm(lrprice ~ nearinc + y81 + y81nrinc, data = kielmc)
vcov_cbd <- cluster.vcov(m1, kielmc$cbd)
coeftest1 <- coeftest(m1, vcov_cbd)
stargazer(coeftest1,
type = "text",
title = "Regression Model",
column.labels = c("Model 1"),
digits = 3)
##
## Regression Model
## ====================================
## Dependent variable:
## ---------------------------
##
## Model 1
## ------------------------------------
## nearinc -0.340***
## (0.076)
##
## y81 0.193***
## (0.056)
##
## y81nrinc -0.063
## (0.091)
##
## Constant 11.285***
## (0.038)
##
## ====================================
## ====================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# INTERPRETATION: The housing price near the facility is originally 34.0% lower than the other housing options (at 0.001 level of significance), the 3-year natural price growth is about 19.3% (at 0.001 level of significance), and the construction of the facility resulted in only a 6.3% decrease in the housing price (statistically insignificant), which is lower than the pre-trend difference. We suggested that the price has originally included an anticipation effect, as the builder knows about the plan to build up the facility and may not build high-quality houses there.
ii. With covariates.
m2 <- lm(lrprice ~ nearinc + y81 + y81nrinc + cbd + rooms + baths + age + agesq + larea + lland, data = kielmc)
vcov_cbd <- cluster.vcov(m2, kielmc$cbd)
coeftest2 <- coeftest(m2, vcov_cbd)
m3 <- lm(lrprice ~ nearinc + y81 + y81nrinc + cbd + rooms + baths + age + area + land, data = kielmc)
vcov_cbd <- cluster.vcov(m3, kielmc$cbd)
coeftest3 <- coeftest(m3, vcov_cbd)
stargazer(coeftest2, coeftest3,
type = "text",
title = "Comparison of Regression Models",
column.labels = c("Model 2", "Model 3"),
digits = 3)
##
## Comparison of Regression Models
## =====================================
## Dependent variable:
## ----------------------------
##
## Model 2 Model 3
## (1) (2)
## -------------------------------------
## nearinc 0.012 -0.076
## (0.068) (0.060)
##
## y81 0.155*** 0.127***
## (0.038) (0.040)
##
## y81nrinc -0.127** -0.057
## (0.054) (0.052)
##
## cbd -0.00001*** -0.00000
## (0.00000) (0.00000)
##
## rooms 0.048*** 0.063***
## (0.015) (0.018)
##
## baths 0.093*** 0.146***
## (0.028) (0.026)
##
## age -0.008*** -0.003***
## (0.002) (0.001)
##
## agesq 0.00004***
## (0.00001)
##
## larea 0.346***
## (0.070)
##
## lland 0.102***
## (0.031)
##
## area 0.0002***
## (0.00005)
##
## land 0.00000
## (0.00000)
##
## Constant 7.196*** 10.137***
## (0.642) (0.103)
##
## =====================================
## =====================================
## Note: *p<0.1; **p<0.05; ***p<0.01
m4 <- lm(lrprice ~ nearinc + y81 + y81nrinc + cbd + rooms + baths + age + agesq + larea + lland, data = kielmc)
didweight(y = lrprice, d = nearinc, t = y81, x = rooms + baths + larea + lland, trim = 0.05, cluster = cbd, boot = 1999) # cbd and agesq are not included, which result in a bug.
## $effect
## [1] -0.03169364
##
## $se
## [1] 0.1150804
##
## $pvalue
## [1] 0.7830057
##
## $ntrimmed
## [1] 0
m5 <- lm(lrprice ~ nearinc + y81 + y81nrinc + cbd + rooms + baths + age + area + land, data = kielmc)
didweight(y = lrprice, d = nearinc, t = y81, x = rooms + baths, trim = 0.05, cluster = cbd, boot = 1999) # cbd, area, and land are not included, which result in a bug.
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## $effect
## [1] -0.02762012
##
## $se
## [1] 0.09395419
##
## $pvalue
## [1] 0.7687775
##
## $ntrimmed
## [1] 0
# INTERPRETATION: After a set of appropriate control of the covariates (Model 2), the treatment effect becomes statistically significant, as the function of the facility decreased the housing price by 12.7%, which partially evidence our suggestion of that the builders might already anticipate such a treatment and design not to build up very large and high quality housing options in that areas.
iii. Two-Way Fixed Effects.
# Friend's data
i <- c(1, 1, 2, 2, 3, 3, 4, 4)
t <- c(1, 2, 1, 2, 1, 2, 1, 2)
P_t <- c(0, 1, 0, 1, 0, 1, 0, 1)
D_it <- c(0, 0, 0, 0, 0, 1, 0, 1)
D_i <- c(0, 0, 0, 0, 1, 1, 1, 1)
Y_it <- c(6, 7, 10, 9, 8, 14, 8, 12)
df <- data.frame(i = i, t = t, P_t = P_t, D_it = D_it, D_i = D_i, Y_it = Y_it)
rm(i, t, P_t, D_it, D_i, Y_it)
df$D_i_P_t <- df$D_i * df$P_t
m_Friend1 <- feols(Y_it ~ D_it | i + t, data = df)
m_Friend2 <- feols(Y_it ~ D_i + P_t + D_i_P_t, data = df)
m_Friend3 <- feols(Y_it ~ D_i_P_t | i + t, data = df)
tab <- etable(m_Friend1, m_Friend2, m_Friend3, se.below = T)
tab
## m_Fri..1 m_Friend2 m_Fri..3
## Dependent Var.: Y_it Y_it Y_it
##
## D_it 5.000*
## (1.366)
## Constant 8.000**
## (1.225)
## D_i 3.55e-15
## (1.732)
## P_t 3.55e-15
## (1.732)
## D_i_P_t 5 5.000*
## (2.449) (1.366)
## Fixed-Effects: -------- --------- --------
## i Yes No Yes
## t Yes No Yes
## _______________ ________ _________ ________
## S.E. type by: i IID by: i
## Observations 8 8 8
## R2 0.95960 0.75758 0.95960
## Within R2 0.86207 -- 0.86207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# DISCUSSION: The estimations show that all of them are unbiased, but Friend 1's and Friend 3's estimators are more efficient. Compared and contrast the regressions in the table, we know that the standard errors should be clustered on the county level for efficiency purpose.
PROBLEM 2. Difference-in-Differences with Multiple Periods of
Treatment.
# Load "mpdta" data
library(did)
data(mpdta)
table(mpdta$treat, mpdta$year)
##
## 2003 2004 2005 2006 2007
## 0 309 309 309 309 309
## 1 191 191 191 191 191
length(unique(mpdta$first.treat)) # cohorts or groups (incl. never-treated)
## [1] 4
table(mpdta$first.treat, mpdta$year)
##
## 2003 2004 2005 2006 2007
## 0 309 309 309 309 309
## 2004 20 20 20 20 20
## 2006 40 40 40 40 40
## 2007 131 131 131 131 131
summary(mpdta$first.treat[mpdta$treat == 0])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 0 0 0
summary(mpdta$first.treat[mpdta$treat == 1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2004 2006 2007 2006 2007 2007
summary(mpdta$lemp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.099 4.868 5.697 5.773 6.684 10.443
i. Descriptive county-level maps.
mappingdata <- mpdta[mpdta$year == 2003, ]
mappingdata$fips <- mappingdata$countyreal
mappingdata <- mutate(mappingdata, treat = factor(treat))
mappingdata <- mutate(mappingdata, first.treat = factor(first.treat))
plot_usmap(data = mappingdata, values = "treat", color = "black") + labs(title = "Treated")

plot_usmap(data = mappingdata, values = "first.treat", color = "black") + labs(title = "The first year in which the county introduced a minimum wage")

mpdta$fips <- mpdta$countyreal
mappingdata2003 <- mpdta[mpdta$year == 2003, ]
mappingdata2004 <- mpdta[mpdta$year == 2004, ]
mappingdata2005 <- mpdta[mpdta$year == 2005, ]
mappingdata2006 <- mpdta[mpdta$year == 2006, ]
mappingdata2007 <- mpdta[mpdta$year == 2007, ]
mappingdata2003_first.treat0 <- mappingdata2003[mappingdata2003$first.treat == 0, ]
mappingdata2003_first.treat2004 <- mappingdata2003[mappingdata2003$first.treat == 2004, ]
mappingdata2003_first.treat2006 <- mappingdata2003[mappingdata2003$first.treat == 2006, ]
mappingdata2003_first.treat2007 <- mappingdata2003[mappingdata2003$first.treat == 2007, ]
mappingdata2004_first.treat0 <- mappingdata2004[mappingdata2004$first.treat == 0, ]
mappingdata2004_first.treat2004 <- mappingdata2004[mappingdata2004$first.treat == 2004, ]
mappingdata2004_first.treat2006 <- mappingdata2004[mappingdata2004$first.treat == 2006, ]
mappingdata2004_first.treat2007 <- mappingdata2004[mappingdata2004$first.treat == 2007, ]
mappingdata2005_first.treat0 <- mappingdata2005[mappingdata2005$first.treat == 0, ]
mappingdata2005_first.treat2004 <- mappingdata2005[mappingdata2005$first.treat == 2004, ]
mappingdata2005_first.treat2006 <- mappingdata2005[mappingdata2005$first.treat == 2006, ]
mappingdata2005_first.treat2007 <- mappingdata2005[mappingdata2005$first.treat == 2007, ]
mappingdata2006_first.treat0 <- mappingdata2006[mappingdata2006$first.treat == 0, ]
mappingdata2006_first.treat2004 <- mappingdata2006[mappingdata2006$first.treat == 2004, ]
mappingdata2006_first.treat2006 <- mappingdata2006[mappingdata2006$first.treat == 2006, ]
mappingdata2006_first.treat2007 <- mappingdata2006[mappingdata2006$first.treat == 2007, ]
mappingdata2007_first.treat0 <- mappingdata2007[mappingdata2007$first.treat == 0, ]
mappingdata2007_first.treat2004 <- mappingdata2007[mappingdata2007$first.treat == 2004, ]
mappingdata2007_first.treat2006 <- mappingdata2007[mappingdata2007$first.treat == 2006, ]
mappingdata2007_first.treat2007 <- mappingdata2007[mappingdata2007$first.treat == 2007, ]
plot_usmap(data = mappingdata2003_first.treat0, values = "lemp", color = "black") + labs(title = "Log of employments (Nevertreated in the year of 2003)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2003_first.treat2004, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2004 in the year of 2003)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2003_first.treat2006, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2006 in the year of 2003)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2003_first.treat2007, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2007 in the year of 2003)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2004_first.treat0, values = "lemp", color = "black") + labs(title = "Log of employments (Nevertreated in the year of 2004)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2004_first.treat2004, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2004 in the year of 2004)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2004_first.treat2006, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2006 in the year of 2004)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2004_first.treat2007, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2007 in the year of 2004)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2005_first.treat0, values = "lemp", color = "black") + labs(title = "Log of employments (Nevertreated in the year of 2005)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2005_first.treat2004, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2004 in the year of 2005)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2005_first.treat2006, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2006 in the year of 2005)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2005_first.treat2007, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2007 in the year of 2005)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2006_first.treat0, values = "lemp", color = "black") + labs(title = "Log of employments (Nevertreated in the year of 2006)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2006_first.treat2004, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2004 in the year of 2006)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2006_first.treat2006, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2006 in the year of 2006)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2006_first.treat2007, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2007 in the year of 2006)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2007_first.treat0, values = "lemp", color = "black") + labs(title = "Log of employments (Nevertreated in the year of 2007)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2007_first.treat2004, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2004 in the year of 2007)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2007_first.treat2006, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2006 in the year of 2007)") + scale_fill_continuous(limits = c(0,12))

plot_usmap(data = mappingdata2007_first.treat2007, values = "lemp", color = "black") + labs(title = "Log of employments (Group 2007 in the year of 2007)") + scale_fill_continuous(limits = c(0,12))

ii. Cohort-year-specific average treatment effects on the
treated.
m1 <- att_gt(
yname = "lemp",
gname = "first.treat",
idname = "countyreal",
tname = "year",
control_group = "nevertreated",
xformla = ~ lpop,
clustervars = "countyreal",
data = mpdta,
est_method = "reg")
m1
##
## Call:
## att_gt(yname = "lemp", tname = "year", idname = "countyreal",
## gname = "first.treat", xformla = ~lpop, data = mpdta, control_group = "nevertreated",
## clustervars = "countyreal", est_method = "reg")
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
## Group-Time Average Treatment Effects:
## Group Time ATT(g,t) Std. Error [95% Simult. Conf. Band]
## 2004 2004 -0.0149 0.0224 -0.0774 0.0476
## 2004 2005 -0.0770 0.0288 -0.1572 0.0032
## 2004 2006 -0.1411 0.0370 -0.2441 -0.0381 *
## 2004 2007 -0.1075 0.0359 -0.2076 -0.0075 *
## 2006 2004 -0.0021 0.0223 -0.0643 0.0601
## 2006 2005 -0.0070 0.0185 -0.0585 0.0446
## 2006 2006 0.0008 0.0197 -0.0540 0.0555
## 2006 2007 -0.0415 0.0191 -0.0947 0.0116
## 2007 2004 0.0264 0.0138 -0.0119 0.0647
## 2007 2005 -0.0048 0.0165 -0.0506 0.0411
## 2007 2006 -0.0285 0.0184 -0.0798 0.0228
## 2007 2007 -0.0288 0.0155 -0.0721 0.0145
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## P-value for pre-test of parallel trends assumption: 0.23116
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Outcome Regression
# INTERPRETATION: The treatment effects are only statistically significant for the Group 2004 in the year of 2006 and 2007, which suggested that such treatments may not happen immediately but have a delay.
m2 <- att_gt(
yname = "lemp",
gname = "first.treat",
idname = "countyreal",
tname = "year",
control_group = "nevertreated",
xformla = ~ lpop,
clustervars = "countyreal",
data = mpdta,
est_method = "ipw")
m2
##
## Call:
## att_gt(yname = "lemp", tname = "year", idname = "countyreal",
## gname = "first.treat", xformla = ~lpop, data = mpdta, control_group = "nevertreated",
## clustervars = "countyreal", est_method = "ipw")
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
## Group-Time Average Treatment Effects:
## Group Time ATT(g,t) Std. Error [95% Simult. Conf. Band]
## 2004 2004 -0.0145 0.0238 -0.0766 0.0475
## 2004 2005 -0.0764 0.0295 -0.1533 0.0004
## 2004 2006 -0.1405 0.0399 -0.2445 -0.0365 *
## 2004 2007 -0.1069 0.0331 -0.1932 -0.0206 *
## 2006 2004 -0.0009 0.0228 -0.0604 0.0586
## 2006 2005 -0.0064 0.0192 -0.0566 0.0438
## 2006 2006 0.0012 0.0215 -0.0550 0.0574
## 2006 2007 -0.0413 0.0200 -0.0934 0.0108
## 2007 2004 0.0266 0.0140 -0.0098 0.0629
## 2007 2005 -0.0047 0.0169 -0.0487 0.0394
## 2007 2006 -0.0283 0.0184 -0.0764 0.0198
## 2007 2007 -0.0289 0.0172 -0.0737 0.0159
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## P-value for pre-test of parallel trends assumption: 0.23604
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Inverse Probability Weighting
# RESULT: The qualitative results (statistical significances) do not change, as only Group 2004 in the year of 2006 and 2007 have significant treatment effects.
m3 <- att_gt(
yname = "lemp",
gname = "first.treat",
idname = "countyreal",
tname = "year",
control_group = "nevertreated",
xformla = ~ lpop,
clustervars = "countyreal",
data = mpdta,
est_method = "dr")
m3
##
## Call:
## att_gt(yname = "lemp", tname = "year", idname = "countyreal",
## gname = "first.treat", xformla = ~lpop, data = mpdta, control_group = "nevertreated",
## clustervars = "countyreal", est_method = "dr")
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
## Group-Time Average Treatment Effects:
## Group Time ATT(g,t) Std. Error [95% Simult. Conf. Band]
## 2004 2004 -0.0145 0.0244 -0.0790 0.0500
## 2004 2005 -0.0764 0.0303 -0.1565 0.0037
## 2004 2006 -0.1404 0.0365 -0.2368 -0.0441 *
## 2004 2007 -0.1069 0.0319 -0.1913 -0.0225 *
## 2006 2004 -0.0005 0.0235 -0.0626 0.0617
## 2006 2005 -0.0062 0.0199 -0.0589 0.0465
## 2006 2006 0.0010 0.0200 -0.0518 0.0538
## 2006 2007 -0.0413 0.0198 -0.0935 0.0110
## 2007 2004 0.0267 0.0144 -0.0113 0.0648
## 2007 2005 -0.0046 0.0162 -0.0474 0.0382
## 2007 2006 -0.0284 0.0187 -0.0779 0.0210
## 2007 2007 -0.0288 0.0169 -0.0734 0.0158
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## P-value for pre-test of parallel trends assumption: 0.23267
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Doubly Robust
# RESULT: The qualitative results (statistical significances) do not change, either.
ggdid(m3, ylim = c(-.25, .1))

iii. Cohort-specific and aggregate average treatment effects on the
treated.
aggte(m3, type = "simple")
##
## Call:
## aggte(MP = m3, type = "simple")
##
## Reference: Callaway, Brantly and Pedro H.C. Sant'Anna. "Difference-in-Differences with Multiple Time Periods." Journal of Econometrics, Vol. 225, No. 2, pp. 200-230, 2021. <https://doi.org/10.1016/j.jeconom.2020.12.001>, <https://arxiv.org/abs/1803.09015>
##
##
## ATT Std. Error [ 95% Conf. Int.]
## -0.0418 0.011 -0.0633 -0.0203 *
##
##
## ---
## Signif. codes: `*' confidence band does not cover 0
##
## Control Group: Never Treated, Anticipation Periods: 0
## Estimation Method: Doubly Robust
# INTERPRETATION: The aggregate average treatment effect is -0.0418 (at 0.05 level of significance), as an effective minimum wage would statistically significantly reduce the employments by 4.18%.
# New package 1: didimputation
library(didimputation, warn = FALSE)
## Warning: package 'didimputation' was built under R version 4.4.3
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## The following objects are masked from 'package:zoo':
##
## yearmon, yearqtr
m_newpackage1 <- did_imputation(data = mpdta, yname = "lemp", gname = "first.treat", tname = "year", idname = "countyreal")
summary(m_newpackage1)
## lhs term estimate std.error
## Length:1 Length:1 Min. :-0.04771 Min. :0.01322
## Class :character Class :character 1st Qu.:-0.04771 1st Qu.:0.01322
## Mode :character Mode :character Median :-0.04771 Median :0.01322
## Mean :-0.04771 Mean :0.01322
## 3rd Qu.:-0.04771 3rd Qu.:0.01322
## Max. :-0.04771 Max. :0.01322
## conf.low conf.high
## Min. :-0.07363 Min. :-0.02179
## 1st Qu.:-0.07363 1st Qu.:-0.02179
## Median :-0.07363 Median :-0.02179
## Mean :-0.07363 Mean :-0.02179
## 3rd Qu.:-0.07363 3rd Qu.:-0.02179
## Max. :-0.07363 Max. :-0.02179
# New package 2: feols
mpdta$treated <- ifelse(mpdta$treat == 1 & mpdta$first.treat >= mpdta$year, 1, 0)
mpdta$treated2004 <- ifelse(mpdta$treated == 1 & mpdta$first.treat == 2004, 1, 0)
mpdta$treated2006 <- ifelse(mpdta$treated == 1 & mpdta$first.treat == 2006, 1, 0)
mpdta$treated2007 <- ifelse(mpdta$treated == 1 & mpdta$first.treat == 2007, 1, 0)
m_newpackage2 <- feols(lemp ~ treated2004 + treated2006 + treated2007 + lpop | countyreal + year, data = mpdta)
## The variables 'treated2007' and 'lpop' have been removed because of collinearity (see $collin.var).
summary(m_newpackage2)
## OLS estimation, Dep. Var.: lemp
## Observations: 2,500
## Fixed-effects: countyreal: 500, year: 5
## Standard-errors: Clustered (countyreal)
## Estimate Std. Error t value Pr(>|t|)
## treated2004 0.092507 0.026617 3.47553 0.0005543 ***
## treated2006 0.027545 0.020169 1.36569 0.1726516
## ... 2 variables were removed because of collinearity (treated2007 and lpop)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.124135 Adj. R2: 0.991513
## Within R2: 0.005582
# New package 3: staggered
library(staggered, warn = FALSE)
## Warning: package 'staggered' was built under R version 4.4.3
m_newpackage3 <- staggered(df = mpdta,
i = "countyreal",
t = "year",
g = "first.treat",
y = "lemp",
estimand = "cohort")
summary(m_newpackage3)
## estimate se se_neyman
## Min. :-0.3664 Min. :0.1132 Min. :0.1132
## 1st Qu.:-0.3664 1st Qu.:0.1132 1st Qu.:0.1132
## Median :-0.3664 Median :0.1132 Median :0.1132
## Mean :-0.3664 Mean :0.1132 Mean :0.1132
## 3rd Qu.:-0.3664 3rd Qu.:0.1132 3rd Qu.:0.1132
## Max. :-0.3664 Max. :0.1132 Max. :0.1132