The variables were collected from two main data sources For the Venture Capital (VC) data we used Venture Express. The original data start at 1960 and has all the investments made by VC firms in start ups. We aggregated the data at MSA level creating the following variables
VC_sum = All the investments made in all the companies at the same MSA
Investors = Total number of investment firms at the same MSA
sum_round_numbers = Total amount of investment rounds at the same MSA
num_comp_per_aggreg = Amount of invested companies aggregated at the same MSA
treated = 1 if the MSA is treated
post = 1 if year = to year treated and above
years_to_treat = -1000 if never treated otherwise the remaining periods before the year that is treated
year_treated = 1000 if never treated otherwise the number of years to treat starting from the first year in the dataset (2002)
round_year = years starting from 2002
year = number of the year 1 if 2002, 18 if 2019 - just counting the years
The second data source was the Media closures
We manually checked all the data and convert from county level to MSA, for that we had to drop 4 data points that had no matching with MSA. We use Media Closure as the exogenous shock to analyse if any of the VC variables would be affected by.
#preprocessing
a <- cbind( news_MSA_VC$post, news_MSA_VC$treated ,news_MSA_VC$pop ,news_MSA_VC$GDP)
news_MSA_VC$GDP_per <- news_MSA_VC$GDP/news_MSA_VC$pop
library(knitr) # for making table (optional)
library(fastDummies) # main package
## Warning: package 'fastDummies' was built under R version 4.1.3
## Making dummy variables for a SPECIFIC categorical variable ##
#news_MSA_VC <- fastDummies::dummy_cols(news_MSA_VC, select_columns = "MSA") # create dummy variables for year
#news_MSA_VC <- fastDummies::dummy_cols(news_MSA_VC, select_columns = "round_year") # create dummy variables for year
We analyzed several variables, but we dropped the ones that were a clear spurious correlation.
The 3 analysis below are significant and the 2 additional not significant variables were mentioned at the end.
fit <- plm(log(vc_sum) ~ I(post*treated)+ treated + GDP_per, data=news_MSA_VC,index=c("MSA", "round_year"), model="pooling", effect="individual")
#options(max.print=1000000)
fit <- plm(log(vc_sum) ~ I(post*treated)+ treated + GDP_per, data=news_MSA_VC,index=c("MSA", "round_year"), model="within", effect="individual")
fit <- plm(log(vc_sum) ~ I(post*treated)+ treated + GDP_per, data=news_MSA_VC,index=c("MSA", "round_year"), model="within", effect="twoways")
summary(fit)
## Twoways effects Within Model
##
## Call:
## plm(formula = log(vc_sum) ~ I(post * treated) + treated + GDP_per,
## data = news_MSA_VC, effect = "twoways", model = "within",
## index = c("MSA", "round_year"))
##
## Balanced Panel: n = 236, T = 18, N = 4248
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -16.49656 -1.74869 -0.18943 0.92269 17.71184
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## I(post * treated) -0.243263 0.523300 -0.4649 0.642055
## GDP_per -0.055900 0.020546 -2.7207 0.006542 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 94558
## Residual Sum of Squares: 94377
## R-Squared: 0.001912
## Adj. R-Squared: -0.061578
## F-statistic: 3.82462 on 2 and 3993 DF, p-value: 0.021907
# "Naive" TWFE DiD (note that the time to treatment for the never treated is -1000)
# (by using ref = c(-1, -1000) we exclude the period just before the treatment and
# the never treated)
res_twfe = feols(vc_sum ~ i(years_to_treat, ref = c(-1, -1000)) + GDP_per | MSA + year, news_MSA_VC)
summary(res_twfe, vcov = "twoway")
## OLS estimation, Dep. Var.: vc_sum
## Observations: 4,248
## Fixed-effects: MSA: 236, year: 18
## Standard-errors: Clustered (MSA & year)
## Estimate Std. Error t value Pr(>|t|)
## years_to_treat::-17 -325257610 192460949 -1.689993 0.109282
## years_to_treat::-16 -339368911 196785540 -1.724562 0.102740
## years_to_treat::-15 -314322033 179783751 -1.748334 0.098440 .
## years_to_treat::-14 -668111544 595991135 -1.121009 0.277870
## years_to_treat::-13 -392516008 275510044 -1.424689 0.172347
## years_to_treat::-12 -313193681 193325399 -1.620034 0.123625
## years_to_treat::-11 -425176002 218690392 -1.944192 0.068612 .
## years_to_treat::-10 -364726659 194839902 -1.871930 0.078524 .
## years_to_treat::-9 -634898995 352727396 -1.799971 0.089637 .
## years_to_treat::-8 -553119259 341278050 -1.620729 0.123475
## years_to_treat::-7 -296150115 349677581 -0.846923 0.408803
## years_to_treat::-6 -313333164 213419127 -1.468159 0.160321
## years_to_treat::-5 -297795848 225524018 -1.320462 0.204183
## years_to_treat::-4 -143717313 204207553 -0.703781 0.491098
## years_to_treat::-3 -176211186 129333190 -1.362459 0.190832
## years_to_treat::-2 -85548694 65594555 -1.304204 0.209547
## years_to_treat::0 124564011 118416564 1.051914 0.307563
## years_to_treat::1 530241608 378330366 1.401531 0.179048
## years_to_treat::2 349867433 284328294 1.230505 0.235263
## years_to_treat::3 814474419 537105252 1.516415 0.147789
## years_to_treat::4 911758057 699423943 1.303584 0.209754
## years_to_treat::5 1043704935 678206219 1.538920 0.142229
## years_to_treat::6 1255697171 927402175 1.353994 0.193465
## years_to_treat::7 1811456461 1493695675 1.212735 0.241815
## years_to_treat::8 2487700319 2167261188 1.147854 0.266926
## years_to_treat::9 554558261 463206601 1.197216 0.247651
## years_to_treat::10 578144145 467225509 1.237399 0.232759
## years_to_treat::11 816201323 546804978 1.492674 0.153849
## years_to_treat::12 963252457 543765128 1.771449 0.094410 .
## years_to_treat::13 750764156 611040969 1.228664 0.235936
## years_to_treat::14 958416380 673141182 1.423797 0.172601
## GDP_per 48423416 24911889 1.943787 0.068664 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 633,309,507.0 Adj. R2: 0.654397
## Within R2: 0.144399
# To implement the Sun and Abraham (2020) method,
# we use the sunab(cohort, period) function
res_sa20 = feols(vc_sum ~ sunab(year_treated, year) + GDP_per| MSA + year, news_MSA_VC) # specifying the cluster here doesn't change anything
summary(res_sa20, vcov = "twoway")
## Variance contained negative values in the diagonal and was 'fixed' (a la Cameron, Gelbach & Miller 2011).
## OLS estimation, Dep. Var.: vc_sum
## Observations: 4,248
## Fixed-effects: MSA: 236, year: 18
## Standard-errors: Clustered (MSA & year)
## Estimate Std. Error t value Pr(>|t|)
## year::-17 6762088.4 55417504 0.122021 9.0431e-01
## year::-16 155323.6 64296798 0.002416 9.9810e-01
## year::-15 -10580001.3 40729384 -0.259763 7.9817e-01
## year::-14 -516763850.8 64588736 -8.000835 3.6447e-07 ***
## year::-13 -245998949.2 33079668 -7.436560 9.7222e-07 ***
## year::-12 -123533409.4 65612633 -1.882769 7.6961e-02 .
## year::-11 -237698565.9 44539637 -5.336787 5.4502e-05 ***
## year::-10 -244149218.2 66201250 -3.687985 1.8248e-03 **
## year::-9 -356056678.7 193248949 -1.842477 8.2916e-02 .
## year::-8 -278262083.1 206804258 -1.345534 1.9613e-01
## year::-7 -149734192.0 160625615 -0.932194 3.6429e-01
## year::-6 -213568372.8 132827590 -1.607862 1.2628e-01
## year::-5 -221037647.1 104328450 -2.118671 4.9148e-02 *
## year::-4 -97037291.3 90224615 -1.075508 2.9718e-01
## year::-3 -171092670.4 60376201 -2.833777 1.1460e-02 *
## year::-2 -85792707.4 102954478 -0.833307 4.1623e-01
## year::0 125685084.4 75327860 1.668507 1.1353e-01
## year::1 523026895.9 134469053 3.889571 1.1781e-03 **
## year::2 338551600.6 90735099 3.731209 1.6613e-03 **
## year::3 779769425.2 339184353 2.298955 3.4459e-02 *
## year::4 891523062.8 530039312 1.681994 1.1085e-01
## year::5 994344518.2 562826015 1.766700 9.5226e-02 .
## year::6 1066863223.7 703836935 1.515782 1.4795e-01
## year::7 1636418188.0 1056079929 1.549521 1.3967e-01
## year::8 2346784406.6 1428712419 1.642587 1.1884e-01
## year::9 -22375578.9 42777043 -0.523074 6.0767e-01
## year::10 4769657.4 53502373 0.089149 9.3001e-01
## year::11 74847284.9 39386587 1.900324 7.4487e-02 .
## year::12 297931186.3 19821022 15.031071 2.9961e-11 ***
## year::13 -17657917.6 54543848 -0.323738 7.5009e-01
## year::14 71339868.6 72927882 0.978225 3.4168e-01
## GDP_per 35895167.5 15666551 2.291198 3.4996e-02 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 491,383,681.9 Adj. R2: 0.782443
## Within R2: 0.484913
# Plot the two TWFE results
iplot(list(res_twfe, res_sa20), sep = 0.5)
legend("topleft", col = c(1, 2), pch = c(20, 17),
legend = c("TWFE", "Sun & Abraham (2020)"))
### ATT agregated
# The full ATT
summary(res_sa20, agg = "att")
## OLS estimation, Dep. Var.: vc_sum
## Observations: 4,248
## Fixed-effects: MSA: 236, year: 18
## Standard-errors: Clustered (MSA)
## Estimate Std. Error t value Pr(>|t|)
## ATT 689288892 363394733 1.89680 0.0590800 .
## GDP_per 35895168 13261352 2.70675 0.0072931 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 491,383,681.9 Adj. R2: 0.782443
## Within R2: 0.484913
# Full disaggregation (you could have used summary instead of etable)
#head(etable(res_sa20, agg = FALSE), 100)
#summary(res_sa20, agg = FALSE)
est_comb = feols(vc_sum~ I(post*treated)+ treated + GDP_per | treated^year_treated , news_MSA_VC )
## The variable 'treated' has been removed because of collinearity (see $collin.var).
summary(est_comb, vcov = "twoway")
## OLS estimation, Dep. Var.: vc_sum
## Observations: 4,248
## Fixed-effects: treated^year_treated: 13
## Standard-errors: Clustered (treated^year_treated)
## Estimate Std. Error t value Pr(>|t|)
## I(post * treated) 984310610 634091637 1.55232 0.146550
## GDP_per 31401262 14850525 2.11449 0.056082 .
## ... 1 variable was removed because of collinearity (treated)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 871,593,539.8 Adj. R2: 0.387156
## Within R2: 0.170276
#fixef(est_comb)[[1]]
news_MSA_VC['postxtreatment']=news_MSA_VC$post*news_MSA_VC$treated
res_fepois_vc = fepois(vc_sum ~ postxtreatment +GDP_per| MSA + round_year , data=news_MSA_VC)
res_feols_vc = feols(vc_sum~ postxtreatment+ GDP_per| MSA + round_year, news_MSA_VC )
res_feglm_vc = feglm(vc_sum ~ postxtreatment +GDP_per| MSA + round_year , data=news_MSA_VC)
res_fenegbin_vc = fenegbin(vc_sum ~ postxtreatment +GDP_per| MSA + round_year , data=news_MSA_VC)
etable(res_fepois_vc, res_feols_vc, res_feglm_vc,res_fenegbin_vc, cluster = "MSA",
headers = c("Poisson","Poisson", "GLM", "binomial"))
## res_fepois_vc res_feols_vc
## Poisson Poisson
## Dependent Var.: vc_sum vc_sum
##
## postxtreatment 0.6394*** (0.1897) 980,836,291.3 (610,024,338.4)
## GDP_per 0.0030 (0.0049) 49,266,092.6. (25,395,424.5)
## Fixed-Effects: ------------------ -----------------------------
## MSA Yes Yes
## round_year Yes Yes
## _______________ __________________ _____________________________
## Family Poisson OLS
## S.E.: Clustered by: MSA by: MSA
## Observations 4,248 4,248
## Squared Cor. 0.91234 0.66500
## Pseudo R2 0.91717 0.02457
## BIC 3.62e+11 186,531.2
## Over-dispersion -- --
##
## res_feglm_vc res_fenegbin_vc
## GLM binomial
## Dependent Var.: vc_sum vc_sum
##
## postxtreatment 980,836,291.3 (610,024,338.4) 0.3148 (0.3527)
## GDP_per 49,266,092.6. (25,395,424.5) -0.0087 (0.0154)
## Fixed-Effects: ----------------------------- ----------------
## MSA Yes Yes
## round_year Yes Yes
## _______________ _____________________________ ________________
## Family gaussian("identity") Neg. Bin.
## S.E.: Clustered by: MSA by: MSA
## Observations 4,248 4,248
## Squared Cor. 0.66500 0.74756
## Pseudo R2 0.02457 0.05354
## BIC 186,533.2 94,106.6
## Over-dispersion -- 0.14325
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
news_MSA_VC['postxtreatment']=news_MSA_VC$post*news_MSA_VC$treated
res_fepois = fepois(num_comp_per_aggreg ~ postxtreatment +GDP_per| MSA + year , data=news_MSA_VC)
## NOTE: 18/0 fixed-effects (324 observations) removed because of only 0 outcomes.
res_fenegbin = fepois(Investors ~ postxtreatment +GDP_per| MSA + year, data=news_MSA_VC)
## NOTE: 18/0 fixed-effects (324 observations) removed because of only 0 outcomes.
res_feols = fepois(sum_round_numbers ~ postxtreatment +GDP_per| MSA + year, data=news_MSA_VC)
## NOTE: 18/0 fixed-effects (324 observations) removed because of only 0 outcomes.
#summary(res_sa20, cluster = "MSA")
etable(res_fepois,res_fenegbin,res_feols, cluster = "MSA",
headers = c("Poisson","Poisson", "Poisson"))
## res_fepois res_fenegbin res_feols
## Poisson Poisson Poisson
## Dependent Var.: num_comp_per_aggreg Investors sum_round_numbers
##
## postxtreatment 0.1539 (0.1243) 0.3209. (0.1759) 0.0298 (0.1207)
## GDP_per -0.0082** (0.0026) -0.0064. (0.0036) -0.0144*** (0.0019)
## Fixed-Effects: ------------------- ----------------- -------------------
## MSA Yes Yes Yes
## year Yes Yes Yes
## _______________ ___________________ _________________ ___________________
## S.E.: Clustered by: MSA by: MSA by: MSA
## Observations 3,924 3,924 3,924
## Squared Cor. 0.97967 0.96534 0.97559
## Pseudo R2 0.94451 0.96775 0.96640
## BIC 14,868.1 28,291.7 38,976.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res_fepois = fepois(sum_round_numbers ~ postxtreatment | MSA + year, data=news_MSA_VC)
## NOTE: 18/0 fixed-effects (324 observations) removed because of only 0 outcomes.
res_fenegbin = fenegbin(sum_round_numbers ~ postxtreatment | MSA + year, data=news_MSA_VC)
## NOTE: 18/0 fixed-effects (324 observations) removed because of only 0 outcomes.
res_feols = feols(sum_round_numbers ~ postxtreatment | MSA + year, data=news_MSA_VC)
#summary(res_sa20, cluster = "MSA")
etable(res_fepois,res_fenegbin,res_feols, cluster = "MSA",
headers = c("Poisson","Negative Binomial", "Gaussian"))
## res_fepois res_fenegbin res_feols
## Poisson Negative Binomial Gaussian
## Dependent Var.: sum_round_numbers sum_round_numbers sum_round_numbers
##
## postxtreatment 0.0910 (0.1423) -0.0227 (0.1580) 105.7 (65.43)
## Fixed-Effects: ----------------- ----------------- -----------------
## MSA Yes Yes Yes
## year Yes Yes Yes
## _______________ _________________ _________________ _________________
## Family Poisson Neg. Bin. OLS
## S.E.: Clustered by: MSA by: MSA by: MSA
## Observations 3,924 3,924 4,248
## Squared Cor. 0.95627 0.88151 0.92286
## Pseudo R2 0.96258 0.24652 0.18419
## BIC 43,175.0 23,110.1 50,328.6
## Over-dispersion -- 1.7935 --
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res_fepois = fepois(Investors ~ postxtreatment | MSA + year, data=news_MSA_VC)
## NOTE: 18/0 fixed-effects (324 observations) removed because of only 0 outcomes.
res_fenegbin = fenegbin(Investors ~ postxtreatment | MSA + year, data=news_MSA_VC)
## NOTE: 18/0 fixed-effects (324 observations) removed because of only 0 outcomes.
res_feols = feols(Investors ~ postxtreatment | MSA + year, data=news_MSA_VC)
#summary(res_sa20, cluster = "MSA")
etable(res_fepois,res_fenegbin,res_feols, cluster = "MSA",
headers = c("Poisson","Negative Binomial", "Gaussian"))
## res_fepois res_fenegbin res_feols
## Poisson Negative Binomial Gaussian
## Dependent Var.: Investors Investors Investors
##
## postxtreatment 0.3598* (0.1775) 0.0576 (0.1140) 78.66 (63.71)
## Fixed-Effects: ---------------- --------------- -------------
## MSA Yes Yes Yes
## year Yes Yes Yes
## _______________ ________________ _______________ _____________
## Family Poisson Neg. Bin. OLS
## S.E.: Clustered by: MSA by: MSA by: MSA
## Observations 3,924 3,924 4,248
## Squared Cor. 0.96295 0.93662 0.91541
## Pseudo R2 0.96701 0.29096 0.18387
## BIC 28,892.4 20,485.6 48,693.5
## Over-dispersion -- 3.6591 --
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#The numbers changed after we drop some MSA due to the merge process with POP and GDP
library("MatchIt")
## Warning: package 'MatchIt' was built under R version 4.1.3
#m.out0 <- matchit(treated ~ vc_sum , data=news_MSA_VC,index=c("MSA", "round_year"),
# method = NULL, distance = "glm")
#summary(m.out0)
#all it once
m.PSM <- matchit(treated ~ vc_sum+Investors+sum_round_numbers+num_comp_per_aggreg+GDP_per, data=news_MSA_VC,index=c("MSA", "round_year"),
method = NULL, distance = "glm")
summary(m.PSM)
##
## Call:
## matchit(formula = treated ~ vc_sum + Investors + sum_round_numbers +
## num_comp_per_aggreg + GDP_per, data = news_MSA_VC, method = NULL,
## distance = "glm", index = c("MSA", "round_year"))
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.1682 0.1030 0.3317 19.3641
## vc_sum 821651853.5641 112237125.6032 0.2432 29.1936
## Investors 175.4915 23.6225 0.3075 19.9411
## sum_round_numbers 231.6004 37.7368 0.3240 14.9439
## num_comp_per_aggreg 50.5000 8.1161 0.3217 18.1540
## GDP_per 49.3726 46.0226 0.2761 1.0568
## eCDF Mean eCDF Max
## distance 0.1048 0.1538
## vc_sum 0.1932 0.2554
## Investors 0.1128 0.2640
## sum_round_numbers 0.1210 0.2540
## num_comp_per_aggreg 0.1017 0.2665
## GDP_per 0.0825 0.1468
##
##
## Sample Sizes:
## Control Treated
## All 3780 468
## Matched 3780 468
## Unmatched 0 0
## Discarded 0 0
plot(m.PSM, type = "qq", interactive = FALSE,
which.xs = c("vc_sum","Investors", "sum_round_numbers", "num_comp_per_aggreg","GDP_per"))
m.dataCem <- match.data(m.PSM)
plot(summary(m.PSM))
### Coarsened Exact Matching (method = “cem”)
m.Cem <- matchit(treated ~ vc_sum+GDP_per, data=news_MSA_VC,index=c("MSA", "round_year"),
method = "cem", distance = "mahvars")
## Warning: The argument 'distance' is not used with method = "cem" and will be
## ignored.
#summary(m.Cem)
m.dataCem <- match.data(m.Cem)
fit.cem <- lm(vc_sum ~ treated + factor(MSA) + factor(round_year),index=c("MSA", "round_year"), data = m.dataCem, weights = weights)
## Warning: In lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok,
## ...) :
## extra argument 'index' will be disregarded
#coeftest(fit.cem, vcov. = vcovCL, cluster = ~subclass)
m.nearest <- matchit(treated ~ vc_sum+Investors+sum_round_numbers+num_comp_per_aggreg, data=news_MSA_VC,index=c("MSA", "round_year"),
method = "nearest", distance = "glm")
summary(m.nearest)
##
## Call:
## matchit(formula = treated ~ vc_sum + Investors + sum_round_numbers +
## num_comp_per_aggreg, data = news_MSA_VC, method = "nearest",
## distance = "glm", index = c("MSA", "round_year"))
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.1654 0.1033 0.3289 16.3627
## vc_sum 821651853.5641 112237125.6032 0.2432 29.1936
## Investors 175.4915 23.6225 0.3075 19.9411
## sum_round_numbers 231.6004 37.7368 0.3240 14.9439
## num_comp_per_aggreg 50.5000 8.1161 0.3217 18.1540
## eCDF Mean eCDF Max
## distance 0.1837 0.2680
## vc_sum 0.1932 0.2554
## Investors 0.1128 0.2640
## sum_round_numbers 0.1210 0.2540
## num_comp_per_aggreg 0.1017 0.2665
##
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.1654 0.1397 0.1363 2.5713
## vc_sum 821651853.5641 418678120.6581 0.1382 5.9291
## Investors 175.4915 98.0470 0.1568 2.9993
## sum_round_numbers 231.6004 147.5085 0.1405 2.4760
## num_comp_per_aggreg 50.5000 31.0235 0.1478 2.9573
## eCDF Mean eCDF Max Std. Pair Dist.
## distance 0.0020 0.0577 0.1366
## vc_sum 0.0254 0.0491 0.1903
## Investors 0.0335 0.0769 0.1874
## sum_round_numbers 0.0216 0.0662 0.1906
## num_comp_per_aggreg 0.0294 0.0662 0.1703
##
## Sample Sizes:
## Control Treated
## All 3780 468
## Matched 468 468
## Unmatched 3312 0
## Discarded 0 0