Data (State level)

pa <- subset(csp, state == 'PA' | state == 'SC')

pa$closure_date[pa$round_year < pa$closure_date] <- 0
pa$closure_date[pa$closure_date == '2013']<- 1
pa$closure_date[pa$state == 'SC' & pa$round_year < '2013' ]<- 0
pa$closure_date[pa$state == 'SC' & pa$round_year >= '2013' ]<- 1

pa$state2[pa$state == 'SC']<- 0
pa$state2[pa$state == 'PA']<- 1

Basic Analysis (State)

fit.pa <- lm(log(vc_sum) ~ state2 + closure_date + I(closure_date*state2) , data=pa)
summary(fit.pa)
## 
## Call:
## lm(formula = log(vc_sum) ~ state2 + closure_date + I(closure_date * 
##     state2), data = pa)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.085  -9.661   4.380   8.096  10.547 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 9.661      1.207   8.001 1.04e-12 ***
## state2                      2.424      1.708   1.419   0.1585    
## closure_date                7.798      3.535   2.206   0.0294 *  
## I(closure_date * state2)    1.039      4.999   0.208   0.8357    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.79 on 116 degrees of freedom
## Multiple R-squared:  0.1051, Adjusted R-squared:  0.08199 
## F-statistic: 4.543 on 3 and 116 DF,  p-value: 0.004766

The effect is insignificant and equals to 1.039

MSA level data

MSA <- read.csv("../data/news_vc_msa_missing.csv",fileEncoding = 'UTF-8')
MSA_NY_VT <- subset(MSA, MSA == 'Burlington, VT' | MSA == 'Buffalo-Niagara Falls, NY')

MSA_NY_VT$closure_date[MSA_NY_VT$round_year < MSA_NY_VT$closure_date] <- 0
MSA_NY_VT$closure_date[MSA_NY_VT$MSA == 'Burlington, VT' & MSA_NY_VT$round_year < '2015']<- 0
MSA_NY_VT$closure_date[MSA_NY_VT$MSA == 'Burlington, VT' & MSA_NY_VT$round_year >= '2015']<- 1
MSA_NY_VT$closure_date[MSA_NY_VT$closure_date == '2015']<- 1
MSA_NY_VT$MSA_binary[MSA_NY_VT$MSA == 'Burlington, VT']<- 0
MSA_NY_VT$MSA_binary[MSA_NY_VT$MSA == 'Buffalo-Niagara Falls, NY']<- 1

#Basic Visualization
ggplot(MSA_NY_VT, mapping = aes(x = log(vc_sum)))+ geom_histogram()+
  labs(y= "Frequency", x = "Dollars Invested", title = "Between two MSAs")+
  scale_x_continuous(labels = trans_format("exp", format = round))+
  facet_wrap(vars(MSA_binary))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(MSA_NY_VT, mapping = aes(x= MSA_binary, y = vc_sum))+
  stat_summary(geom = "pointrange", fun.data = "mean_se", fun.args = list(mult = 1.96 ))+ facet_wrap(vars(closure_date))

Almost no difference here

Basic Analysis MSA level

fit <- plm(log(vc_sum)~ MSA_binary + closure_date + I(closure_date*MSA_binary), data=MSA_NY_VT,index=c("MSA", "round_year"), model="pooling", effect="individual")
summary(fit)
## Pooling Model
## 
## Call:
## plm(formula = log(vc_sum) ~ MSA_binary + closure_date + I(closure_date * 
##     MSA_binary), data = MSA_NY_VT, effect = "individual", model = "pooling", 
##     index = c("MSA", "round_year"))
## 
## Unbalanced Panel: n = 2, T = 31-37, N = 68
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -4.23962 -1.11322  0.21215  1.25746  3.30642 
## 
## Coefficients:
##                              Estimate Std. Error t-value Pr(>|t|)    
## (Intercept)                  8.441544   0.365048 23.1245  < 2e-16 ***
## MSA_binary                   0.046575   0.489764  0.0951  0.92453    
## closure_date                 1.791430   0.768214  2.3319  0.02286 *  
## I(closure_date * MSA_binary) 0.675760   1.074083  0.6292  0.53149    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    256.83
## Residual Sum of Squares: 204.69
## R-Squared:      0.20301
## Adj. R-Squared: 0.16566
## F-statistic: 5.43419 on 3 and 64 DF, p-value: 0.0021583

Fixed Effect and diff-in-diff

fit <- plm(log(vc_sum)~ MSA_binary + closure_date + I(closure_date*MSA_binary), data=MSA_NY_VT,index=c("MSA", "round_year"), model="within", effect="individual")
summary(fit)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = log(vc_sum) ~ MSA_binary + closure_date + I(closure_date * 
##     MSA_binary), data = MSA_NY_VT, effect = "individual", model = "within", 
##     index = c("MSA", "round_year"))
## 
## Unbalanced Panel: n = 2, T = 31-37, N = 68
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -4.23962 -1.11322  0.21215  1.25746  3.30642 
## 
## Coefficients:
##                              Estimate Std. Error t-value Pr(>|t|)  
## closure_date                  1.79143    0.76821  2.3319  0.02286 *
## I(closure_date * MSA_binary)  0.67576    1.07408  0.6292  0.53149  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    256.63
## Residual Sum of Squares: 204.69
## R-Squared:      0.20239
## Adj. R-Squared: 0.16501
## F-statistic: 8.12005 on 2 and 64 DF, p-value: 0.00071983

Manual diff-in-diff (hard check)

diffs <- MSA_NY_VT %>% 
  group_by(closure_date, MSA_binary) %>% 
  summarize(mean_vc = mean(log(vc_sum)),
            # Calculate average with regular duration too, just for fun
            mean_vc_raw = mean(vc_sum))
## `summarise()` has grouped output by 'closure_date'. You can override using the `.groups` argument.
diffs
## # A tibble: 4 x 4
## # Groups:   closure_date [2]
##   closure_date MSA_binary mean_vc mean_vc_raw
##          <dbl>      <dbl>   <dbl>       <dbl>
## 1            0          0    8.44      10764.
## 2            0          1    8.49      18935.
## 3            1          0   10.2      100866.
## 4            1          1   11.0      111178.
before_treatment <- diffs %>% 
  filter(closure_date == 0, MSA_binary == 1 ) %>% 
  pull(mean_vc)

after_treatment <- diffs %>% 
  filter(closure_date == 1, MSA_binary == 1 ) %>% 
  pull(mean_vc)

before_control <- diffs %>% 
  filter(closure_date == 0, MSA_binary == 0 ) %>% 
  pull(mean_vc)

after_control <- diffs %>% 
  filter(closure_date == 1, MSA_binary == 0 ) %>% 
  pull(mean_vc)

diff_treatment_before_after <- after_treatment - before_treatment
diff_control_before_after <- after_control - before_control
diff_diff <- diff_treatment_before_after - diff_control_before_after

Some nice visualization (high biased though)

ggplot(diffs, aes(x = as.factor(closure_date), 
                  y = mean_vc, 
                  color = as.factor(MSA_binary))) + 
  geom_point() +
  geom_line(aes(group = as.factor(MSA_binary))) +
  # If you use these lines you'll get some extra annotation lines and
  # labels. The annotate() function lets you put stuff on a ggplot that's not
  # part of a dataset. Normally with geom_line, geom_point, etc., you have to
  # plot data that is in columns. With annotate() you can specify your own x and
  # y values.
  annotate(geom = "segment", x = "0", xend = "1",
           y = before_treatment, yend = after_treatment - diff_diff,
           linetype = "dashed", color = "grey50") +
  annotate(geom = "segment", x = "1", xend = "1",
           y = after_treatment, yend = after_treatment - diff_diff,
           linetype = "dotted", color = "blue") +
  annotate(geom = "label", x = "1", y = after_treatment - (diff_diff / 2), 
           label = "newspaper closures effect", size = 3)