Introduction

This is the preliminary study of COVID-19 Vaccines and Economic Recovery in Ecuador. Through empirical models, I aim to determine if there is any sign of a causal effect of vaccination in economic recovery. I will study this phenonemon from 2020 to 2021 in Ecuador. I try to implement a difference-in-differences design using several variables I created as treatments.

Data wrangling

Load all preliminary stuff:

# Load libraries
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sandwich)
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.1.3
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(modelsummary)
## Warning: package 'modelsummary' was built under R version 4.1.3
library(fwildclusterboot)
## Warning: package 'fwildclusterboot' was built under R version 4.1.3
library(fixest)
library(plm)
## Warning: package 'plm' was built under R version 4.1.3
## 
## Attaching package: 'plm'
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
library(estimatr)
## Warning: package 'estimatr' was built under R version 4.1.3
library(psych)
## Warning: package 'psych' was built under R version 4.1.3
## 
## Attaching package: 'psych'
## The following object is masked from 'package:modelsummary':
## 
##     SD
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.1.3
library(corrplot)
## corrplot 0.92 loaded
library(stats)
# Load the data
df<-read.csv('../data/clean_data.csv')

I need to wrangle the data before I can start running models.

# Create province factors (for the clustered std. errors)

df$prov<-as.factor(df$province)

# Create post dummy variable 

df$post<-ifelse(df$post == 'post',1,0)

# Create the antivax factor variable

df$antivax<-as.factor(df$antivax)

# Convert the month factor to a factor

df$month<-as.factor(df$month)

# Create a year factor

df$year<-as.factor(df$year)

# Conservate a year numeric variable

df$year_n<-as.numeric(df$year)

# Change the reference level to PICHINCHA

df$prov<-relevel(df$prov, 'PICHINCHA')

I will also create a panel data frame with the plm package

panel_df<-pdata.frame(df, index = 24)
pdim(panel_df)
## Balanced Panel: n = 24, T = 24, N = 576

Exploratory analysis

Graphs

Correlograms

I want to plot a simple correlation matrix for my variables, see what’s what.

# First create a subset of the data so that the correlation matrix makes sense

df_cor<- df %>% 
         select(buss, vax_leastone, vax_complete, deaths, excessd, excessd_rate, total_cases, case_rate, new_cases,
                new_case_rate, taxes, taxpayers, sales, month_number, transit_acc, v_deaths, year_n, 
                robbery, tax_pc, sales_pc_k, job_change, m_retail, m_grocery, m_parks, m_transit, m_workplace, 
                m_residential)

cor_table<-cor(df_cor,
               use = 'complete.obs') 

corrplot(cor_table,
         method = 'square',
         title = 'Correlogram')

Now I’d like to separate the data by year, then draw the same corrplots to see if there is any difference.

For 2020:

df_20<-subset(df, df$year == 2020)

df_20_cor <- df_20 %>% 
             select(buss, deaths, excessd, excessd_rate, total_cases, case_rate, new_cases,
                new_case_rate, taxes, taxpayers, sales, month_number, transit_acc, v_deaths,
                robbery, tax_pc, sales_pc_k, job_change, m_retail, m_grocery, m_parks, m_transit, m_workplace, 
                m_residential)

cor_table_20<-cor(df_20_cor,
               use = 'complete.obs') 

corrplot(cor_table_20,
         method = 'square',
         title = 'Correlogram')

For 2021:

df_21<-subset(df, df$year == 2021)

df_21_cor <- df_21 %>% 
             select(buss, deaths, vax_leastone, vax_complete, excessd, excessd_rate, total_cases, case_rate, new_cases,
                new_case_rate, taxes, taxpayers, sales, month_number, transit_acc, v_deaths,
                robbery, tax_pc, sales_pc_k, job_change, m_retail, m_grocery, m_parks, m_transit, m_workplace, 
                m_residential)

cor_table_21<-cor(df_21_cor,
               use = 'complete.obs') 

corrplot(cor_table_21,
         method = 'square',
         title = 'Correlogram')

# Fixed FX Models with vaccination rate treatment

For this section, I try to establish a differences in differences design using the vaccination rate as my treatment variable.

Run a model which includes the most basic variables. Also run the bootstrapping.

# Model
easy<-feols(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete | prov + month,
            cluster = ~ prov,
            data = df)

# Bootstrapping test
easy_boot<-boottest(lm(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + month + prov, data = df),
                    clustid = 'prov',
                    param = 'post:vax_leastone',
                    B = 9999)
summary(easy_boot)
## boottest.lm(object = lm(buss ~ post * vax_leastone + deaths + 
##     year + v_deaths + vax_complete + month + prov, data = df), 
##     clustid = "prov", param = "post:vax_leastone", B = 9999)
##  
##  Hypothesis: 1*post:vax_leastone = 0
##  Observations: 576
##  Bootstr. Iter: 9999
##  Bootstr. Type: rademacher
##  Clustering: 1-way
##  Confidence Sets: 95%
##  Number of Clusters: 24
## 

Add the google workplace mobility trend

easy_g<-feols(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace| prov + month,
            cluster = ~ prov,
            data = df)
## NOTE: 48 observations removed because of NA values (RHS: 48).
summary(easy_g)
## OLS estimation, Dep. Var.: buss
## Observations: 528 
## Fixed-effects: prov: 24,  month: 12
## Standard-errors: Clustered (prov) 
##                     Estimate Std. Error  t value   Pr(>|t|)    
## post              -59.222826  37.583688 -1.57576 1.2874e-01    
## vax_leastone       -0.397713   0.145396 -2.73539 1.1791e-02 *  
## deaths             -0.032692   0.007833 -4.17388 3.6503e-04 ***
## year2021           11.244422   6.870433  1.63664 1.1532e-01    
## v_deaths            0.665070   0.082858  8.02662 4.0514e-08 ***
## vax_complete        0.103996   0.091052  1.14217 2.6514e-01    
## m_workplace         1.062721   0.743885  1.42861 1.6656e-01    
## post:vax_leastone   0.824555   0.507050  1.62618 1.1754e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 33.2     Adj. R2: 0.909515
##              Within R2: 0.325871
# Bootstrapping test
easyg_boot<-boottest(lm(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + month + prov + 
                       m_workplace, data = df),
                    clustid = 'prov',
                    param = 'post:vax_leastone',
                    B = 9999)
summary(easyg_boot)
## boottest.lm(object = lm(buss ~ post * vax_leastone + deaths + 
##     year + v_deaths + vax_complete + month + prov + m_workplace, 
##     data = df), clustid = "prov", param = "post:vax_leastone", 
##     B = 9999)
##  
##  Hypothesis: 1*post:vax_leastone = 0
##  Observations: 528
##  Bootstr. Iter: 9999
##  Bootstr. Type: rademacher
##  Clustering: 1-way
##  Confidence Sets: 95%
##  Number of Clusters: 24
## 

Now consider a model with some more variables.

complex_g<-feols(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + robbery + 
                 transit_acc + job_change + tax_pc + sales_pc_k| prov + month,
                 cluster = ~ prov,
                 data = df)
## NOTE: 48 observations removed because of NA values (RHS: 48).
summary(complex_g)
## OLS estimation, Dep. Var.: buss
## Observations: 528 
## Fixed-effects: prov: 24,  month: 12
## Standard-errors: Clustered (prov) 
##                     Estimate Std. Error   t value   Pr(>|t|)    
## post              -49.351079  25.900838 -1.905385 0.06930653 .  
## vax_leastone       -0.438601   0.158900 -2.760228 0.01114071 *  
## deaths             -0.028866   0.006524 -4.424869 0.00019524 ***
## year2021           15.954292   7.890628  2.021929 0.05496624 .  
## v_deaths            0.419577   0.102646  4.087628 0.00045245 ***
## vax_complete        0.161515   0.108851  1.483819 0.15143262    
## m_workplace         0.857805   0.668856  1.282496 0.21244431    
## robbery             0.130904   0.057410  2.280153 0.03219164 *  
## transit_acc         0.028918   0.314615  0.091915 0.92756136    
## job_change         -0.000963   0.001915 -0.502653 0.61998726    
## tax_pc             -0.032938   0.033958 -0.969957 0.34215817    
## sales_pc_k          0.006537   0.013408  0.487533 0.63049565    
## post:vax_leastone   0.699511   0.349951  1.998881 0.05757368 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 31.1     Adj. R2: 0.919734
##              Within R2: 0.408172
complexg_boot<-boottest(lm(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + robbery + 
                           transit_acc + job_change + tax_pc + sales_pc_k + prov + month,
                           data = df),
                        clustid = 'prov',
                        param = 'post:vax_leastone',
                        B = 9999)
summary(complexg_boot)
## boottest.lm(object = lm(buss ~ post * vax_leastone + deaths + 
##     year + v_deaths + vax_complete + m_workplace + robbery + 
##     transit_acc + job_change + tax_pc + sales_pc_k + prov + month, 
##     data = df), clustid = "prov", param = "post:vax_leastone", 
##     B = 9999)
##  
##  Hypothesis: 1*post:vax_leastone = 0
##  Observations: 528
##  Bootstr. Iter: 9999
##  Bootstr. Type: rademacher
##  Clustering: 1-way
##  Confidence Sets: 95%
##  Number of Clusters: 24
## 

Add the contagion data to see what happens

complex_g_cases<-feols(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + robbery + 
                       transit_acc + job_change + tax_pc + sales_pc_k + total_cases| prov + month,
                 cluster = ~ prov,
                 data = df)
## NOTE: 48 observations removed because of NA values (RHS: 48).
summary(complex_g_cases)
## OLS estimation, Dep. Var.: buss
## Observations: 528 
## Fixed-effects: prov: 24,  month: 12
## Standard-errors: Clustered (prov) 
##                    Estimate Std. Error   t value   Pr(>|t|)    
## post              -6.966022  15.892820 -0.438313 6.6525e-01    
## vax_leastone      -0.577637   0.265781 -2.173358 4.0301e-02 *  
## deaths            -0.026079   0.007101 -3.672780 1.2631e-03 ** 
## year2021          12.307630   7.616962  1.615819 1.1977e-01    
## v_deaths           0.424558   0.114854  3.696517 1.1915e-03 ** 
## vax_complete       0.161602   0.102385  1.578368 1.2814e-01    
## m_workplace        0.240887   0.234353  1.027881 3.1469e-01    
## robbery            0.087062   0.055943  1.556272 1.3330e-01    
## transit_acc        0.024398   0.320965  0.076015 9.4006e-01    
## job_change        -0.002813   0.000688 -4.089478 4.5037e-04 ***
## tax_pc            -0.029253   0.029019 -1.008059 3.2391e-01    
## sales_pc_k         0.006453   0.011697  0.551718 5.8646e-01    
## total_cases        0.001394   0.000073 19.087074 1.3356e-15 ***
## post:vax_leastone  0.187568   0.192643  0.973658 3.4036e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 24.7     Adj. R2: 0.949489
##              Within R2: 0.628343
complexgcase_boot<-boottest(lm(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + 
                               + robbery + transit_acc + job_change + tax_pc + sales_pc_k + prov + month,
                               data = df),
                               clustid = 'prov',
                            param = 'post:vax_leastone',
                            B = 9999)
summary(complexg_boot)
## boottest.lm(object = lm(buss ~ post * vax_leastone + deaths + 
##     year + v_deaths + vax_complete + m_workplace + robbery + 
##     transit_acc + job_change + tax_pc + sales_pc_k + prov + month, 
##     data = df), clustid = "prov", param = "post:vax_leastone", 
##     B = 9999)
##  
##  Hypothesis: 1*post:vax_leastone = 0
##  Observations: 528
##  Bootstr. Iter: 9999
##  Bootstr. Type: rademacher
##  Clustering: 1-way
##  Confidence Sets: 95%
##  Number of Clusters: 24
## 

Replaces the total cases with new cases

complex_newc<-feols(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + robbery + 
                       transit_acc + job_change + tax_pc + sales_pc_k + new_cases| prov + month,
                 cluster = ~ prov,
                 data = df)
## NOTE: 48 observations removed because of NA values (RHS: 48).
summary(complex_newc)
## OLS estimation, Dep. Var.: buss
## Observations: 528 
## Fixed-effects: prov: 24,  month: 12
## Standard-errors: Clustered (prov) 
##                     Estimate Std. Error    t value   Pr(>|t|)    
## post              -39.491855  28.199999  -1.400420 1.7473e-01    
## vax_leastone       -0.099877   0.085545  -1.167537 2.5496e-01    
## deaths             -0.039417   0.003011 -13.090009 3.8247e-12 ***
## year2021            3.846905   6.874196   0.559615 5.8115e-01    
## v_deaths            0.654225   0.077467   8.445198 1.6729e-08 ***
## vax_complete        0.083940   0.075562   1.110882 2.7810e-01    
## m_workplace         0.557996   0.504120   1.106870 2.7979e-01    
## robbery             0.091007   0.064712   1.406346 1.7299e-01    
## transit_acc         0.167553   0.379207   0.441850 6.6272e-01    
## job_change         -0.000741   0.002443  -0.303449 7.6428e-01    
## tax_pc             -0.034958   0.034049  -1.026708 3.1524e-01    
## sales_pc_k          0.002569   0.010797   0.237923 8.1405e-01    
## new_cases           0.013984   0.001111  12.586399 8.4794e-12 ***
## post:vax_leastone   0.604973   0.450898   1.341708 1.9279e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 26.6     Adj. R2: 0.941294
##              Within R2: 0.568042
complex_newc_boot<-boottest(lm(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + 
                               + robbery + transit_acc + job_change + tax_pc + sales_pc_k + new_cases + prov + month,
                               data = df),
                            clustid = 'prov',
                            param = 'post:vax_leastone',
                            B = 9999)
summary(complex_newc_boot)
## boottest.lm(object = lm(buss ~ post * vax_leastone + deaths + 
##     year + v_deaths + vax_complete + m_workplace + +robbery + 
##     transit_acc + job_change + tax_pc + sales_pc_k + new_cases + 
##     prov + month, data = df), clustid = "prov", param = "post:vax_leastone", 
##     B = 9999)
##  
##  Hypothesis: 1*post:vax_leastone = 0
##  Observations: 528
##  Bootstr. Iter: 9999
##  Bootstr. Type: rademacher
##  Clustering: 1-way
##  Confidence Sets: 95%
##  Number of Clusters: 24
## 

Replace with total case rates

complex_c1<-feols(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + robbery + 
                       transit_acc + job_change + tax_pc + sales_pc_k + case_rate| prov + month,
                 cluster = ~ prov,
                 data = df)
## NOTE: 48 observations removed because of NA values (RHS: 48).
summary(complex_c1)
## OLS estimation, Dep. Var.: buss
## Observations: 528 
## Fixed-effects: prov: 24,  month: 12
## Standard-errors: Clustered (prov) 
##                     Estimate Std. Error   t value   Pr(>|t|)    
## post               -9.293379  29.642811 -0.313512 7.5672e-01    
## vax_leastone       -0.569742   0.266495 -2.137905 4.3377e-02 *  
## deaths             -0.030176   0.005250 -5.747477 7.4545e-06 ***
## year2021          -16.465918  35.700225 -0.461227 6.4897e-01    
## v_deaths            0.453461   0.132783  3.415045 2.3705e-03 ** 
## vax_complete        0.140101   0.117343  1.193942 2.4467e-01    
## m_workplace         0.951692   0.658680  1.444847 1.6199e-01    
## robbery             0.123458   0.049629  2.487628 2.0547e-02 *  
## transit_acc         0.030302   0.315506  0.096042 9.2432e-01    
## job_change         -0.000858   0.001463 -0.586671 5.6314e-01    
## tax_pc             -0.032224   0.032465 -0.992572 3.3125e-01    
## sales_pc_k          0.001046   0.009783  0.106894 9.1580e-01    
## case_rate          15.559659  15.535095  1.001581 3.2697e-01    
## post:vax_leastone   0.313038   0.273591  1.144183 2.6432e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 30.3     Adj. R2: 0.923656
##              Within R2: 0.438263
complex_c1_boot<-boottest(lm(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + 
                               + robbery + transit_acc + job_change + tax_pc + sales_pc_k + case_rate + prov + month,
                               data = df),
                            clustid = 'prov',
                            param = 'post:vax_leastone',
                            B = 9999)
summary(complex_c1_boot)
## boottest.lm(object = lm(buss ~ post * vax_leastone + deaths + 
##     year + v_deaths + vax_complete + m_workplace + +robbery + 
##     transit_acc + job_change + tax_pc + sales_pc_k + case_rate + 
##     prov + month, data = df), clustid = "prov", param = "post:vax_leastone", 
##     B = 9999)
##  
##  Hypothesis: 1*post:vax_leastone = 0
##  Observations: 528
##  Bootstr. Iter: 9999
##  Bootstr. Type: rademacher
##  Clustering: 1-way
##  Confidence Sets: 95%
##  Number of Clusters: 24
## 

Replace with new case rates.

complex_c1_new<-feols(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + robbery + 
                       transit_acc + job_change + tax_pc + sales_pc_k + new_case_rate| prov + month,
                 cluster = ~ prov,
                 data = df)
## NOTE: 48 observations removed because of NA values (RHS: 48).
summary(complex_c1_new)
## OLS estimation, Dep. Var.: buss
## Observations: 528 
## Fixed-effects: prov: 24,  month: 12
## Standard-errors: Clustered (prov) 
##                     Estimate Std. Error   t value   Pr(>|t|)    
## post              -44.142282  21.766730 -2.027970 0.05430043 .  
## vax_leastone       -0.225483   0.152385 -1.479689 0.15252409    
## deaths             -0.030135   0.005486 -5.492946 0.00001385 ***
## year2021            8.077152  11.020444  0.732924 0.47100898    
## v_deaths            0.411773   0.103089  3.994334 0.00057051 ***
## vax_complete        0.150223   0.102724  1.462391 0.15716512    
## m_workplace         0.829482   0.638723  1.298657 0.20693205    
## robbery             0.126770   0.055849  2.269850 0.03290356 *  
## transit_acc         0.041600   0.312223  0.133240 0.89516283    
## job_change         -0.000979   0.001961 -0.499522 0.62215677    
## tax_pc             -0.031787   0.032889 -0.966488 0.34385330    
## sales_pc_k          0.001234   0.009261  0.133212 0.89518467    
## new_case_rate      44.453050  34.622905  1.283920 0.21195385    
## post:vax_leastone   0.585183   0.275988  2.120319 0.04497957 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 30.8     Adj. R2: 0.921284
##              Within R2: 0.420804
complex_c1new_boot<-boottest(lm(buss ~ post*vax_leastone + deaths + year + v_deaths + vax_complete + m_workplace + 
                               + robbery + transit_acc + job_change + tax_pc + sales_pc_k + new_case_rate + prov + month,
                               data = df),
                            clustid = 'prov',
                            param = 'post:vax_leastone',
                            B = 9999)
summary(complex_c1new_boot)
## boottest.lm(object = lm(buss ~ post * vax_leastone + deaths + 
##     year + v_deaths + vax_complete + m_workplace + +robbery + 
##     transit_acc + job_change + tax_pc + sales_pc_k + new_case_rate + 
##     prov + month, data = df), clustid = "prov", param = "post:vax_leastone", 
##     B = 9999)
##  
##  Hypothesis: 1*post:vax_leastone = 0
##  Observations: 528
##  Bootstr. Iter: 9999
##  Bootstr. Type: rademacher
##  Clustering: 1-way
##  Confidence Sets: 95%
##  Number of Clusters: 24
## 

Show a table with all of the models that were estimated:

modelsummary(list(easy, easy_g, complex_g, complex_g_cases, complex_newc, complex_c1,complex_c1_new),
             stars = c('*' = .1, '**'=0.05, '***' = .01))
Model 1 Model 2 Model 3 Model 4 Model 5 Model 6 Model 7
post -62.169 -59.223 -49.351* -6.966 -39.492 -9.293 -44.142*
(43.025) (37.584) (25.901) (15.893) (28.200) (29.643) (21.767)
vax_leastone -0.313** -0.398** -0.439** -0.578** -0.100 -0.570** -0.225
(0.123) (0.145) (0.159) (0.266) (0.086) (0.266) (0.152)
deaths -0.032*** -0.033*** -0.029*** -0.026*** -0.039*** -0.030*** -0.030***
(0.007) (0.008) (0.007) (0.007) (0.003) (0.005) (0.005)
year2021 27.858** 11.244 15.954* 12.308 3.847 -16.466 8.077
(12.233) (6.870) (7.891) (7.617) (6.874) (35.700) (11.020)
v_deaths 0.807*** 0.665*** 0.420*** 0.425*** 0.654*** 0.453*** 0.412***
(0.047) (0.083) (0.103) (0.115) (0.077) (0.133) (0.103)
vax_complete 0.111 0.104 0.162 0.162 0.084 0.140 0.150
(0.102) (0.091) (0.109) (0.102) (0.076) (0.117) (0.103)
post × vax_leastone 0.812 0.825 0.700* 0.188 0.605 0.313 0.585**
(0.554) (0.507) (0.350) (0.193) (0.451) (0.274) (0.276)
m_workplace 1.063 0.858 0.241 0.558 0.952 0.829
(0.744) (0.669) (0.234) (0.504) (0.659) (0.639)
robbery 0.131** 0.087 0.091 0.123** 0.127**
(0.057) (0.056) (0.065) (0.050) (0.056)
transit_acc 0.029 0.024 0.168 0.030 0.042
(0.315) (0.321) (0.379) (0.316) (0.312)
job_change -0.001 -0.003*** -0.001 -0.001 -0.001
(0.002) (0.001) (0.002) (0.001) (0.002)
tax_pc -0.033 -0.029 -0.035 -0.032 -0.032
(0.034) (0.029) (0.034) (0.032) (0.033)
sales_pc_k 0.007 0.006 0.003 0.001 0.001
(0.013) (0.012) (0.011) (0.010) (0.009)
total_cases 0.001***
(0.000)
new_cases 0.014***
(0.001)
case_rate 15.560
(15.535)
new_case_rate 44.453
(34.623)
Num.Obs. 576 528 528 528 528 528 528
R2 0.906 0.917 0.927 0.954 0.947 0.931 0.928
R2 Adj. 0.899 0.910 0.920 0.949 0.941 0.924 0.921
R2 Within 0.264 0.326 0.408 0.628 0.568 0.438 0.421
R2 Pseudo
AIC 5794.8 5283.6 5224.9 4981.2 5060.6 5199.3 5215.5
BIC 5977.8 5467.2 5429.8 5190.4 5269.8 5408.5 5424.7
Log.Lik. -2855.405 -2598.809 -2564.435 -2441.610 -2481.304 -2550.659 -2558.739
Std.Errors by: prov by: prov by: prov by: prov by: prov by: prov by: prov
FE: prov X X X X X X X
FE: month X X X X X X X
* p < 0.1, ** p < 0.05, *** p < 0.01

Now a table with all of the bootstrap stuff:

modelsummary(list(easy_boot, easyg_boot, complexg_boot, complexgcase_boot, complex_newc_boot, 
                  complex_c1_boot, complex_c1new_boot), 
             estimate = "{estimate} ({p.value})", 
             statistic = "[{conf.low}, {conf.high}]")
Model 1 Model 2 Model 3 Model 4 Model 5 Model 6 Model 7
1*post × vax_leastone = 0 0.812 (0.021) 0.825 (0.003) 0.700 (0.002) 0.700 (0.002) 0.605 (0.052) 0.313 (0.245) 0.585 (0.017)
[0.058, 2.289] [0.145, 2.208] [0.174, 1.605] [0.174, 1.618] [-0.003, 1.728] [-0.276, 0.823] [0.089, 1.238]
Num.Obs. 576 528 528 528 528 528 528
R2 0.906 0.917 0.927 0.927 0.947 0.931 0.928
R2 Adj. 0.899 0.910 0.920 0.920 0.941 0.924 0.921
AIC 5796.8 5285.6 5226.9 5226.9 5062.6 5201.3 5217.5
BIC 5984.1 5473.5 5436.1 5436.1 5276.1 5414.8 5430.9
Log.Lik. -2855.405 -2598.809 -2564.435 -2564.435 -2481.304 -2550.659 -2558.739

Fixed FX Models with antivax-provax dummy as treatment and ln(1+x) bussiness creation

Here, I use an antivax-provax dummy as my treatment variable. I’d like to see if provinces which I’ve labelled as “anti-vax” have a tendency to create lesser businesses. I’ve based this indicator in the number of cases per capita reported for sicknesses that could have been otherwise prevented by a vaccine (Hep-B, parotiditis, etc.). I use TWFE for this.

Data Wrangling

I will change the way I use my dependent variable and apply a ln(1+x) transformation. Also, in order to not have a collinearity problem, I can’t estimate the treatment effect, as it is a combination of several province dummies. What I will do is simply include the period and treatment interaction.

# Create the log transformed variable

df$lbuss<-log(df$buss+1) 

# Create the interaction

df$did<-df$post* df$antivax_dic

Models

Now I estimate a very simple model just considering the DiD estimator and corresponding FX dummies. Clustering is done by province and months now. I do the wild bootstrap test on the DiD estimator as well.

First, a simple model simply considering the DiD estimator, the period dummy and the corresponding fixed effects (not shown). Also, compare it with one that uses the interactions between year and month.

simple_twfe<-feols(lbuss ~ post + did | prov + month,
                   data = df,
                   vcov = ~ prov + month)

boot_twfe<-boottest(simple_twfe,
                    clustid= c('prov','month') ,
                    param = 'did',
                    B = 9999,
                    seed = 1) 

simple_twfe_int<-feols(lbuss ~ post + did | prov^month,
                       data = df,
                       vcov = ~ prov + month)

simple_twfe_int_lm<-lm(lbuss ~ post + did + prov*month, 
                       data =df)

boot_twfe_int<-boottest(simple_twfe_int_lm,
                    clustid= c('prov','month') ,
                    param = 'did',
                    B = 9999,
                    seed = 1) 

A more complex one adding the most obvious variables, and an important month times province dummy

asked_twfe<-feols(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year | prov^month,
                  cluster = ~ prov + month,
                  data = df)
summary(asked_twfe)
## OLS estimation, Dep. Var.: lbuss
## Observations: 576 
## Fixed-effects: prov^month: 288
## Standard-errors: Clustered (prov & month) 
##               Estimate Std. Error   t value  Pr(>|t|)    
## post         -0.097087   0.231514 -0.419358 0.6830272    
## did          -0.150728   0.059165 -2.547567 0.0271123 *  
## vax_leastone -0.009508   0.006487 -1.465840 0.1706854    
## vax_complete  0.001047   0.002574  0.406900 0.6918869    
## excessd      -0.000419   0.000056 -7.454530 0.0000127 ***
## year2021      0.982761   0.350975  2.800093 0.0172719 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.4297     Adj. R2: 0.811775
##                Within R2: 0.43257
asked_twfe_lm<-lm(lbuss ~ post + did + prov*month + vax_leastone + vax_complete + excessd + year,
                  data = df)

boot_asasked<-boottest(asked_twfe_lm,
                       clustid = c('prov', 'month'),
                       param = 'did',
                       B = 9999,
                       seed = 1)

Add the google mobility data to these models.

# With workplace
asked_twfe_g<-feols(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year + m_workplace| prov^month,
                    cluster = ~ prov + month,
                    data = df)
## NOTE: 48 observations removed because of NA values (RHS: 48).
summary(asked_twfe_g)
## OLS estimation, Dep. Var.: lbuss
## Observations: 528 
## Fixed-effects: prov^month: 288
## Standard-errors: Clustered (prov & month) 
##               Estimate Std. Error   t value   Pr(>|t|)    
## post         -0.022377   0.191752 -0.116696 9.0920e-01    
## did           0.020190   0.088346  0.228534 8.2342e-01    
## vax_leastone -0.013291   0.003135 -4.238947 1.3913e-03 ** 
## vax_complete  0.001201   0.002518  0.477005 6.4269e-01    
## excessd      -0.000220   0.000028 -7.733325 9.0035e-06 ***
## year2021      0.272785   0.199341  1.368435 1.9848e-01    
## m_workplace   0.047301   0.007005  6.752416 3.1472e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.304879     Adj. R2: 0.896937
##                  Within R2: 0.724328
asked_twfeg_lm<-lm(lbuss ~ post + did + prov*month + vax_leastone + vax_complete + excessd + year + m_workplace,
                    data = df)

boot_g<-boottest(asked_twfeg_lm,
                  clustid = c('prov', 'month'),
                  param = 'did',
                  B = 9999,
                  seed = 1)

# With parks

asked_twfe_g1<-feols(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year + m_parks| prov^month,
                    cluster = ~ prov + month,
                    data = df)
## NOTE: 48 observations removed because of NA values (RHS: 48).
## Variance contained negative values in the diagonal and was 'fixed' (a la Cameron, Gelbach & Miller 2011).
summary(asked_twfe_g1)
## OLS estimation, Dep. Var.: lbuss
## Observations: 528 
## Fixed-effects: prov^month: 288
## Standard-errors: Clustered (prov & month) 
##               Estimate Std. Error   t value   Pr(>|t|)    
## post          0.249830   0.199703  1.251010 2.3688e-01    
## did          -0.263004   0.093488 -2.813230 1.6871e-02 *  
## vax_leastone -0.022115   0.005801 -3.812308 2.8812e-03 ** 
## vax_complete  0.002287   0.002680  0.853361 4.1166e-01    
## excessd      -0.000339   0.000038 -8.867956 2.4217e-06 ***
## year2021      0.978223   0.235471  4.154324 1.6047e-03 ** 
## m_parks       0.015990   0.007399  2.161088 5.3603e-02 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.347317     Adj. R2: 0.866248
##                  Within R2: 0.642242
asked_twfeg1_lm<-lm(lbuss ~ post + did + prov*month + vax_leastone + vax_complete + excessd + year + m_parks,
                    data = df)

boot_g1<-boottest(asked_twfeg1_lm,
                  clustid = c('prov', 'month'),
                  param = 'did',
                  B = 9999,
                  seed = 1)

# With residential

asked_twfe_g2<-feols(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year + m_residential| prov^month,
                    cluster = ~ prov + month,
                    data = df)
## NOTE: 3 observations removed because of NA values (RHS: 3).
## Variance contained negative values in the diagonal and was 'fixed' (a la Cameron, Gelbach & Miller 2011).
summary(asked_twfe_g2)
## OLS estimation, Dep. Var.: lbuss
## Observations: 573 
## Fixed-effects: prov^month: 288
## Standard-errors: Clustered (prov & month) 
##                Estimate Std. Error   t value   Pr(>|t|)    
## post          -0.025856   0.255876 -0.101050 9.2133e-01    
## did           -0.263705   0.030699 -8.590047 3.2998e-06 ***
## vax_leastone  -0.014538   0.006022 -2.414124 3.4363e-02 *  
## vax_complete   0.001134   0.003055  0.371339 7.1744e-01    
## excessd       -0.000322   0.000061 -5.271708 2.6362e-04 ***
## year2021       0.959524   0.258914  3.705950 3.4655e-03 ** 
## m_residential -0.037256   0.014650 -2.543147 2.7326e-02 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.389704     Adj. R2: 0.844154
##                  Within R2: 0.534957
asked_twfeg2_lm<-lm(lbuss ~ post + did + prov*month + vax_leastone + vax_complete + excessd + year + m_residential,
                    data = df)

boot_g2<-boottest(asked_twfeg2_lm,
                  clustid = c('prov', 'month'),
                  param = 'did',
                  B = 9999,
                  seed = 1)

# With all of them mixed

asked_twfe_g3<-feols(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year + m_residential + m_parks
                     + m_workplace| prov^month,
                    cluster = ~ prov + month,
                    data = df)
## NOTE: 49 observations removed because of NA values (RHS: 49).
summary(asked_twfe_g3)
## OLS estimation, Dep. Var.: lbuss
## Observations: 527 
## Fixed-effects: prov^month: 288
## Standard-errors: Clustered (prov & month) 
##                Estimate Std. Error   t value   Pr(>|t|)    
## post           0.027361   0.214467  0.127576 9.0079e-01    
## did           -0.043690   0.054239 -0.805520 4.3759e-01    
## vax_leastone  -0.014775   0.003518 -4.199904 1.4858e-03 ** 
## vax_complete   0.001076   0.002567  0.418933 6.8333e-01    
## excessd       -0.000217   0.000025 -8.776820 2.6781e-06 ***
## year2021       0.148831   0.238755  0.623365 5.4575e-01    
## m_residential -0.008765   0.007391 -1.185815 2.6068e-01    
## m_parks        0.005637   0.003981  1.416048 1.8445e-01    
## m_workplace    0.042350   0.006224  6.804189 2.9374e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.301643     Adj. R2: 0.898184
##                  Within R2: 0.73036
asked_twfe_g3_lm<-lm(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year + m_residential + m_parks
                     + m_workplace +prov*month,
                     data = df)

boot_g3<-boottest(asked_twfe_g3_lm,
                  clustid = c('prov','month'),
                  param = 'did',
                  B = 9999,
                  seed = 1)

Add some more variables to a few more models

asked_twfe_g4<-feols(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year + m_residential + m_parks
                     + m_workplace + case_rate| prov^month,
                    cluster = ~ prov + month,
                    data = df)
## NOTE: 49 observations removed because of NA values (RHS: 49).
summary(asked_twfe_g4)
## OLS estimation, Dep. Var.: lbuss
## Observations: 527 
## Fixed-effects: prov^month: 288
## Standard-errors: Clustered (prov & month) 
##                Estimate Std. Error   t value   Pr(>|t|)    
## post           0.152598   0.183256  0.832705 0.42272226    
## did           -0.024661   0.032052 -0.769406 0.45785596    
## vax_leastone  -0.016838   0.003216 -5.236402 0.00027836 ***
## vax_complete   0.000677   0.002259  0.299736 0.76996678    
## excessd       -0.000235   0.000061 -3.877468 0.00257457 ** 
## year2021      -0.113634   0.266121 -0.427002 0.67761584    
## m_residential -0.011087   0.007750 -1.430665 0.18031436    
## m_parks        0.006076   0.003904  1.556507 0.14787288    
## m_workplace    0.041404   0.005109  8.103428 0.00000578 ***
## case_rate      0.143402   0.075847  1.890678 0.08528466 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.295792     Adj. R2: 0.901668
##                  Within R2: 0.74072
asked_twfe_g4_lm<-lm(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year + m_residential + m_parks
                     + m_workplace + case_rate + prov*month,
                     data = df)


boot_g4<-boottest(asked_twfe_g4_lm,
                       clustid = c('prov','month'),
                       param = 'did',
                       B = 9999,
                       seed = 1)

asked_twfe_g5<-feols(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year + m_residential + m_parks
                     + m_workplace + new_case_rate| prov^month,
                    cluster = ~ prov + month,
                    data = df)
## NOTE: 49 observations removed because of NA values (RHS: 49).
summary(asked_twfe_g5)
## OLS estimation, Dep. Var.: lbuss
## Observations: 527 
## Fixed-effects: prov^month: 288
## Standard-errors: Clustered (prov & month) 
##                Estimate Std. Error   t value   Pr(>|t|)    
## post           0.031745   0.184507  0.172054 8.6652e-01    
## did           -0.106339   0.052182 -2.037840 6.6350e-02 .  
## vax_leastone  -0.012069   0.003239 -3.726324 3.3448e-03 ** 
## vax_complete   0.000774   0.002541  0.304586 7.6637e-01    
## excessd       -0.000264   0.000032 -8.311623 4.5339e-06 ***
## year2021       0.053987   0.235335  0.229405 8.2276e-01    
## m_residential -0.009966   0.008040 -1.239558 2.4093e-01    
## m_parks        0.007388   0.003827  1.930448 7.9726e-02 .  
## m_workplace    0.037689   0.001768 21.313383 2.6984e-10 ***
## new_case_rate  0.902430   0.395956  2.279118 4.3604e-02 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.294579     Adj. R2: 0.902473
##                  Within R2: 0.742843
asked_twfe_g5_lm<-lm(lbuss ~ post + did + vax_leastone + vax_complete + excessd + year + m_residential + m_parks
                     + m_workplace + new_case_rate + prov*month,
                     data = df)

boot_g5<-boottest(asked_twfe_g5_lm,
                       clustid = c('prov','month'),
                       param = 'did',
                       B = 9999,
                       seed = 1)

Now compare all of them

modelsummary(list(simple_twfe,
                  simple_twfe_int,
                  asked_twfe,
                  asked_twfe_g,
                  asked_twfe_g1,
                  asked_twfe_g2,
                  asked_twfe_g3,
                  asked_twfe_g4,
                  asked_twfe_g5), 
             stars = c('*' = .1, '**'=0.05, '***' = .01))
Model 1 Model 2 Model 3 Model 4 Model 5 Model 6 Model 7 Model 8 Model 9
post 0.271*** 0.224*** -0.097 -0.022 0.250 -0.026 0.027 0.153 0.032
(0.060) (0.057) (0.232) (0.192) (0.200) (0.256) (0.214) (0.183) (0.185)
did -0.254*** -0.129* -0.151** 0.020 -0.263** -0.264*** -0.044 -0.025 -0.106*
(0.072) (0.060) (0.059) (0.088) (0.093) (0.031) (0.054) (0.032) (0.052)
vax_leastone -0.010 -0.013*** -0.022*** -0.015** -0.015*** -0.017*** -0.012***
(0.006) (0.003) (0.006) (0.006) (0.004) (0.003) (0.003)
vax_complete 0.001 0.001 0.002 0.001 0.001 0.001 0.001
(0.003) (0.003) (0.003) (0.003) (0.003) (0.002) (0.003)
excessd 0.000*** 0.000*** 0.000*** 0.000*** 0.000*** 0.000*** 0.000***
(0.000) (0.000) (0.000) (0.000) (0.000) (0.000) (0.000)
year2021 0.983** 0.273 0.978*** 0.960*** 0.149 -0.114 0.054
(0.351) (0.199) (0.235) (0.259) (0.239) (0.266) (0.235)
m_workplace 0.047*** 0.042*** 0.041*** 0.038***
(0.007) (0.006) (0.005) (0.002)
m_parks 0.016* 0.006 0.006 0.007*
(0.007) (0.004) (0.004) (0.004)
m_residential -0.037** -0.009 -0.011 -0.010
(0.015) (0.007) (0.008) (0.008)
case_rate 0.143*
(0.076)
new_case_rate 0.902**
(0.396)
Num.Obs. 576 576 576 528 528 573 527 527 527
R2 0.799 0.839 0.908 0.954 0.941 0.924 0.955 0.957 0.958
R2 Adj. 0.786 0.676 0.812 0.897 0.866 0.844 0.898 0.902 0.902
R2 Within 0.011 0.009 0.433 0.724 0.642 0.535 0.730 0.741 0.743
R2 Pseudo
AIC 1183.1 1562.8 1249.6 834.0 971.7 1136.1 826.3 807.7 803.4
BIC 1344.3 2826.1 2530.3 2093.4 2231.0 2419.7 2093.7 2079.3 2075.0
Log.Lik. -554.552 -491.408 -330.779 -122.020 -190.831 -273.074 -116.166 -105.842 -103.677
Std.Errors by: prov & month by: prov & month by: prov & month by: prov & month by: prov & month by: prov & month by: prov & month by: prov & month by: prov & month
FE: prov^month X X X X X X X X
FE: prov X
FE: month X
* p < 0.1, ** p < 0.05, *** p < 0.01
modelsummary(list(boot_twfe,
                  boot_twfe_int,
                  boot_asasked,
                  boot_g,
                  boot_g1,
                  boot_g2,
                  boot_g3,
                  boot_g4,
                  boot_g5),
             estimate = "{estimate} ({p.value})", 
             statistic = "[{conf.low}, {conf.high}]")
Model 1 Model 2 Model 3 Model 4 Model 5 Model 6 Model 7 Model 8 Model 9
1*did = 0 -0.254 (0.031) -0.129 (0.154) -0.151 (0.096) 0.020 (0.840) -0.263 (0.074) -0.264 (0.009) -0.044 (0.512) -0.025 (0.593) -0.106 (0.148)
[-0.461, -0.042] [-0.350, 0.098] [-0.360, 0.055] [-0.280, 0.306] [-0.580, 0.048] [-0.387, -0.144] [-0.229, 0.142] [-0.150, 0.101] [-0.278, 0.066]
Num.Obs. 576 576 576 528 528 573 527 527 527
R2 0.799 0.839 0.908 0.954 0.941 0.924 0.955 0.957 0.958
R2 Adj. 0.786 0.676 0.812 0.897 0.866 0.844 0.898 0.902 0.902
R2 Within 0.011
R2 Pseudo
AIC 1183.1 1564.8 1251.6 836.0 973.7 1138.1 828.3 809.7 805.4
BIC 1344.3 2832.4 2536.6 2099.7 2237.3 2426.0 2100.0 2085.6 2081.2
Log.Lik. -554.552 -491.408 -330.779 -122.020 -190.831 -273.074 -116.166 -105.842 -103.677

Defining a treatment variable

In this section I’d like to change things a little bit by using a k-means algorithm to define my treatment. I’ll use a reduced, “yearly” dataset of provinces with some data on health-related stuff.

# Load the data first
df_prov<-read.xlsx('../data/province_data.xlsx')

# Change the row names so the k-means stuff works
rownames(df_prov)<-df_prov$province

# Delete the province variable

df_prov$province<-NULL

# Normalize the data

df_prov.Z<-scale(df_prov)

# Now do the k-means with 3 clusters and just three variables

df_prov.Z_reduced<-as.data.frame(df_prov.Z) %>% select(diseases_pc, avg_covaxrate, population)

set.seed(1)
kmeans_prov<-kmeans(df_prov.Z_reduced, 3)
print(kmeans_prov)
## K-means clustering with 3 clusters of sizes 6, 16, 2
## 
## Cluster means:
##   diseases_pc avg_covaxrate population
## 1   1.4541894   -0.48488130 -0.5144259
## 2  -0.4949548    0.17637143 -0.1840603
## 3  -0.4029301    0.04367247  3.0157604
## 
## Clustering vector:
##                          AZUAY                        BOLIVAR 
##                              2                              2 
##                          CAÑAR                         CARCHI 
##                              2                              2 
##                     CHIMBORAZO                       COTOPAXI 
##                              2                              1 
##                         EL ORO                     ESMERALDAS 
##                              2                              2 
##                      GALAPAGOS                         GUAYAS 
##                              2                              3 
##                       IMBABURA                           LOJA 
##                              2                              2 
##                       LOS RIOS                         MANABI 
##                              2                              2 
##                MORONA SANTIAGO                           NAPO 
##                              1                              1 
##                       ORELLANA                        PASTAZA 
##                              1                              1 
##                      PICHINCHA                    SANTA ELENA 
##                              3                              2 
## SANTO DOMINGO DE LOS TSACHILAS                      SUCUMBIOS 
##                              2                              2 
##                     TUNGURAHUA               ZAMORA CHINCHIPE 
##                              2                              1 
## 
## Within cluster sum of squares by cluster:
## [1]  5.073078 23.022023  1.740848
##  (between_SS / total_SS =  56.8 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
# Now paste the clusters in the province database

df_prov$cluster <- kmeans_prov$cluster

# Now create a treatment dummy

df_prov$treatment<-ifelse(df_prov$cluster == 1, 1,0)

# Append that information to a new dataframe

# First, return the province names

df_prov$province<-rownames(df_prov)

# Now, create a df with just the treatment variable

df_prov1<-df_prov %>% select(province, treatment)

# Now, the new dataframe

df_new<-df

df_new<-left_join(df_new, df_prov1, by = 'province')

# Create the did variable

df_new$did<-df_new$post*df_new$treatment

# Write a csv file

write.csv(df_new, 
          file ='df_new.csv',
          na = '')

We have that some other provinces will fit the treatment as they have relatively low population, high disease rate and relatively low average vaccination rates.

Update: Stata says this treatment doesn’t work (check my GitHub).