This script attempts to replciate the ivregress command in Stata using the ivreg command from the AER package in R, with and without weights.

TODO: Margins :)

Set up data

rm(list = ls())
data(ai_data, package = 'aiHelper')
df <- ai_data

set.seed(100)
df <- ai_data[sample(1:5000), ]

Make endogenous regressor

df$assign_treat <- df$treat

# Probability of getting treatment increases with age and is higher if female
df$pr_treat <- plogis(
  df$age / sd(df$age) + 
    ifelse(df$gender == 'female', 0.1, 0))

# ggplot2::qplot(df$pr_treat, binwidth = 0.01)

# One-way non-compliance
df$actual_treat <- df$assign_treat
df$actual_treat[df$assign_treat == 'treat'] <- rbinom(
  n = sum(df$assign_treat == 'treat'), size = 1, 
  prob = df$pr_treat[df$assign_treat == 'treat']
)
df$actual_treat <- aiHelper::ai_recode(
  df$actual_treat,
  control = 0, treat = 1,
  verbose = F
)

aiHelper::ai_table(df$assign_treat, df$actual_treat)

##          
##           control treat <NA>
##   control    2495     0    0
##   treat       284  2221    0
##   <NA>          0     0    0

df$treat <- NULL
df$pr_treat <- NULL

Comparison function

compare <- function(s, r, vcov_fun = vcov){
  s$model <- s$model[! s$model$se %in% c('.', ''), ]
  # Put intercept at the end
  s$model <- s$model[c(nrow(s$model), 1:(nrow(s$model) - 1)), ]
  s_coef <- as.numeric(s$model$b)
  s_se <- as.numeric(s$model$se)
  
  r_coef <- r$coefficients
  start <- Sys.time()
  r_se <- sqrt(diag(summary(r, vcov = vcov_fun)[['vcov']]))
  stop <- Sys.time()
  cat("\nTime to calculate vcov:", round(stop - start, 3), 'seconds')
  
  knitr::kable(data.frame(
    r_coef, s_coef, 
    diff_coef = paste0(round(100 * (s_coef - r_coef) / s_coef, 3), '%'), 
    r_var = r_se^2, s_var = s_se^2, 
    diff_var = paste0(round(100 * (s_se - r_se) / s_se, 3), '%'))
  )
}

Models without weights

 s <- aiEstimation::mod_marg(
  paste('ivregress 2sls ag_fav age partisanship_score i.gender i.race', 
        '(i.actual_treat = i.assign_treat)'),
  list(pred = 'margins i.actual_treat'),
  df = df
)

r <- AER::ivreg(ag_fav ~ actual_treat + age + partisanship_score + gender + race | 
                  age + partisanship_score + gender + race + assign_treat,
                data = df)

Stata

writeLines(s$output)

## . ivregress 2sls ag_fav age partisanship_score i.gender i.race (i.actual_treat 
## > = i.assign_treat)
## 
## Instrumental variables (2SLS) regression               Number of obs =    5000
##                                                        Wald chi2(7)  =  480.30
##                                                        Prob > chi2   =  0.0000
##                                                        R-squared     =  0.0833
##                                                        Root MSE      =  .47846
## 
## ------------------------------------------------------------------------------
##       ag_fav |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
## -------------+----------------------------------------------------------------
## actual_treat |
##       treat  |   .1603467   .0152625    10.51   0.000     .1304327    .1902606
##          age |   .0054434   .0003514    15.49   0.000     .0047547    .0061321
## partisansh~e |   .1990048    .023346     8.52   0.000     .1532474    .2447622
##              |
##       gender |
##        male  |  -.0364066   .0135404    -2.69   0.007    -.0629453    -.009868
##              |
##         race |
##      latinx  |  -.0086425   .0190332    -0.45   0.650     -.045947     .028662
##       other  |  -.0532068   .0190133    -2.80   0.005    -.0904722   -.0159413
##       white  |     -.0541   .0190799    -2.84   0.005    -.0914959   -.0167041
##              |
##        _cons |   .1449032   .0250693     5.78   0.000     .0957683    .1940382
## ------------------------------------------------------------------------------
## Instrumented:  2.actual_treat
## Instruments:   age partisanship_score 2.gender 2.race 3.race 4.race
##                2.assign_treat
## 
## . estout . using mod1.txt, cells("b se t p") stats(N) replace
## (note: file mod1.txt not found)
## (output written to mod1.txt)
## 
## . estimates store t1
## 
## . margins i.actual_treat
## 
## Predictive margins                                Number of obs   =       5000
## Model VCE    : Unadjusted
## 
## Expression   : Linear prediction, predict()
## 
## ------------------------------------------------------------------------------
##              |            Delta-method
##              |     Margin   Std. Err.      z    P>|z|     [95% Conf. Interval]
## -------------+----------------------------------------------------------------
## actual_treat |
##     control  |    .445374   .0095785    46.50   0.000     .4266004    .4641476
##       treat  |   .6057207    .010851    55.82   0.000      .584453    .6269883
## ------------------------------------------------------------------------------
## 
## . quietly estadd margins i.actual_treat, replace
## 
## . estout e(margins_table) using pred.txt, replace
## (note: file pred.txt not found)
## (output written to pred.txt)
## 
## . estimates restore t1
## (results t1 are active now)
## 
## . table ag_fav
## 
## ----------------------
##    ag_fav |      Freq.
## ----------+-----------
##         0 |      2,417
##         1 |      2,583
## ----------------------
## 
## end of do-file

Really magic correction

v <- function(object, ...){
  vcov(object) * object$df.residual / nobs(object)
}

compare(s, r, vcov = v)

## 
## Time to calculate vcov: 0.004 seconds

	r_coef	s_coef	diff_coef	r_var	s_var	diff_var
(Intercept)	0.1449032	0.1449032	0%	0.0006285	0.0006285	0%
actual_treattreat	0.1603467	0.1603467	0%	0.0002329	0.0002329	0%
age	0.0054434	0.0054434	0%	0.0000001	0.0000001	0.001%
partisanship_score	0.1990048	0.1990048	0%	0.0005450	0.0005450	0%
gendermale	-0.0364066	-0.0364066	0%	0.0001833	0.0001833	0%
racelatinx	-0.0086425	-0.0086425	0%	0.0003623	0.0003623	0%
raceother	-0.0532068	-0.0532068	0%	0.0003615	0.0003615	0%
racewhite	-0.0541000	-0.0541000	0%	0.0003640	0.0003640	0%

Model with weights

df$wgt <- runif(nrow(df))

s <- aiEstimation::mod_marg(
  paste('ivregress 2sls ag_fav age partisanship_score i.gender i.race',
        '(i.actual_treat = i.assign_treat)',
        '[pweight = wgt]'),
  list(pred = 'margins i.actual_treat'),
  df = df
)

r <- AER::ivreg(ag_fav ~ actual_treat + age + partisanship_score + gender + race | 
                  age + partisanship_score + gender + race + assign_treat,
                weights = wgt,
                data = df)

Stata

writeLines(s$output)

## . ivregress 2sls ag_fav age partisanship_score i.gender i.race (i.actual_treat 
## > = i.assign_treat) [pweight = wgt]
## (sum of wgt is   2.4915e+03)
## 
## Instrumental variables (2SLS) regression               Number of obs =    5000
##                                                        Wald chi2(7)  =  380.89
##                                                        Prob > chi2   =  0.0000
##                                                        R-squared     =  0.0786
##                                                        Root MSE      =  .47969
## 
## ------------------------------------------------------------------------------
##              |               Robust
##       ag_fav |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
## -------------+----------------------------------------------------------------
## actual_treat |
##       treat  |   .1623005   .0177673     9.13   0.000     .1274772    .1971238
##          age |   .0054265   .0004024    13.49   0.000     .0046379    .0062151
## partisansh~e |   .1721051   .0269923     6.38   0.000     .1192011     .225009
##              |
##       gender |
##        male  |  -.0370628   .0156482    -2.37   0.018    -.0677327   -.0063929
##              |
##         race |
##      latinx  |  -.0183145   .0219604    -0.83   0.404     -.061356     .024727
##       other  |   -.055443   .0217227    -2.55   0.011    -.0980188   -.0128672
##       white  |  -.0482655   .0222419    -2.17   0.030    -.0918588   -.0046722
##              |
##        _cons |   .1597342   .0284024     5.62   0.000     .1040665     .215402
## ------------------------------------------------------------------------------
## Instrumented:  2.actual_treat
## Instruments:   age partisanship_score 2.gender 2.race 3.race 4.race
##                2.assign_treat
## 
## . estout . using mod1.txt, cells("b se t p") stats(N) replace
## (note: file mod1.txt not found)
## (output written to mod1.txt)
## 
## . estimates store t1
## 
## . margins i.actual_treat
## 
## Predictive margins                                Number of obs   =       5000
## Model VCE    : Robust
## 
## Expression   : Linear prediction, predict()
## 
## ------------------------------------------------------------------------------
##              |            Delta-method
##              |     Margin   Std. Err.      z    P>|z|     [95% Conf. Interval]
## -------------+----------------------------------------------------------------
## actual_treat |
##     control  |   .4442876   .0110731    40.12   0.000     .4225847    .4659904
##       treat  |   .6065881   .0126437    47.98   0.000     .5818069    .6313693
## ------------------------------------------------------------------------------
## 
## . quietly estadd margins i.actual_treat, replace
## 
## . estout e(margins_table) using pred.txt, replace
## (note: file pred.txt not found)
## (output written to pred.txt)
## 
## . estimates restore t1
## (results t1 are active now)
## 
## . table ag_fav
## 
## ----------------------
##    ag_fav |      Freq.
## ----------+-----------
##         0 |      2,417
##         1 |      2,583
## ----------------------
## 
## end of do-file

HC0 correction

v <- function(x) sandwich::vcovHC(x, type = 'HC0')
compare(s, r, vcov = v)

## 
## Time to calculate vcov: 2.249 seconds

	r_coef	s_coef	diff_coef	r_var	s_var	diff_var
(Intercept)	0.1597342	0.1597342	0%	0.0008067	0.0008067	0%
actual_treattreat	0.1623005	0.1623005	0%	0.0003157	0.0003157	0%
age	0.0054265	0.0054265	-0.001%	0.0000002	0.0000002	0.012%
partisanship_score	0.1721051	0.1721051	0%	0.0007286	0.0007286	0%
gendermale	-0.0370628	-0.0370628	0%	0.0002449	0.0002449	0%
racelatinx	-0.0183145	-0.0183145	0%	0.0004823	0.0004823	0%
raceother	-0.0554430	-0.0554430	0%	0.0004719	0.0004719	0%
racewhite	-0.0482655	-0.0482655	0%	0.0004947	0.0004947	0%

summary(r, vcov = v)

## 
## Call:
## AER::ivreg(formula = ag_fav ~ actual_treat + age + partisanship_score + 
##     gender + race | age + partisanship_score + gender + race + 
##     assign_treat, data = df, weights = wgt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.90774 -0.29480  0.05547  0.28664  0.78354 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.1597342  0.0284024   5.624 1.97e-08 ***
## actual_treattreat   0.1623005  0.0177673   9.135  < 2e-16 ***
## age                 0.0054265  0.0004024  13.487  < 2e-16 ***
## partisanship_score  0.1721051  0.0269923   6.376 1.98e-10 ***
## gendermale         -0.0370628  0.0156482  -2.368   0.0179 *  
## racelatinx         -0.0183145  0.0219604  -0.834   0.4043    
## raceother          -0.0554430  0.0217227  -2.552   0.0107 *  
## racewhite          -0.0482655  0.0222419  -2.170   0.0301 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3389 on 4992 degrees of freedom
## Multiple R-Squared: 0.07864, Adjusted R-squared: 0.07735 
## Wald test: 54.41 on 7 and 4992 DF,  p-value: < 2.2e-16

Replicate 2SLS from Stata in R

Nat

2017-07-16 19:44:33

Set up data

Make endogenous regressor

Comparison function

Models without weights

Stata

Really magic correction

Model with weights

Stata

HC0 correction