This script attempts to replciate the ivregress
command in Stata using the ivreg
command from the AER
package in R, with and without weights.
TODO: Margins :)
rm(list = ls())
data(ai_data, package = 'aiHelper')
df <- ai_data
set.seed(100)
df <- ai_data[sample(1:5000), ]
df$assign_treat <- df$treat
# Probability of getting treatment increases with age and is higher if female
df$pr_treat <- plogis(
df$age / sd(df$age) +
ifelse(df$gender == 'female', 0.1, 0))
# ggplot2::qplot(df$pr_treat, binwidth = 0.01)
# One-way non-compliance
df$actual_treat <- df$assign_treat
df$actual_treat[df$assign_treat == 'treat'] <- rbinom(
n = sum(df$assign_treat == 'treat'), size = 1,
prob = df$pr_treat[df$assign_treat == 'treat']
)
df$actual_treat <- aiHelper::ai_recode(
df$actual_treat,
control = 0, treat = 1,
verbose = F
)
aiHelper::ai_table(df$assign_treat, df$actual_treat)
##
## control treat <NA>
## control 2495 0 0
## treat 284 2221 0
## <NA> 0 0 0
df$treat <- NULL
df$pr_treat <- NULL
compare <- function(s, r, vcov_fun = vcov){
s$model <- s$model[! s$model$se %in% c('.', ''), ]
# Put intercept at the end
s$model <- s$model[c(nrow(s$model), 1:(nrow(s$model) - 1)), ]
s_coef <- as.numeric(s$model$b)
s_se <- as.numeric(s$model$se)
r_coef <- r$coefficients
start <- Sys.time()
r_se <- sqrt(diag(summary(r, vcov = vcov_fun)[['vcov']]))
stop <- Sys.time()
cat("\nTime to calculate vcov:", round(stop - start, 3), 'seconds')
knitr::kable(data.frame(
r_coef, s_coef,
diff_coef = paste0(round(100 * (s_coef - r_coef) / s_coef, 3), '%'),
r_var = r_se^2, s_var = s_se^2,
diff_var = paste0(round(100 * (s_se - r_se) / s_se, 3), '%'))
)
}
s <- aiEstimation::mod_marg(
paste('ivregress 2sls ag_fav age partisanship_score i.gender i.race',
'(i.actual_treat = i.assign_treat)'),
list(pred = 'margins i.actual_treat'),
df = df
)
r <- AER::ivreg(ag_fav ~ actual_treat + age + partisanship_score + gender + race |
age + partisanship_score + gender + race + assign_treat,
data = df)
writeLines(s$output)
## . ivregress 2sls ag_fav age partisanship_score i.gender i.race (i.actual_treat
## > = i.assign_treat)
##
## Instrumental variables (2SLS) regression Number of obs = 5000
## Wald chi2(7) = 480.30
## Prob > chi2 = 0.0000
## R-squared = 0.0833
## Root MSE = .47846
##
## ------------------------------------------------------------------------------
## ag_fav | Coef. Std. Err. z P>|z| [95% Conf. Interval]
## -------------+----------------------------------------------------------------
## actual_treat |
## treat | .1603467 .0152625 10.51 0.000 .1304327 .1902606
## age | .0054434 .0003514 15.49 0.000 .0047547 .0061321
## partisansh~e | .1990048 .023346 8.52 0.000 .1532474 .2447622
## |
## gender |
## male | -.0364066 .0135404 -2.69 0.007 -.0629453 -.009868
## |
## race |
## latinx | -.0086425 .0190332 -0.45 0.650 -.045947 .028662
## other | -.0532068 .0190133 -2.80 0.005 -.0904722 -.0159413
## white | -.0541 .0190799 -2.84 0.005 -.0914959 -.0167041
## |
## _cons | .1449032 .0250693 5.78 0.000 .0957683 .1940382
## ------------------------------------------------------------------------------
## Instrumented: 2.actual_treat
## Instruments: age partisanship_score 2.gender 2.race 3.race 4.race
## 2.assign_treat
##
## . estout . using mod1.txt, cells("b se t p") stats(N) replace
## (note: file mod1.txt not found)
## (output written to mod1.txt)
##
## . estimates store t1
##
## . margins i.actual_treat
##
## Predictive margins Number of obs = 5000
## Model VCE : Unadjusted
##
## Expression : Linear prediction, predict()
##
## ------------------------------------------------------------------------------
## | Delta-method
## | Margin Std. Err. z P>|z| [95% Conf. Interval]
## -------------+----------------------------------------------------------------
## actual_treat |
## control | .445374 .0095785 46.50 0.000 .4266004 .4641476
## treat | .6057207 .010851 55.82 0.000 .584453 .6269883
## ------------------------------------------------------------------------------
##
## . quietly estadd margins i.actual_treat, replace
##
## . estout e(margins_table) using pred.txt, replace
## (note: file pred.txt not found)
## (output written to pred.txt)
##
## . estimates restore t1
## (results t1 are active now)
##
## . table ag_fav
##
## ----------------------
## ag_fav | Freq.
## ----------+-----------
## 0 | 2,417
## 1 | 2,583
## ----------------------
##
## end of do-file
v <- function(object, ...){
vcov(object) * object$df.residual / nobs(object)
}
compare(s, r, vcov = v)
##
## Time to calculate vcov: 0.004 seconds
r_coef | s_coef | diff_coef | r_var | s_var | diff_var | |
---|---|---|---|---|---|---|
(Intercept) | 0.1449032 | 0.1449032 | 0% | 0.0006285 | 0.0006285 | 0% |
actual_treattreat | 0.1603467 | 0.1603467 | 0% | 0.0002329 | 0.0002329 | 0% |
age | 0.0054434 | 0.0054434 | 0% | 0.0000001 | 0.0000001 | 0.001% |
partisanship_score | 0.1990048 | 0.1990048 | 0% | 0.0005450 | 0.0005450 | 0% |
gendermale | -0.0364066 | -0.0364066 | 0% | 0.0001833 | 0.0001833 | 0% |
racelatinx | -0.0086425 | -0.0086425 | 0% | 0.0003623 | 0.0003623 | 0% |
raceother | -0.0532068 | -0.0532068 | 0% | 0.0003615 | 0.0003615 | 0% |
racewhite | -0.0541000 | -0.0541000 | 0% | 0.0003640 | 0.0003640 | 0% |
df$wgt <- runif(nrow(df))
s <- aiEstimation::mod_marg(
paste('ivregress 2sls ag_fav age partisanship_score i.gender i.race',
'(i.actual_treat = i.assign_treat)',
'[pweight = wgt]'),
list(pred = 'margins i.actual_treat'),
df = df
)
r <- AER::ivreg(ag_fav ~ actual_treat + age + partisanship_score + gender + race |
age + partisanship_score + gender + race + assign_treat,
weights = wgt,
data = df)
writeLines(s$output)
## . ivregress 2sls ag_fav age partisanship_score i.gender i.race (i.actual_treat
## > = i.assign_treat) [pweight = wgt]
## (sum of wgt is 2.4915e+03)
##
## Instrumental variables (2SLS) regression Number of obs = 5000
## Wald chi2(7) = 380.89
## Prob > chi2 = 0.0000
## R-squared = 0.0786
## Root MSE = .47969
##
## ------------------------------------------------------------------------------
## | Robust
## ag_fav | Coef. Std. Err. z P>|z| [95% Conf. Interval]
## -------------+----------------------------------------------------------------
## actual_treat |
## treat | .1623005 .0177673 9.13 0.000 .1274772 .1971238
## age | .0054265 .0004024 13.49 0.000 .0046379 .0062151
## partisansh~e | .1721051 .0269923 6.38 0.000 .1192011 .225009
## |
## gender |
## male | -.0370628 .0156482 -2.37 0.018 -.0677327 -.0063929
## |
## race |
## latinx | -.0183145 .0219604 -0.83 0.404 -.061356 .024727
## other | -.055443 .0217227 -2.55 0.011 -.0980188 -.0128672
## white | -.0482655 .0222419 -2.17 0.030 -.0918588 -.0046722
## |
## _cons | .1597342 .0284024 5.62 0.000 .1040665 .215402
## ------------------------------------------------------------------------------
## Instrumented: 2.actual_treat
## Instruments: age partisanship_score 2.gender 2.race 3.race 4.race
## 2.assign_treat
##
## . estout . using mod1.txt, cells("b se t p") stats(N) replace
## (note: file mod1.txt not found)
## (output written to mod1.txt)
##
## . estimates store t1
##
## . margins i.actual_treat
##
## Predictive margins Number of obs = 5000
## Model VCE : Robust
##
## Expression : Linear prediction, predict()
##
## ------------------------------------------------------------------------------
## | Delta-method
## | Margin Std. Err. z P>|z| [95% Conf. Interval]
## -------------+----------------------------------------------------------------
## actual_treat |
## control | .4442876 .0110731 40.12 0.000 .4225847 .4659904
## treat | .6065881 .0126437 47.98 0.000 .5818069 .6313693
## ------------------------------------------------------------------------------
##
## . quietly estadd margins i.actual_treat, replace
##
## . estout e(margins_table) using pred.txt, replace
## (note: file pred.txt not found)
## (output written to pred.txt)
##
## . estimates restore t1
## (results t1 are active now)
##
## . table ag_fav
##
## ----------------------
## ag_fav | Freq.
## ----------+-----------
## 0 | 2,417
## 1 | 2,583
## ----------------------
##
## end of do-file
v <- function(x) sandwich::vcovHC(x, type = 'HC0')
compare(s, r, vcov = v)
##
## Time to calculate vcov: 2.249 seconds
r_coef | s_coef | diff_coef | r_var | s_var | diff_var | |
---|---|---|---|---|---|---|
(Intercept) | 0.1597342 | 0.1597342 | 0% | 0.0008067 | 0.0008067 | 0% |
actual_treattreat | 0.1623005 | 0.1623005 | 0% | 0.0003157 | 0.0003157 | 0% |
age | 0.0054265 | 0.0054265 | -0.001% | 0.0000002 | 0.0000002 | 0.012% |
partisanship_score | 0.1721051 | 0.1721051 | 0% | 0.0007286 | 0.0007286 | 0% |
gendermale | -0.0370628 | -0.0370628 | 0% | 0.0002449 | 0.0002449 | 0% |
racelatinx | -0.0183145 | -0.0183145 | 0% | 0.0004823 | 0.0004823 | 0% |
raceother | -0.0554430 | -0.0554430 | 0% | 0.0004719 | 0.0004719 | 0% |
racewhite | -0.0482655 | -0.0482655 | 0% | 0.0004947 | 0.0004947 | 0% |
summary(r, vcov = v)
##
## Call:
## AER::ivreg(formula = ag_fav ~ actual_treat + age + partisanship_score +
## gender + race | age + partisanship_score + gender + race +
## assign_treat, data = df, weights = wgt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.90774 -0.29480 0.05547 0.28664 0.78354
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1597342 0.0284024 5.624 1.97e-08 ***
## actual_treattreat 0.1623005 0.0177673 9.135 < 2e-16 ***
## age 0.0054265 0.0004024 13.487 < 2e-16 ***
## partisanship_score 0.1721051 0.0269923 6.376 1.98e-10 ***
## gendermale -0.0370628 0.0156482 -2.368 0.0179 *
## racelatinx -0.0183145 0.0219604 -0.834 0.4043
## raceother -0.0554430 0.0217227 -2.552 0.0107 *
## racewhite -0.0482655 0.0222419 -2.170 0.0301 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3389 on 4992 degrees of freedom
## Multiple R-Squared: 0.07864, Adjusted R-squared: 0.07735
## Wald test: 54.41 on 7 and 4992 DF, p-value: < 2.2e-16