IMM

knitr::opts_chunk$set(echo = TRUE)
library(AER)

## Warning: package 'AER' was built under R version 4.3.3

## Loading required package: car

## Loading required package: carData

## Loading required package: lmtest

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: sandwich

## Loading required package: survival

library(ggplot2)
library(ggthemes)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Create the above data set 1000 times for visualization

sampleSize <- 1000
myList = list()
set.seed(12345)
for (i in 1L:1000L) {
  omVar <- rnorm(sampleSize)
  z1 <- rnorm(sampleSize)
  z2 <- rnorm(sampleSize)
  x2 <- rnorm(sampleSize)
  x1 <- z1 + z2 + omVar
  y <- 2 + 3*x1 + 1*x2 + 3*omVar + 10*rnorm(sampleSize)

  myList[[i]] <- data.frame(y = y,
                            x1 = x1,
                            x2 = x2,
                            z1 = z1,
                            z2 = z2,
                            sample = i)
  rm(omVar, z1, z2, x1, x2, y)
}

myData <- data.table::rbindlist(myList)

Graphs

OLS

m1 = data.frame(Intercept = NA,
                x1 = NA,
                x2 = NA)
for (i in 1L:1000L) {
  m1 <- rbind(m1, lm(y ~ x1 + x2, subset(myData,sample == i))$coefficients)
}
m1 = m1[-1,]

ZKHL

m2 <- data.frame(Intercept = NA,
                 x1_1 = NA,
                 x2 = NA)
for (i in 1L:1000L) {
  myData$x1_1[myData$sample == i] <- lm(x1~z1 + x2, subset(myData,sample == i))$residuals
  m2 <- rbind(m2, lm(y ~ x1_1 + x2, subset(myData,sample == i))$coefficients)
}
m2 <- m2[-1,]

2SLS

m3 <- data.frame(Intercept = NA,
                 x1_2 = NA,
                 x2 = NA)
for (i in 1L:1000L) {
  myData$x1_2[myData$sample == i] <- lm(x1~z1 + x2, subset(myData,sample == i))$fitted.values
  m3 <- rbind(m3, lm(y ~ x1_2 + x2, subset(myData,sample == i))$coefficients)
}
m3 <- m3[-1,]

Three plots together for comparison

m1$Model <- 'OLS'
m2$Model <- 'ZKHL'
m3$Model <- '2SLS'

mMeans <- rbind(m1, setNames(m2, names(m1)), setNames(m3,names(m1))) %>%
  mutate(Model2 = factor(Model, levels = c('OLS','ZKHL','2SLS'))) %>%
  group_by(Model2) %>%
  summarise(x1 = mean(x1))

rbind(m1,setNames(m2, names(m1)),setNames(m3,names(m1))) %>%
  mutate(Model2 = factor(Model, levels = c('OLS','ZKHL','2SLS'))) %>%
  ggplot(aes(x1)) +
  geom_histogram(bins = 40) +
  geom_vline(aes(xintercept = x1, color = Model2), data = mMeans) +
  theme_fivethirtyeight() +
  facet_wrap(~Model2, nrow = 3) +
  theme(legend.position = 'none')

Reporting in the paper

summary(lm(y ~ x1 + x2, subset(myData,sample == 1)))

## 
## Call:
## lm(formula = y ~ x1 + x2, data = subset(myData, sample == 1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.116  -7.103   0.418   6.977  34.033 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.0868     0.3351   6.227 6.98e-10 ***
## x1            3.7352     0.1945  19.202  < 2e-16 ***
## x2            1.2846     0.3521   3.649 0.000277 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.6 on 997 degrees of freedom
## Multiple R-squared:  0.275,  Adjusted R-squared:  0.2735 
## F-statistic: 189.1 on 2 and 997 DF,  p-value: < 2.2e-16

Model with ZKHL method

myData$x1_1[myData$sample == 1] <- lm(x1~z1 + x2, subset(myData,sample == 1))$residuals
summary(lm(y ~ x1_1 + x2, subset(myData,sample == 1)))

## 
## Call:
## lm(formula = y ~ x1_1 + x2, data = subset(myData, sample == 1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.372  -7.525   0.097   7.036  37.858 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.0538     0.3438   5.974 3.22e-09 ***
## x1_1          4.3416     0.2505  17.335  < 2e-16 ***
## x2            1.0807     0.3611   2.993  0.00283 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.87 on 997 degrees of freedom
## Multiple R-squared:  0.2369, Adjusted R-squared:  0.2353 
## F-statistic: 154.7 on 2 and 997 DF,  p-value: < 2.2e-16

Model with correct method

myData$x1_2[myData$sample == 1] <- lm(x1~z1 + x2, subset(myData,sample == 1))$fitted.values
summary(lm(y ~ x1_2 + x2, subset(myData,sample == 1)))

## 
## Call:
## lm(formula = y ~ x1_2 + x2, data = subset(myData, sample == 1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.083  -8.018  -0.482   7.393  37.011 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.0774     0.3821   5.437 6.83e-08 ***
## x1_2          2.6805     0.3671   7.302 5.81e-13 ***
## x2            1.2270     0.4018   3.054  0.00232 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.08 on 997 degrees of freedom
## Multiple R-squared:  0.05727,    Adjusted R-squared:  0.05538 
## F-statistic: 30.28 on 2 and 997 DF,  p-value: 1.707e-13

2SLS using `ivreg` function

ivM1 <- ivreg(y ~ x1 + x2|x2 + z1, data = subset(myData,sample == 1))

summary(ivM1, diagnostics = TRUE)

## 
## Call:
## ivreg(formula = y ~ x1 + x2 | x2 + z1, data = subset(myData, 
##     sample == 1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -39.9030  -7.1996   0.3698   6.9697  33.9642 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.0774     0.3400   6.110 1.43e-09 ***
## x1            2.6805     0.3267   8.206 7.02e-16 ***
## x2            1.2270     0.3575   3.432 0.000624 ***
## 
## Diagnostic tests:
##                  df1 df2 statistic p-value    
## Weak instruments   1 997    573.24 < 2e-16 ***
## Wu-Hausman         1 996     17.18 3.7e-05 ***
## Sargan             0  NA        NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.75 on 997 degrees of freedom
## Multiple R-Squared: 0.2536,  Adjusted R-squared: 0.2521 
## Wald test: 38.25 on 2 and 997 DF,  p-value: < 2.2e-16

What if we have more than one instrument for `x1`?

ivM2 <- ivreg(y ~ x1 + x2|x2 + z1 + z2, data = subset(myData,sample == 1))
summary(ivM2, diagnostics = TRUE)

## 
## Call:
## ivreg(formula = y ~ x1 + x2 | x2 + z1 + z2, data = subset(myData, 
##     sample == 1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -39.9440  -7.1656   0.3717   6.9970  33.9605 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.0770     0.3405   6.099 1.52e-09 ***
## x1            2.6254     0.2424  10.830  < 2e-16 ***
## x2            1.2240     0.3579   3.420 0.000651 ***
## 
## Diagnostic tests:
##                  df1 df2 statistic  p-value    
## Weak instruments   2 996   988.150  < 2e-16 ***
## Wu-Hausman         1 996    68.988 3.21e-16 ***
## Sargan             1  NA     0.063    0.802    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.77 on 997 degrees of freedom
## Multiple R-Squared: 0.2513,  Adjusted R-squared: 0.2498 
## Wald test: 63.21 on 2 and 997 DF,  p-value: < 2.2e-16

Control function

summary(lm(y ~ x1 + x1_1 + x2, subset(myData,sample == 1)))

## 
## Call:
## lm(formula = y ~ x1 + x1_1 + x2, data = subset(myData, sample == 
##     1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.552  -7.083   0.448   6.630  33.311 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.0774     0.3324   6.249  6.1e-10 ***
## x1            2.6805     0.3194   8.393  < 2e-16 ***
## x1_1          1.6611     0.4008   4.144  3.7e-05 ***
## x2            1.2270     0.3495   3.510 0.000467 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.51 on 996 degrees of freedom
## Multiple R-squared:  0.2873, Adjusted R-squared:  0.2851 
## F-statistic: 133.8 on 3 and 996 DF,  p-value: < 2.2e-16

IMM

Ashwin Malshé

7/30/2018

Create the above data set 1000 times for visualization

Graphs

OLS

ZKHL

2SLS

Three plots together for comparison

Reporting in the paper

Model with ZKHL method

Model with correct method

2SLS using `ivreg` function

What if we have more than one instrument for `x1`?

Control function

IMM

Ashwin Malshé

7/30/2018

Create the above data set 1000 times for visualization

Graphs

OLS

ZKHL

2SLS

Three plots together for comparison

Reporting in the paper

Model with ZKHL method

Model with correct method

2SLS using ivreg function

What if we have more than one instrument for x1?

Control function

2SLS using `ivreg` function

What if we have more than one instrument for `x1`?