library(AER)
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
(data("CigarettesSW", package = "AER"))
## [1] "CigarettesSW"
CigarettesSW$lquant=log(CigarettesSW$packs)
CigarettesSW$lprice=log(CigarettesSW$price)
CigarettesSW$lprice=log(CigarettesSW$price)
CigarettesSW$lincome=log(CigarettesSW$income)
CigarettesSW$tdiff= (CigarettesSW$taxs-CigarettesSW$tax)
iv1 = ivreg(lquant ~ lprice | tdiff , data = CigarettesSW)
summary(iv1)
##
## Call:
## ivreg(formula = lquant ~ lprice | tdiff, data = CigarettesSW)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.596376 -0.105963 -0.009352 0.102347 0.525601
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.68478 0.42253 18.187 < 2e-16 ***
## lprice -0.61385 0.08579 -7.155 1.81e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1847 on 94 degrees of freedom
## Multiple R-Squared: 0.4314, Adjusted R-squared: 0.4254
## Wald test: 51.19 on 1 and 94 DF, p-value: 1.811e-10
tdiff is a instrument variable.
Compare to general linear model
iv1_1 = lm(lquant ~ lprice + tdiff , data = CigarettesSW)
summary(iv1_1)
##
## Call:
## lm(formula = lquant ~ lprice + tdiff, data = CigarettesSW)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.61852 -0.08462 -0.01197 0.08937 0.53732
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.888186 0.416150 16.552 < 2e-16 ***
## lprice -0.442058 0.089510 -4.939 3.45e-06 ***
## tdiff -0.008625 0.006183 -1.395 0.166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1821 on 93 degrees of freedom
## Multiple R-squared: 0.4531, Adjusted R-squared: 0.4413
## F-statistic: 38.52 on 2 and 93 DF, p-value: 6.503e-13
Note that we can have more than one instrument, and we can also include exogenous control variables.
$ ivreg(Y ~ X + W | W + Z, … ) $
Let Y be the outcome (dependent) variable of interest (lquant), X be the endogenous variable (lprice), W be any exogenous regressors, not including instruments, and Z be the instruments (tdiff).
Important note: Endogenous variables (X) can only appear before the vertical line; instruments (Z) can only appear after the vertical line; exogenous regressors that are not instruments (W) must appear both before and after the vertical line.
Let’s add an exogenous regressor: log per capita income. Let’s also add another instrument: the real tax on cigarettes tax/cpi.
iv2 = ivreg(lquant ~ lprice + lincome | lincome + tdiff + I(tax/cpi),
data = CigarettesSW)
summary(iv2, vcov = sandwich, diagnostics = TRUE)
##
## Call:
## ivreg(formula = lquant ~ lprice + lincome | lincome + tdiff +
## I(tax/cpi), data = CigarettesSW)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58551 -0.10768 -0.01146 0.11466 0.52787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.86424 0.40173 19.576 < 2e-16 ***
## lprice -0.65683 0.07015 -9.363 4.55e-15 ***
## lincome 0.00179 0.01875 0.095 0.924
##
## Diagnostic tests:
## df1 df2 statistic p-value
## Weak instruments 2 92 77.914 <2e-16 ***
## Wu-Hausman 1 92 6.480 0.0126 *
## Sargan 1 NA 1.726 0.1889
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1879 on 93 degrees of freedom
## Multiple R-Squared: 0.4178, Adjusted R-squared: 0.4053
## Wald test: 48.49 on 2 and 93 DF, p-value: 3.749e-15
Diagnostic tests:
Weak instruments: The null hypothesis is essentially that we have weak instruments.
Wu-Hausman: This tests the consistency of the OLS estimates under the assumption that the IV is consistent.
Sargan: If the null is rejected, it means that at least one of our instruments is invalid, and possibly all of them.