library(wooldridge)
data("meap00_01", package = "wooldridge")
head(meap00_01)
## dcode bcode math4 read4 lunch enroll exppp lenroll lexppp
## 1 1010 4937 83.3 77.8 40.60 468 5870.673 6.148468 8.677725
## 2 3010 790 85.7 60.0 12.75 251 4824.836 5.525453 8.481532
## 3 3010 1403 77.3 59.1 17.08 439 4358.772 6.084499 8.379946
## 4 3010 4056 85.2 67.0 23.17 561 4701.396 6.329721 8.455615
## 5 3020 922 91.8 67.7 25.57 442 4311.984 6.091310 8.369153
## 6 3020 2864 87.7 80.0 27.30 381 4507.331 5.942800 8.413461
str(meap00_01)
## 'data.frame': 1692 obs. of 9 variables:
## $ dcode : num 1010 3010 3010 3010 3020 3020 3020 3030 3030 3030 ...
## $ bcode : int 4937 790 1403 4056 922 2864 4851 881 2748 4469 ...
## $ math4 : num 83.3 85.7 77.3 85.2 91.8 ...
## $ read4 : num 77.8 60 59.1 67 67.7 ...
## $ lunch : num 40.6 12.8 17.1 23.2 25.6 ...
## $ enroll : int 468 251 439 561 442 381 274 326 273 285 ...
## $ exppp : num 5871 4825 4359 4701 4312 ...
## $ lenroll: num 6.15 5.53 6.08 6.33 6.09 ...
## $ lexppp : num 8.68 8.48 8.38 8.46 8.37 ...
## - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
#(i)
library(sandwich)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
model <- lm(math4 ~ lunch + lenroll + lexppp, data = meap00_01)
summary(model)
##
## Call:
## lm(formula = math4 ~ lunch + lenroll + lexppp, data = meap00_01)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.326 -8.758 0.914 9.164 51.416
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.93246 19.96170 4.605 4.42e-06 ***
## lunch -0.44874 0.01464 -30.648 < 2e-16 ***
## lenroll -5.39915 0.94041 -5.741 1.11e-08 ***
## lexppp 3.52474 2.09785 1.680 0.0931 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.3 on 1688 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3718
## F-statistic: 334.6 on 3 and 1688 DF, p-value: < 2.2e-16
coeftest(model, vcov = vcovHC(model, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.932462 23.087101 3.9820 7.124e-05 ***
## lunch -0.448743 0.016584 -27.0583 < 2.2e-16 ***
## lenroll -5.399152 1.131175 -4.7730 1.971e-06 ***
## lexppp 3.524744 2.353736 1.4975 0.1344
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#(ii)
residuals <- resid(model)
squared_residuals <- residuals^2
white_test <- lm(squared_residuals ~ lunch + lenroll + lexppp + I(lunch^2) + I(lenroll^2) + I(lexppp^2), data = meap00_01)
#F-test for heteroskedasticity
anova(white_test)
## Analysis of Variance Table
##
## Response: squared_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## lunch 1 36754420 36754420 280.1021 < 2.2e-16 ***
## lenroll 1 8213 8213 0.0626 0.8024736
## lexppp 1 1578 1578 0.0120 0.9126907
## I(lunch^2) 1 470701 470701 3.5872 0.0583985 .
## I(lenroll^2) 1 1564795 1564795 11.9252 0.0005675 ***
## I(lexppp^2) 1 607896 607896 4.6327 0.0315094 *
## Residuals 1685 221102259 131218
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#(iii)
log_u_squared <- log(squared_residuals)
variance_model <- lm(log_u_squared ~ lunch + lenroll + lexppp, data = meap00_01)
weights <- exp(fitted(variance_model))
wls_model <- lm(math4 ~ lunch + lenroll + lexppp, data = meap00_01, weights = 1 / weights)
summary(wls_model)
##
## Call:
## lm(formula = math4 ~ lunch + lenroll + lexppp, data = meap00_01,
## weights = 1/weights)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -9.3305 -1.1915 0.1297 1.3735 3.7803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.13394 16.76071 3.409 0.000668 ***
## lunch -0.45148 0.01499 -30.120 < 2e-16 ***
## lenroll -3.14144 0.86138 -3.647 0.000273 ***
## lexppp 6.04688 1.69751 3.562 0.000378 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.888 on 1688 degrees of freedom
## Multiple R-squared: 0.3505, Adjusted R-squared: 0.3494
## F-statistic: 303.7 on 3 and 1688 DF, p-value: < 2.2e-16
#(iv)
coeftest(wls_model, vcov = vcovHC(wls_model, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.133943 18.552854 3.0795 0.0021068 **
## lunch -0.451482 0.014271 -31.6353 < 2.2e-16 ***
## lenroll -3.141439 1.017973 -3.0860 0.0020618 **
## lexppp 6.046880 1.804487 3.3510 0.0008229 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#(v)
ols_coef <- summary(model)$coefficients["lexppp", ]
wls_coef <- summary(wls_model)$coefficients["lexppp", ]
ols_coef
## Estimate Std. Error t value Pr(>|t|)
## 3.52474429 2.09784596 1.68017307 0.09310868
wls_coef
## Estimate Std. Error t value Pr(>|t|)
## 6.0468796465 1.6975069157 3.5622120833 0.0003779527