library(wooldridge)
data("meap00_01", package = "wooldridge")
head(meap00_01)
##   dcode bcode math4 read4 lunch enroll    exppp  lenroll   lexppp
## 1  1010  4937  83.3  77.8 40.60    468 5870.673 6.148468 8.677725
## 2  3010   790  85.7  60.0 12.75    251 4824.836 5.525453 8.481532
## 3  3010  1403  77.3  59.1 17.08    439 4358.772 6.084499 8.379946
## 4  3010  4056  85.2  67.0 23.17    561 4701.396 6.329721 8.455615
## 5  3020   922  91.8  67.7 25.57    442 4311.984 6.091310 8.369153
## 6  3020  2864  87.7  80.0 27.30    381 4507.331 5.942800 8.413461
str(meap00_01)
## 'data.frame':    1692 obs. of  9 variables:
##  $ dcode  : num  1010 3010 3010 3010 3020 3020 3020 3030 3030 3030 ...
##  $ bcode  : int  4937 790 1403 4056 922 2864 4851 881 2748 4469 ...
##  $ math4  : num  83.3 85.7 77.3 85.2 91.8 ...
##  $ read4  : num  77.8 60 59.1 67 67.7 ...
##  $ lunch  : num  40.6 12.8 17.1 23.2 25.6 ...
##  $ enroll : int  468 251 439 561 442 381 274 326 273 285 ...
##  $ exppp  : num  5871 4825 4359 4701 4312 ...
##  $ lenroll: num  6.15 5.53 6.08 6.33 6.09 ...
##  $ lexppp : num  8.68 8.48 8.38 8.46 8.37 ...
##  - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
#(i)
library(sandwich)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
model <- lm(math4 ~ lunch + lenroll + lexppp, data = meap00_01)
summary(model)
## 
## Call:
## lm(formula = math4 ~ lunch + lenroll + lexppp, data = meap00_01)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -56.326  -8.758   0.914   9.164  51.416 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 91.93246   19.96170   4.605 4.42e-06 ***
## lunch       -0.44874    0.01464 -30.648  < 2e-16 ***
## lenroll     -5.39915    0.94041  -5.741 1.11e-08 ***
## lexppp       3.52474    2.09785   1.680   0.0931 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.3 on 1688 degrees of freedom
## Multiple R-squared:  0.3729, Adjusted R-squared:  0.3718 
## F-statistic: 334.6 on 3 and 1688 DF,  p-value: < 2.2e-16
coeftest(model, vcov = vcovHC(model, type = "HC1"))
## 
## t test of coefficients:
## 
##              Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 91.932462  23.087101   3.9820 7.124e-05 ***
## lunch       -0.448743   0.016584 -27.0583 < 2.2e-16 ***
## lenroll     -5.399152   1.131175  -4.7730 1.971e-06 ***
## lexppp       3.524744   2.353736   1.4975    0.1344    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#(ii)
residuals <- resid(model)
squared_residuals <- residuals^2
white_test <- lm(squared_residuals ~ lunch + lenroll + lexppp + I(lunch^2) + I(lenroll^2) + I(lexppp^2), data = meap00_01)
#F-test for heteroskedasticity
anova(white_test)
## Analysis of Variance Table
## 
## Response: squared_residuals
##                Df    Sum Sq  Mean Sq  F value    Pr(>F)    
## lunch           1  36754420 36754420 280.1021 < 2.2e-16 ***
## lenroll         1      8213     8213   0.0626 0.8024736    
## lexppp          1      1578     1578   0.0120 0.9126907    
## I(lunch^2)      1    470701   470701   3.5872 0.0583985 .  
## I(lenroll^2)    1   1564795  1564795  11.9252 0.0005675 ***
## I(lexppp^2)     1    607896   607896   4.6327 0.0315094 *  
## Residuals    1685 221102259   131218                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#(iii)
log_u_squared <- log(squared_residuals)
variance_model <- lm(log_u_squared ~ lunch + lenroll + lexppp, data = meap00_01)
weights <- exp(fitted(variance_model))
wls_model <- lm(math4 ~ lunch + lenroll + lexppp, data = meap00_01, weights = 1 / weights)
summary(wls_model)
## 
## Call:
## lm(formula = math4 ~ lunch + lenroll + lexppp, data = meap00_01, 
##     weights = 1/weights)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3305 -1.1915  0.1297  1.3735  3.7803 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 57.13394   16.76071   3.409 0.000668 ***
## lunch       -0.45148    0.01499 -30.120  < 2e-16 ***
## lenroll     -3.14144    0.86138  -3.647 0.000273 ***
## lexppp       6.04688    1.69751   3.562 0.000378 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.888 on 1688 degrees of freedom
## Multiple R-squared:  0.3505, Adjusted R-squared:  0.3494 
## F-statistic: 303.7 on 3 and 1688 DF,  p-value: < 2.2e-16
#(iv)
coeftest(wls_model, vcov = vcovHC(wls_model, type = "HC1"))
## 
## t test of coefficients:
## 
##              Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 57.133943  18.552854   3.0795 0.0021068 ** 
## lunch       -0.451482   0.014271 -31.6353 < 2.2e-16 ***
## lenroll     -3.141439   1.017973  -3.0860 0.0020618 ** 
## lexppp       6.046880   1.804487   3.3510 0.0008229 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#(v)
ols_coef <- summary(model)$coefficients["lexppp", ]
wls_coef <- summary(wls_model)$coefficients["lexppp", ]
ols_coef
##   Estimate Std. Error    t value   Pr(>|t|) 
## 3.52474429 2.09784596 1.68017307 0.09310868
wls_coef
##     Estimate   Std. Error      t value     Pr(>|t|) 
## 6.0468796465 1.6975069157 3.5622120833 0.0003779527