library(wooldridge)
data("meap00_01", package = "wooldridge")
head(meap00_01)
## dcode bcode math4 read4 lunch enroll exppp lenroll lexppp
## 1 1010 4937 83.3 77.8 40.60 468 5870.673 6.148468 8.677725
## 2 3010 790 85.7 60.0 12.75 251 4824.836 5.525453 8.481532
## 3 3010 1403 77.3 59.1 17.08 439 4358.772 6.084499 8.379946
## 4 3010 4056 85.2 67.0 23.17 561 4701.396 6.329721 8.455615
## 5 3020 922 91.8 67.7 25.57 442 4311.984 6.091310 8.369153
## 6 3020 2864 87.7 80.0 27.30 381 4507.331 5.942800 8.413461
str(meap00_01)
## 'data.frame': 1692 obs. of 9 variables:
## $ dcode : num 1010 3010 3010 3010 3020 3020 3020 3030 3030 3030 ...
## $ bcode : int 4937 790 1403 4056 922 2864 4851 881 2748 4469 ...
## $ math4 : num 83.3 85.7 77.3 85.2 91.8 ...
## $ read4 : num 77.8 60 59.1 67 67.7 ...
## $ lunch : num 40.6 12.8 17.1 23.2 25.6 ...
## $ enroll : int 468 251 439 561 442 381 274 326 273 285 ...
## $ exppp : num 5871 4825 4359 4701 4312 ...
## $ lenroll: num 6.15 5.53 6.08 6.33 6.09 ...
## $ lexppp : num 8.68 8.48 8.38 8.46 8.37 ...
## - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
#(i)
library(sandwich)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
model <- lm(math4 ~ lunch + lenroll + lexppp, data = meap00_01)
summary(model)
##
## Call:
## lm(formula = math4 ~ lunch + lenroll + lexppp, data = meap00_01)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.326 -8.758 0.914 9.164 51.416
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.93246 19.96170 4.605 4.42e-06 ***
## lunch -0.44874 0.01464 -30.648 < 2e-16 ***
## lenroll -5.39915 0.94041 -5.741 1.11e-08 ***
## lexppp 3.52474 2.09785 1.680 0.0931 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.3 on 1688 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3718
## F-statistic: 334.6 on 3 and 1688 DF, p-value: < 2.2e-16
coeftest(model, vcov = vcovHC(model, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.932462 23.087101 3.9820 7.124e-05 ***
## lunch -0.448743 0.016584 -27.0583 < 2.2e-16 ***
## lenroll -5.399152 1.131175 -4.7730 1.971e-06 ***
## lexppp 3.524744 2.353736 1.4975 0.1344
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Robust SEs are generally larger than the usual SEs, which is expected when heteroskedasticity is present.Thelexppp coefficient is particularly affected, as it becomes statistically insignificant with robust SEs, highlighting the importance of accounting for heteroskedasticity in inference.
#(ii)
residuals <- resid(model)
squared_residuals <- residuals^2
white_test <- lm(squared_residuals ~ lunch + lenroll + lexppp + I(lunch^2) + I(lenroll^2) + I(lexppp^2), data = meap00_01)
#F-test for heteroskedasticity
anova(white_test)
## Analysis of Variance Table
##
## Response: squared_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## lunch 1 36754420 36754420 280.1021 < 2.2e-16 ***
## lenroll 1 8213 8213 0.0626 0.8024736
## lexppp 1 1578 1578 0.0120 0.9126907
## I(lunch^2) 1 470701 470701 3.5872 0.0583985 .
## I(lenroll^2) 1 1564795 1564795 11.9252 0.0005675 ***
## I(lexppp^2) 1 607896 607896 4.6327 0.0315094 *
## Residuals 1685 221102259 131218
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#the F-statistic for the test for heteroskedasticity is significant with very low p-values for key terms like lunch, lenroll^2, lexppp^2, confirming the presence of heteroskedasticity in the data.
#(iii)
log_u_squared <- log(squared_residuals)
variance_model <- lm(log_u_squared ~ lunch + lenroll + lexppp, data = meap00_01)
weights <- exp(fitted(variance_model))
wls_model <- lm(math4 ~ lunch + lenroll + lexppp, data = meap00_01, weights = 1 / weights)
summary(wls_model)
##
## Call:
## lm(formula = math4 ~ lunch + lenroll + lexppp, data = meap00_01,
## weights = 1/weights)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -9.3305 -1.1915 0.1297 1.3735 3.7803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.13394 16.76071 3.409 0.000668 ***
## lunch -0.45148 0.01499 -30.120 < 2e-16 ***
## lenroll -3.14144 0.86138 -3.647 0.000273 ***
## lexppp 6.04688 1.69751 3.562 0.000378 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.888 on 1688 degrees of freedom
## Multiple R-squared: 0.3505, Adjusted R-squared: 0.3494
## F-statistic: 303.7 on 3 and 1688 DF, p-value: < 2.2e-16
#There are notable differences between the OLS and WLS estimates. The intercept and lenroll coefficients change substantially, indicating that heteroskedasticity was impacting the OLS estimates. The lexppp coefficient increases, while lunch remains nearly unchanged. The substantial changes in certain coefficients (intercept, lenroll, lexppp) suggest that heteroskedasticity was influencing the OLS results. The WLS model adjusts for this, leading to more reliable coefficient estimates, particularly for variables like lenroll and lexppp.
#(iv)
coeftest(wls_model, vcov = vcovHC(wls_model, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.133943 18.552854 3.0795 0.0021068 **
## lunch -0.451482 0.014271 -31.6353 < 2.2e-16 ***
## lenroll -3.141439 1.017973 -3.0860 0.0020618 **
## lexppp 6.046880 1.804487 3.3510 0.0008229 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#There are small to moderate differences between the usual WLS standard errors and the robust standard errors: Larger differences are observed for the Intercept, lenroll, and lexppp, where the robust standard errors are higher. In the other hand, the lunch coefficient shows a slightly smaller robust standard error.
#(v)
ols_coef <- summary(model)$coefficients["lexppp", ]
wls_coef <- summary(wls_model)$coefficients["lexppp", ]
ols_coef
## Estimate Std. Error t value Pr(>|t|)
## 3.52474429 2.09784596 1.68017307 0.09310868
wls_coef
## Estimate Std. Error t value Pr(>|t|)
## 6.0468796465 1.6975069157 3.5622120833 0.0003779527
#The WLS standard error (1.6975) is smaller than the OLS standard error (2.0978) for the lexppp coefficient. So, the WLS model appears to be more precise for estimating the effect of spending (lexppp) on math4. This improvement in precision likely arises from the WLS model's adjustment for heteroskedasticity, ensuring that variability in the data does not inflate the uncertainty of the estimate.