# Load the package
library(wooldridge)
##Part1
# Load the MEAP00 dataset
data <- wooldridge::meap00_01
library(sandwich)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Estimate the OLS model
ols_model <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = data)
# Usual standard errors
summary(ols_model)
##
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.326 -8.758 0.914 9.164 51.416
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.93240 19.96170 4.605 4.42e-06 ***
## lunch -0.44874 0.01464 -30.648 < 2e-16 ***
## log(enroll) -5.39915 0.94041 -5.741 1.11e-08 ***
## log(exppp) 3.52475 2.09785 1.680 0.0931 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.3 on 1688 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3718
## F-statistic: 334.6 on 3 and 1688 DF, p-value: < 2.2e-16
robust_se <- coeftest(ols_model, vcov = vcovHC(ols_model, type = "HC0"))
robust_se
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.932404 23.059797 3.9867 6.986e-05 ***
## lunch -0.448743 0.016565 -27.0903 < 2.2e-16 ***
## log(enroll) -5.399152 1.129837 -4.7787 1.917e-06 ***
## log(exppp) 3.524751 2.350953 1.4993 0.134
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Part (ii): White Test for Heteroskedasticity
# Obtain residuals and squared residuals
residuals_squared <- residuals(ols_model)^2
# Auxiliary regression for White test
aux_model <- lm(residuals_squared ~ lunch + log(enroll) + log(exppp) +
I(lunch^2) + I(log(enroll)^2) + I(log(exppp)^2) +
lunch:log(enroll) + lunch:log(exppp) + log(enroll):log(exppp), data = data)
# F-test statistic and p-value
F_stat <- summary(aux_model)$fstatistic
p_value <- pf(F_stat[1], F_stat[2], F_stat[3], lower.tail = FALSE)
list(F_statistic = F_stat[1], p_value = p_value)
## $F_statistic
## value
## 34.63394
##
## $p_value
## value
## 1.857979e-56
##Part (iii): Fitted Values and WLS Estimation
# Obtain the log of squared residuals
log_residuals_squared <- log(residuals_squared)
# Fit the auxiliary regression to obtain weights
aux_reg <- lm(log_residuals_squared ~ math4 + I(math4^2), data = data)
log_fitted <- predict(aux_reg)
h_hat <- exp(log_fitted)
# WLS estimation using weights
wls_model <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = data, weights = 1 / h_hat)
# Compare WLS and OLS coefficients
summary(ols_model)
##
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.326 -8.758 0.914 9.164 51.416
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.93240 19.96170 4.605 4.42e-06 ***
## lunch -0.44874 0.01464 -30.648 < 2e-16 ***
## log(enroll) -5.39915 0.94041 -5.741 1.11e-08 ***
## log(exppp) 3.52475 2.09785 1.680 0.0931 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.3 on 1688 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3718
## F-statistic: 334.6 on 3 and 1688 DF, p-value: < 2.2e-16
##Part (iv): WLS Standard Errors (Mis-specification Check)
# Robust standard errors for WLS
robust_se_wls <- coeftest(wls_model, vcov = vcovHC(wls_model, type = "HC0"))
# Compare usual WLS and robust WLS standard errors
summary(wls_model)
##
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = data,
## weights = 1/h_hat)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -4.1285 -1.4897 0.1499 1.3713 4.6178
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.34591 15.05791 3.211 0.001349 **
## lunch -0.25518 0.01182 -21.596 < 2e-16 ***
## log(enroll) -1.65287 0.71742 -2.304 0.021348 *
## log(exppp) 5.36162 1.56806 3.419 0.000643 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.706 on 1688 degrees of freedom
## Multiple R-squared: 0.2179, Adjusted R-squared: 0.2165
## F-statistic: 156.8 on 3 and 1688 DF, p-value: < 2.2e-16
robust_se_wls
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.345914 15.180620 3.1847 0.0014754 **
## lunch -0.255175 0.011944 -21.3649 < 2.2e-16 ***
## log(enroll) -1.652872 0.701273 -2.3570 0.0185389 *
## log(exppp) 5.361620 1.586354 3.3798 0.0007418 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Part (v): Precision of OLS vs. WLS
# Compare standard errors of OLS and WLS
ols_se <- sqrt(diag(vcov(ols_model)))
wls_se <- sqrt(diag(vcov(wls_model)))
comparison <- data.frame(
Variable = names(ols_se),
OLS_SE = ols_se,
WLS_SE = wls_se
)
comparison
## Variable OLS_SE WLS_SE
## (Intercept) (Intercept) 19.96169801 15.05790988
## lunch lunch 0.01464203 0.01181573
## log(enroll) log(enroll) 0.94041163 0.71741850
## log(exppp) log(exppp) 2.09784606 1.56806479