# Load the package
library(wooldridge)
library(sandwich)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
##Part1
# Load the MEAP00 dataset
data <- wooldridge::meap00_01

# Estimate the OLS model
ols_model <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = data)

# Usual standard errors
summary(ols_model)
## 
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -56.326  -8.758   0.914   9.164  51.416 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 91.93240   19.96170   4.605 4.42e-06 ***
## lunch       -0.44874    0.01464 -30.648  < 2e-16 ***
## log(enroll) -5.39915    0.94041  -5.741 1.11e-08 ***
## log(exppp)   3.52475    2.09785   1.680   0.0931 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.3 on 1688 degrees of freedom
## Multiple R-squared:  0.3729, Adjusted R-squared:  0.3718 
## F-statistic: 334.6 on 3 and 1688 DF,  p-value: < 2.2e-16
robust_se <- coeftest(ols_model, vcov = vcovHC(ols_model, type = "HC0"))
robust_se
## 
## t test of coefficients:
## 
##              Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 91.932404  23.059797   3.9867 6.986e-05 ***
## lunch       -0.448743   0.016565 -27.0903 < 2.2e-16 ***
## log(enroll) -5.399152   1.129837  -4.7787 1.917e-06 ***
## log(exppp)   3.524751   2.350953   1.4993     0.134    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Part (ii): White Test for Heteroskedasticity
# Obtain residuals and squared residuals
residuals_squared <- residuals(ols_model)^2

# Auxiliary regression for White test
aux_model <- lm(residuals_squared ~ lunch + log(enroll) + log(exppp) + 
                I(lunch^2) + I(log(enroll)^2) + I(log(exppp)^2) + 
                lunch:log(enroll) + lunch:log(exppp) + log(enroll):log(exppp), data = data)

# F-test statistic and p-value
F_stat <- summary(aux_model)$fstatistic
p_value <- pf(F_stat[1], F_stat[2], F_stat[3], lower.tail = FALSE)
list(F_statistic = F_stat[1], p_value = p_value)
## $F_statistic
##    value 
## 34.63394 
## 
## $p_value
##        value 
## 1.857979e-56
##Part (iii): Fitted Values and WLS Estimation
# Obtain the log of squared residuals
log_residuals_squared <- log(residuals_squared)

# Fit the auxiliary regression to obtain weights
aux_reg <- lm(log_residuals_squared ~ math4 + I(math4^2), data = data)
log_fitted <- predict(aux_reg)
h_hat <- exp(log_fitted)

# WLS estimation using weights
wls_model <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = data, weights = 1 / h_hat)

# Compare WLS and OLS coefficients
summary(ols_model)
## 
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -56.326  -8.758   0.914   9.164  51.416 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 91.93240   19.96170   4.605 4.42e-06 ***
## lunch       -0.44874    0.01464 -30.648  < 2e-16 ***
## log(enroll) -5.39915    0.94041  -5.741 1.11e-08 ***
## log(exppp)   3.52475    2.09785   1.680   0.0931 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.3 on 1688 degrees of freedom
## Multiple R-squared:  0.3729, Adjusted R-squared:  0.3718 
## F-statistic: 334.6 on 3 and 1688 DF,  p-value: < 2.2e-16
##Part (iv): WLS Standard Errors (Mis-specification Check)
# Robust standard errors for WLS
robust_se_wls <- coeftest(wls_model, vcov = vcovHC(wls_model, type = "HC0"))

# Compare usual WLS and robust WLS standard errors
summary(wls_model)
## 
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = data, 
##     weights = 1/h_hat)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1285 -1.4897  0.1499  1.3713  4.6178 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 48.34591   15.05791   3.211 0.001349 ** 
## lunch       -0.25518    0.01182 -21.596  < 2e-16 ***
## log(enroll) -1.65287    0.71742  -2.304 0.021348 *  
## log(exppp)   5.36162    1.56806   3.419 0.000643 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.706 on 1688 degrees of freedom
## Multiple R-squared:  0.2179, Adjusted R-squared:  0.2165 
## F-statistic: 156.8 on 3 and 1688 DF,  p-value: < 2.2e-16
robust_se_wls
## 
## t test of coefficients:
## 
##              Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 48.345914  15.180620   3.1847 0.0014754 ** 
## lunch       -0.255175   0.011944 -21.3649 < 2.2e-16 ***
## log(enroll) -1.652872   0.701273  -2.3570 0.0185389 *  
## log(exppp)   5.361620   1.586354   3.3798 0.0007418 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Part (v): Precision of OLS vs. WLS
# Compare standard errors of OLS and WLS
ols_se <- sqrt(diag(vcov(ols_model)))
wls_se <- sqrt(diag(vcov(wls_model)))
comparison <- data.frame(
  Variable = names(ols_se),
  OLS_SE = ols_se,
  WLS_SE = wls_se
)
comparison
##                Variable      OLS_SE      WLS_SE
## (Intercept) (Intercept) 19.96169801 15.05790988
## lunch             lunch  0.01464203  0.01181573
## log(enroll) log(enroll)  0.94041163  0.71741850
## log(exppp)   log(exppp)  2.09784606  1.56806479