library(dplyr)
library(wooldridge)
library(sandwich)
library(lmtest)
data("meap00_01")

(i)

model_ols <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = meap00_01)
summary(model_ols)
## 
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = meap00_01)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -56.326  -8.758   0.914   9.164  51.416 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 91.93240   19.96170   4.605 4.42e-06 ***
## lunch       -0.44874    0.01464 -30.648  < 2e-16 ***
## log(enroll) -5.39915    0.94041  -5.741 1.11e-08 ***
## log(exppp)   3.52475    2.09785   1.680   0.0931 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.3 on 1688 degrees of freedom
## Multiple R-squared:  0.3729, Adjusted R-squared:  0.3718 
## F-statistic: 334.6 on 3 and 1688 DF,  p-value: < 2.2e-16
# Robust standard errors
coeftest(model_ols, vcov = vcovHC(model_ols, type = "HC1"))
## 
## t test of coefficients:
## 
##              Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 91.932404  23.087103   3.9820 7.125e-05 ***
## lunch       -0.448743   0.016584 -27.0583 < 2.2e-16 ***
## log(enroll) -5.399152   1.131175  -4.7730 1.971e-06 ***
## log(exppp)   3.524751   2.353737   1.4975    0.1344    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(ii)

bptest(model_ols, ~ fitted(model_ols)^2, data = meap00_01)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_ols
## BP = 227.12, df = 1, p-value < 2.2e-16

The F test value of 227.12 with a p-value < 2.2e-16 indicates strong evidence of heteroskedasticity in the residuals of the OLS model. This suggests that the variance of the errors is not constant, and robust standard errors should be used for reliable inference.

(iii)

meap00_01$residuals <- residuals(model_ols)
meap00_01$log_resid2 <- log(meap00_01$residuals^2)
# Fit auxiliary regression
aux_model <- lm(log_resid2 ~ fitted(model_ols) + I(fitted(model_ols)^2), data = meap00_01)
summary(aux_model)
## 
## Call:
## lm(formula = log_resid2 ~ fitted(model_ols) + I(fitted(model_ols)^2), 
##     data = meap00_01)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.2227  -0.9116   0.4911   1.5356   4.8347 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)  
## (Intercept)             4.2075441  1.9202238   2.191   0.0286 *
## fitted(model_ols)       0.0596369  0.0561472   1.062   0.2883  
## I(fitted(model_ols)^2) -0.0008415  0.0004015  -2.096   0.0363 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.224 on 1689 degrees of freedom
## Multiple R-squared:  0.08763,    Adjusted R-squared:  0.08655 
## F-statistic: 81.11 on 2 and 1689 DF,  p-value: < 2.2e-16
meap00_01$g_hat <- fitted(aux_model)
meap00_01$weights <- exp(meap00_01$g_hat)
# Run WLS regression
model_wls <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = meap00_01, weights = 1 / meap00_01$weights)
summary(model_wls)
## 
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = meap00_01, 
##     weights = 1/meap00_01$weights)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7122  -1.1914   0.1403   1.3741   3.9151 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 50.47815   16.51032   3.057 0.002268 ** 
## lunch       -0.44859    0.01461 -30.697  < 2e-16 ***
## log(enroll) -2.64728    0.83594  -3.167 0.001569 ** 
## log(exppp)   6.47420    1.68588   3.840 0.000127 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.9 on 1688 degrees of freedom
## Multiple R-squared:   0.36,  Adjusted R-squared:  0.3588 
## F-statistic: 316.5 on 3 and 1688 DF,  p-value: < 2.2e-16

(iv)

# Robust standard errors for WLS
coeftest(model_wls, vcov = vcovHC(model_wls, type = "HC1"))
## 
## t test of coefficients:
## 
##              Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept) 50.478152  18.925179   2.6672 0.0077206 ** 
## lunch       -0.448592   0.014254 -31.4711 < 2.2e-16 ***
## log(enroll) -2.647282   1.054608  -2.5102 0.0121590 *  
## log(exppp)   6.474200   1.812868   3.5712 0.0003652 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The changes in standard errors are minimal, and the results remain consistent: lunch and log(enroll) remain highly significant, while log(exppp) retains its positive and significant effect.

(v)

# Coefficients comparison
summary(model_ols)$coefficients
##               Estimate  Std. Error    t value      Pr(>|t|)
## (Intercept) 91.9324043 19.96169801   4.605440  4.424155e-06
## lunch       -0.4487434  0.01464203 -30.647632 2.240954e-164
## log(enroll) -5.3991517  0.94041163  -5.741264  1.112079e-08
## log(exppp)   3.5247506  2.09784606   1.680176  9.310811e-02
summary(model_wls)$coefficients
##               Estimate  Std. Error    t value      Pr(>|t|)
## (Intercept) 50.4781522 16.51031594   3.057370  2.267937e-03
## lunch       -0.4485915  0.01461347 -30.697120 8.446615e-165
## log(enroll) -2.6472816  0.83593620  -3.166846  1.568566e-03
## log(exppp)   6.4741999  1.68587676   3.840257  1.274465e-04

WLS appears more precise, particularly in estimating the effect of log(exppp), where the significance improves compared to OLS.