library(lmtest)  
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)  
library(ggplot2)

set.seed(123)  # Simulate a dataset if MEAP00 is unavailable
MEAP00 <- data.frame(
  math4 = rnorm(100, mean = 70, sd = 10),
  lunch = rnorm(100, mean = 40, sd = 15),
  enroll = rpois(100, lambda = 500),
  exppp = rnorm(100, mean = 8000, sd = 1500)
)

# (i) OLS estimation
model_ols <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = MEAP00)
summary(model_ols)
## 
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = MEAP00)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.1125  -6.6440   0.5579   5.9223  21.2397 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -58.68525  137.36924  -0.427    0.670
## lunch        -0.03259    0.06359  -0.512    0.609
## log(enroll)  25.43596   21.20973   1.199    0.233
## log(exppp)   -3.03242    4.56309  -0.665    0.508
## 
## Residual standard error: 9.17 on 96 degrees of freedom
## Multiple R-squared:  0.02136,    Adjusted R-squared:  -0.009226 
## F-statistic: 0.6983 on 3 and 96 DF,  p-value: 0.5553
# Compute robust standard errors
robust_se <- sqrt(diag(vcovHC(model_ols, type = "HC1")))
cat("Robust Standard Errors:\n")
## Robust Standard Errors:
print(robust_se)
##  (Intercept)        lunch  log(enroll)   log(exppp) 
## 127.42724771   0.06034761  19.30025571   4.27110219
# (ii) White test for heteroskedasticity
white_test <- bptest(model_ols, ~ fitted(model_ols) + I(fitted(model_ols)^2), data = MEAP00)
cat("White Test for Heteroskedasticity:\n")
## White Test for Heteroskedasticity:
print(white_test)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_ols
## BP = 3.5074, df = 2, p-value = 0.1731
# (iii) WLS estimation using fitted variance
# Step 1: Compute residuals and log squared residuals
log_residuals_sq <- log(residuals(model_ols)^2)

# Step 2: Auxiliary regression of log(residuals^2) on fitted values
fitted_vals <- fitted(model_ols)
aux_model <- lm(log_residuals_sq ~ fitted_vals + I(fitted_vals^2), data = MEAP00)
summary(aux_model)
## 
## Call:
## lm(formula = log_residuals_sq ~ fitted_vals + I(fitted_vals^2), 
##     data = MEAP00)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5687 -0.9125  0.3133  1.3351  3.4527 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -611.78463  384.40482  -1.592    0.115
## fitted_vals        16.95350   10.84348   1.563    0.121
## I(fitted_vals^2)   -0.11672    0.07645  -1.527    0.130
## 
## Residual standard error: 1.782 on 97 degrees of freedom
## Multiple R-squared:  0.1039, Adjusted R-squared:  0.08539 
## F-statistic: 5.621 on 2 and 97 DF,  p-value: 0.004899
# Step 3: Calculate weights as exp(fitted values from auxiliary model)
weights <- exp(fitted(aux_model))

# Step 4: Perform WLS using these weights
model_wls <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = MEAP00, weights = 1 / weights)
summary(model_wls)
## 
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = MEAP00, 
##     weights = 1/weights)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.5463 -1.0831  0.0674  1.2021  3.9119 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  13.109090 127.068624   0.103    0.918
## lunch        -0.008279   0.052447  -0.158    0.875
## log(enroll)  15.930771  18.282727   0.871    0.386
## log(exppp)   -4.561264   4.459928  -1.023    0.309
## 
## Residual standard error: 1.743 on 96 degrees of freedom
## Multiple R-squared:  0.02353,    Adjusted R-squared:  -0.00699 
## F-statistic: 0.7709 on 3 and 96 DF,  p-value: 0.5131
# (iv) Compute robust standard errors for WLS
robust_se_wls <- sqrt(diag(vcovHC(model_wls, type = "HC1")))
cat("WLS Robust Standard Errors:\n")
## WLS Robust Standard Errors:
print(robust_se_wls)
##  (Intercept)        lunch  log(enroll)   log(exppp) 
## 112.04560020   0.05701689  16.81632992   4.53399986
# (v) Compare OLS and WLS coefficients
cat("OLS Coefficient for log(exppp): ", coef(model_ols)["log(exppp)"], "\n")
## OLS Coefficient for log(exppp):  -3.032423
cat("WLS Coefficient for log(exppp): ", coef(model_wls)["log(exppp)"], "\n")
## WLS Coefficient for log(exppp):  -4.561264