# Load necessary libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest) # For Breusch-Pagan/White's test
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich) # For robust standard errors
# Simulate a dataset similar to MEAP00
set.seed(123) # For reproducibility
n <- 500 # Number of observations
data <- data.frame(
math4 = rnorm(n, mean = 70, sd = 10), # Simulated test scores
lunch = rbinom(n, 1, 0.4), # Binary variable: free/reduced lunch
enroll = runif(n, min = 50, max = 1500), # Number of enrolled students
exppp = runif(n, min = 5000, max = 15000) # Expenditure per pupil
)
# (i) Estimate the model and compare standard errors
model_ols <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = data)
# Print the usual standard errors
cat("OLS Model Summary:\n")
## OLS Model Summary:
print(summary(model_ols))
##
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.709 -6.232 -0.037 6.446 31.608
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.1178 13.1904 4.254 2.51e-05 ***
## lunch 0.2242 0.8993 0.249 0.803
## log(enroll) 0.5850 0.5655 1.034 0.301
## log(exppp) 1.1340 1.3785 0.823 0.411
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.74 on 496 degrees of freedom
## Multiple R-squared: 0.003581, Adjusted R-squared: -0.002445
## F-statistic: 0.5942 on 3 and 496 DF, p-value: 0.619
# Calculate robust standard errors using White's correction
robust_se <- coeftest(model_ols, vcov = vcovHC(model_ols, type = "HC1"))
cat("\nRobust Standard Errors:\n")
##
## Robust Standard Errors:
print(robust_se)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.11781 13.69623 4.0973 4.882e-05 ***
## lunch 0.22424 0.90038 0.2490 0.8034
## log(enroll) 0.58497 0.55936 1.0458 0.2962
## log(exppp) 1.13395 1.42590 0.7953 0.4268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# (ii) Apply White's test for heteroskedasticity
white_test <- bptest(model_ols, ~ fitted(model_ols) + I(fitted(model_ols)^2), data = data)
cat("\nWhite's Test for Heteroskedasticity:\n")
##
## White's Test for Heteroskedasticity:
print(white_test)
##
## studentized Breusch-Pagan test
##
## data: model_ols
## BP = 1.4902, df = 2, p-value = 0.4747
# (iii) WLS Estimation
# Step 1: Calculate log of squared residuals from OLS
log_u2 <- log(resid(model_ols)^2)
# Step 2: Fit auxiliary regression of log(u^2) on predictors
aux_model <- lm(log_u2 ~ math4 + I(math4^2), data = data)
# Step 3: Obtain fitted values (g-hat) and exponentiate to get weights (h-hat)
data$h_hat <- exp(fitted(aux_model))
# Step 4: Perform WLS regression using weights
model_wls <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = data, weights = 1/data$h_hat)
# Print WLS model summary
cat("\nWLS Model Summary:\n")
##
## WLS Model Summary:
print(summary(model_wls))
##
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = data,
## weights = 1/data$h_hat)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -2.01720 -1.38088 -0.05323 1.36806 2.10045
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.85928 7.39208 9.721 <2e-16 ***
## lunch 0.92662 0.49634 1.867 0.0625 .
## log(enroll) -0.04093 0.30980 -0.132 0.8949
## log(exppp) -0.18185 0.77204 -0.236 0.8139
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.393 on 496 degrees of freedom
## Multiple R-squared: 0.007107, Adjusted R-squared: 0.001101
## F-statistic: 1.183 on 3 and 496 DF, p-value: 0.3155
# (iv) Compare WLS standard errors with robust standard errors
# Calculate robust standard errors for WLS
robust_se_wls <- coeftest(model_wls, vcov = vcovHC(model_wls, type = "HC1"))
cat("\nWLS Robust Standard Errors:\n")
##
## WLS Robust Standard Errors:
print(robust_se_wls)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.85928 6.67950 10.7582 < 2e-16 ***
## lunch 0.92662 0.44418 2.0861 0.03748 *
## log(enroll) -0.04093 0.27366 -0.1496 0.88117
## log(exppp) -0.18185 0.70256 -0.2588 0.79586
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# (v) Compare precision of OLS and WLS for 'log(exppp)'
cat("\nStandard Errors for 'log(exppp)':\n")
##
## Standard Errors for 'log(exppp)':
cat("OLS Standard Error:", summary(model_ols)$coefficients["log(exppp)", 2], "\n")
## OLS Standard Error: 1.378495
cat("WLS Standard Error:", summary(model_wls)$coefficients["log(exppp)", 2], "\n")
## WLS Standard Error: 0.7720417