# Load necessary libraries
install.packages("wooldridge") # Install wooldridge package if not already installed
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("sandwich") # Install sandwich package for robust SE
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("lmtest") # Install lmtest package for heteroskedasticity tests
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(wooldridge) # Access datasets from the wooldridge package
library(sandwich) # For robust standard errors
library(lmtest) # For heteroskedasticity tests
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# STEP 1: Load the MEAP00 dataset from the wooldridge package
data("meap00_1", package = "wooldridge") # Load the dataset
## Warning in data("meap00_1", package = "wooldridge"): data set 'meap00_1' not
## found
MEAP00_1 <- meap00_01 # Assign it to a variable for clarity
# Inspect the dataset to verify it's loaded correctly
str(MEAP00_1) # Check column names and structure
## 'data.frame': 1692 obs. of 9 variables:
## $ dcode : num 1010 3010 3010 3010 3020 3020 3020 3030 3030 3030 ...
## $ bcode : int 4937 790 1403 4056 922 2864 4851 881 2748 4469 ...
## $ math4 : num 83.3 85.7 77.3 85.2 91.8 ...
## $ read4 : num 77.8 60 59.1 67 67.7 ...
## $ lunch : num 40.6 12.8 17.1 23.2 25.6 ...
## $ enroll : int 468 251 439 561 442 381 274 326 273 285 ...
## $ exppp : num 5871 4825 4359 4701 4312 ...
## $ lenroll: num 6.15 5.53 6.08 6.33 6.09 ...
## $ lexppp : num 8.68 8.48 8.38 8.46 8.37 ...
## - attr(*, "time.stamp")= chr "25 Jun 2011 23:03"
summary(MEAP00_1) # Basic summary statistics
## dcode bcode math4 read4
## Min. : 1010 Min. : 1 Min. : 0.00 Min. : 0.00
## 1st Qu.:33215 1st Qu.:1307 1st Qu.: 62.45 1st Qu.: 49.55
## Median :56010 Median :2680 Median : 76.90 Median : 63.30
## Mean :53210 Mean :3163 Mean : 72.69 Mean : 60.69
## 3rd Qu.:78030 3rd Qu.:4551 3rd Qu.: 87.22 3rd Qu.: 74.12
## Max. :83010 Max. :8723 Max. :100.00 Max. :100.00
## lunch enroll exppp lenroll
## Min. : 0.00 Min. : 62.0 Min. : 1268 Min. :4.127
## 1st Qu.: 16.50 1st Qu.: 287.0 1st Qu.: 4595 1st Qu.:5.659
## Median : 34.89 Median : 376.5 Median : 5144 Median :5.931
## Mean : 38.99 Mean : 398.6 Mean : 5264 Mean :5.906
## 3rd Qu.: 58.62 3rd Qu.: 480.0 3rd Qu.: 5817 3rd Qu.:6.174
## Max. :100.00 Max. :1496.0 Max. :11958 Max. :7.311
## lexppp
## Min. :7.145
## 1st Qu.:8.433
## Median :8.546
## Mean :8.551
## 3rd Qu.:8.668
## Max. :9.389
# STEP 2: Estimate the model using OLS
model <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = MEAP00_1)
# Output the summary of the OLS regression
print("OLS Regression Summary:")
## [1] "OLS Regression Summary:"
summary(model)
##
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = MEAP00_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.326 -8.758 0.914 9.164 51.416
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.93240 19.96170 4.605 4.42e-06 ***
## lunch -0.44874 0.01464 -30.648 < 2e-16 ***
## log(enroll) -5.39915 0.94041 -5.741 1.11e-08 ***
## log(exppp) 3.52475 2.09785 1.680 0.0931 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.3 on 1688 degrees of freedom
## Multiple R-squared: 0.3729, Adjusted R-squared: 0.3718
## F-statistic: 334.6 on 3 and 1688 DF, p-value: < 2.2e-16
# STEP 3: Compare standard errors (usual vs. robust)
# Usual standard errors are in the summary above; now compute robust standard errors
robust_se <- coeftest(model, vcov = vcovHC(model, type = "HC1"))
print("Robust Standard Errors:")
## [1] "Robust Standard Errors:"
print(robust_se)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.932404 23.087103 3.9820 7.125e-05 ***
## lunch -0.448743 0.016584 -27.0583 < 2.2e-16 ***
## log(enroll) -5.399152 1.131175 -4.7730 1.971e-06 ***
## log(exppp) 3.524751 2.353737 1.4975 0.1344
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# STEP 4: Conduct the White test for heteroskedasticity
white_test <- bptest(model, ~ fitted(model) + I(fitted(model)^2))
print("White Test for Heteroskedasticity:")
## [1] "White Test for Heteroskedasticity:"
print(white_test)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 229.78, df = 2, p-value < 2.2e-16
# STEP 5: Perform WLS estimation
# Log of squared residuals
log_u2 <- log(residuals(model)^2)
# Fit auxiliary regression to predict heteroskedasticity
aux_model <- lm(log_u2 ~ fitted(model) + I(fitted(model)^2), data = MEAP00_1)
# Predict h_hat (variance function)
h_hat <- exp(predict(aux_model))
# Add h_hat as weights in WLS regression
MEAP00_1$weights <- 1 / h_hat
wls_model <- lm(math4 ~ lunch + log(enroll) + log(exppp), data = MEAP00_1, weights = MEAP00_1$weights)
# Output the summary of the WLS regression
print("WLS Regression Summary:")
## [1] "WLS Regression Summary:"
summary(wls_model)
##
## Call:
## lm(formula = math4 ~ lunch + log(enroll) + log(exppp), data = MEAP00_1,
## weights = MEAP00_1$weights)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -10.7122 -1.1914 0.1403 1.3741 3.9151
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.47815 16.51032 3.057 0.002268 **
## lunch -0.44859 0.01461 -30.697 < 2e-16 ***
## log(enroll) -2.64728 0.83594 -3.167 0.001569 **
## log(exppp) 6.47420 1.68588 3.840 0.000127 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.9 on 1688 degrees of freedom
## Multiple R-squared: 0.36, Adjusted R-squared: 0.3588
## F-statistic: 316.5 on 3 and 1688 DF, p-value: < 2.2e-16
# STEP 6: Compare OLS and WLS coefficients
ols_coefficients <- summary(model)$coefficients
wls_coefficients <- summary(wls_model)$coefficients
comparison <- data.frame(
Coefficient = rownames(ols_coefficients),
OLS_Estimate = ols_coefficients[, "Estimate"],
WLS_Estimate = wls_coefficients[, "Estimate"],
OLS_StdError = ols_coefficients[, "Std. Error"],
WLS_StdError = wls_coefficients[, "Std. Error"]
)
print("Comparison of OLS and WLS Coefficients:")
## [1] "Comparison of OLS and WLS Coefficients:"
print(comparison)
## Coefficient OLS_Estimate WLS_Estimate OLS_StdError WLS_StdError
## (Intercept) (Intercept) 91.9324043 50.4781522 19.96169801 16.51031594
## lunch lunch -0.4487434 -0.4485915 0.01464203 0.01461347
## log(enroll) log(enroll) -5.3991517 -2.6472816 0.94041163 0.83593620
## log(exppp) log(exppp) 3.5247506 6.4741999 2.09784606 1.68587676
# STEP 7: Robust standard errors for WLS
wls_robust_se <- coeftest(wls_model, vcov = vcovHC(wls_model, type = "HC1"))
print("Robust Standard Errors for WLS:")
## [1] "Robust Standard Errors for WLS:"
print(wls_robust_se)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.478152 18.925179 2.6672 0.0077206 **
## lunch -0.448592 0.014254 -31.4711 < 2.2e-16 ***
## log(enroll) -2.647282 1.054608 -2.5102 0.0121590 *
## log(exppp) 6.474200 1.812868 3.5712 0.0003652 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# STEP 8: Conclude whether OLS or WLS is more precise
precision_comparison <- data.frame(
Variable = rownames(comparison),
OLS_SE = comparison$OLS_StdError,
WLS_SE = comparison$WLS_StdError
)
print("Precision Comparison (OLS vs. WLS Standard Errors):")
## [1] "Precision Comparison (OLS vs. WLS Standard Errors):"
print(precision_comparison)
## Variable OLS_SE WLS_SE
## 1 (Intercept) 19.96169801 16.51031594
## 2 lunch 0.01464203 0.01461347
## 3 log(enroll) 0.94041163 0.83593620
## 4 log(exppp) 2.09784606 1.68587676