# Install and load necessary libraries
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("lmtest")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("sandwich")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
# Load the data (replace with your actual file path or dataset)
# Example: data <- read.csv("meap00.csv")
# Placeholder dataset for testing
data <- data.frame(
lunch = c(30, 40, 50), # Replace with actual data
enroll = c(300, 400, 500),
exppp = c(5000, 6000, 7000),
math4 = c(70, 80, 90)
)
# (i) OLS regression
data <- data %>%
mutate(log_enroll = log(enroll), log_exppp = log(exppp)) # Create log variables
ols_model <- lm(math4 ~ lunch + log_enroll + log_exppp, data = data)
cat("OLS Regression Summary:\n")
## OLS Regression Summary:
print(summary(ols_model))
##
## Call:
## lm(formula = math4 ~ lunch + log_enroll + log_exppp, data = data)
##
## Residuals:
## ALL 3 residuals are 0: no residual degrees of freedom!
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.000e+01 NaN NaN NaN
## lunch 1.000e+00 NaN NaN NaN
## log_enroll -6.742e-14 NaN NaN NaN
## log_exppp NA NA NA NA
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 2 and 0 DF, p-value: NA
# Robust standard errors
robust_se <- coeftest(ols_model, vcov = vcovHC(ols_model, type = "HC3"))
## Warning in meatHC(x, type = type, omega = omega): HC3 covariances are
## numerically unstable for hat values close to 1 (and undefined if exactly 1) as
## for observation(s) 1, 2, 3
cat("\nOLS with Robust Standard Errors:\n")
##
## OLS with Robust Standard Errors:
print(robust_se)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.000e+01 NaN NaN NaN
## lunch 1.000e+00 NaN NaN NaN
## log_enroll -6.742e-14 NaN NaN NaN
# (ii) White test for heteroskedasticity
data$residuals_sq <- residuals(ols_model)^2
white_test <- lm(residuals_sq ~ math4 + I(math4^2), data = data)
cat("\nWhite Test Summary:\n")
##
## White Test Summary:
print(summary(white_test))
##
## Call:
## lm(formula = residuals_sq ~ math4 + I(math4^2), data = data)
##
## Residuals:
## ALL 3 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0 NaN NaN NaN
## math4 0 NaN NaN NaN
## I(math4^2) 0 NaN NaN NaN
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: NaN, Adjusted R-squared: NaN
## F-statistic: NaN on 2 and 0 DF, p-value: NA
# F-test value
f_test_value <- summary(white_test)$fstatistic[1]
cat("\nWhite Test F-value:\n", f_test_value, "\n")
##
## White Test F-value:
## NaN
# (iii) Weighted Least Squares (WLS)
# Fix log transformation for residuals squared
data$residuals_sq <- residuals(ols_model)^2
# Add a small constant to avoid issues with log(0)
data$log_residuals_sq <- log(data$residuals_sq + 1e-6)
# Auxiliary regression for weights
aux_model <- lm(log_residuals_sq ~ math4 + I(math4^2), data = data)
# Calculate weights from the fitted values of the auxiliary regression
data$weights <- exp(fitted(aux_model))
# Perform Weighted Least Squares (WLS) regression
wls_model <- lm(math4 ~ lunch + log_enroll + log_exppp, data = data, weights = 1 / data$weights)
# Output the WLS results
cat("\nWLS Regression Summary:\n")
##
## WLS Regression Summary:
print(summary(wls_model))
##
## Call:
## lm(formula = math4 ~ lunch + log_enroll + log_exppp, data = data,
## weights = 1/data$weights)
##
## Residuals:
## ALL 3 residuals are 0: no residual degrees of freedom!
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40 NaN NaN NaN
## lunch 1 NaN NaN NaN
## log_enroll 0 NaN NaN NaN
## log_exppp NA NA NA NA
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 2 and 0 DF, p-value: NA