# Install and load necessary libraries
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("lmtest")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("sandwich")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)

# Load the data (replace with your actual file path or dataset)
# Example: data <- read.csv("meap00.csv")
# Placeholder dataset for testing
data <- data.frame(
  lunch = c(30, 40, 50),    # Replace with actual data
  enroll = c(300, 400, 500),
  exppp = c(5000, 6000, 7000),
  math4 = c(70, 80, 90)
)

# (i) OLS regression
data <- data %>%
  mutate(log_enroll = log(enroll), log_exppp = log(exppp)) # Create log variables

ols_model <- lm(math4 ~ lunch + log_enroll + log_exppp, data = data)
cat("OLS Regression Summary:\n")
## OLS Regression Summary:
print(summary(ols_model))
## 
## Call:
## lm(formula = math4 ~ lunch + log_enroll + log_exppp, data = data)
## 
## Residuals:
## ALL 3 residuals are 0: no residual degrees of freedom!
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  4.000e+01        NaN     NaN      NaN
## lunch        1.000e+00        NaN     NaN      NaN
## log_enroll  -6.742e-14        NaN     NaN      NaN
## log_exppp           NA         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 2 and 0 DF,  p-value: NA
# Robust standard errors
robust_se <- coeftest(ols_model, vcov = vcovHC(ols_model, type = "HC3"))
## Warning in meatHC(x, type = type, omega = omega): HC3 covariances are
## numerically unstable for hat values close to 1 (and undefined if exactly 1) as
## for observation(s) 1, 2, 3
cat("\nOLS with Robust Standard Errors:\n")
## 
## OLS with Robust Standard Errors:
print(robust_se)
## 
## z test of coefficients:
## 
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)  4.000e+01        NaN     NaN      NaN
## lunch        1.000e+00        NaN     NaN      NaN
## log_enroll  -6.742e-14        NaN     NaN      NaN
# (ii) White test for heteroskedasticity
data$residuals_sq <- residuals(ols_model)^2
white_test <- lm(residuals_sq ~ math4 + I(math4^2), data = data)
cat("\nWhite Test Summary:\n")
## 
## White Test Summary:
print(summary(white_test))
## 
## Call:
## lm(formula = residuals_sq ~ math4 + I(math4^2), data = data)
## 
## Residuals:
## ALL 3 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)        0        NaN     NaN      NaN
## math4              0        NaN     NaN      NaN
## I(math4^2)         0        NaN     NaN      NaN
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:    NaN,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 2 and 0 DF,  p-value: NA
# F-test value
f_test_value <- summary(white_test)$fstatistic[1]
cat("\nWhite Test F-value:\n", f_test_value, "\n")
## 
## White Test F-value:
##  NaN
# (iii) Weighted Least Squares (WLS)
# Fix log transformation for residuals squared
data$residuals_sq <- residuals(ols_model)^2

# Add a small constant to avoid issues with log(0)
data$log_residuals_sq <- log(data$residuals_sq + 1e-6)

# Auxiliary regression for weights
aux_model <- lm(log_residuals_sq ~ math4 + I(math4^2), data = data)

# Calculate weights from the fitted values of the auxiliary regression
data$weights <- exp(fitted(aux_model))

# Perform Weighted Least Squares (WLS) regression
wls_model <- lm(math4 ~ lunch + log_enroll + log_exppp, data = data, weights = 1 / data$weights)

# Output the WLS results
cat("\nWLS Regression Summary:\n")
## 
## WLS Regression Summary:
print(summary(wls_model))
## 
## Call:
## lm(formula = math4 ~ lunch + log_enroll + log_exppp, data = data, 
##     weights = 1/data$weights)
## 
## Residuals:
## ALL 3 residuals are 0: no residual degrees of freedom!
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)       40        NaN     NaN      NaN
## lunch              1        NaN     NaN      NaN
## log_enroll         0        NaN     NaN      NaN
## log_exppp         NA         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 2 and 0 DF,  p-value: NA