library(wooldridge)
library(lmtest) 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
# Load and Subset Data for the Year 1988
data("jtrain", package = "wooldridge")
jtrain_1988 <- subset(jtrain, year == 1988)

# Check for missing values in relevant columns (scrap, grant, and lscrap_1)
jtrain_1988 <- na.omit(jtrain_1988[, c("scrap", "grant", "lscrap_1")])

# Ensure that the columns exist
if (!all(c("scrap", "grant", "lscrap_1") %in% names(jtrain_1988))) {
  stop("The required columns ('scrap', 'grant', 'lscrap_1') are not present in the dataset.")
}

# STEP 1: Simple Regression Model
# Model: log(scrap) = β0 + β1 * grant + u
model1 <- lm(log(scrap) ~ grant, data = jtrain_1988)
cat("\nSimple Regression Model Summary (log(scrap) ~ grant):\n")
## 
## Simple Regression Model Summary (log(scrap) ~ grant):
summary(model1)
## 
## Call:
## lm(formula = log(scrap) ~ grant, data = jtrain_1988)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4043 -0.9536 -0.0465  0.9636  2.8103 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   0.4085     0.2406   1.698   0.0954 .
## grant         0.0566     0.4056   0.140   0.8895  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.423 on 52 degrees of freedom
## Multiple R-squared:  0.0003744,  Adjusted R-squared:  -0.01885 
## F-statistic: 0.01948 on 1 and 52 DF,  p-value: 0.8895
# STEP 2: Adding log(scrap87) as an explanatory variable
# Assuming "lscrap_1" corresponds to log(scrap87)
model2 <- lm(log(scrap) ~ grant + lscrap_1, data = jtrain_1988)
cat("\nRegression Model Summary (log(scrap) ~ grant + log(scrap87)):\n")
## 
## Regression Model Summary (log(scrap) ~ grant + log(scrap87)):
summary(model2)
## 
## Call:
## lm(formula = log(scrap) ~ grant + lscrap_1, data = jtrain_1988)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9146 -0.1763  0.0057  0.2308  1.5991 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.02124    0.08910   0.238   0.8126    
## grant       -0.25397    0.14703  -1.727   0.0902 .  
## lscrap_1     0.83116    0.04444  18.701   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5127 on 51 degrees of freedom
## Multiple R-squared:  0.8728, Adjusted R-squared:  0.8678 
## F-statistic: 174.9 on 2 and 51 DF,  p-value: < 2.2e-16
# STEP 3: Interpret the coefficient of 'grant' and test for significance
grant_coefficient <- summary(model2)$coefficients["grant", ]
cat("\nCoefficient for 'grant':\n", grant_coefficient, "\n")
## 
## Coefficient for 'grant':
##  -0.2539697 0.1470311 -1.727319 0.0901627
# One-sided test for grant coefficient: H0: β_grant >= 0 vs H1: β_grant < 0
t_stat <- grant_coefficient[3]
p_value_one_sided <- pt(t_stat, df = model2$df.residual, lower.tail = TRUE)
cat("\nOne-sided p-value for β_grant < 0:\n", p_value_one_sided, "\n")
## 
## One-sided p-value for β_grant < 0:
##  0.04508135
# STEP 4: Two-sided test for log(scrap87)
log_scrap87_coefficient <- summary(model2)$coefficients["lscrap_1", ]
p_value_two_sided <- log_scrap87_coefficient[4]
cat("\nTwo-sided p-value for log(scrap87):\n", p_value_two_sided, "\n")
## 
## Two-sided p-value for log(scrap87):
##  1.756555e-24
# STEP 5: Re-estimate models with heteroskedasticity-robust standard errors
cat("\nHeteroskedasticity-Robust Standard Errors:\n")
## 
## Heteroskedasticity-Robust Standard Errors:
cat("\nRobust Coefficients for Model 1:\n")
## 
## Robust Coefficients for Model 1:
coeftest(model1, vcov = vcovHC(model1, type = "HC1"))
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.40853    0.26293  1.5538   0.1263
## grant        0.05660    0.37084  0.1526   0.8793
cat("\nRobust Coefficients for Model 2:\n")
## 
## Robust Coefficients for Model 2:
coeftest(model2, vcov = vcovHC(model2, type = "HC1"))
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  0.021237   0.099845  0.2127   0.83241    
## grant       -0.253970   0.146373 -1.7351   0.08876 .  
## lscrap_1     0.831161   0.073541 11.3021 1.688e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1