# Install and load required packages
if (!require(wooldridge)) install.packages("wooldridge")
## Loading required package: wooldridge
if (!require(lmtest)) install.packages("lmtest")
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
if (!require(sandwich)) install.packages("sandwich")
## Loading required package: sandwich
library(wooldridge)
library(lmtest)
library(sandwich)

# Load the JTRAIN dataset
data("jtrain", package = "wooldridge")
# Filter data for the year 1988
jtrain_1988 <- subset(jtrain, year == 1988)
# Inspect the structure of the dataset to confirm variables
str(jtrain_1988)
## 'data.frame':    157 obs. of  30 variables:
##  $ year    : int  1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 ...
##  $ fcode   : num  410032 410440 410495 410500 410501 ...
##  $ employ  : int  131 13 25 155 NA NA 16 20 47 16 ...
##  $ sales   : num  43000000 1970000 110000 19659000 8000000 ...
##  $ avgsal  : num  37000 11000 18720 14287 NA ...
##  $ scrap   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ rework  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tothrs  : int  8 12 50 0 0 0 0 0 14 100 ...
##  $ union   : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ grant   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ d89     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ d88     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ totrain : int  50 13 10 0 20 0 0 0 3 3 ...
##  $ hrsemp  : num  3.05 12 20 0 NA ...
##  $ lscrap  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lemploy : num  4.88 2.56 3.22 5.04 NA ...
##  $ lsales  : num  17.6 14.5 11.6 16.8 15.9 ...
##  $ lrework : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ lhrsemp : num  1.4 2.56 3.04 0 NA ...
##  $ lscrap_1: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ grant_1 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ clscrap : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ cgrant  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ clemploy: num  0.27 0.08 0.223 -0.255 NA ...
##  $ clsales : num  -0.0889 0.2333 -1.9196 -0.1887 0.2877 ...
##  $ lavgsal : num  10.52 9.31 9.84 9.57 NA ...
##  $ clavgsal: num  0.0556 0.0465 0.0572 0.0398 NA ...
##  $ cgrant_1: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ chrsemp : num  -8.95 0 -17.5 0 NA ...
##  $ clhrsemp: num  -1.165 0 -0.606 0 NA ...
# Part (ii): Simple regression model log(scrap) ~ grant
cat("\nRunning simple regression model: log(scrap) ~ grant...\n")
## 
## Running simple regression model: log(scrap) ~ grant...
model_simple <- lm(log(scrap) ~ grant, data = jtrain_1988)
summary_simple <- summary(model_simple)

# Display the regression summary
cat("\nRegression Summary:\n")
## 
## Regression Summary:
## 
## Regression Summary:
print(summary_simple)
## 
## Call:
## lm(formula = log(scrap) ~ grant, data = jtrain_1988)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4043 -0.9536 -0.0465  0.9636  2.8103 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   0.4085     0.2406   1.698   0.0954 .
## grant         0.0566     0.4056   0.140   0.8895  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.423 on 52 degrees of freedom
##   (103 observations deleted due to missingness)
## Multiple R-squared:  0.0003744,  Adjusted R-squared:  -0.01885 
## F-statistic: 0.01948 on 1 and 52 DF,  p-value: 0.8895
# Extract key results
cat("\nEffect of grant on log(scrap):\n")
## 
## Effect of grant on log(scrap):
cat("Coefficient (grant): ", coef(summary_simple)["grant", "Estimate"], "\n")
## Coefficient (grant):  0.05660037
cat("Standard Error: ", coef(summary_simple)["grant", "Std. Error"], "\n")
## Standard Error:  0.4055519
cat("p-value: ", coef(summary_simple)["grant", "Pr(>|t|)"], "\n")
## p-value:  0.8895438
# Check for significance
if (coef(summary_simple)["grant", "Pr(>|t|)"] < 0.05) {
  cat("The effect of receiving a grant on log(scrap) is statistically significant at the 5% level.\n")
} else {
  cat("The effect of receiving a grant on log(scrap) is NOT statistically significant at the 5% level.\n")
}
## The effect of receiving a grant on log(scrap) is NOT statistically significant at the 5% level.
# Part (v): Heteroskedasticity-robust standard errors
cat("\nUsing heteroskedasticity-robust standard errors:\n")
## 
## Using heteroskedasticity-robust standard errors:
# Interpretation of the robust results
robust_se <- coeftest(model_simple, vcov = vcovHC(model_simple, type = "HC1"))
print(robust_se)
## 
## t test of coefficients:
## 
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.40853    0.26293  1.5538   0.1263
## grant        0.05660    0.37084  0.1526   0.8793
cat("\nInterpretation using robust standard errors:\n")
## 
## Interpretation using robust standard errors:
cat("Robust Coefficient (grant): ", robust_se["grant", "Estimate"], "\n")
## Robust Coefficient (grant):  0.05660037
cat("Robust Standard Error: ", robust_se["grant", "Std. Error"], "\n")
## Robust Standard Error:  0.3708355
cat("Robust p-value: ", robust_se["grant", "Pr(>|t|)"], "\n")
## Robust p-value:  0.8792813
if (robust_se["grant", "Pr(>|t|)"] < 0.05) {
  cat("The effect of receiving a grant on log(scrap) remains statistically significant (robust).\n")
} else {
  cat("The effect of receiving a grant on log(scrap) is NOT statistically significant with robust standard errors.\n")
}
## The effect of receiving a grant on log(scrap) is NOT statistically significant with robust standard errors.