# Install and load required packages
if (!require(wooldridge)) install.packages("wooldridge")
## Loading required package: wooldridge
if (!require(lmtest)) install.packages("lmtest")
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
if (!require(sandwich)) install.packages("sandwich")
## Loading required package: sandwich
library(wooldridge)
library(lmtest)
library(sandwich)
# Load the JTRAIN dataset
data("jtrain", package = "wooldridge")
# Filter data for the year 1988
jtrain_1988 <- subset(jtrain, year == 1988)
# Inspect the structure of the dataset to confirm variables
str(jtrain_1988)
## 'data.frame': 157 obs. of 30 variables:
## $ year : int 1988 1988 1988 1988 1988 1988 1988 1988 1988 1988 ...
## $ fcode : num 410032 410440 410495 410500 410501 ...
## $ employ : int 131 13 25 155 NA NA 16 20 47 16 ...
## $ sales : num 43000000 1970000 110000 19659000 8000000 ...
## $ avgsal : num 37000 11000 18720 14287 NA ...
## $ scrap : num NA NA NA NA NA NA NA NA NA NA ...
## $ rework : num NA NA NA NA NA NA NA NA NA NA ...
## $ tothrs : int 8 12 50 0 0 0 0 0 14 100 ...
## $ union : int 0 0 0 0 0 0 0 1 0 0 ...
## $ grant : int 0 0 0 0 0 0 0 0 0 0 ...
## $ d89 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ d88 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ totrain : int 50 13 10 0 20 0 0 0 3 3 ...
## $ hrsemp : num 3.05 12 20 0 NA ...
## $ lscrap : num NA NA NA NA NA NA NA NA NA NA ...
## $ lemploy : num 4.88 2.56 3.22 5.04 NA ...
## $ lsales : num 17.6 14.5 11.6 16.8 15.9 ...
## $ lrework : num NA NA NA NA NA NA NA NA NA NA ...
## $ lhrsemp : num 1.4 2.56 3.04 0 NA ...
## $ lscrap_1: num NA NA NA NA NA NA NA NA NA NA ...
## $ grant_1 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ clscrap : num NA NA NA NA NA NA NA NA NA NA ...
## $ cgrant : int 0 0 0 0 0 0 0 0 0 0 ...
## $ clemploy: num 0.27 0.08 0.223 -0.255 NA ...
## $ clsales : num -0.0889 0.2333 -1.9196 -0.1887 0.2877 ...
## $ lavgsal : num 10.52 9.31 9.84 9.57 NA ...
## $ clavgsal: num 0.0556 0.0465 0.0572 0.0398 NA ...
## $ cgrant_1: int 0 0 0 0 0 0 0 0 0 0 ...
## $ chrsemp : num -8.95 0 -17.5 0 NA ...
## $ clhrsemp: num -1.165 0 -0.606 0 NA ...
# Part (ii): Simple regression model log(scrap) ~ grant
cat("\nRunning simple regression model: log(scrap) ~ grant...\n")
##
## Running simple regression model: log(scrap) ~ grant...
model_simple <- lm(log(scrap) ~ grant, data = jtrain_1988)
summary_simple <- summary(model_simple)
# Display the regression summary
cat("\nRegression Summary:\n")
##
## Regression Summary:
##
## Regression Summary:
print(summary_simple)
##
## Call:
## lm(formula = log(scrap) ~ grant, data = jtrain_1988)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4043 -0.9536 -0.0465 0.9636 2.8103
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4085 0.2406 1.698 0.0954 .
## grant 0.0566 0.4056 0.140 0.8895
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.423 on 52 degrees of freedom
## (103 observations deleted due to missingness)
## Multiple R-squared: 0.0003744, Adjusted R-squared: -0.01885
## F-statistic: 0.01948 on 1 and 52 DF, p-value: 0.8895
# Extract key results
cat("\nEffect of grant on log(scrap):\n")
##
## Effect of grant on log(scrap):
cat("Coefficient (grant): ", coef(summary_simple)["grant", "Estimate"], "\n")
## Coefficient (grant): 0.05660037
cat("Standard Error: ", coef(summary_simple)["grant", "Std. Error"], "\n")
## Standard Error: 0.4055519
cat("p-value: ", coef(summary_simple)["grant", "Pr(>|t|)"], "\n")
## p-value: 0.8895438
# Check for significance
if (coef(summary_simple)["grant", "Pr(>|t|)"] < 0.05) {
cat("The effect of receiving a grant on log(scrap) is statistically significant at the 5% level.\n")
} else {
cat("The effect of receiving a grant on log(scrap) is NOT statistically significant at the 5% level.\n")
}
## The effect of receiving a grant on log(scrap) is NOT statistically significant at the 5% level.
# Part (v): Heteroskedasticity-robust standard errors
cat("\nUsing heteroskedasticity-robust standard errors:\n")
##
## Using heteroskedasticity-robust standard errors:
# Interpretation of the robust results
robust_se <- coeftest(model_simple, vcov = vcovHC(model_simple, type = "HC1"))
print(robust_se)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.40853 0.26293 1.5538 0.1263
## grant 0.05660 0.37084 0.1526 0.8793
cat("\nInterpretation using robust standard errors:\n")
##
## Interpretation using robust standard errors:
cat("Robust Coefficient (grant): ", robust_se["grant", "Estimate"], "\n")
## Robust Coefficient (grant): 0.05660037
cat("Robust Standard Error: ", robust_se["grant", "Std. Error"], "\n")
## Robust Standard Error: 0.3708355
cat("Robust p-value: ", robust_se["grant", "Pr(>|t|)"], "\n")
## Robust p-value: 0.8792813
if (robust_se["grant", "Pr(>|t|)"] < 0.05) {
cat("The effect of receiving a grant on log(scrap) remains statistically significant (robust).\n")
} else {
cat("The effect of receiving a grant on log(scrap) is NOT statistically significant with robust standard errors.\n")
}
## The effect of receiving a grant on log(scrap) is NOT statistically significant with robust standard errors.