# Load necessary libraries
library(dplyr)       # For data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)         # For hypothesis testing
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(sandwich)    # For robust standard errors
library(lmtest)      # For statistical tests
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
# Simulate a dataset similar to JTRAIN for 1988
set.seed(123)  # For reproducibility
data_1988 <- data.frame(
  scrap = rnorm(54, mean = 50, sd = 10),          # Firm scrap rate
  grant = sample(c(0, 1), 54, replace = TRUE),   # Dummy variable for grant
  scraps7 = rnorm(54, mean = 45, sd = 8)         # Lagged scrap rate
)

# (ii) Simple regression model
model1 <- lm(log(scrap) ~ grant, data = data_1988)
cat("Simple Regression Model:\n")
## Simple Regression Model:
summary(model1)
## 
## Call:
## lm(formula = log(scrap) ~ grant, data = data_1988)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4856 -0.1164 -0.0061  0.1328  0.3745 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.92019    0.03784  103.61   <2e-16 ***
## grant       -0.02232    0.05076   -0.44    0.662    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1854 on 52 degrees of freedom
## Multiple R-squared:  0.003705,   Adjusted R-squared:  -0.01545 
## F-statistic: 0.1934 on 1 and 52 DF,  p-value: 0.6619
# (iii) Adding log(scraps7) as an explanatory variable
model2 <- lm(log(scrap) ~ grant + log(scraps7), data = data_1988)
cat("\nRegression Model with log(scraps7):\n")
## 
## Regression Model with log(scraps7):
summary(model2)
## 
## Call:
## lm(formula = log(scrap) ~ grant + log(scraps7), data = data_1988)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48307 -0.10467 -0.00171  0.13882  0.33969 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.17005    0.61572   5.149 4.26e-06 ***
## grant        -0.01636    0.05076  -0.322    0.749    
## log(scraps7)  0.19704    0.16143   1.221    0.228    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1845 on 51 degrees of freedom
## Multiple R-squared:  0.03198,    Adjusted R-squared:  -0.005978 
## F-statistic: 0.8425 on 2 and 51 DF,  p-value: 0.4365
# Coefficient interpretation and statistical significance test for grant
coefficient_grant <- coef(summary(model2))["grant", ]
significant <- ifelse(coefficient_grant["Pr(>|t|)"] < 0.05, "Yes", "No")
cat("\nIs grant statistically significant at 5% level? ", significant, "\n")
## 
## Is grant statistically significant at 5% level?  No
# (iv) Test if parameter on log(scraps7) is equal to 1 (two-sided test)
hypothesis_test <- linearHypothesis(model2, "log(scraps7) = 1")
p_value <- hypothesis_test$`Pr(>F)`[2]
cat("\nP-value for test log(scraps7) = 1: ", p_value, "\n")
## 
## P-value for test log(scraps7) = 1:  7.82673e-06
# (v) Repeat with heteroskedasticity-robust standard errors
robust_se <- vcovHC(model2, type = "HC")
cat("\nRegression with Robust Standard Errors:\n")
## 
## Regression with Robust Standard Errors:
coeftest(model2, vcov = robust_se)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)   3.170050   0.696250  4.5530 3.311e-05 ***
## grant        -0.016356   0.046693 -0.3503    0.7276    
## log(scraps7)  0.197043   0.183686  1.0727    0.2884    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test for log(scraps7) with robust standard errors
robust_test <- linearHypothesis(model2, "log(scraps7) = 1", vcov = robust_se)
robust_p_value <- robust_test$`Pr(>F)`[2]
cat("\nP-value for robust test log(scraps7) = 1: ", robust_p_value, "\n")
## 
## P-value for robust test log(scraps7) = 1:  6.087304e-05