# Load libraries
library(wooldridge)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)

# Load the dataset
data("jtrain")

# Step 1: Filter data for 1987 and 1988
# Extract 1987 data for `scrap` and rename it to `scrap87`
jtrain_1987 <- subset(jtrain, year == 1987, select = c(fcode, scrap))
colnames(jtrain_1987)[2] <- "scrap87"

# Extract 1988 data
jtrain_1988 <- subset(jtrain, year == 1988)

# Merge the two datasets by firm code (fcode)
jtrain_1988 <- merge(jtrain_1988, jtrain_1987, by = "fcode", all.x = TRUE)

# Step 2: Model 1 - Simple regression
model1 <- lm(log(scrap) ~ grant, data = jtrain_1988)
summary(model1)
## 
## Call:
## lm(formula = log(scrap) ~ grant, data = jtrain_1988)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4043 -0.9536 -0.0465  0.9636  2.8103 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   0.4085     0.2406   1.698   0.0954 .
## grant         0.0566     0.4056   0.140   0.8895  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.423 on 52 degrees of freedom
##   (103 observations deleted due to missingness)
## Multiple R-squared:  0.0003744,  Adjusted R-squared:  -0.01885 
## F-statistic: 0.01948 on 1 and 52 DF,  p-value: 0.8895
# Step 3: Model 2 - Add log(scrap87) as a predictor
model2 <- lm(log(scrap) ~ grant + log(scrap87), data = jtrain_1988)
summary(model2)
## 
## Call:
## lm(formula = log(scrap) ~ grant + log(scrap87), data = jtrain_1988)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9146 -0.1763  0.0057  0.2308  1.5991 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.02124    0.08910   0.238   0.8126    
## grant        -0.25397    0.14703  -1.727   0.0902 .  
## log(scrap87)  0.83116    0.04444  18.701   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5127 on 51 degrees of freedom
##   (103 observations deleted due to missingness)
## Multiple R-squared:  0.8728, Adjusted R-squared:  0.8678 
## F-statistic: 174.9 on 2 and 51 DF,  p-value: < 2.2e-16
# Step 4: Hypothesis test for grant (H1: β_grant < 0)
t_stat_grant <- coef(summary(model2))["grant", "t value"]
p_value_grant <- pt(t_stat_grant, df = model2$df.residual, lower.tail = TRUE)
cat("One-sided p-value for grant:", p_value_grant, "\n")
## One-sided p-value for grant: 0.04508135
# Step 5: Hypothesis test for log(scrap87) (two-sided test)
t_stat_scrap87 <- coef(summary(model2))["log(scrap87)", "t value"]
p_value_scrap87 <- 2 * pt(abs(t_stat_scrap87), df = model2$df.residual, lower.tail = FALSE)
cat("Two-sided p-value for log(scrap87):", p_value_scrap87, "\n")
## Two-sided p-value for log(scrap87): 1.756554e-24
# Step 6: Robust standard errors
robust_se <- vcovHC(model2, type = "HC")
robust_results <- coeftest(model2, vcov = robust_se)
cat("Robust results:\n")
## Robust results:
print(robust_results)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)   0.021237   0.097032  0.2189   0.82763    
## grant        -0.253970   0.142249 -1.7854   0.08014 .  
## log(scrap87)  0.831161   0.071469 11.6297 5.862e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Compare standard and robust results
cat("Comparison of standard vs robust standard errors:\n")
## Comparison of standard vs robust standard errors:
summary(model2)  # Standard errors
## 
## Call:
## lm(formula = log(scrap) ~ grant + log(scrap87), data = jtrain_1988)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9146 -0.1763  0.0057  0.2308  1.5991 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.02124    0.08910   0.238   0.8126    
## grant        -0.25397    0.14703  -1.727   0.0902 .  
## log(scrap87)  0.83116    0.04444  18.701   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5127 on 51 degrees of freedom
##   (103 observations deleted due to missingness)
## Multiple R-squared:  0.8728, Adjusted R-squared:  0.8678 
## F-statistic: 174.9 on 2 and 51 DF,  p-value: < 2.2e-16
print(robust_results)  # Robust standard errors
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)   0.021237   0.097032  0.2189   0.82763    
## grant        -0.253970   0.142249 -1.7854   0.08014 .  
## log(scrap87)  0.831161   0.071469 11.6297 5.862e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1