# Load libraries
library(wooldridge)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
# Load the dataset
data("jtrain")
# Step 1: Filter data for 1987 and 1988
# Extract 1987 data for `scrap` and rename it to `scrap87`
jtrain_1987 <- subset(jtrain, year == 1987, select = c(fcode, scrap))
colnames(jtrain_1987)[2] <- "scrap87"
# Extract 1988 data
jtrain_1988 <- subset(jtrain, year == 1988)
# Merge the two datasets by firm code (fcode)
jtrain_1988 <- merge(jtrain_1988, jtrain_1987, by = "fcode", all.x = TRUE)
# Step 2: Model 1 - Simple regression
model1 <- lm(log(scrap) ~ grant, data = jtrain_1988)
summary(model1)
##
## Call:
## lm(formula = log(scrap) ~ grant, data = jtrain_1988)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4043 -0.9536 -0.0465 0.9636 2.8103
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4085 0.2406 1.698 0.0954 .
## grant 0.0566 0.4056 0.140 0.8895
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.423 on 52 degrees of freedom
## (103 observations deleted due to missingness)
## Multiple R-squared: 0.0003744, Adjusted R-squared: -0.01885
## F-statistic: 0.01948 on 1 and 52 DF, p-value: 0.8895
# Step 3: Model 2 - Add log(scrap87) as a predictor
model2 <- lm(log(scrap) ~ grant + log(scrap87), data = jtrain_1988)
summary(model2)
##
## Call:
## lm(formula = log(scrap) ~ grant + log(scrap87), data = jtrain_1988)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9146 -0.1763 0.0057 0.2308 1.5991
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02124 0.08910 0.238 0.8126
## grant -0.25397 0.14703 -1.727 0.0902 .
## log(scrap87) 0.83116 0.04444 18.701 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5127 on 51 degrees of freedom
## (103 observations deleted due to missingness)
## Multiple R-squared: 0.8728, Adjusted R-squared: 0.8678
## F-statistic: 174.9 on 2 and 51 DF, p-value: < 2.2e-16
# Step 4: Hypothesis test for grant (H1: β_grant < 0)
t_stat_grant <- coef(summary(model2))["grant", "t value"]
p_value_grant <- pt(t_stat_grant, df = model2$df.residual, lower.tail = TRUE)
cat("One-sided p-value for grant:", p_value_grant, "\n")
## One-sided p-value for grant: 0.04508135
# Step 5: Hypothesis test for log(scrap87) (two-sided test)
t_stat_scrap87 <- coef(summary(model2))["log(scrap87)", "t value"]
p_value_scrap87 <- 2 * pt(abs(t_stat_scrap87), df = model2$df.residual, lower.tail = FALSE)
cat("Two-sided p-value for log(scrap87):", p_value_scrap87, "\n")
## Two-sided p-value for log(scrap87): 1.756554e-24
# Step 6: Robust standard errors
robust_se <- vcovHC(model2, type = "HC")
robust_results <- coeftest(model2, vcov = robust_se)
cat("Robust results:\n")
## Robust results:
print(robust_results)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.021237 0.097032 0.2189 0.82763
## grant -0.253970 0.142249 -1.7854 0.08014 .
## log(scrap87) 0.831161 0.071469 11.6297 5.862e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Compare standard and robust results
cat("Comparison of standard vs robust standard errors:\n")
## Comparison of standard vs robust standard errors:
summary(model2) # Standard errors
##
## Call:
## lm(formula = log(scrap) ~ grant + log(scrap87), data = jtrain_1988)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9146 -0.1763 0.0057 0.2308 1.5991
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02124 0.08910 0.238 0.8126
## grant -0.25397 0.14703 -1.727 0.0902 .
## log(scrap87) 0.83116 0.04444 18.701 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5127 on 51 degrees of freedom
## (103 observations deleted due to missingness)
## Multiple R-squared: 0.8728, Adjusted R-squared: 0.8678
## F-statistic: 174.9 on 2 and 51 DF, p-value: < 2.2e-16
print(robust_results) # Robust standard errors
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.021237 0.097032 0.2189 0.82763
## grant -0.253970 0.142249 -1.7854 0.08014 .
## log(scrap87) 0.831161 0.071469 11.6297 5.862e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1