# Load necessary libraries
library(dplyr) # For data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car) # For hypothesis testing
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(sandwich) # For robust standard errors
library(lmtest) # For statistical tests
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# Simulate a dataset similar to JTRAIN for 1988
set.seed(123) # For reproducibility
data_1988 <- data.frame(
scrap = rnorm(54, mean = 50, sd = 10), # Firm scrap rate
grant = sample(c(0, 1), 54, replace = TRUE), # Dummy variable for grant
scraps7 = rnorm(54, mean = 45, sd = 8) # Lagged scrap rate
)
# (ii) Simple regression model
model1 <- lm(log(scrap) ~ grant, data = data_1988)
cat("Simple Regression Model:\n")
## Simple Regression Model:
summary(model1)
##
## Call:
## lm(formula = log(scrap) ~ grant, data = data_1988)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4856 -0.1164 -0.0061 0.1328 0.3745
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.92019 0.03784 103.61 <2e-16 ***
## grant -0.02232 0.05076 -0.44 0.662
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1854 on 52 degrees of freedom
## Multiple R-squared: 0.003705, Adjusted R-squared: -0.01545
## F-statistic: 0.1934 on 1 and 52 DF, p-value: 0.6619
# (iii) Adding log(scraps7) as an explanatory variable
model2 <- lm(log(scrap) ~ grant + log(scraps7), data = data_1988)
cat("\nRegression Model with log(scraps7):\n")
##
## Regression Model with log(scraps7):
summary(model2)
##
## Call:
## lm(formula = log(scrap) ~ grant + log(scraps7), data = data_1988)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48307 -0.10467 -0.00171 0.13882 0.33969
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.17005 0.61572 5.149 4.26e-06 ***
## grant -0.01636 0.05076 -0.322 0.749
## log(scraps7) 0.19704 0.16143 1.221 0.228
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1845 on 51 degrees of freedom
## Multiple R-squared: 0.03198, Adjusted R-squared: -0.005978
## F-statistic: 0.8425 on 2 and 51 DF, p-value: 0.4365
# Coefficient interpretation and statistical significance test for grant
coefficient_grant <- coef(summary(model2))["grant", ]
significant <- ifelse(coefficient_grant["Pr(>|t|)"] < 0.05, "Yes", "No")
cat("\nIs grant statistically significant at 5% level? ", significant, "\n")
##
## Is grant statistically significant at 5% level? No
# (iv) Test if parameter on log(scraps7) is equal to 1 (two-sided test)
hypothesis_test <- linearHypothesis(model2, "log(scraps7) = 1")
p_value <- hypothesis_test$`Pr(>F)`[2]
cat("\nP-value for test log(scraps7) = 1: ", p_value, "\n")
##
## P-value for test log(scraps7) = 1: 7.82673e-06
# (v) Repeat with heteroskedasticity-robust standard errors
robust_se <- vcovHC(model2, type = "HC")
cat("\nRegression with Robust Standard Errors:\n")
##
## Regression with Robust Standard Errors:
coeftest(model2, vcov = robust_se)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.170050 0.696250 4.5530 3.311e-05 ***
## grant -0.016356 0.046693 -0.3503 0.7276
## log(scraps7) 0.197043 0.183686 1.0727 0.2884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test for log(scraps7) with robust standard errors
robust_test <- linearHypothesis(model2, "log(scraps7) = 1", vcov = robust_se)
robust_p_value <- robust_test$`Pr(>F)`[2]
cat("\nP-value for robust test log(scraps7) = 1: ", robust_p_value, "\n")
##
## P-value for robust test log(scraps7) = 1: 6.087304e-05