# Load required libraries
library(car)     # for joint hypothesis testing
## Loading required package: carData
library(wooldridge)  # for WAGE2 dataset

# Load the data
data("wage2")  # Note: dataset name is lowercase in wooldridge package

# (i) Estimate the original model
model1 <- lm(log(wage) ~ educ + exper + tenure + married + black + south + urban, data = wage2)

# Display regression results
summary(model1)
## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98069 -0.21996  0.00707  0.24288  1.22822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469 
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16
# Calculate approximate percentage difference in monthly salary between blacks and nonblacks
black_coef <- coef(model1)["black"]
pct_diff <- (exp(black_coef) - 1) * 100

# (ii) Test joint significance of exper^2 and tenure^2
# Create squared terms
wage2$exper2 <- wage2$exper^2
wage2$tenure2 <- wage2$tenure^2

model2 <- lm(log(wage) ~ educ + exper + tenure + married + black + south + urban + 
             exper2 + tenure2, data = wage2)

# Joint F-test for exper2 and tenure2
joint_test <- linearHypothesis(model2, c("exper2 = 0", "tenure2 = 0"))
print(joint_test)
## 
## Linear hypothesis test:
## exper2 = 0
## tenure2 = 0
## 
## Model 1: restricted model
## Model 2: log(wage) ~ educ + exper + tenure + married + black + south + 
##     urban + exper2 + tenure2
## 
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    927 123.82                           
## 2    925 123.42  2   0.39756 1.4898  0.226
# (iii) Test whether return to education depends on race
model3 <- lm(log(wage) ~ educ + exper + tenure + married + black + south + urban + 
             educ:black, data = wage2)
summary(model3)
## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban + educ:black, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.97782 -0.21832  0.00475  0.24136  1.23226 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.374817   0.114703  46.859  < 2e-16 ***
## educ         0.067115   0.006428  10.442  < 2e-16 ***
## exper        0.013826   0.003191   4.333 1.63e-05 ***
## tenure       0.011787   0.002453   4.805 1.80e-06 ***
## married      0.198908   0.039047   5.094 4.25e-07 ***
## black        0.094809   0.255399   0.371 0.710561    
## south       -0.089450   0.026277  -3.404 0.000692 ***
## urban        0.183852   0.026955   6.821 1.63e-11 ***
## educ:black  -0.022624   0.020183  -1.121 0.262603    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared:  0.2536, Adjusted R-squared:  0.2471 
## F-statistic: 39.32 on 8 and 926 DF,  p-value: < 2.2e-16
# (iv) Wage differentials across four groups
# Create interaction variables
wage2$married_black <- wage2$married * wage2$black
wage2$married_nonblack <- wage2$married * (1-wage2$black)
wage2$single_black <- (1-wage2$married) * wage2$black
# Note: single_nonblack is the base group

model4 <- lm(log(wage) ~ educ + exper + tenure + south + urban + 
             married_black + married_nonblack + single_black, data = wage2)

# Calculate wage differential between married blacks and married nonblacks
married_black_coef <- coef(model4)["married_black"]
married_nonblack_coef <- coef(model4)["married_nonblack"]
wage_diff <- married_black_coef - married_nonblack_coef
pct_diff_married <- (exp(wage_diff) - 1) * 100

# Print all results
cat("\nResults:\n")
## 
## Results:
cat("\n(i) Black-Nonblack Wage Differential:", round(pct_diff, 2), 
    "% (A negative value means blacks earn less)\n")
## 
## (i) Black-Nonblack Wage Differential: -17.17 % (A negative value means blacks earn less)
cat("    Statistical significance of black coefficient: p-value =", 
    round(summary(model1)$coefficients["black", "Pr(>|t|)"], 4), "\n")
##     Statistical significance of black coefficient: p-value = 0
cat("\n(ii) Joint test of exper2 and tenure2:\n")
## 
## (ii) Joint test of exper2 and tenure2:
cat("     F-statistic =", round(joint_test$F[2], 3), "\n")
##      F-statistic = 1.49
cat("     p-value =", round(joint_test$`Pr(>F)`[2], 4), "\n")
##      p-value = 0.226
cat("\n(iii) Education-Race interaction:\n")
## 
## (iii) Education-Race interaction:
cat("     Coefficient =", round(coef(model3)["educ:black"], 4), "\n")
##      Coefficient = -0.0226
cat("     p-value =", round(summary(model3)$coefficients["educ:black", "Pr(>|t|)"], 4), "\n")
##      p-value = 0.2626
cat("\n(iv) Wage differential between married blacks and married nonblacks:", 
    round(pct_diff_married, 2), "%\n")
## 
## (iv) Wage differential between married blacks and married nonblacks: -16.43 %

For part (i), it will show the wage differential between blacks and nonblacks and its statistical significance. For part (ii), it tests whether exper² and tenure² are jointly insignificant at the 20% level. For part (iii), it tests whether education returns differ by race by including an interaction term between education and race. For part (iv), it calculates the wage differential specifically between married blacks and married nonblacks.