library(wooldridge)
library(car)
## Loading required package: carData
# Load the data
data("wage2")

# Part (i)
# Estimate the model
model_1 <- lm(log(wage) ~ educ + exper + tenure + married + black + south + urban, data = wage2)

# Summary of the model to get coefficient estimates and significance levels
summary(model_1)
## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98069 -0.21996  0.00707  0.24288  1.22822 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.395497   0.113225  47.653  < 2e-16 ***
## educ         0.065431   0.006250  10.468  < 2e-16 ***
## exper        0.014043   0.003185   4.409 1.16e-05 ***
## tenure       0.011747   0.002453   4.789 1.95e-06 ***
## married      0.199417   0.039050   5.107 3.98e-07 ***
## black       -0.188350   0.037667  -5.000 6.84e-07 ***
## south       -0.090904   0.026249  -3.463 0.000558 ***
## urban        0.183912   0.026958   6.822 1.62e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared:  0.2526, Adjusted R-squared:  0.2469 
## F-statistic: 44.75 on 7 and 927 DF,  p-value: < 2.2e-16
# Part (ii)
# Add exper^2 and tenure^2 to the model and test for joint insignificance
wage2$exper_sq <- wage2$exper^2
wage2$tenure_sq <- wage2$tenure^2

# Fit the model with squared terms
model_2 <- lm(log(wage) ~ educ + exper + tenure + married + black + south + urban + exper_sq + tenure_sq, data = wage2)

# Summary of the model
summary(model_2)
## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban + exper_sq + tenure_sq, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98236 -0.21972 -0.00036  0.24078  1.25127 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.3586756  0.1259143  42.558  < 2e-16 ***
## educ         0.0642761  0.0063115  10.184  < 2e-16 ***
## exper        0.0172146  0.0126138   1.365 0.172665    
## tenure       0.0249291  0.0081297   3.066 0.002229 ** 
## married      0.1985470  0.0391103   5.077 4.65e-07 ***
## black       -0.1906636  0.0377011  -5.057 5.13e-07 ***
## south       -0.0912153  0.0262356  -3.477 0.000531 ***
## urban        0.1854241  0.0269585   6.878 1.12e-11 ***
## exper_sq    -0.0001138  0.0005319  -0.214 0.830622    
## tenure_sq   -0.0007964  0.0004710  -1.691 0.091188 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3653 on 925 degrees of freedom
## Multiple R-squared:  0.255,  Adjusted R-squared:  0.2477 
## F-statistic: 35.17 on 9 and 925 DF,  p-value: < 2.2e-16
# Test joint insignificance of exper^2 and tenure^2
linearHypothesis(model_2, c("exper_sq = 0", "tenure_sq = 0"))
## 
## Linear hypothesis test:
## exper_sq = 0
## tenure_sq = 0
## 
## Model 1: restricted model
## Model 2: log(wage) ~ educ + exper + tenure + married + black + south + 
##     urban + exper_sq + tenure_sq
## 
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    927 123.82                           
## 2    925 123.42  2   0.39756 1.4898  0.226
# Part (iii)
# Allow the return to education to depend on race by adding an interaction term between educ and black
wage2$educ_black <- wage2$educ * wage2$black

# Fit the model with the interaction term
model_3 <- lm(log(wage) ~ educ + exper + tenure + married + black + south + urban + educ_black, data = wage2)

# Summary of the model
summary(model_3)
## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black + 
##     south + urban + educ_black, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.97782 -0.21832  0.00475  0.24136  1.23226 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.374817   0.114703  46.859  < 2e-16 ***
## educ         0.067115   0.006428  10.442  < 2e-16 ***
## exper        0.013826   0.003191   4.333 1.63e-05 ***
## tenure       0.011787   0.002453   4.805 1.80e-06 ***
## married      0.198908   0.039047   5.094 4.25e-07 ***
## black        0.094809   0.255399   0.371 0.710561    
## south       -0.089450   0.026277  -3.404 0.000692 ***
## urban        0.183852   0.026955   6.821 1.63e-11 ***
## educ_black  -0.022624   0.020183  -1.121 0.262603    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared:  0.2536, Adjusted R-squared:  0.2471 
## F-statistic: 39.32 on 8 and 926 DF,  p-value: < 2.2e-16
# Part (iv)
# Allow wages to differ across four groups by adding interaction terms for married and black
wage2$married_black <- wage2$married * wage2$black

# Fit the model with interaction terms
model_4 <- lm(log(wage) ~ educ + exper + tenure + south + urban + married + black + married_black, data = wage2)

# Summary of the model
summary(model_4)
## 
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + south + urban + 
##     married + black + married_black, data = wage2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98013 -0.21780  0.01057  0.24219  1.22889 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    5.403793   0.114122  47.351  < 2e-16 ***
## educ           0.065475   0.006253  10.471  < 2e-16 ***
## exper          0.014146   0.003191   4.433 1.04e-05 ***
## tenure         0.011663   0.002458   4.745 2.41e-06 ***
## south         -0.091989   0.026321  -3.495 0.000497 ***
## urban          0.184350   0.026978   6.833 1.50e-11 ***
## married        0.188915   0.042878   4.406 1.18e-05 ***
## black         -0.240820   0.096023  -2.508 0.012314 *  
## married_black  0.061354   0.103275   0.594 0.552602    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3656 on 926 degrees of freedom
## Multiple R-squared:  0.2528, Adjusted R-squared:  0.2464 
## F-statistic: 39.17 on 8 and 926 DF,  p-value: < 2.2e-16