#Chapter 7
#C2
library(wooldridge)
# Load the data
data("wage2")
# (i) Estimate the model
model1 <- lm(log(wage) ~ educ + exper + tenure + married + black + south + urban, data = wage2)
summary(model1)
##
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black +
## south + urban, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98069 -0.21996 0.00707 0.24288 1.22822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.395497 0.113225 47.653 < 2e-16 ***
## educ 0.065431 0.006250 10.468 < 2e-16 ***
## exper 0.014043 0.003185 4.409 1.16e-05 ***
## tenure 0.011747 0.002453 4.789 1.95e-06 ***
## married 0.199417 0.039050 5.107 3.98e-07 ***
## black -0.188350 0.037667 -5.000 6.84e-07 ***
## south -0.090904 0.026249 -3.463 0.000558 ***
## urban 0.183912 0.026958 6.822 1.62e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3655 on 927 degrees of freedom
## Multiple R-squared: 0.2526, Adjusted R-squared: 0.2469
## F-statistic: 44.75 on 7 and 927 DF, p-value: < 2.2e-16
# Report the difference in salary between blacks and nonblacks
coef_black <- coef(model1)["black"]
cat("Approximate difference in monthly salary between blacks and nonblacks:", coef_black, "\n")
## Approximate difference in monthly salary between blacks and nonblacks: -0.1883499
cat("Is this difference statistically significant?", summary(model1)$coefficients["black", "Pr(>|t|)"] < 0.05, "\n")
## Is this difference statistically significant? TRUE
# (ii) Add exper^2 and tenure^2 to the model and test joint significance
wage2$exper_sq <- wage2$exper^2
wage2$tenure_sq <- wage2$tenure^2
model2 <- lm(log(wage) ~ educ + exper + tenure + married + black + south + urban + exper_sq + tenure_sq, data = wage2)
summary(model2)
##
## Call:
## lm(formula = log(wage) ~ educ + exper + tenure + married + black +
## south + urban + exper_sq + tenure_sq, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98236 -0.21972 -0.00036 0.24078 1.25127
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.3586756 0.1259143 42.558 < 2e-16 ***
## educ 0.0642761 0.0063115 10.184 < 2e-16 ***
## exper 0.0172146 0.0126138 1.365 0.172665
## tenure 0.0249291 0.0081297 3.066 0.002229 **
## married 0.1985470 0.0391103 5.077 4.65e-07 ***
## black -0.1906636 0.0377011 -5.057 5.13e-07 ***
## south -0.0912153 0.0262356 -3.477 0.000531 ***
## urban 0.1854241 0.0269585 6.878 1.12e-11 ***
## exper_sq -0.0001138 0.0005319 -0.214 0.830622
## tenure_sq -0.0007964 0.0004710 -1.691 0.091188 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3653 on 925 degrees of freedom
## Multiple R-squared: 0.255, Adjusted R-squared: 0.2477
## F-statistic: 35.17 on 9 and 925 DF, p-value: < 2.2e-16
# Test if exper^2 and tenure^2 are jointly insignificant
anova(model1, model2)
## Analysis of Variance Table
##
## Model 1: log(wage) ~ educ + exper + tenure + married + black + south +
## urban
## Model 2: log(wage) ~ educ + exper + tenure + married + black + south +
## urban + exper_sq + tenure_sq
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 927 123.82
## 2 925 123.42 2 0.39756 1.4898 0.226
# (iii) Allow return to education to depend on race and test it
model3 <- lm(log(wage) ~ educ * black + exper + tenure + married + south + urban, data = wage2)
summary(model3)
##
## Call:
## lm(formula = log(wage) ~ educ * black + exper + tenure + married +
## south + urban, data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97782 -0.21832 0.00475 0.24136 1.23226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.374817 0.114703 46.859 < 2e-16 ***
## educ 0.067115 0.006428 10.442 < 2e-16 ***
## black 0.094809 0.255399 0.371 0.710561
## exper 0.013826 0.003191 4.333 1.63e-05 ***
## tenure 0.011787 0.002453 4.805 1.80e-06 ***
## married 0.198908 0.039047 5.094 4.25e-07 ***
## south -0.089450 0.026277 -3.404 0.000692 ***
## urban 0.183852 0.026955 6.821 1.63e-11 ***
## educ:black -0.022624 0.020183 -1.121 0.262603
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3654 on 926 degrees of freedom
## Multiple R-squared: 0.2536, Adjusted R-squared: 0.2471
## F-statistic: 39.32 on 8 and 926 DF, p-value: < 2.2e-16
# Test if the interaction term (educ:black) is significant
coef_educ_black <- coef(model3)["educ:black"]
cat("Interaction term (return to education depending on race):", coef_educ_black, "\n")
## Interaction term (return to education depending on race): -0.02262361
cat("Is this interaction term statistically significant?", summary(model3)$coefficients["educ:black", "Pr(>|t|)"] < 0.05, "\n")
## Is this interaction term statistically significant? FALSE
# (iv) Estimate wage differentials across groups: married black, married nonblack, single black, single nonblack
wage2$married_black <- wage2$married * wage2$black
wage2$married_nonblack <- wage2$married * (1 - wage2$black)
wage2$single_black <- (1 - wage2$married) * wage2$black
wage2$single_nonblack <- (1 - wage2$married) * (1 - wage2$black)
model4 <- lm(log(wage) ~ married_black + married_nonblack + single_black + single_nonblack + educ + exper + tenure + south + urban, data = wage2)
summary(model4)
##
## Call:
## lm(formula = log(wage) ~ married_black + married_nonblack + single_black +
## single_nonblack + educ + exper + tenure + south + urban,
## data = wage2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98013 -0.21780 0.01057 0.24219 1.22889
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.403793 0.114122 47.351 < 2e-16 ***
## married_black 0.009448 0.056013 0.169 0.866083
## married_nonblack 0.188915 0.042878 4.406 1.18e-05 ***
## single_black -0.240820 0.096023 -2.508 0.012314 *
## single_nonblack NA NA NA NA
## educ 0.065475 0.006253 10.471 < 2e-16 ***
## exper 0.014146 0.003191 4.433 1.04e-05 ***
## tenure 0.011663 0.002458 4.745 2.41e-06 ***
## south -0.091989 0.026321 -3.495 0.000497 ***
## urban 0.184350 0.026978 6.833 1.50e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3656 on 926 degrees of freedom
## Multiple R-squared: 0.2528, Adjusted R-squared: 0.2464
## F-statistic: 39.17 on 8 and 926 DF, p-value: < 2.2e-16
# Calculate the wage differential between married blacks and married nonblacks
coef_married_black <- coef(model4)["married_black"]
coef_married_nonblack <- coef(model4)["married_nonblack"]
wage_differential <- coef_married_black - coef_married_nonblack
cat("Estimated wage differential between married blacks and married nonblacks:", wage_differential, "\n")
## Estimated wage differential between married blacks and married nonblacks: -0.1794663