Code
par(mfrow=c(2,2))
plot(mod)One might be interested in the association between our skill clusters and salary. Here we perform a hedonic wage regression to estimate the elasticity of salary with respect to five skill aggregates: Analytical, Management, STEM, Social, and Technical skills. A non-exhaustive list of caveats:
par(mfrow=c(2,2))
plot(mod)av_plot_gg(mod, "log(Analytical)", data = tbbl)av_plot_gg(mod, "log(Management)", data = tbbl)av_plot_gg(mod, "log(STEM)", data = tbbl)av_plot_gg(mod, "log(Technical)", data = tbbl)robust_se <- sqrt(diag(vcovHC(mod, type = "HC3")))
stargazer(mod, type = "html",
se = list(robust_se),
title = "Hedonic Regression of Wages on Skill Aggregates (HC3 Standard Errors)",
dep.var.labels = "Log Salary",
covariate.labels = c("Log Analytical Skills", "Log Management Skills",
"Log STEM Skills", "Log Social Skills", "Log Technical Skills"),
digits = 3,
omit.stat = c("f", "ser"),
no.space = TRUE)| Dependent variable: | |
| Log Salary | |
| Log Analytical Skills | 1.864*** |
| (0.292) | |
| Log Management Skills | 0.388*** |
| (0.125) | |
| Log STEM Skills | -0.134** |
| (0.064) | |
| Log Social Skills | -1.064*** |
| (0.165) | |
| Log Technical Skills | 0.004 |
| (0.033) | |
| Constant | 6.816*** |
| (0.603) | |
| Observations | 500 |
| R2 | 0.403 |
| Adjusted R2 | 0.397 |
| Note: | p<0.1; p<0.05; p<0.01 |
all_skills <- skills|>
select(NOC=NOC2021, name=Skill, value=`Level Score`) |>
mutate(value=value+1)|>
pivot_wider()|>
inner_join(wages)|>
unite(noc, NOC, `Occupation Title`, sep = ": ")|>
column_to_rownames("noc")|>
na.omit()
response <- "salary"
log_response <- paste0("log(`", response, "`)")
# Get independent variables (exclude response)
indep_vars <- setdiff(names(all_skills), response)
# Create formula terms: log(`var name`)
log_vars <- paste0("log(`", indep_vars, "`)")
# Build the formula
fmla <- reformulate(termlabels = log_vars, response = log_response)
# Fit model
all_model <- lm(fmla, data = all_skills)
print(fmla)log(salary) ~ log(`Active Learning`) + log(`Active Listening`) +
log(`Complex Problem Solving`) + log(Coordination) + log(`Critical Thinking`) +
log(`Equipment Maintenance`) + log(`Equipment Selection`) +
log(Installation) + log(Instructing) + log(`Judgment and Decision Making`) +
log(`Learning Strategies`) + log(`Management of Financial Resources`) +
log(`Management of Material Resources`) + log(`Management of Personnel Resources`) +
log(Mathematics) + log(Monitoring) + log(Negotiation) + log(`Operation and Control`) +
log(`Operations Analysis`) + log(`Operations Monitoring`) +
log(Persuasion) + log(Programming) + log(`Quality Control Analysis`) +
log(`Reading Comprehension`) + log(Repairing) + log(Science) +
log(`Service Orientation`) + log(`Social Perceptiveness`) +
log(Speaking) + log(`Systems Analysis`) + log(`Systems Evaluation`) +
log(`Technology Design`) + log(`Time Management`) + log(Troubleshooting) +
log(Writing)
par(mfrow=c(2,2))
plot(all_model)coef_df <- coeftest(all_model, vcov = vcovHC(all_model, type = "HC3"))|>
tidy()|>
filter(term != "(Intercept)")|>
mutate(term=str_remove_all(term, "log\\("),
term=str_remove_all(term, "\\)"),
term=str_remove_all(term, "`"))|>
full_join(clusters, by = c("term" = "Skill"))
ggplot(coef_df, aes(x = estimate, y = fct_reorder(term, estimate))) +
geom_vline(xintercept = 0, lwd = 2, color = "white") +
geom_point() +
geom_errorbarh(aes(xmin = estimate - std.error, xmax = estimate + std.error), height = 0.2) +
labs(
x = "Coefficient Estimate (+/-1 HC3 SE)",
y = NULL,
title = "Skill elasticities from Hedonic Wage Regression"
)+
facet_wrap(~cluster_name, ncol = 1, scales = "free_y")