Code
par(mfrow=c(2,2))
plot(mod)
One might be interested in the association between our skill clusters and salary. Here we perform a hedonic wage regression to estimate the elasticity of salary with respect to five skill aggregates: Analytical, Management, STEM, Social, and Technical skills. A non-exhaustive list of caveats:
par(mfrow=c(2,2))
plot(mod)
av_plot_gg(mod, "log(Analytical)", data = tbbl)
av_plot_gg(mod, "log(Management)", data = tbbl)
av_plot_gg(mod, "log(STEM)", data = tbbl)
av_plot_gg(mod, "log(Technical)", data = tbbl)
<- sqrt(diag(vcovHC(mod, type = "HC3")))
robust_se
stargazer(mod, type = "html",
se = list(robust_se),
title = "Hedonic Regression of Wages on Skill Aggregates (HC3 Standard Errors)",
dep.var.labels = "Log Salary",
covariate.labels = c("Log Analytical Skills", "Log Management Skills",
"Log STEM Skills", "Log Social Skills", "Log Technical Skills"),
digits = 3,
omit.stat = c("f", "ser"),
no.space = TRUE)
Dependent variable: | |
Log Salary | |
Log Analytical Skills | 1.864*** |
(0.292) | |
Log Management Skills | 0.388*** |
(0.125) | |
Log STEM Skills | -0.134** |
(0.064) | |
Log Social Skills | -1.064*** |
(0.165) | |
Log Technical Skills | 0.004 |
(0.033) | |
Constant | 6.816*** |
(0.603) | |
Observations | 500 |
R2 | 0.403 |
Adjusted R2 | 0.397 |
Note: | p<0.1; p<0.05; p<0.01 |
<- skills|>
all_skills select(NOC=NOC2021, name=Skill, value=`Level Score`) |>
mutate(value=value+1)|>
pivot_wider()|>
inner_join(wages)|>
unite(noc, NOC, `Occupation Title`, sep = ": ")|>
column_to_rownames("noc")|>
na.omit()
<- "salary"
response <- paste0("log(`", response, "`)")
log_response # Get independent variables (exclude response)
<- setdiff(names(all_skills), response)
indep_vars
# Create formula terms: log(`var name`)
<- paste0("log(`", indep_vars, "`)")
log_vars
# Build the formula
<- reformulate(termlabels = log_vars, response = log_response)
fmla
# Fit model
<- lm(fmla, data = all_skills)
all_model
print(fmla)
log(salary) ~ log(`Active Learning`) + log(`Active Listening`) +
log(`Complex Problem Solving`) + log(Coordination) + log(`Critical Thinking`) +
log(`Equipment Maintenance`) + log(`Equipment Selection`) +
log(Installation) + log(Instructing) + log(`Judgment and Decision Making`) +
log(`Learning Strategies`) + log(`Management of Financial Resources`) +
log(`Management of Material Resources`) + log(`Management of Personnel Resources`) +
log(Mathematics) + log(Monitoring) + log(Negotiation) + log(`Operation and Control`) +
log(`Operations Analysis`) + log(`Operations Monitoring`) +
log(Persuasion) + log(Programming) + log(`Quality Control Analysis`) +
log(`Reading Comprehension`) + log(Repairing) + log(Science) +
log(`Service Orientation`) + log(`Social Perceptiveness`) +
log(Speaking) + log(`Systems Analysis`) + log(`Systems Evaluation`) +
log(`Technology Design`) + log(`Time Management`) + log(Troubleshooting) +
log(Writing)
par(mfrow=c(2,2))
plot(all_model)
<- coeftest(all_model, vcov = vcovHC(all_model, type = "HC3"))|>
coef_df tidy()|>
filter(term != "(Intercept)")|>
mutate(term=str_remove_all(term, "log\\("),
term=str_remove_all(term, "\\)"),
term=str_remove_all(term, "`"))|>
full_join(clusters, by = c("term" = "Skill"))
ggplot(coef_df, aes(x = estimate, y = fct_reorder(term, estimate))) +
geom_vline(xintercept = 0, lwd = 2, color = "white") +
geom_point() +
geom_errorbarh(aes(xmin = estimate - std.error, xmax = estimate + std.error), height = 0.2) +
labs(
x = "Coefficient Estimate (+/-1 HC3 SE)",
y = NULL,
title = "Skill elasticities from Hedonic Wage Regression"
+
)facet_wrap(~cluster_name, ncol = 1, scales = "free_y")