lmo <- read_csv(here("employment.csv"), skip = 3)|>
janitor::remove_empty()|>
filter(NOC=="#T",
`Geographic Area`=="British Columbia",
Industry!="All industries")|>
select(Industry, lmo_2023=`2023`)|>
arrange(Industry)
lfs <- read_excel(here("Employment for 64 LMO Industries 2000-2023.xlsx"),
sheet = "British Columbia",
skip=3)|>
select(Industry=`Lmo Detailed Industry`, lfs_2023=`2023`)|>
filter(!Industry %in% c("subtotal","unknown","total"))|>
arrange(Industry)
tbbl <- bind_cols(lmo, lfs)|> #would be better to join, but stokes used slightly different names than we did.
select(Industry=Industry...1, lmo_2023, lfs_2023)|>
mutate(diff=lmo_2023-lfs_2023, #the lmo-lfs difference
abs_diff=abs(diff), #only care about magnitude of error
log_abs_diff=log(abs_diff), #reduce the scale of differences
mean_emp = (lmo_2023+lfs_2023)/2, #the base for creating relative difference
rel_diff = diff/mean_emp, #the relative difference
abs_rel_diff =abs(rel_diff), #only care about magnitude of relative difference
log_abs_rel_diff = log(abs_rel_diff), #reduce the scale of relative differences
level_input=max(log_abs_diff)-log_abs_diff+1, #convert to a strictly positive good thing
relative_input=max(log_abs_rel_diff)-log_abs_rel_diff+1, #convert to a strictly positive good thing
accuracy=level_input^.5*relative_input^.5 #Cobb Douglas function
)|>
arrange(accuracy)
plt1 <- ggplot(tbbl, aes(lfs_2023,
lmo_2023,
colour=accuracy,
size=mean_emp,
text=paste("Industry:",
Industry,
"<br> LMO forecast:",
scales::comma(lmo_2023, accuracy = 1),
"<br> LFS estimate:",
scales::comma(lfs_2023, accuracy = 1))
)
)+
geom_abline(slope = 1, intercept = 0)+
geom_point()+
scale_x_continuous(trans="log10", labels = scales::comma)+
scale_y_continuous(trans="log10", labels = scales::comma)+
scale_colour_viridis_c()+
labs(title="LMO forecast vs. LFS estimate",
x="LFS Estimate for 2023",
y="LMO forecast for 2023")
plt1 <- plotly::ggplotly(plt1, tooltip = "text")
plt2 <- ggplot(tbbl, aes(abs_diff,
abs_rel_diff,
colour=accuracy,
size=mean_emp,
text=paste("Industry:",
Industry,
"<br> Relative difference:",
scales::percent(abs_rel_diff, accuracy = .1),
"<br> Difference:",
scales::comma(abs_diff, accuracy = 1))))+
geom_point()+
scale_x_continuous(trans="log10", labels = scales::comma)+
scale_y_continuous(trans="log10", labels = scales::percent)+
scale_colour_viridis_c()+
labs(title="Relative vs. level difference: LFS vs LMO for 2023",
x="Level difference",
y="Relative difference"
)
plt2 <- plotly::ggplotly(plt2, tooltip = "text")
… Or how closely the LMO forecast corresponds with the LFS estimate of employment. It seems reasonable to care about both level differences and relative differences. For convenience I created a composite index using a cobb douglas functional form, where both the level differences and the relative differences are imperfect substitutes in creating “accuracy”, which is mapped into colour in the following plots.
plt1
plt2
plt3 <- ggplot(tbbl, aes(diff, fct_reorder(Industry, diff), fill=accuracy))+
geom_col()+
scale_x_continuous(labels = scales::comma)+
scale_fill_viridis_c()+
labs(title="Level difference for 2023",
x="LMO minus LFS",
y=NULL)
plt4 <- ggplot(tbbl, aes(rel_diff, fct_reorder(Industry, rel_diff), fill=accuracy))+
geom_col()+
scale_x_continuous(labels = scales::percent)+
scale_fill_viridis_c()+
labs(title="Relative difference for 2023",
x="LMO minus LFS over mean",
y=NULL)
plt3+plt4+ plot_layout(guides = "collect")
… starting with the worst.
tbbl|>
select(Industry, lmo_2023, lfs_2023, diff, rel_diff, accuracy)|>
DT::datatable(rownames = FALSE)|>
DT::formatPercentage(columns='rel_diff', digits = 1)|>
DT::formatRound(columns=c("lmo_2023", "lfs_2023","diff"), digits = 0)|>
DT::formatRound(columns = "accuracy", digits = 2)