lmo <- read_csv(here("employment.csv"), skip = 3)|>
  janitor::remove_empty()|>
  filter(NOC=="#T",
         `Geographic Area`=="British Columbia",
         Industry!="All industries")|>
  select(Industry, lmo_2023=`2023`)|>
  arrange(Industry)

lfs <- read_excel(here("Employment for 64 LMO Industries 2000-2023.xlsx"),
                  sheet = "British Columbia",
                  skip=3)|>
  select(Industry=`Lmo Detailed Industry`, lfs_2023=`2023`)|>
  filter(!Industry %in% c("subtotal","unknown","total"))|>
  arrange(Industry)

tbbl <- bind_cols(lmo, lfs)|> #would be better to join, but stokes used slightly different names than we did.
  select(Industry=Industry...1, lmo_2023, lfs_2023)|>
  mutate(diff=lmo_2023-lfs_2023, #the lmo-lfs difference
         abs_diff=abs(diff), #only care about magnitude of error
         log_abs_diff=log(abs_diff), #reduce the scale of differences
         mean_emp = (lmo_2023+lfs_2023)/2, #the base for creating relative difference
         rel_diff = diff/mean_emp, #the relative difference
         abs_rel_diff =abs(rel_diff), #only care about magnitude of relative difference
         log_abs_rel_diff =  log(abs_rel_diff), #reduce the scale of relative differences
         level_input=max(log_abs_diff)-log_abs_diff+1, #convert to a strictly positive good thing
         relative_input=max(log_abs_rel_diff)-log_abs_rel_diff+1, #convert to a strictly positive good thing
         accuracy=level_input^.5*relative_input^.5 #Cobb Douglas function
         )|>
  arrange(accuracy)

plt1 <- ggplot(tbbl, aes(lfs_2023, 
                         lmo_2023,
                         colour=accuracy,
                         size=mean_emp,
                         text=paste("Industry:",
                                    Industry,
                                    "<br> LMO forecast:",
                                    scales::comma(lmo_2023, accuracy = 1),
                                    "<br> LFS estimate:",
                                    scales::comma(lfs_2023, accuracy = 1))
                         )
               )+
  geom_abline(slope = 1, intercept = 0)+
  geom_point()+
  scale_x_continuous(trans="log10", labels = scales::comma)+
  scale_y_continuous(trans="log10", labels = scales::comma)+
  scale_colour_viridis_c()+
  labs(title="LMO forecast vs. LFS estimate",
       x="LFS Estimate for 2023",
       y="LMO forecast for 2023")

plt1 <- plotly::ggplotly(plt1, tooltip = "text")

plt2 <- ggplot(tbbl, aes(abs_diff, 
                 abs_rel_diff,
                 colour=accuracy,
                 size=mean_emp,
                 text=paste("Industry:",
                            Industry,
                            "<br> Relative difference:",
                            scales::percent(abs_rel_diff, accuracy = .1),
                            "<br> Difference:",
                            scales::comma(abs_diff, accuracy = 1))))+
  geom_point()+
  scale_x_continuous(trans="log10", labels = scales::comma)+
  scale_y_continuous(trans="log10", labels = scales::percent)+
  scale_colour_viridis_c()+
  labs(title="Relative vs. level difference: LFS vs LMO for 2023",
       x="Level difference",
       y="Relative difference"
       )

plt2 <- plotly::ggplotly(plt2, tooltip = "text")

… Or how closely the LMO forecast corresponds with the LFS estimate of employment. It seems reasonable to care about both level differences and relative differences. For convenience I created a composite index using a cobb douglas functional form, where both the level differences and the relative differences are imperfect substitutes in creating “accuracy”, which is mapped into colour in the following plots.

plt1
plt2
plt3 <- ggplot(tbbl, aes(diff, fct_reorder(Industry, diff), fill=accuracy))+
  geom_col()+  
  scale_x_continuous(labels = scales::comma)+
  scale_fill_viridis_c()+
  labs(title="Level difference for 2023",
       x="LMO minus LFS",
       y=NULL)

plt4 <- ggplot(tbbl, aes(rel_diff, fct_reorder(Industry, rel_diff), fill=accuracy))+
  geom_col()+  
  scale_x_continuous(labels = scales::percent)+
  scale_fill_viridis_c()+
  labs(title="Relative difference for 2023",
       x="LMO minus LFS over mean",
       y=NULL)

plt3+plt4+ plot_layout(guides = "collect")

Industries organized by accuracy

… starting with the worst.

tbbl|>
  select(Industry, lmo_2023, lfs_2023, diff, rel_diff, accuracy)|>
  DT::datatable(rownames = FALSE)|>
  DT::formatPercentage(columns='rel_diff', digits = 1)|>
  DT::formatRound(columns=c("lmo_2023", "lfs_2023","diff"), digits = 0)|>
  DT::formatRound(columns = "accuracy", digits = 2)