Forecast Simulation

Code
ggr <- function(tbbl){#get growth rate
  tbbl|>
    mutate(growth_rate=value/lag(value))|>
    select(year, growth_rate)|>
    na.omit()
}

glv <- function(tbbl){#get last value
  tbbl$value[tbbl$year==max(tbbl$year)]
}

mk_fcast <- function(last_val, grt_rts){#make the forecast
  lyod <- as.numeric(max(grt_rts$year))
  grt_rts <- grt_rts|>
    mutate(year=as.numeric(year),
           prob=(year-min(year))/((length(year)-1)/2*length(year))) #discrete triangle distribution
  lyod <- max(grt_rts$year)
  rand_grt_rts <- sample(grt_rts$growth_rate, size = 11, replace = TRUE, prob = grt_rts$prob)
  ones <- rep(1,11)
  weights <- seq(1,.5, by=-.05)
  growth_rates <- rand_grt_rts*(weights)+(1-weights)*1
  tibble(year=(lyod+1):(lyod+11), fcast=last_val*cumprod(growth_rates))
}

tbbl <- read_excel("Employment for 64 LMO Industries 2000-2023.xlsx", skip = 3)|>
  cross_join(tibble(sim=1:1000))|>
  pivot_longer(cols = starts_with("2"), names_to = "year")|>
  clean_names()|>
  filter(str_detect(lmo_ind_code, "ind"))|>
  unite(industry, lmo_ind_code, lmo_detailed_industry, sep=": ")|>
  group_by(industry, sim)|>
  nest()|>
  mutate(last_value=map_dbl(data, glv),
         growth_rates=map(data, ggr),
         forecast=map2(last_value, growth_rates, mk_fcast))

historic <- tbbl|>
  ungroup()|>
  select(industry, data)|>
  unnest(data)|>
  distinct()|>
  mutate(year=as.numeric(year),
         industry=str_trunc(industry, 20)
         )

fcasts <- tbbl|>
  select(industry, sim, forecast)|>
  unnest(forecast)|>
  mutate(industry=str_trunc(industry, 20))

ave_fcasts <- fcasts|>
  group_by(industry, year)|>
  summarize(mean_fcast=mean(fcast))

Forecasts based on historic growth rates:

I was interested in what kind of forecasts we would end up with if we

  1. Calculated the annual growth rates for each year and industry between 2000 and 2023.
  2. Drew a random sample of 11 of the growth rates with probabilities from discrete triangle distribution:
Code
tibble(year=2000:2023)|>
  mutate(prob=(year-min(year))/((length(year)-1)/2*length(year)))|> 
  ggplot(aes(year, prob))+
  geom_col()+
  labs(title="The probabilities of drawing growth rate from each year",
       subtitle="The most recent past more informative than distant past")

  1. Calculate weighted average growth rate to dampen the trend:
Code
tibble(year=factor(2024:2034))|>
  mutate(weight=seq(1,.5, by=-.05))|> 
  ggplot(aes(year, weight))+
  geom_col()+
  labs(title="The weight put on the random growth rates drawn.",
       subtitle="Thus as time progresses the forecasts flatten out.")

  1. Apply these weighted growth rates to the 2023 level.
  2. Repeat 2) and 3) 1000 times, for each of the 64 LMO industries.
  3. Plot the results: the simulation mean is in white.

This gives an indication of the uncertainty of the forecasts: the more predictable the series, the tighter the forecast cone.

Code
ggplot()+
  geom_line(data=fcasts, mapping=aes(year, fcast, group=sim), alpha=.1, lwd=.05)+
  geom_line(data=historic, mapping=aes(year, value, group=industry))+
  geom_line(data=ave_fcasts, mapping=aes(year, mean_fcast, group=industry), colour="white")+
  scale_y_continuous(trans="log10", labels = scales::label_number(suffix = " K", scale = 1e-3))+ 
  facet_wrap(~industry, scales = "free_y")+
  labs(x=NULL, y=NULL, title="Forecasts based on historic growth rates")