library(tidyverse)
library(readxl)
library(janitor)
library(patchwork)
library(stargazer)
library(fpp3)
library(ggplotify)
library(ggallin)
#functions------------------

residual_plot <- function(fit, mod, quoted_title){
  fit|>
  select({{  mod  }})|>
  gg_tsresiduals()+
  labs(title=quoted_title)
}

plot_fcasts <- function(var, fcast_tbbl, quoted_title){
 ggplot()+
  geom_line(mapping = aes(year, {{  var  }}), data=training)+
  geom_line(mapping= aes(year, .mean, colour=.model), data=fcast_tbbl|>filter(.model=="lm"))+
  geom_line(mapping= aes(year, .mean, colour=.model), data=fcast_tbbl|>filter(.model=="lm_no_aac"))+
  geom_line(mapping= aes(year, .mean, colour=.model), data=fcast_tbbl|>filter(.model=="arima"))+
  geom_line(mapping= aes(year, .mean, colour=.model), data=fcast_tbbl|>filter(.model=="arima_no_aac"))+ 
  geom_line(mapping= aes(year, .mean, colour=.model), data=fcast_tbbl|>filter(.model=="ets"))+     
  labs(x=NULL, 
       y="Employment",
       title=quoted_title)+
  scale_y_continuous(labels = scales::comma) 
}

get_growth_rate <- function(tbbl){
  tbbl|>
    mutate(growth_rate=value/lag(value)-1)
}

growth_boxplot <- function(tbbl){
  tbbl|>
    group_by(name)|>
    nest()|>
    mutate(data=map(data, get_growth_rate))|>
    unnest(data)|>
    ggplot(aes(x=growth_rate, y=name))+
    geom_boxplot(fill="red", alpha=.25)+
    geom_jitter(alpha=.25)+
    scale_x_continuous(labels = scales::percent, trans=pseudolog10_trans)+
    labs(x="growth rate",
         y="series",
         title="Comparison of annual growth rates"
         )
}
combine_fc <- function(fc, industry){
  temp <- fc|>
    as_tibble()|>
    select(.model, year, .mean)
  
  ave_fcast <- temp|>
    group_by(year)|>
    summarize(.mean=mean(.mean))|>
    mutate(.model="Average")

  bind_rows(temp, ave_fcast) |>
    pivot_wider(id_cols = year, names_from = .model, values_from=.mean)|>
    full_join(training|>select(year, {{  industry  }}))|>
    arrange(year)|>
    pivot_longer(cols=-year)|>
    na.omit()
}
gg_phase <- function(unquoted_var, quoted_title){
  ggplot(training, aes(aac, {{  unquoted_var  }})) +
  geom_path(mapping=aes(colour=year), alpha=.25)+
  geom_text(mapping=aes(label=year, colour=year), size=3)+
  scale_y_continuous(labels = scales::comma)+
  labs(x="Annual Allowable Cut",
       y="Employment",
       title=quoted_title)
}
#the program------------------
industry <- c("Forestry, logging and support activities",
                              "Wood product manufacturing",
                              "Paper manufacturing")

aac <- read_excel("Historic AAC Jan 2023.xlsx", sheet = "Summary",skip=3)|>
  clean_names()|>
  select(year, aac=total)|>
  filter(year %in% 2000:2034)

aac_wide <- aac|>
  mutate(thing="Annual Allowable Cut")|>
  rename(value=aac)

forest_long <- read_excel("Employment for 64 LMO Industries 2000-2023.xlsx", skip = 2)|>
  filter(`Lmo Detailed Industry` %in% industry)|>
  select(-`Lmo Ind Code`)|>
  pivot_longer(cols=-`Lmo Detailed Industry`, names_to = "year", values_to = "employment")|>
  mutate(year=as.numeric(year))

forestry_data <- forest_long|>
  pivot_wider(id_cols = year, 
              names_from = `Lmo Detailed Industry`, 
              values_from = employment)|>
  clean_names()

forest_long <- forest_long|>
  rename(thing=`Lmo Detailed Industry`,
         value=employment
         )

for_first_plot <- bind_rows(aac_wide, forest_long)

all_data <- left_join(aac, forestry_data)|>
  tsibble(index = year)

training <- all_data|>
  na.omit()


plt1 <- gg_phase(forestry_logging_and_support_activities, "Forestry logging and...")
plt2 <- gg_phase(wood_product_manufacturing,"Wood product...")
plt3 <- gg_phase(paper_manufacturing,"Paper manufacturing")

test <- all_data|>
  anti_join(training)


forest_fits <- training|>
  model(lm=TSLM(log(forestry_logging_and_support_activities)~ aac+trend()),
        lm_no_aac=TSLM(log(forestry_logging_and_support_activities)~ trend()),
        arima=ARIMA(log(forestry_logging_and_support_activities)~ 1+aac),
        arima_no_aac=ARIMA(log(forestry_logging_and_support_activities)~ 1),
        ets=ETS(log(forestry_logging_and_support_activities))
        )

wood_fits <- training|>
 model(lm=TSLM(log(wood_product_manufacturing)~ aac+trend()),
        lm_no_aac=TSLM(log(wood_product_manufacturing)~ trend()),
        arima=ARIMA(log(wood_product_manufacturing)~ 1+aac),
        arima_no_aac=ARIMA(log(wood_product_manufacturing)~ 1),
        ets=ETS(log(wood_product_manufacturing))
        )

paper_fits <- training|>
  model(lm=TSLM(log(paper_manufacturing)~ aac+trend()),
        lm_no_aac=TSLM(log(paper_manufacturing)~ trend()),
        arima=ARIMA(log(paper_manufacturing)~ 1+aac),
        arima_no_aac=ARIMA(log(paper_manufacturing)~ 1),
        ets=ETS(log(paper_manufacturing))
        )
 
forest_fc <- forecast(forest_fits, new_data = test)
wood_fc <- forecast(wood_fits, new_data = test)
paper_fc <- forecast(paper_fits, new_data = test)

We are going to forecast employment for 3 LMO forest related industries based on annual allowable cut. Lets take a look at the series:

ggplot(for_first_plot, aes(year, value))+
    annotate(geom = "rect",
             xmin = 2000,
             xmax = 2009,
             ymin = -Inf,
             ymax = +Inf,
             alpha = 0.25) +
  geom_line()+
  labs(x=NULL, y=NULL)+
  scale_y_continuous(labels = scales::comma)+
  facet_wrap(~thing, scales = "free_y")

Note that much of the decline in employment occurred over a period where the annual allowable cut was increasing (2000:2009). An alternative way to see this is to look at the phase diagrams for each of the industries and AAC.

plt1+plt2+plt3+
  plot_layout(guides = 'collect')

Since 2009, employment has stayed somewhat stable in forestry and paper as AAC was decreasing. Only in wood product manufacturing (post 2009) does there seem to be a positive relationship between ACC and employment. Based on the above, I am not optimistic that AAC can be used to predict employment, but lets give it a go.

We fit five models:

  1. lm(log(employment)~aac+trend()
  2. lm(log(employment)~trend()
  3. arima(log(employment)~1+acc)
  4. arima(log(employment)~1)
  5. ets(log(employment))

We fit both the lm and arima models with and without aac to ascertain how much the allowable cut influences the forecasts. Log transforming employment ensures forecasts stay non-negative. Including trend in the linear model protects against spurious regression (if the series are trending). Arima differences the data to make stationary prior to fitting, again to avoid spurious regression.

Forestry logging and support activities

Residual analysis:

residual_plot(forest_fits, lm, "Linear Model with Annual Allowable Cut")

residual_plot(forest_fits, lm_no_aac, "Linear Model, no Annual Allowable Cut")

residual_plot(forest_fits, arima, "Arima Model with Annual Allowable Cut")

residual_plot(forest_fits, arima_no_aac, "Arima Model, no Annual Allowable Cut")

residual_plot(forest_fits, ets, "ETS Model")

Forecasts:

plot_fcasts(forestry_logging_and_support_activities, 
            forest_fc, 
            "Forestry")

Wood product manufacturing

Residual analysis:

residual_plot(wood_fits, lm, "Linear Model with Annual Allowable Cut")

residual_plot(wood_fits, lm_no_aac, "Linear Model, no Annual Allowable Cut")

residual_plot(wood_fits, arima, "Arima Model with Annual Allowable Cut")

residual_plot(wood_fits, arima_no_aac, "Arima Model, no Annual Allowable Cut")

residual_plot(wood_fits, ets, "ETS model")

Forecasts:

plot_fcasts(wood_product_manufacturing, 
            wood_fc, 
            "Wood")

Paper manufacturing

Residual analysis:

residual_plot(paper_fits, lm, "Linear Model with Annual Allowable Cut")

residual_plot(paper_fits, lm_no_aac, "Linear Model, no Annual Allowable Cut")

residual_plot(paper_fits, arima, "Arima Model with Annual Allowable Cut")

residual_plot(paper_fits, arima_no_aac, "Arima Model, no Annual Allowable Cut")

residual_plot(paper_fits, ets, "ETS Model")

Forecasts:

plot_fcasts(paper_manufacturing, 
            paper_fc, 
            "Paper")

Recommendation: average the forecasts

Bates and Granger (1969) demonstrate that averaging across several distinct forecasts typically improves forecast accuracy… but just because you won a Nobel prize doesn’t mean you know what you are talking about, right?

Forestry:

forest <- combine_fc(forest_fc, forestry_logging_and_support_activities)

ggplot(forest, aes(year, value, colour=name))+
  geom_line(lwd=.5)+
  geom_line(data=forest|>filter(name=="Average"), lwd=1.5, show.legend = FALSE)+
  labs(title="Forestry, logging and support activities")

Wood products:

wood <- combine_fc(wood_fc, wood_product_manufacturing)
ggplot(wood, aes(year, value, colour=name))+
  geom_line(lwd=.5)+
  geom_line(data=wood|>filter(name=="Average"), lwd=1.5, show.legend = FALSE)+
  labs(title="Wood product manufacturing")

Paper:

paper <- combine_fc(paper_fc, paper_manufacturing)
ggplot(paper, aes(year, value, colour=name))+
  geom_line(lwd=.5)+
  geom_line(data=paper|>filter(name=="Average"), lwd=1.5, show.legend = FALSE)+
  labs(title="Paper manufacturing")

Compare growth rates:

growth_boxplot(forest)

growth_boxplot(wood)

growth_boxplot(paper)

References

Bates, John M, and Clive WJ Granger. 1969. “The Combination of Forecasts.” Journal of the Operational Research Society 20 (4): 451–68.