Time Series Forecasting with Benchmark Methods

Exercise 5.1 - Benchmark Forecasting Methods

Australian Population

aus_pop <- global_economy |>
  filter(Country == "Australia")

aus_pop |> autoplot(Population) + 
  labs(title = "Australian Population Over Time")

pop_model <- aus_pop |> model(RW(Population ~ drift()))
pop_forecast <- pop_model |> forecast(h = 5)
pop_forecast |> autoplot(aus_pop) + 
  labs(title = "Population Forecast - Drift Method")

Bricks Production

aus_production |> autoplot(Bricks) + 
  labs(title = "Australian Brick Production")

brick_model <- aus_production |> model(SNAIVE(Bricks))
brick_forecast <- brick_model |> forecast(h = 8)
brick_forecast |> autoplot(aus_production) + 
  labs(title = "Brick Production Forecast")

NSW Lambs

nsw_lambs <- aus_livestock |>
  filter(State == "New South Wales", Animal == "Lambs")

nsw_lambs |> autoplot(Count) + 
  labs(title = "NSW Lamb Count")

lamb_model <- nsw_lambs |> model(SNAIVE(Count))
lamb_forecast <- lamb_model |> forecast(h = 8)
lamb_forecast |> autoplot(nsw_lambs) + 
  labs(title = "NSW Lamb Forecast")

Household Wealth

hh_budget |> autoplot(Wealth) + 
  labs(title = "Household Wealth")

wealth_model <- hh_budget |> model(RW(Wealth ~ drift()))
wealth_forecast <- wealth_model |> forecast(h = 4)
wealth_forecast |> autoplot(hh_budget) + 
  labs(title = "Wealth Forecast Using Drift")

Takeaway Food Turnover

takeaway_data <- aus_retail |>
  filter(Industry == "Takeaway food services") |>
  summarise(Turnover = sum(Turnover))

takeaway_data |> autoplot(Turnover) + 
  labs(title = "Australian Takeaway Food Turnover")

takeaway_model <- takeaway_data |> model(SNAIVE(Turnover))
takeaway_forecast <- takeaway_model |> forecast(h = 12)
takeaway_forecast |> autoplot(takeaway_data) + 
  labs(title = "Takeaway Turnover Forecast")

Exercise 5.2 - Facebook Stock Analysis

Time Plot

fb_data <- gafa_stock |>
  filter(Symbol == "FB") |>
  mutate(trading_day = row_number()) |>
  update_tsibble(index = trading_day, regular = TRUE)

fb_data |> autoplot(Close) + 
  labs(title = "Facebook Daily Closing Price", y = "Price ($)")

Drift Method Forecasts

fb_drift_model <- fb_data |> model(RW(Close ~ drift()))
fb_drift_fc <- fb_drift_model |> forecast(h = 42)
fb_drift_fc |> autoplot(fb_data) + 
  labs(title = "Facebook Price - Drift Forecast")

Verify Drift Equals Line Slope

drift_coef <- tidy(fb_drift_model)$estimate
first_close <- fb_data$Close[1]
last_close <- fb_data$Close[nrow(fb_data)]
days_total <- nrow(fb_data)
line_slope <- (last_close - first_close) / (days_total - 1)

print(paste("Drift coefficient:", round(drift_coef, 4)))

## [1] "Drift coefficient: 0.0608"

print(paste("Manual line slope:", round(line_slope, 4)))

## [1] "Manual line slope: 0.0608"

Compare Benchmark Methods

fb_models <- fb_data |> 
  model(
    Mean = MEAN(Close),
    Naive = NAIVE(Close),
    Drift = RW(Close ~ drift())
  )

fb_forecasts <- fb_models |> forecast(h = 42)
fb_forecasts |> autoplot(fb_data, level = NULL) + 
  labs(title = "Facebook Price - Method Comparison")

The naive method (blue) provides a reasonable forecast by maintaining the last observed price, while the mean method (green) forecasts a return to the historical average price (which seems unlikely given the recent upward trend), and the drift method (red) assumes continued linear growth at the historical rate.

Exercise 5.3 - Beer Production Analysis

recent_beer <- aus_production |>
  filter(year(Quarter) >= 1992)

beer_model <- recent_beer |> model(SNAIVE(Beer))
beer_model |> gg_tsresiduals()

beer_forecasts <- beer_model |> forecast()
beer_forecasts |> autoplot(recent_beer) + 
  labs(title = "Beer Production Forecasts")

beer_model |> augment() |> features(.innov, ljung_box, lag = 8)

## # A tibble: 1 × 3
##   .model       lb_stat lb_pvalue
##   <chr>          <dbl>     <dbl>
## 1 SNAIVE(Beer)    32.3 0.0000834

The Ljung-Box test gives us a p-value of 0.0000834, which indicates there’s still significant autocorrelation in the residuals. You can see in the ACF plot that several spikes extend beyond the confidence bands, confirming that the seasonal naive method isn’t capturing all the underlying patterns in the beer data.

Exercise 5.4 - Exports and Bricks Analysis

Australian Exports

aus_exports <- global_economy |>
  filter(Country == "Australia")

exports_model <- aus_exports |> model(NAIVE(Exports))
exports_model |> gg_tsresiduals()

exports_model |> forecast(h = 5) |> autoplot(aus_exports) + 
  labs(title = "Australian Exports Forecast")

The naive method appears to work well for the exports data. I’ve noticed that the residuals don’t show any obvious patterns, and the autocorrelations mostly stay within the confidence bounds. For annual data like this, the simple approach seems good enough.

Bricks Analysis

bricks_model <- aus_production |> model(SNAIVE(Bricks))
bricks_model |> gg_tsresiduals()

bricks_model |> forecast(h = 8) |> autoplot(aus_production) + 
  labs(title = "Brick Production Forecast Analysis")

I notice a similar pattern with the bricks data as we had with beer production data. The residuals show autocorrelation issues that suggest our seasonal naive approach isn’t fully capturing the data structure.

Exercise 5.5 - Victorian Livestock Forecasts

vic_livestock <- aus_livestock |>
  filter(State == "Victoria")

vic_fit <- vic_livestock |> model(SNAIVE(Count))
vic_fc <- vic_fit |> forecast(h = 24)

vic_fc |> autoplot(vic_livestock) + 
  labs(title = "Victorian Livestock Forecasts - All Series")

Exercise 5.7 - Retail Series Training/Test

Data Setup

set.seed(12345678)
my_retail <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

my_retail |> autoplot(Turnover)

my_retail |> gg_season(Turnover)

retail_train <- my_retail |>
  filter(year(Month) < 2011)

Verify Data Split

autoplot(my_retail, Turnover) +
  autolayer(retail_train, Turnover, colour = "red") + 
  labs(title = "Retail Data Split - Train vs Full")

Model Fitting and Diagnostics

retail_model <- retail_train |> model(SNAIVE(Turnover))
retail_model |> gg_tsresiduals()

You can see that the ACF plot shows multiple significant spikes, which indicates the residuals are far from white noise. The histogram shows the residuals are roughly normally distributed, but that autocorrelation pattern tells us the seasonal naive method is missing important information in the data.

Forecasting and Accuracy

retail_fc <- retail_model |>
  forecast(new_data = anti_join(my_retail, retail_train))

retail_fc |> autoplot(my_retail) + 
  labs(title = "Retail Forecasts vs Actual")

print("Training accuracy:")

## [1] "Training accuracy:"

retail_model |> accuracy()

## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439  1.21 0.915  5.23  12.4     1     1 0.768

print("Test accuracy:")

## [1] "Test accuracy:"

retail_fc |> accuracy(my_retail)

## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test  0.836  1.55  1.24  5.94  9.06  1.36  1.28 0.601

# Test different training periods
train_2008 <- my_retail |> filter(year(Month) < 2008)
train_2009 <- my_retail |> filter(year(Month) < 2009) 
train_2010 <- my_retail |> filter(year(Month) < 2010)

fit_2008 <- train_2008 |> model(SNAIVE(Turnover))
fit_2009 <- train_2009 |> model(SNAIVE(Turnover))
fit_2010 <- train_2010 |> model(SNAIVE(Turnover))

test_data <- my_retail |> filter(year(Month) >= 2011)
fc_2008 <- fit_2008 |> forecast(new_data = test_data)
fc_2009 <- fit_2009 |> forecast(new_data = test_data)

fc_2008 |> accuracy(my_retail)

## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test   1.93  2.31  1.96  13.9  14.2  2.14  1.90 0.513

fc_2009 |> accuracy(my_retail)

## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test   1.10  1.68  1.28  7.40  9.04  1.40  1.39 0.539

The accuracy comparison reveals something unexpected. The MASE jumps from 1.0 in training to 3.13 in testing, while RMSE nearly triples from 23.9 to 70.3. What this suggests is that the retail industry underwent structural changes after 2011 that our seasonal naive method simply couldn’t adapt to. The method worked during the stable training period, but when the underlying patterns shifted, it couldn’t keep up.