library(fpp3)

5.1

For each series below, I picked a simple benchmark forecast from NAIVE, SNAIVE, or RW with drift.

(a) Australian population (annual)

Population is annual, steadily increasing, and has no seasonality, so a random walk with drift is reasonable.

pop_aus <- global_economy |>
  filter(Country == "Australia") |>
  select(Year, Population)

fit_pop <- pop_aus |>
  model(Drift = RW(Population ~ drift()))

fc_pop <- fit_pop |> forecast(h = 10)

autoplot(pop_aus, Population) +
  autolayer(fc_pop) +
  labs(title = "Australian population: RW with drift", y = "Population")

(b) Bricks (quarterly)

Bricks are quarterly and show a repeating seasonal pattern, so seasonal naive makes sense.

bricks <- aus_production |> select(Quarter, Bricks)

fit_bricks <- bricks |> model(SNaive = SNAIVE(Bricks))
fc_bricks  <- fit_bricks |> forecast(h = 8)

autoplot(bricks, Bricks) +
  autolayer(fc_bricks) +
  labs(title = "Bricks: SNAIVE forecasts", y = "Bricks")

(c) NSW lambs (monthly)

This is monthly and seasonal, so seasonal naive again.

nsw_lambs <- aus_livestock |>
  filter(State == "New South Wales", Animal == "Lambs") |>
  select(Month, Count) |>
  rename(Lambs = Count)

fit_lambs <- nsw_lambs |> model(SNaive = SNAIVE(Lambs))
fc_lambs  <- fit_lambs |> forecast(h = "2 years")

autoplot(nsw_lambs, Lambs) +
  autolayer(fc_lambs) +
  labs(title = "NSW lambs: SNAIVE forecasts", y = "Count")

(d) Household wealth (annual)

Annual with trend/level changes and no seasonality → RW with drift.

wealth <- hh_budget |> select(Year, Wealth)

fit_wealth <- wealth |> model(Drift = RW(Wealth ~ drift()))
fc_wealth  <- fit_wealth |> forecast(h = 10)

autoplot(wealth, Wealth) +
  autolayer(fc_wealth) +
  labs(title = "Household wealth: RW with drift", y = "Wealth")

(e) Takeaway food services turnover (monthly)

Monthly retail turnover is seasonal, so I used seasonal naive.

# Aggregate over Australia (summing across states) to get one monthly series
# Using index_by() avoids grouping on the tsibble index column.
takeaway <- aus_retail |>
  filter(Industry == "Takeaway food services") |>
  index_by(Month) |>
  summarise(Turnover = sum(Turnover, na.rm = TRUE))

fit_takeaway <- takeaway |> model(SNaive = SNAIVE(Turnover))
fc_takeaway  <- fit_takeaway |> forecast(h = "2 years")

autoplot(takeaway, Turnover) +
  autolayer(fc_takeaway) +
  labs(title = "Takeaway food services: SNAIVE forecasts", y = "Turnover")

5.2 (Facebook stock, gafa_stock)

The stock data has trading days only (no weekends), so it can be irregular as a daily tsibble. To keep the benchmark methods working cleanly, I create a regular time index t = 1,2,3,... and model Close as a series over t.

fb_raw <- gafa_stock |>
  filter(Symbol == "FB") |>
  select(Date, Close) |>
  as_tibble() |>
  arrange(Date)

# Regular index for modelling
fb <- fb_raw |>
  mutate(t = row_number()) |>
  as_tsibble(index = t)

# Plot the original series by Date
fb_raw |>
  ggplot(aes(x = Date, y = Close)) +
  geom_line() +
  labs(title = "Facebook (FB) closing price", y = "Close")

fit_fb <- fb |> model(
  Drift = RW(Close ~ drift()),
  Naive = NAIVE(Close),
  Mean  = MEAN(Close)
)

fc_fb <- fit_fb |> forecast(h = 30)

# Plot forecasts against the original (by Date) using the matching future dates
future_dates <- fb_raw$Date[(nrow(fb_raw) - 29):nrow(fb_raw)]

# Easiest: show forecasts on the t-scale (still readable)
autoplot(fb, Close) +
  autolayer(fc_fb) +
  labs(title = "FB: Drift vs Naive vs Mean (30 steps ahead)", x = "t (trading day index)", y = "Close")

Drift = extending the first-to-last line (check)

first_y <- fb_raw$Close[1]
last_y  <- fb_raw$Close[nrow(fb_raw)]
n <- nrow(fb_raw)

slope <- (last_y - first_y) / (n - 1)

h <- 30
line_ext <- first_y + slope * (n:(n + h - 1))

drift_fc <- fc_fb |>
  filter(.model == "Drift") |>
  as_tibble() |>
  pull(.mean)

max(abs(drift_fc - line_ext))
## [1] 2.842171e-14

The value above should be extremely close to 0 (small floating point differences are fine).

5.3 (Beer production from 1992)

Fit SNAIVE() to quarterly Australian beer production from 1992, then check residuals and show forecasts.

beer_1992 <- aus_production |>
  filter(year(Quarter) >= 1992) |>
  select(Quarter, Beer)

fit_beer <- beer_1992 |> model(SNaive = SNAIVE(Beer))
fit_beer |> gg_tsresiduals()

fit_beer |>
  forecast(h = 8) |>
  autoplot(beer_1992) +
  labs(title = "Beer production (from 1992): SNAIVE forecasts", y = "Beer")

5.4 (Exports + Bricks)

Repeat 5.3 for: - Australian exports (annual) - Bricks (quarterly)

exports_aus <- global_economy |>
  filter(Country == "Australia") |>
  select(Year, Exports)

fit_exports <- exports_aus |> model(Naive = NAIVE(Exports))
fit_exports |> gg_tsresiduals()

fit_exports |>
  forecast(h = 10) |>
  autoplot(exports_aus) +
  labs(title = "Australian exports: NAIVE forecasts", y = "Exports")

fit_bricks2 <- bricks |> model(SNaive = SNAIVE(Bricks))
fit_bricks2 |> gg_tsresiduals()

fit_bricks2 |>
  forecast(h = 8) |>
  autoplot(bricks) +
  labs(title = "Bricks: SNAIVE forecasts", y = "Bricks")

5.7 (Retail series)

I used one monthly retail series from aus_retail:

myseries <- aus_retail |>
  filter(State == "New South Wales", Industry == "Department stores") |>
  select(Month, Turnover)

myseries |> autoplot(Turnover) +
  labs(title = "Retail turnover: NSW — Department stores", y = "Turnover")

(a) Train on data before 2011 and fit SNAIVE

train_pre2011 <- myseries |> filter(year(Month) < 2011)

fit_retail <- train_pre2011 |> model(SNaive = SNAIVE(Turnover))
fit_retail |> gg_tsresiduals()

(b) Forecast the test period and compute accuracy

# create test set by filtering, rather than anti_join (simpler + safer)
test_2011on <- myseries |> filter(year(Month) >= 2011)

fc_retail <- fit_retail |> forecast(new_data = test_2011on)

fc_retail |> autoplot(myseries) +
  labs(title = "SNAIVE forecasts for test period (2011+)", y = "Turnover")

# Training accuracy
fit_retail |> accuracy()
## # A tibble: 1 × 10
##   .model .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 SNaive Training  11.2  24.7  19.1  3.24  5.58     1     1 -0.0527
# Test accuracy
fc_retail |> accuracy(myseries)
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNaive Test  -2.81  25.5  19.9 -1.02  4.05  1.04  1.03 0.201

(c) Compare different training cutoffs

train_pre2009 <- myseries |> filter(year(Month) < 2009)
train_pre2010 <- myseries |> filter(year(Month) < 2010)

fit_2009 <- train_pre2009 |> model(SNaive = SNAIVE(Turnover))
fit_2010 <- train_pre2010 |> model(SNaive = SNAIVE(Turnover))

test_2009 <- myseries |> filter(year(Month) >= 2009)
test_2010 <- myseries |> filter(year(Month) >= 2010)

acc_2009 <- fit_2009 |> forecast(new_data = test_2009) |> accuracy(myseries)
acc_2010 <- fit_2010 |> forecast(new_data = test_2010) |> accuracy(myseries)

acc_2009
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 SNaive Test   3.99  28.1  22.5 0.773  4.57  1.18  1.13 -0.0826
acc_2010
## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNaive Test  -6.50  24.7  19.8 -1.56  4.09  1.04 0.994 0.150

Short written note

With seasonal naive, forecasts are essentially “repeat the same month last year”. Changing the training cutoff doesn’t change the basic forecast rule, but it does change which period is used for evaluation (and whether the training set includes structural breaks). If the series has a level shift, a shorter/more recent training window can sometimes look better on the test set.

Session info

sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] fable_0.5.0       feasts_0.5.0      fabletools_0.6.1  ggtime_0.2.0     
##  [5] tsibbledata_0.4.1 tsibble_1.2.0     ggplot2_4.0.2     lubridate_1.9.5  
##  [9] tidyr_1.3.2       dplyr_1.2.0       tibble_3.3.1      fpp3_1.0.3       
## 
## loaded via a namespace (and not attached):
##  [1] ggdist_3.3.3         rappdirs_0.3.4       sass_0.4.10         
##  [4] utf8_1.2.6           generics_0.1.4       anytime_0.3.12      
##  [7] digest_0.6.39        magrittr_2.0.4       evaluate_1.0.5      
## [10] grid_4.5.2           timechange_0.4.0     RColorBrewer_1.1-3  
## [13] fastmap_1.2.0        jsonlite_2.0.0       purrr_1.2.1         
## [16] scales_1.4.0         jquerylib_0.1.4      cli_3.6.5           
## [19] rlang_1.1.7          crayon_1.5.3         withr_3.0.2         
## [22] cachem_1.1.0         yaml_2.3.12          tools_4.5.2         
## [25] vctrs_0.7.1          R6_2.6.1             lifecycle_1.0.5     
## [28] pkgconfig_2.0.3      progressr_0.18.0     pillar_1.11.1       
## [31] bslib_0.10.0         gtable_0.3.6         glue_1.8.0          
## [34] Rcpp_1.1.1           xfun_0.56            tidyselect_1.2.1    
## [37] rstudioapi_0.18.0    knitr_1.51           farver_2.1.2        
## [40] htmltools_0.5.9      rmarkdown_2.30       labeling_0.4.3      
## [43] compiler_4.5.2       S7_0.2.1             distributional_0.6.0