library(fpp3)
For each series below, I picked a simple benchmark forecast from NAIVE, SNAIVE, or RW with drift.
Population is annual, steadily increasing, and has no seasonality, so a random walk with drift is reasonable.
pop_aus <- global_economy |>
filter(Country == "Australia") |>
select(Year, Population)
fit_pop <- pop_aus |>
model(Drift = RW(Population ~ drift()))
fc_pop <- fit_pop |> forecast(h = 10)
autoplot(pop_aus, Population) +
autolayer(fc_pop) +
labs(title = "Australian population: RW with drift", y = "Population")
Bricks are quarterly and show a repeating seasonal pattern, so seasonal naive makes sense.
bricks <- aus_production |> select(Quarter, Bricks)
fit_bricks <- bricks |> model(SNaive = SNAIVE(Bricks))
fc_bricks <- fit_bricks |> forecast(h = 8)
autoplot(bricks, Bricks) +
autolayer(fc_bricks) +
labs(title = "Bricks: SNAIVE forecasts", y = "Bricks")
This is monthly and seasonal, so seasonal naive again.
nsw_lambs <- aus_livestock |>
filter(State == "New South Wales", Animal == "Lambs") |>
select(Month, Count) |>
rename(Lambs = Count)
fit_lambs <- nsw_lambs |> model(SNaive = SNAIVE(Lambs))
fc_lambs <- fit_lambs |> forecast(h = "2 years")
autoplot(nsw_lambs, Lambs) +
autolayer(fc_lambs) +
labs(title = "NSW lambs: SNAIVE forecasts", y = "Count")
Annual with trend/level changes and no seasonality → RW with drift.
wealth <- hh_budget |> select(Year, Wealth)
fit_wealth <- wealth |> model(Drift = RW(Wealth ~ drift()))
fc_wealth <- fit_wealth |> forecast(h = 10)
autoplot(wealth, Wealth) +
autolayer(fc_wealth) +
labs(title = "Household wealth: RW with drift", y = "Wealth")
Monthly retail turnover is seasonal, so I used seasonal naive.
# Aggregate over Australia (summing across states) to get one monthly series
# Using index_by() avoids grouping on the tsibble index column.
takeaway <- aus_retail |>
filter(Industry == "Takeaway food services") |>
index_by(Month) |>
summarise(Turnover = sum(Turnover, na.rm = TRUE))
fit_takeaway <- takeaway |> model(SNaive = SNAIVE(Turnover))
fc_takeaway <- fit_takeaway |> forecast(h = "2 years")
autoplot(takeaway, Turnover) +
autolayer(fc_takeaway) +
labs(title = "Takeaway food services: SNAIVE forecasts", y = "Turnover")
gafa_stock)The stock data has trading days only (no weekends), so it can be
irregular as a daily tsibble. To keep the benchmark
methods working cleanly, I create a regular time index
t = 1,2,3,... and model Close as a series over
t.
fb_raw <- gafa_stock |>
filter(Symbol == "FB") |>
select(Date, Close) |>
as_tibble() |>
arrange(Date)
# Regular index for modelling
fb <- fb_raw |>
mutate(t = row_number()) |>
as_tsibble(index = t)
# Plot the original series by Date
fb_raw |>
ggplot(aes(x = Date, y = Close)) +
geom_line() +
labs(title = "Facebook (FB) closing price", y = "Close")
fit_fb <- fb |> model(
Drift = RW(Close ~ drift()),
Naive = NAIVE(Close),
Mean = MEAN(Close)
)
fc_fb <- fit_fb |> forecast(h = 30)
# Plot forecasts against the original (by Date) using the matching future dates
future_dates <- fb_raw$Date[(nrow(fb_raw) - 29):nrow(fb_raw)]
# Easiest: show forecasts on the t-scale (still readable)
autoplot(fb, Close) +
autolayer(fc_fb) +
labs(title = "FB: Drift vs Naive vs Mean (30 steps ahead)", x = "t (trading day index)", y = "Close")
first_y <- fb_raw$Close[1]
last_y <- fb_raw$Close[nrow(fb_raw)]
n <- nrow(fb_raw)
slope <- (last_y - first_y) / (n - 1)
h <- 30
line_ext <- first_y + slope * (n:(n + h - 1))
drift_fc <- fc_fb |>
filter(.model == "Drift") |>
as_tibble() |>
pull(.mean)
max(abs(drift_fc - line_ext))
## [1] 2.842171e-14
The value above should be extremely close to 0 (small floating point differences are fine).
Fit SNAIVE() to quarterly Australian beer production
from 1992, then check residuals and show forecasts.
beer_1992 <- aus_production |>
filter(year(Quarter) >= 1992) |>
select(Quarter, Beer)
fit_beer <- beer_1992 |> model(SNaive = SNAIVE(Beer))
fit_beer |> gg_tsresiduals()
fit_beer |>
forecast(h = 8) |>
autoplot(beer_1992) +
labs(title = "Beer production (from 1992): SNAIVE forecasts", y = "Beer")
Repeat 5.3 for: - Australian exports (annual) - Bricks (quarterly)
exports_aus <- global_economy |>
filter(Country == "Australia") |>
select(Year, Exports)
fit_exports <- exports_aus |> model(Naive = NAIVE(Exports))
fit_exports |> gg_tsresiduals()
fit_exports |>
forecast(h = 10) |>
autoplot(exports_aus) +
labs(title = "Australian exports: NAIVE forecasts", y = "Exports")
fit_bricks2 <- bricks |> model(SNaive = SNAIVE(Bricks))
fit_bricks2 |> gg_tsresiduals()
fit_bricks2 |>
forecast(h = 8) |>
autoplot(bricks) +
labs(title = "Bricks: SNAIVE forecasts", y = "Bricks")
I used one monthly retail series from aus_retail:
myseries <- aus_retail |>
filter(State == "New South Wales", Industry == "Department stores") |>
select(Month, Turnover)
myseries |> autoplot(Turnover) +
labs(title = "Retail turnover: NSW — Department stores", y = "Turnover")
train_pre2011 <- myseries |> filter(year(Month) < 2011)
fit_retail <- train_pre2011 |> model(SNaive = SNAIVE(Turnover))
fit_retail |> gg_tsresiduals()
# create test set by filtering, rather than anti_join (simpler + safer)
test_2011on <- myseries |> filter(year(Month) >= 2011)
fc_retail <- fit_retail |> forecast(new_data = test_2011on)
fc_retail |> autoplot(myseries) +
labs(title = "SNAIVE forecasts for test period (2011+)", y = "Turnover")
# Training accuracy
fit_retail |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNaive Training 11.2 24.7 19.1 3.24 5.58 1 1 -0.0527
# Test accuracy
fc_retail |> accuracy(myseries)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNaive Test -2.81 25.5 19.9 -1.02 4.05 1.04 1.03 0.201
train_pre2009 <- myseries |> filter(year(Month) < 2009)
train_pre2010 <- myseries |> filter(year(Month) < 2010)
fit_2009 <- train_pre2009 |> model(SNaive = SNAIVE(Turnover))
fit_2010 <- train_pre2010 |> model(SNaive = SNAIVE(Turnover))
test_2009 <- myseries |> filter(year(Month) >= 2009)
test_2010 <- myseries |> filter(year(Month) >= 2010)
acc_2009 <- fit_2009 |> forecast(new_data = test_2009) |> accuracy(myseries)
acc_2010 <- fit_2010 |> forecast(new_data = test_2010) |> accuracy(myseries)
acc_2009
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNaive Test 3.99 28.1 22.5 0.773 4.57 1.18 1.13 -0.0826
acc_2010
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNaive Test -6.50 24.7 19.8 -1.56 4.09 1.04 0.994 0.150
With seasonal naive, forecasts are essentially “repeat the same month last year”. Changing the training cutoff doesn’t change the basic forecast rule, but it does change which period is used for evaluation (and whether the training set includes structural breaks). If the series has a level shift, a shorter/more recent training window can sometimes look better on the test set.
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] fable_0.5.0 feasts_0.5.0 fabletools_0.6.1 ggtime_0.2.0
## [5] tsibbledata_0.4.1 tsibble_1.2.0 ggplot2_4.0.2 lubridate_1.9.5
## [9] tidyr_1.3.2 dplyr_1.2.0 tibble_3.3.1 fpp3_1.0.3
##
## loaded via a namespace (and not attached):
## [1] ggdist_3.3.3 rappdirs_0.3.4 sass_0.4.10
## [4] utf8_1.2.6 generics_0.1.4 anytime_0.3.12
## [7] digest_0.6.39 magrittr_2.0.4 evaluate_1.0.5
## [10] grid_4.5.2 timechange_0.4.0 RColorBrewer_1.1-3
## [13] fastmap_1.2.0 jsonlite_2.0.0 purrr_1.2.1
## [16] scales_1.4.0 jquerylib_0.1.4 cli_3.6.5
## [19] rlang_1.1.7 crayon_1.5.3 withr_3.0.2
## [22] cachem_1.1.0 yaml_2.3.12 tools_4.5.2
## [25] vctrs_0.7.1 R6_2.6.1 lifecycle_1.0.5
## [28] pkgconfig_2.0.3 progressr_0.18.0 pillar_1.11.1
## [31] bslib_0.10.0 gtable_0.3.6 glue_1.8.0
## [34] Rcpp_1.1.1 xfun_0.56 tidyselect_1.2.1
## [37] rstudioapi_0.18.0 knitr_1.51 farver_2.1.2
## [40] htmltools_0.5.9 rmarkdown_2.30 labeling_0.4.3
## [43] compiler_4.5.2 S7_0.2.1 distributional_0.6.0