Exercise 5.1: Forecasting using NAIVE, SNAIVE, and RW with Drift

install.packages("fable")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("fabletools")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(fable)
## Loading required package: fabletools
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
library(fabletools)
library(tsibble)
## 
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble      3.2.1     ✔ ggplot2     3.5.1
## ✔ tidyr       1.3.1     ✔ tsibbledata 0.4.1
## ✔ lubridate   1.9.4     ✔ feasts      0.4.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()     masks base::date()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ tsibble::intersect()  masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ tsibble::setdiff()    masks base::setdiff()
## ✖ tsibble::union()      masks base::union()

This series exhibits a strong trend and steady growth, making the Random Walk with Drift (RW(y ~ drift())) the best choice. This method assumes the population will continue increasing at a similar rate.

pop_fit <- global_economy |> 
  filter(Country == "Australia") |> 
  model(RW(Population ~ drift()))

pop_fc <- pop_fit |> forecast()

autoplot(global_economy |> filter(Country == "Australia"), Population) + 
  autolayer(pop_fc, Population, color = "blue") + 
  ggtitle("Forecast of Australian Population")

-Bricks (aus_production)

This series exhibits strong seasonal patterns, so we use the Seasonal Naïve (SNAIVE(y)) method. It assumes that future values will repeat the same seasonal cycle as previous years.

bricks_fit <- aus_production |> 
  model(SNAIVE(Bricks))

bricks_fc <- bricks_fit |> forecast()

autoplot(aus_production, Bricks) + 
  autolayer(bricks_fc, Bricks, color = "red") + 
  ggtitle("Forecast of Bricks Production")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

This series also follows a seasonal pattern due to lambing cycles, making SNAIVE(y) the most appropriate. Household Wealth (hh_budget)

lambs_data <- aus_livestock |> 
  filter(State == "New South Wales", Animal == "Lambs")

lambs_fit <- lambs_data |> model(SNAIVE(Count))

lambs_fc <- lambs_fit |> forecast()

autoplot(lambs_data, Count) + 
  autolayer(lambs_fc, Count, color = "green") + 
  ggtitle("Forecast of NSW Lambs Production") +
  labs(y = "Lambs Count", x = "Month")

wealth_fit <- hh_budget |> 
  model(RW(Wealth ~ drift()))

wealth_fc <- wealth_fit |> forecast()

autoplot(hh_budget, Wealth) + 
  autolayer(wealth_fc, Wealth, color = "purple") + 
  ggtitle("Forecast of Household Wealth")

- Household wealth generally trends upwards but without clear seasonality. RW(y ~ drift()) is the best option since it captures the long-term upward trend.

-Australian Takeaway Food Turnover (aus_retail) This series shows both a trend and seasonality. The SNAIVE(y) method is best, as food sales tend to follow seasonal patterns (e.g., holidays, summer spikes).

takeaway_fit <- aus_retail |> 
  filter(Industry == "Takeaway food services") |> 
  model(SNAIVE(Turnover))

takeaway_fc <- takeaway_fit |> forecast()

autoplot(aus_retail |> filter(Industry == "Takeaway food services"), Turnover) + 
  autolayer(takeaway_fc, Turnover, color = "orange") + 
  ggtitle("Forecast of Australian Takeaway Food Turnover")

Exercise 5.2: Forecasting Facebook Stock Prices (a) Time plot of the series

gafa_stock |> 
  filter(Symbol == "FB") |> 
  autoplot(Close)

The plot shows an overall upward trend with fluctuations.

  1. Forecast using the drift method and plot the results
fb_stock <- gafa_stock |> 
  filter(Symbol == "FB") |> 
  arrange(Date) |>  
  update_tsibble(regular = TRUE)  

fit <- fb_stock |> model(RW(Close ~ drift()))
## Warning: 1 error encountered for RW(Close ~ drift())
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
fc <- fit |> forecast(h = 30)  

autoplot(fb_stock, Close) + 
  autolayer(fc, Close, color = "blue") + 
  ggtitle("Forecast of Facebook Stock Prices Using Random Walk with Drift") +
  labs(y = "Stock Price (USD)", x = "Date")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 30 rows containing missing values or values outside the scale range
## (`geom_line()`).

This method extends the trend based on past observations.

  1. Show that the drift method forecasts extend the first-to-last observation trend
fb_stock <- gafa_stock |> 
  filter(Symbol == "FB") |> 
  arrange(Date) |> 
  update_tsibble(regular = TRUE)  
first_last_line <- fb_stock |> 
  summarise(First = first(Close), Last = last(Close), 
            n = n()) 

slope <- (first_last_line$Last - first_last_line$First) / first_last_line$n

autoplot(fb_stock, Close) + 
  geom_abline(intercept = first_last_line$First - slope * 1, 
              slope = slope, 
              color = "red", linetype = "dashed") +
  ggtitle("Facebook Stock Price with First-Last Line") +
  labs(y = "Stock Price (USD)", x = "Date")

This demonstrates that drift forecasts align with the line connecting the first and last points.

  1. Compare with other benchmark methods
fb_stock |> model(NAIVE(Close)) |> forecast()
## Warning: 1 error encountered for NAIVE(Close)
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
fb_stock |> model(SNAIVE(Close)) |> forecast()
## Warning: 1 error encountered for SNAIVE(Close)
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

NAIVE: Not suitable due to trend. SNAIVE: Not ideal since stock prices don’t exhibit strict seasonality. RW with Drift: Best choice as it accounts for long-term movement.

Exercise 5.3: Seasonal Naïve Method for Beer Production Extract data from 1992 onward

recent_production <- aus_production |> filter(year(Quarter) >= 1992)
fit <- recent_production |> model(SNAIVE(Beer))
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

fit |> forecast() |> autoplot(recent_production)

If residuals are random, the model captures seasonality well. If residuals show patterns, another model (like ETS or ARIMA) may be better.

Exercise 5.4: Forecasting Australian Exports & Bricks Production Australian Exports (global_economy)

This series has a trend but no strong seasonality → NAIVE(y) is appropriate. Bricks (aus_production)

This series has clear seasonal patterns → SNAIVE(y) is appropriate. Code for fitting and forecasting:

exports_fit <- global_economy |> model(NAIVE(Exports))
## Warning: 17 errors (1 unique) encountered for NAIVE(Exports)
## [17] All observations are missing, a model cannot be estimated without data.
bricks_fit <- aus_production |> model(SNAIVE(Bricks))
exports_fc <- exports_fit |> forecast()
bricks_fc <- bricks_fit |> forecast()

autoplot(global_economy, Exports) + autolayer(exports_fc, Exports)
## Warning: Removed 4420 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 110 rows containing missing values or values outside the scale range
## (`geom_line()`).

autoplot(aus_production, Bricks) + autolayer(bricks_fc, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

Exercise 5.7: Evaluating a Retail Time Series Forecasting Model

  1. Create training dataset before 2011
myseries <- aus_retail |> 
  filter(State == "New South Wales", Industry == "Retail trade")
myseries_train <- myseries |> 
  filter(year(Month) < 2011)
  1. Verify data split with a plot
myseries |> summarise(n = n())
myseries_train |> summarise(n = n())
unique(aus_retail$Industry)
##  [1] "Cafes, restaurants and catering services"                         
##  [2] "Cafes, restaurants and takeaway food services"                    
##  [3] "Clothing retailing"                                               
##  [4] "Clothing, footwear and personal accessory retailing"              
##  [5] "Department stores"                                                
##  [6] "Electrical and electronic goods retailing"                        
##  [7] "Food retailing"                                                   
##  [8] "Footwear and other personal accessory retailing"                  
##  [9] "Furniture, floor coverings, houseware and textile goods retailing"
## [10] "Hardware, building and garden supplies retailing"                 
## [11] "Household goods retailing"                                        
## [12] "Liquor retailing"                                                 
## [13] "Newspaper and book retailing"                                     
## [14] "Other recreational goods retailing"                               
## [15] "Other retailing"                                                  
## [16] "Other retailing n.e.c."                                           
## [17] "Other specialised food retailing"                                 
## [18] "Pharmaceutical, cosmetic and toiletry goods retailing"            
## [19] "Supermarket and grocery stores"                                   
## [20] "Takeaway food services"
myseries <- aus_retail |> 
  filter(State == "New South Wales")
autoplot(myseries, Turnover) + ggtitle("Turnover in NSW Retail Trade")

  1. Fit a seasonal naïve model
myseries_train <- aus_retail |> 
  filter(State == "New South Wales", year(Month) < 2011)

# Check if data exists
myseries_train |> summarise(n = n())
fit <- myseries_train |> model(SNAIVE(Turnover))
fc <- fit |> forecast()
autoplot(myseries_train, Turnover) + autolayer(fc, Turnover)

  1. Check residuals for white noise
fit
fit <- myseries_train |> model(SNAIVE(Turnover))
is_tsibble(myseries_train)
## [1] TRUE
myseries_train <- as_tsibble(myseries_train, index = Month)
  1. Produce forecasts for test data
fc <- fit |> forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
autoplot(myseries, Turnover) + autolayer(fc, Turnover)

  1. Compare accuracy of forecasts
fit |> accuracy()
fc |> accuracy(myseries)

We compare forecast errors (e.g., RMSE, MAE) to assess model performance.

  1. Sensitivity to training data amount
filtered_series <- myseries |> 
  filter(State == "New South Wales", Industry == "Cafes, restaurants and catering services")

filtered_series <- filtered_series |> as_tsibble(index = Month, key = NULL)

train_2009 <- filtered_series |> filter(year(Month) < 2009)
train_2010 <- filtered_series |> filter(year(Month) < 2010)
train_2011 <- filtered_series |> filter(year(Month) < 2011)

fit_2009 <- train_2009 |> model(SNAIVE(Turnover))
fit_2010 <- train_2010 |> model(SNAIVE(Turnover))
fit_2011 <- train_2011 |> model(SNAIVE(Turnover))

test_2009 <- filtered_series |> filter(year(Month) >= 2009)
test_2010 <- filtered_series |> filter(year(Month) >= 2010)
test_2011 <- filtered_series |> filter(year(Month) >= 2011)

fc_2009 <- fit_2009 |> forecast(new_data = test_2009)
fc_2010 <- fit_2010 |> forecast(new_data = test_2010)
fc_2011 <- fit_2011 |> forecast(new_data = test_2011)

accuracy_2009 <- accuracy(fc_2009, test_2009) |> mutate(Training_Data = "Before 2009")
accuracy_2010 <- accuracy(fc_2010, test_2010) |> mutate(Training_Data = "Before 2010")
accuracy_2011 <- accuracy(fc_2011, test_2011) |> mutate(Training_Data = "Before 2011")

accuracy_results <- bind_rows(accuracy_2009, accuracy_2010, accuracy_2011) |> 
  select(Training_Data, RMSE, MAE, MAPE)
print(accuracy_results)
## # A tibble: 3 × 4
##   Training_Data  RMSE   MAE  MAPE
##   <chr>         <dbl> <dbl> <dbl>
## 1 Before 2009    266.  235.  37.0
## 2 Before 2010    221.  192.  29.7
## 3 Before 2011    202.  176.  26.0

Using more training data often improves forecasts but may lead to overfitting. If the time series is stable, a smaller training set may be sufficient.