Exercise 5.1: Forecasting using NAIVE, SNAIVE, and RW with Drift
install.packages("fable")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
install.packages("fabletools")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(fable)
## Loading required package: fabletools
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
library(fabletools)
library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ ggplot2 3.5.1
## ✔ tidyr 1.3.1 ✔ tsibbledata 0.4.1
## ✔ lubridate 1.9.4 ✔ feasts 0.4.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
This series exhibits a strong trend and steady growth, making the Random Walk with Drift (RW(y ~ drift())) the best choice. This method assumes the population will continue increasing at a similar rate.
pop_fit <- global_economy |>
filter(Country == "Australia") |>
model(RW(Population ~ drift()))
pop_fc <- pop_fit |> forecast()
autoplot(global_economy |> filter(Country == "Australia"), Population) +
autolayer(pop_fc, Population, color = "blue") +
ggtitle("Forecast of Australian Population")
-Bricks (aus_production)
This series exhibits strong seasonal patterns, so we use the Seasonal Naïve (SNAIVE(y)) method. It assumes that future values will repeat the same seasonal cycle as previous years.
bricks_fit <- aus_production |>
model(SNAIVE(Bricks))
bricks_fc <- bricks_fit |> forecast()
autoplot(aus_production, Bricks) +
autolayer(bricks_fc, Bricks, color = "red") +
ggtitle("Forecast of Bricks Production")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
This series also follows a seasonal pattern due to lambing cycles, making SNAIVE(y) the most appropriate. Household Wealth (hh_budget)
lambs_data <- aus_livestock |>
filter(State == "New South Wales", Animal == "Lambs")
lambs_fit <- lambs_data |> model(SNAIVE(Count))
lambs_fc <- lambs_fit |> forecast()
autoplot(lambs_data, Count) +
autolayer(lambs_fc, Count, color = "green") +
ggtitle("Forecast of NSW Lambs Production") +
labs(y = "Lambs Count", x = "Month")
wealth_fit <- hh_budget |>
model(RW(Wealth ~ drift()))
wealth_fc <- wealth_fit |> forecast()
autoplot(hh_budget, Wealth) +
autolayer(wealth_fc, Wealth, color = "purple") +
ggtitle("Forecast of Household Wealth")
- Household wealth generally trends upwards but without clear
seasonality. RW(y ~ drift()) is the best option since it captures the
long-term upward trend.
-Australian Takeaway Food Turnover (aus_retail) This series shows both a trend and seasonality. The SNAIVE(y) method is best, as food sales tend to follow seasonal patterns (e.g., holidays, summer spikes).
takeaway_fit <- aus_retail |>
filter(Industry == "Takeaway food services") |>
model(SNAIVE(Turnover))
takeaway_fc <- takeaway_fit |> forecast()
autoplot(aus_retail |> filter(Industry == "Takeaway food services"), Turnover) +
autolayer(takeaway_fc, Turnover, color = "orange") +
ggtitle("Forecast of Australian Takeaway Food Turnover")
Exercise 5.2: Forecasting Facebook Stock Prices (a) Time plot of the
series
gafa_stock |>
filter(Symbol == "FB") |>
autoplot(Close)
The plot shows an overall upward trend with fluctuations.
fb_stock <- gafa_stock |>
filter(Symbol == "FB") |>
arrange(Date) |>
update_tsibble(regular = TRUE)
fit <- fb_stock |> model(RW(Close ~ drift()))
## Warning: 1 error encountered for RW(Close ~ drift())
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
fc <- fit |> forecast(h = 30)
autoplot(fb_stock, Close) +
autolayer(fc, Close, color = "blue") +
ggtitle("Forecast of Facebook Stock Prices Using Random Walk with Drift") +
labs(y = "Stock Price (USD)", x = "Date")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 30 rows containing missing values or values outside the scale range
## (`geom_line()`).
This method extends the trend based on past observations.
fb_stock <- gafa_stock |>
filter(Symbol == "FB") |>
arrange(Date) |>
update_tsibble(regular = TRUE)
first_last_line <- fb_stock |>
summarise(First = first(Close), Last = last(Close),
n = n())
slope <- (first_last_line$Last - first_last_line$First) / first_last_line$n
autoplot(fb_stock, Close) +
geom_abline(intercept = first_last_line$First - slope * 1,
slope = slope,
color = "red", linetype = "dashed") +
ggtitle("Facebook Stock Price with First-Last Line") +
labs(y = "Stock Price (USD)", x = "Date")
This demonstrates that drift forecasts align with the line connecting
the first and last points.
fb_stock |> model(NAIVE(Close)) |> forecast()
## Warning: 1 error encountered for NAIVE(Close)
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
fb_stock |> model(SNAIVE(Close)) |> forecast()
## Warning: 1 error encountered for SNAIVE(Close)
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.
NAIVE: Not suitable due to trend. SNAIVE: Not ideal since stock prices don’t exhibit strict seasonality. RW with Drift: Best choice as it accounts for long-term movement.
Exercise 5.3: Seasonal Naïve Method for Beer Production Extract data from 1992 onward
recent_production <- aus_production |> filter(year(Quarter) >= 1992)
fit <- recent_production |> model(SNAIVE(Beer))
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
fit |> forecast() |> autoplot(recent_production)
If residuals are random, the model captures seasonality well. If
residuals show patterns, another model (like ETS or ARIMA) may be
better.
Exercise 5.4: Forecasting Australian Exports & Bricks Production Australian Exports (global_economy)
This series has a trend but no strong seasonality → NAIVE(y) is appropriate. Bricks (aus_production)
This series has clear seasonal patterns → SNAIVE(y) is appropriate. Code for fitting and forecasting:
exports_fit <- global_economy |> model(NAIVE(Exports))
## Warning: 17 errors (1 unique) encountered for NAIVE(Exports)
## [17] All observations are missing, a model cannot be estimated without data.
bricks_fit <- aus_production |> model(SNAIVE(Bricks))
exports_fc <- exports_fit |> forecast()
bricks_fc <- bricks_fit |> forecast()
autoplot(global_economy, Exports) + autolayer(exports_fc, Exports)
## Warning: Removed 4420 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 110 rows containing missing values or values outside the scale range
## (`geom_line()`).
autoplot(aus_production, Bricks) + autolayer(bricks_fc, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
Exercise 5.7: Evaluating a Retail Time Series Forecasting Model
myseries <- aus_retail |>
filter(State == "New South Wales", Industry == "Retail trade")
myseries_train <- myseries |>
filter(year(Month) < 2011)
myseries |> summarise(n = n())
myseries_train |> summarise(n = n())
unique(aus_retail$Industry)
## [1] "Cafes, restaurants and catering services"
## [2] "Cafes, restaurants and takeaway food services"
## [3] "Clothing retailing"
## [4] "Clothing, footwear and personal accessory retailing"
## [5] "Department stores"
## [6] "Electrical and electronic goods retailing"
## [7] "Food retailing"
## [8] "Footwear and other personal accessory retailing"
## [9] "Furniture, floor coverings, houseware and textile goods retailing"
## [10] "Hardware, building and garden supplies retailing"
## [11] "Household goods retailing"
## [12] "Liquor retailing"
## [13] "Newspaper and book retailing"
## [14] "Other recreational goods retailing"
## [15] "Other retailing"
## [16] "Other retailing n.e.c."
## [17] "Other specialised food retailing"
## [18] "Pharmaceutical, cosmetic and toiletry goods retailing"
## [19] "Supermarket and grocery stores"
## [20] "Takeaway food services"
myseries <- aus_retail |>
filter(State == "New South Wales")
autoplot(myseries, Turnover) + ggtitle("Turnover in NSW Retail Trade")
myseries_train <- aus_retail |>
filter(State == "New South Wales", year(Month) < 2011)
# Check if data exists
myseries_train |> summarise(n = n())
fit <- myseries_train |> model(SNAIVE(Turnover))
fc <- fit |> forecast()
autoplot(myseries_train, Turnover) + autolayer(fc, Turnover)
fit
fit <- myseries_train |> model(SNAIVE(Turnover))
is_tsibble(myseries_train)
## [1] TRUE
myseries_train <- as_tsibble(myseries_train, index = Month)
fc <- fit |> forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
autoplot(myseries, Turnover) + autolayer(fc, Turnover)
fit |> accuracy()
fc |> accuracy(myseries)
We compare forecast errors (e.g., RMSE, MAE) to assess model performance.
filtered_series <- myseries |>
filter(State == "New South Wales", Industry == "Cafes, restaurants and catering services")
filtered_series <- filtered_series |> as_tsibble(index = Month, key = NULL)
train_2009 <- filtered_series |> filter(year(Month) < 2009)
train_2010 <- filtered_series |> filter(year(Month) < 2010)
train_2011 <- filtered_series |> filter(year(Month) < 2011)
fit_2009 <- train_2009 |> model(SNAIVE(Turnover))
fit_2010 <- train_2010 |> model(SNAIVE(Turnover))
fit_2011 <- train_2011 |> model(SNAIVE(Turnover))
test_2009 <- filtered_series |> filter(year(Month) >= 2009)
test_2010 <- filtered_series |> filter(year(Month) >= 2010)
test_2011 <- filtered_series |> filter(year(Month) >= 2011)
fc_2009 <- fit_2009 |> forecast(new_data = test_2009)
fc_2010 <- fit_2010 |> forecast(new_data = test_2010)
fc_2011 <- fit_2011 |> forecast(new_data = test_2011)
accuracy_2009 <- accuracy(fc_2009, test_2009) |> mutate(Training_Data = "Before 2009")
accuracy_2010 <- accuracy(fc_2010, test_2010) |> mutate(Training_Data = "Before 2010")
accuracy_2011 <- accuracy(fc_2011, test_2011) |> mutate(Training_Data = "Before 2011")
accuracy_results <- bind_rows(accuracy_2009, accuracy_2010, accuracy_2011) |>
select(Training_Data, RMSE, MAE, MAPE)
print(accuracy_results)
## # A tibble: 3 × 4
## Training_Data RMSE MAE MAPE
## <chr> <dbl> <dbl> <dbl>
## 1 Before 2009 266. 235. 37.0
## 2 Before 2010 221. 192. 29.7
## 3 Before 2011 202. 176. 26.0
Using more training data often improves forecasts but may lead to overfitting. If the time series is stable, a smaller training set may be sufficient.