library(fpp3)
## Warning: package 'fpp3' was built under R version 4.5.2
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
## ✔ tibble 3.3.0 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.2
## ✔ lubridate 1.9.4 ✔ fable 0.5.0
## ✔ ggplot2 4.0.0
## Warning: package 'tsibble' was built under R version 4.5.2
## Warning: package 'tsibbledata' was built under R version 4.5.2
## Warning: package 'feasts' was built under R version 4.5.2
## Warning: package 'fabletools' was built under R version 4.5.2
## Warning: package 'fable' was built under R version 4.5.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.4 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrr)
Produce forecasts for the following series using whichever of
NAIVE(y), SNAIVE(y) or
RW(y~ drift()) is more appropriate in each case:
global_economy)I used a random walk with drift for Australian population because it shows steady long run growth and does not have seasonality in the yearly data.
# filter to Australia and keep the population series
aus_pop <- global_economy %>%
filter(Country == "Australia") %>%
select(Year, Population)
# fit a random walk with drift to capture long-run growth
fit_pop <- aus_pop %>%
model(rw_drift = RW(Population ~ drift()))
# forecast
fc_pop <- fit_pop %>%
forecast(h = "10 years")
# plot the historical data and forecast
fc_pop %>%
autoplot(aus_pop) +
labs(
title = "Australia population forecast using RW with drift",
y = "Population"
)
aus_production)I used a seasonal naive model for bricks because the quarterly series has a repeating seasonal pattern.
# keep the Bricks series
bricks <- aus_production %>%
select(Quarter, Bricks) %>%
filter(!is.na(Bricks))
# fit seasonal naive model
fit_bricks <- bricks %>%
model(snaive = SNAIVE(Bricks))
# forecast next 8 quarters
fc_bricks <- fit_bricks %>%
forecast(h = 8)
# plot the results
fc_bricks %>%
autoplot(bricks) +
labs(
title = "Bricks forecast using seasonal naive",
y = "Bricks"
)
aus_livestock)I used a seasonal naive model for NSW lambs because the monthly counts show strong seasonality across years.
# filter to NSW lambs
nsw_lambs <- aus_livestock %>%
filter(State == "New South Wales", Animal == "Lambs") %>%
select(Month, Count)
# fit seasonal naive model
fit_lambs <- nsw_lambs %>%
model(snaive = SNAIVE(Count))
# forecast 24 months
fc_lambs <- fit_lambs %>%
forecast(h = 24)
# plot the results
fc_lambs %>%
autoplot(nsw_lambs) +
labs(
title = "NSW lambs forecast using seasonal naive",
y = "Count"
)
I used a random walk with drift for household wealth because it follows a trend over time and does not show a clear seasonal cycle.
# keep the wealth series
hh_wealth <- hh_budget %>%
filter(Country == "Australia") %>%
select(Year, Wealth)
# fit random walk with drift
fit_wealth <- hh_wealth %>%
model(rw_drift = RW(Wealth ~ drift()))
# forecast 10 years
fc_wealth <- fit_wealth %>%
forecast(h = "10 years")
# plot the results
fc_wealth %>%
autoplot(hh_wealth) +
labs(
title = "Household wealth forecast using RW with drift",
y = "Wealth"
)
aus_retail)I used a seasonal naive model for takeaway food turnover because retail turnover is strongly seasonal at the monthly level.
# filter to takeaway food series
takeaway <- aus_retail %>%
filter(Industry == "Takeaway food services") %>%
index_by(Month) %>%
summarise(Turnover = sum(Turnover, na.rm = TRUE))
# fit seasonal naive model
fit_takeaway <- takeaway %>%
model(snaive = SNAIVE(Turnover))
# forecast next 24 months
fc_takeaway <- fit_takeaway %>%
forecast(h = 24)
# plot
fc_takeaway %>%
autoplot(takeaway) +
labs(
title = "Takeaway food turnover forecast using seasonal naive",
y = "Turnover"
)
Use the Facebook stock price (data set gafa_stock) to do
the following:
# filter the Facebook stock series and keep the adjusted close price
fb <- gafa_stock %>%
filter(Symbol == "FB") %>%
select(Date, Close)
# plot the time series
fb %>%
autoplot(Close) +
labs(
title = "Facebook closing price",
y = "Close Price"
)
# Make the series regular daily
fb_reg <- fb %>%
arrange(Date) %>%
as_tibble() %>%
tidyr::complete(Date = seq(min(Date), max(Date), by = "day")) %>%
tidyr::fill(Close, .direction = "down") %>%
filter(!is.na(Close)) %>%
as_tsibble(index = Date) %>%
update_tsibble(regular = TRUE)
# fit a random walk with drift model
fit_drift <- fb_reg %>%
model(drift = RW(Close ~ drift()))
# forecast the next 30 days
fc_drift <- fit_drift %>%
forecast(h = 30)
# plot forecast with the historical data
fc_drift %>%
autoplot(fb_reg) +
labs(
title = "Facebook forecast using drift method",
y = "Close price")
h <- 30 # forecast horizon
# Step 1: Build regular daily series (same as part b)
fb <- gafa_stock %>%
filter(Symbol == "FB") %>%
select(Date, Close) %>%
arrange(Date) %>%
as_tibble()
fb_reg <- fb %>%
tidyr::complete(Date = seq(min(Date), max(Date), by = "day")) %>%
tidyr::fill(Close, .direction = "down") %>%
filter(!is.na(Close)) %>%
as_tsibble(index = Date) %>%
update_tsibble(regular = TRUE)
# Step 2: Fit drift + forecast
fit_drift <- fb_reg %>%
model(drift = RW(Close ~ drift()))
fc_drift <- fit_drift %>%
forecast(h = h)
# Step 3: One-row summary of first/last (force tibble to avoid grouped output)
fb_summary <- fb_reg %>%
as_tibble() %>%
summarise(
first_date = first(Date),
last_date = last(Date),
first_close = first(Close),
last_close = last(Close),
n = n()
)
fb_summary
# Step 4: Slope of the line between first and last observation
slope <- (fb_summary$last_close - fb_summary$first_close) / (fb_summary$n - 1)
# Step 5: Build an extended line (first date through last date + h)
line_df <- tibble(
Date = seq(fb_summary$first_date, fb_summary$last_date + h, by = "day")
) %>%
mutate(line = fb_summary$first_close + slope * (as.numeric(Date - fb_summary$first_date)))
# Step 6: Plot series + extended first-to-last line
fb_reg %>%
autoplot(Close) +
geom_line(data = line_df, aes(x = Date, y = line), linetype = "dashed", linewidth = 0.9) +
labs(
title = "First-to-last line extended (drift forecast idea)",
y = "Close price"
)
# fit a set of benchmark models
fit_bench <- fb_reg %>%
model(
mean = MEAN(Close),
naive = NAIVE(Close),
drift = RW(Close ~ drift())
)
# forecast 30 days ahead for each model
fc_bench <- fit_bench %>%
forecast(h = 30)
# plot forecasts together
fc_bench %>%
autoplot(fb) +
facet_wrap(~.model, ncol = 1) +
labs(
title = "Benchmark forecasts for Facebook close price", y = "Close price"
)
## Using `Date` as index variable.
# decide train/test sizes
n_total <- nrow(fb_reg)
n_test <- 60
n_train <- n_total - n_test
# split
fb_train <- fb_reg %>% slice_head(n = n_train)
fb_test <- fb_reg %>% slice_tail(n = n_test)
# fit models on training data
fit_train <- fb_train %>%
model(
mean = MEAN(Close),
naive = NAIVE(Close),
drift = RW(Close ~ drift())
)
# forecast length of test set
fc_train <- fit_train %>%
forecast(h = n_test)
# accuracy on test set
fc_train %>%
accuracy(fb_test)
I compared mean, naive, and drift benchmarks using the last 60 observations as a test set. Based on RMSE and MAE, the naive method performs best for this sample, which makes sense because stock prices often behave like a random walk where the most recent value is a strong baseline.
Apply a seasonal naive method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.
# extract the quarterly beer production data from 1992 onward
recent_production <- aus_production %>%
filter(year(Quarter) >= 1992) %>%
select(Quarter, Beer) %>%
filter(!is.na(Beer))
# fit a seasonal naive model
fit <- recent_production %>%
model(snaive = SNAIVE(Beer))
# check residuals for white-noise behavior
fit %>%
augment() %>%
filter(!is.na(.innov)) %>%
gg_tsdisplay(.innov, plot_type = "partial")
## Warning: `gg_tsdisplay()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsdisplay()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# plot the forecasts
fit %>%
forecast(h = 8) %>%
autoplot(recent_production) +
labs(
title = "Quarterly Australian beer production forecast (SNAIVE), from 1992",
y = "Beer production"
)
I used a seasonal naive model on quarterly beer production from 1992 because the pattern repeats each year by quarter. The forecast basically reuses the same quarter from the previous year, so it captures the main seasonal shape. When I checked the residuals, they were mostly centered around zero, but the ACF still shows a noticeable spike at some lags. That suggests the model is a reasonable baseline, but the residuals are not fully white noise, so a more flexible model could likely improve the fit.
Repeat the previous exercise using the Australian Exports series from
global_economy and the Bricks series from
aus_production. Use whichever of NAIVE() or
SNAIVE() is more appropriate in each case.
# filter australia and keep Exports
aus_exports <- global_economy %>%
filter(Country == "Australia") %>%
select(Year, Exports) %>%
filter(!is.na(Exports))
# fit NAIVE model
fit_exports <- aus_exports %>%
model(NAIVE(Exports))
# residual diagnostics
fit_exports %>%
augment() %>%
filter(!is.na(.innov)) %>%
gg_tsdisplay(.innov, plot_type = "partial")
# forecast next 10 years and plot
fit_exports %>%
forecast(h = 10) %>%
autoplot(aus_exports) +
labs(
title = "Australian exports forecast (NAIVE)",
y = "Exports"
)
I used NAIVE because exports are annual and there is no seasonal cycle to repeat. The residual diagnostics look fairly random with no strong pattern, so NAIVE is a reasonable baseline for this series.
# keep Bricks series and remove missing values
bricks <- aus_production %>%
select(Quarter, Bricks) %>%
filter(!is.na(Bricks))
# fit SNAIVE model (quarterly seasonality)
fit_bricks <- bricks %>%
model(SNAIVE(Bricks))
# residual diagnostics
fit_bricks %>%
augment() %>%
filter(!is.na(.innov)) %>%
gg_tsdisplay(.innov, plot_type = "partial")
# forecast next 8 quarters and plot
fit_bricks %>%
forecast(h = 8) %>%
autoplot(bricks) +
labs(
title = "Bricks forecast (SNAIVE)",
y = "Bricks")
I used NAIVE because exports are annual and there is no seasonal cycle to repeat. The residual diagnostics look fairly random with no strong pattern, so NAIVE is a reasonable baseline for this series.
For your retail time series (from Exercise 7 in Section 2.10):
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1)) %>%
select(Month, Turnover) %>%
filter(!is.na(Turnover))
myseries %>%
glimpse()
## Rows: 369
## Columns: 2
## $ Month <mth> 1988 Apr, 1988 May, 1988 Jun, 1988 Jul, 1988 Aug, 1988 Sep, 1…
## $ Turnover <dbl> 25.6, 26.7, 27.7, 28.2, 28.2, 29.4, 28.3, 27.4, 30.9, 26.7, 2…
# training data w/ observations before 2011
myseries_train <- myseries %>%
filter(year(Month) < 2011)
# quick range check
myseries %>%
as_tibble() %>%
summarise(min_month = min(Month), max_month = max(Month), n = n())
myseries_train %>%
as_tibble() %>%
summarise(min_month = min(Month), max_month = max(Month), n = n())
# visual check that the split is correct
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red") +
labs(
title = "Retail Turnover with training portion highlighted",
y = "Turnover")
SNAIVE() applied to
your training data (myseries_train)# fit SNAIVE on training data
fit <- myseries_train %>%
model(snaive = SNAIVE(Turnover))
fit
feasts::gg_tsresiduals(fit)
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_rug()`).
The residuals are not really white noise. The ACF has several bars outside the bounds, which suggests there is still correlation left after using seasonal naive. The histogram looks roughly centered, but there are some larger residuals, especially near the end, so it is not perfectly normal.
# define test data
myseries_test <- anti_join(myseries, myseries_train, by = "Month")
# forecast over the test period only
fc <- fit %>%
forecast(new_data = myseries_test)
# plot forecasts overlaid on the full series
fc %>%
autoplot(myseries) +
labs(
title = "SNAIVE forecast on test period",
y = "Turnover")
# accuracy on training data
fit %>% accuracy()
# accuracy on test data (compare forecasts to actual)
fc %>% accuracy(myseries)
# compare accuracy under different train/test cutoffs
cutoffs <- c(2007, 2009, 2011, 2013)
acc_by_cutoff <- map_dfr(cutoffs, function(cut) {
train <- myseries %>% filter(year(Month) < cut)
test <- anti_join(myseries, train, by = "Month")
fit_cut <- train %>% model(snaive = SNAIVE(Turnover))
fc_cut <- fit_cut %>% forecast(new_data = test)
fc_cut %>%
accuracy(myseries) %>%
mutate(train_end_year = cut - 1) %>%
select(train_end_year, RMSE, MAE, MAPE)
})
acc_by_cutoff
The accuracy measures change a lot when I change the training cutoff. In my results, RMSE, MAE, and MAPE get larger as I move the cutoff later, which suggests the series level and seasonal size changed over time and the seasonal naive model does not adapt well when the holdout period behaves differently from the earlier history.