Do exercises 5.1, 5.2, 5.3, 5.4 and 5.7 in the Hyndman book.
Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:
a. Australian Population (global_economy)
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.4.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.2
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## Warning: package 'tsibble' was built under R version 4.4.3
## Warning: package 'tsibbledata' was built under R version 4.4.3
## Warning: package 'feasts' was built under R version 4.4.3
## Warning: package 'fabletools' was built under R version 4.4.3
## Warning: package 'fable' was built under R version 4.4.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
pop_aus <- global_economy |> filter(Country == "Australia") |> select(Year, Population)
fit_pop <- pop_aus |> model(rw_drift = RW(Population ~ drift()))
fc_pop <- fit_pop |> forecast(h = 5)
autoplot(pop_aus, Population) + autolayer(fc_pop)
b. Bricks (aus_production)
bricks <- aus_production |> select(Bricks) |> filter(!is.na(Bricks))
fit_brk<- bricks |> model(snaive = SNAIVE(Bricks ~ lag("year")))
fc_brk<- fit_brk |> forecast(h = 20) # Quarters towards next 5 years
autoplot(bricks, Bricks) + autolayer(fc_brk)
c. NSW Lambs (aus_livestock)
lambs_nsw <- aus_livestock |>
filter(State == "New South Wales", Animal == "Lambs") |>
select(Count)
fit_lmb <- lambs_nsw |> model(snaive = SNAIVE(Count ~ lag("year")))
fc_lmb <- fit_lmb |> forecast(h = 12)
autoplot(lambs_nsw, Count) + autolayer(fc_lmb)
d. Household wealth (hh_budget)
wealth <- hh_budget |> select(Wealth)
fit_wth <- wealth |> model(rw_drift = RW(Wealth ~ drift()))
fc_wth <- fit_wth |> forecast(h = 12)
autoplot(wealth, Wealth) + autolayer(fc_wth)
e. Australian takeaway food turnover (aus_retail)
takeaway_aus <- aus_retail |>
filter(Industry == "Takeaway food services") |>
summarise(Turnover = sum(Turnover))
fit_tkw <- takeaway_aus |> model(snaive = SNAIVE(Turnover ~ lag("year")))
fc_tkw <- fit_tkw |> forecast(h = 12)
autoplot(takeaway_aus, Turnover) +
autolayer(fc_tkw, level = NULL) +
labs(title = "Takeaway food turnover",
x = NULL, y = "$AUD")
Use the Facebook stock price (data set gafa_stock) to do the following:
a. Produce a time plot of the series.
fb <- gafa_stock |>
filter(Symbol == "FB") |>
select(Date, Adj_Close)
autoplot(fb, Adj_Close) +
labs(title = "FB Stock Price",
x = NULL, y = "$US")
b. Produce forecasts using the drift method and plot them.
fb_m <- fb |>
index_by(Month = yearmonth(Date)) |>
summarise(Adj_Close = last(Adj_Close))
fit_drift_m <- fb_m |> model(RW(Adj_Close ~ drift()))
fc_drift_m <- fit_drift_m |> forecast(h = 12)
autoplot(fb_m, Adj_Close) +
autolayer(fc_drift_m, level = NULL) +
labs(title = "FB drift method (monthly)", x = NULL, y = "$")
c. Show that the forecasts are identical to extending the line drawn between the first and last observations.
fb <- gafa_stock |> filter(Symbol == "FB") |> select(Date, Adj_Close)
fb_w <- fb |> index_by(Week = yearweek(Date)) |> summarise(Adj_Close = last(Adj_Close))
fit_drift <- fb_w |> model(RW(Adj_Close ~ drift()))
fc_drift <- fit_drift |> forecast(h = 12)
n <- nrow(fb_w)
y1 <- dplyr::first(fb_w$Adj_Close)
yT <- dplyr::last(fb_w$Adj_Close)
slope <- (yT - y1)/(n - 1)
future <- new_data(fb_w, h = 12) |>
mutate(h = row_number(),
line_ext = yT + slope*h)
autoplot(fb_w, Adj_Close) +
autolayer(fc_drift, level = NULL) +
geom_line(data = future, aes(Week, line_ext), linetype = "dashed") +
labs(title = "FB drift method extended line (weekly)", x = NULL, y = "$")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
d. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?
fb_w <- gafa_stock |>
filter(Symbol == "FB") |>
index_by(Week = yearweek(Date)) |>
summarise(Adj_Close = last(Adj_Close))
fit_naive <- fb_w |> model(naive = NAIVE(Adj_Close))
fc_naive <- fit_naive |> forecast(h = 12)
autoplot(fb_w, Adj_Close) +
autolayer(fc_naive, level = NULL) +
labs(title = "FB NAIVE benchmark (weekly)", x = NULL, y = "$")
Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
What do you conclude? I conclude that the residuals
have a baseline around 0 while the ACF shows no trend or seasonality. As
for the seasonal Naive forecasts show a repetition of their last year
patterns with seasonality benchmark being that the shaded areas around
the forecast line show how unsure the statistic is. Basically if the
past numbers bounced around a lot, the bands get wider and vice versa if
the past was stable, they’re narrower. They’re the range where we expect
most future points to land (often 80% or 95% of the time).
Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.
exp_aus <- global_economy |> filter(Country == "Australia") |> select(Year, Exports)
fit_exp <- exp_aus |> model(naive = NAIVE(Exports))
fit_exp |> gg_tsresiduals()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
fc_exp <- fit_exp |> forecast(h = 5)
autoplot(exp_aus, Exports) + autolayer(fc_exp)
bricks <- aus_production |> select(Quarter, Bricks)
fit_bricks <- bricks |> model(snaive = SNAIVE(Bricks ~ lag("year")))
fit_bricks |> gg_tsresiduals()
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_bin()`).
fc_bricks <- fit_bricks |> forecast(h = 20)
autoplot(bricks, Bricks) + autolayer(fc_bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
For your retail time series (from Exercise 7 in Section 2.10):
a. Create a training dataset consisting of observations before 2011 using
myseries <- aus_retail |>
filter(`Series ID` == sample(unique(`Series ID`), 1)) |>
select(State, Industry, Month, Turnover) |>
arrange(Month)
myseries_train <- myseries |>
filter(year(Month) < 2011)
b. Check that your data have been split appropriately by producing the following plot.
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
c. Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).
fit <- myseries_train |>
model(snaive = SNAIVE(Turnover ~ lag("year")))
d. Check the residuals.
fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
Do the residuals appear to be uncorrelated and normally
distributed?
The residuals do appear to be uncorrelated and normally distributed because visibly there’s only really one large spike on lag 4 showing practically little to none autocorrelation as well as the histogram is mostly uniform.
e. Produce forecasts for the test data
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, Month, Turnover)`
fc |> autoplot(myseries)
f. Compare the accuracy of your forecasts against the actual values.
fit |> accuracy()
fc |> accuracy(myseries)
g. How sensitive are the accuracy measures to the amount of training data used?
Not really sensitive because With sNaive, forecasts copy the last 12 months. This means that accuracy won’t change much without the training cutoff shifts to the final year. But when it does, accuracy can be inconsistent especially if specific unusual events happened the last year.