exercises 5.1, 5.2, 5.3, 5.4 and 5.7

5.1

Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

library(fpp3)
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.4     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(fable)
library(tsibble)
library(dplyr)


global_economy |> 
  filter(Country == "Australia") |> 
  model(RW(Population ~ drift())) |>
  forecast(h = 5) |>
  autoplot(global_economy)

aus_production |> 
  model(SNAIVE(Bricks)) |>
  forecast(h = 10) |>
  autoplot(aus_production)
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_livestock |> 
  filter(State == "New South Wales", Animal == "Lambs") |> 
  model(NAIVE(Count)) |>
  forecast(h = 20) |>
  autoplot(aus_livestock)

hh_budget |> 
  model(RW(Wealth ~ drift())) |>
  forecast(h = 5) |>
  autoplot(hh_budget)

aus_retail |> 
  filter(Industry == "Takeaway food services") |> 
  model(RW(Turnover ~ drift())) |>
  forecast(h = 30) |>
  autoplot(aus_retail) +
  facet_wrap(~State, scales = "free")

5.2

Use the Facebook stock price (data set gafa_stock) to do the following:

a. Produce a time plot of the series.

b. Produce forecasts using the drift method and plot them.

c. Show that the forecasts are identical to extending the line drawn between the first and last observations.

d. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

gafa_stock |>  
  filter(Symbol == "FB") |>
  autoplot(Open)

FB_data <- gafa_stock |> 
  filter(Symbol == "FB") |>
  mutate(day = row_number()) |>
  update_tsibble(index = day, regular = TRUE)

FB_data |> model(RW(Open ~ drift())) |>
  forecast(h = 30) |>
  autoplot(FB_data)

first_value <- gafa_stock |>
  filter(Date == min(Date), Symbol == "FB") |>
  select(Open)  
print(first_value)
## # A tsibble: 1 x 2 [!]
##    Open Date      
##   <dbl> <date>    
## 1  54.8 2014-01-02
last_value <- gafa_stock |>
  filter(Date == max(Date), Symbol == "FB") |>
  select(Open)  
print(last_value) 
## # A tsibble: 1 x 2 [!]
##    Open Date      
##   <dbl> <date>    
## 1  134. 2018-12-31
index_value <- gafa_stock |>
  filter(Symbol == "FB") |>
  mutate(day = row_number()) |>
  select(day)
print(range(index_value$day))
## [1]    1 1258
FB_data |> 
  model(RW(Open ~ drift())) |>
  forecast(h = 30) |>
  autoplot(FB_data) +
  geom_segment(aes(x = 1, y = 54.83, xend = 1258, yend = 134.45),
               linetype = "dashed")
## Warning in geom_segment(aes(x = 1, y = 54.83, xend = 1258, yend = 134.45), : All aesthetics have length 1, but the data has 1258 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

FB_data |>
  model(Mean = MEAN(Open),
        `Naïve` = NAIVE(Open),
        Drift = RW(Open ~ drift())) |>
  forecast(h = 50) |>
  autoplot(FB_data, level = NULL)

5.3

Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

# Extract data of interest
recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)

5.4

Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

# Extract data of interest
global_data <- global_economy |>
  filter(Country == "Australia")
# Define and estimate a model
fit <- global_data |> model(NAIVE(Exports))
# Look at the residuals
fit |> gg_tsresiduals() 
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

# Look at some forecasts
fit |> forecast() |> autoplot(global_data) 

# Extract data of interest
aus_data <- aus_production |>
  filter(!is.na(Bricks))  
# Define and estimate a model
fit <- aus_data |> model(SNAIVE(Bricks))
# Look at the residuals
fit |> gg_tsresiduals() 
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Look at some forecasts
fit |> forecast() |> autoplot(aus_production)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

5.7

For your retail time series (from Exercise 7 in Section 2.10):

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_train <- myseries |>
  filter(year(Month) < 2011)

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

fit <- myseries_train |>
  model(SNAIVE(Turnover))

fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries)

fit |> accuracy()
## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439  1.21 0.915  5.23  12.4     1     1 0.768
fc |> accuracy(myseries)
## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test  0.836  1.55  1.24  5.94  9.06  1.36  1.28 0.601

d. The residual seems it is correlate due to the residual being right skewed.

f. The data seem to be around 80% confidence interval.

g. There is a change in accuracy measure with the number of trained data, so it should be pretty sensitive, although too much data can lead to lower accuracy.