DATA 624 Assignment 3: Forecasting Principles & Practice Chapter 5 exercise

#5.1 Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

#Australian Population (global_economy)

aus_pop <- global_economy |>
  filter(Country == "Australia") |>
  select(Year, Population)

fit_pop <- aus_pop |>
  model(RW(Population ~ drift()))

fit_pop |>
  forecast(h = 10) |>
  autoplot(aus_pop)

#Bricks (aus_production)

fit_bricks <- aus_production |>
  model(SNAIVE(Bricks))

fit_bricks |>
  forecast(h = 8) |>
  autoplot(aus_production)

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

#NSW Lambs (aus_livestock)

nsw_lambs <- aus_livestock |>
  filter(State == "New South Wales", Animal == "Lambs") |>
  select(Month, Count)

fit_lambs <- nsw_lambs |>
  model(SNAIVE(Count))

fit_lambs |>
  forecast(h = "2 years") |>
  autoplot(nsw_lambs)

#Household wealth (hh_budget)

fit_wealth <- hh_budget |>
  model(RW(Wealth ~ drift()))

fit_wealth |>
  forecast(h = 8) |>
  autoplot(hh_budget)

#Australian takeaway food turnover (aus_retail).

takeaway <- aus_retail |>
  filter(Industry == "Takeaway food services") |>
  summarise(Turnover = sum(Turnover))

fit_takeaway <- takeaway |>
  model(SNAIVE(Turnover))

fit_takeaway |>
  forecast(h = "2 years") |>
  autoplot(takeaway)

#5.2 Use the Facebook stock price (data set gafa_stock) to do the following:

#(a) Produce a time plot of the series.

fb <- gafa_stock |>
  filter(Symbol == "FB") |>
  select(Date, Close) |>
  as_tsibble(index = Date) |>
  fill_gaps() |>
  fill(Close, .direction = "down") |>
  tsibble::update_tsibble(regular = TRUE)

fb |>
  autoplot(Close) +
  labs(title = "Facebook (FB) closing price",
       y = "Closing price")

#(b) Produce forecasts using the drift method and plot them.

fit_fb_drift <- fb |>
  model(RW(Close ~ drift()))

## Warning: 1 error encountered for RW(Close ~ drift())
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

fc_fb_drift <- fit_fb_drift |>
  forecast(h = 30)

fc_fb_drift |>
  autoplot(fb) +
  labs(title = "Facebook closing price with drift forecast",
       y = "Close")

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 30 rows containing missing values or values outside the scale range
## (`geom_line()`).

#(c) Show that the forecasts are identical to extending the line drawn between the first and last observations.

first_close <- first(fb$Close)
last_close  <- last(fb$Close)
n <- nrow(fb)

slope <- (last_close - first_close) / (n - 1)

fb |>
  ggplot(aes(Date, Close)) +
  geom_line() +
  geom_abline(intercept = first_close, slope = slope) +
  labs(title = "Line between first and last observations",
       y = "Close")

autoplot(fb, Close) +
  autolayer(fc_fb_drift, Close) +
  geom_abline(intercept = first_close, slope = slope) +
  labs(title = "Drift forecast",
       y = "Close")

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 30 rows containing missing values or values outside the scale range
## (`geom_line()`).

#(d) Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

fit_fb_bench <- fb |>
  model(
    NAIVE = NAIVE(Close),
    DRIFT = RW(Close ~ drift())
  )

## Warning: 1 error encountered for NAIVE
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

## Warning: 1 error encountered for DRIFT
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

fit_fb_bench |>
  forecast(h = 30) |>
  autoplot(fb) +
  labs(title = "Benchmark forecasts for Facebook stock",
       y = "Close")

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 60 rows containing missing values or values outside the scale range
## (`geom_line()`).

#5.3 Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)

fit_beer <- recent_production |>
  model(SNAIVE(Beer))

fit_beer |>
  gg_tsresiduals()

## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).

fit_beer |>
  forecast(h = 8) |>
  autoplot(recent_production) +
  labs(title = "Beer production forecasts (SNAIVE)", y = "Beer")

#The seasonal naïve model captures seasonality but not long-term changes. The residuals are not completely white noise. The forecasts repeat the most recent seasonal pattern.

#5.4 Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

aus_exports <- global_economy |>
  filter(Country == "Australia")

fit_exports <- aus_exports |>
  model(NAIVE(Exports))

fit_exports |> gg_tsresiduals()

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_rug()`).

fit_exports |>
  forecast(h = 10) |>
  autoplot(aus_exports)

#5.7 For your retail time series (from Exercise 7 in Section 2.10):

set.seed(12345678)

myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

#(a) Create a training dataset consisting of observations before 2011 using

myseries_train <- myseries |>
  filter(year(Month) < 2011)

#(b) Check that your data have been split appropriately by producing the following plot.

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

#(c) Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

fit <- myseries_train |>
  model(SNAIVE(Turnover))

#(d) Check the residuals.

fit |> gg_tsresiduals()

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_rug()`).

#Do the residuals appear to be uncorrelated and normally distributed?

#The residuals do not appear to be uncorrelated, as the autocorrelation plot shows several significant spikes. The histogram suggests the residuals are approximately normally distributed, although there are some deviations and extreme values. Overall, the residuals are not white noise.

#(e) Produce forecasts for the test data

fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

fc |> autoplot(myseries)

#(f) Compare the accuracy of your forecasts against the actual values.

fit |> accuracy()

fc |> accuracy(myseries)

#(g) How sensitive are the accuracy measures to the amount of training data used?

#The accuracy measures are somewhat sensitive to the amount of training data used because the seasonal naïve method relies on the most recent seasonal pattern. Using older data may reduce accuracy, while using more recent data may improve forecasts. However, using too little training data may also reduce reliability.

DATA 624 Assignment 3: Forecasting Principles & Practice Chapter 5 exercise

Chanice Mckenzie