Homework 3

Exercises 5.1, 5.2, 5.3, 5.4 and 5.7 in the Hyndman book.

5.1 Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Australian Population (global_economy)

aus_pop <- global_economy |>
  filter(Code == "AUS")

fit_pop <- aus_pop |>
  model(RW(Population ~ drift()))

fc_pop <- fit_pop |>
  forecast(h = 10)

autoplot(fc_pop, aus_pop) +
  labs(title = "Australian population – RW with drift",
       x = "Year", y = "Population")

- Bricks (aus_production)

bricks <- aus_production |>
  select(Quarter, Bricks)

fit_bricks <- bricks |>
  model(SNAIVE(Bricks))

fc_bricks <- fit_bricks |>
  forecast(h = "3 years")

autoplot(fc_bricks, bricks) +
  labs(title = "Bricks – SNAIVE",
       x = "Quarter", y = "Millions of bricks")

- NSW Lambs (aus_livestock)

nsw_lambs <- aus_livestock |>
  filter(State == "New South Wales",
         Animal == "Lambs")

fit_lambs <- nsw_lambs |>
  model(SNAIVE(Count))

fc_lambs <- fit_lambs |>
  forecast(h = "2 years")

autoplot(fc_lambs, nsw_lambs) +
  labs(title = "NSW Lambs – SNAIVE",
       x = "Month", y = "Lambs")

Household wealth (hh_budget).

wealth <- hh_budget |>
  filter(Country == "Australia")  # choose the country
  # Year is already the index; Wealth is the series

fit_wealth <- wealth |>
  model(RW(Wealth ~ drift()))

fc_wealth <- fit_wealth |>
  forecast(h = "5 years")

autoplot(fc_wealth, wealth) +
  labs(title = "Household wealth (Australia) – RW with drift",
       x = "Year", y = "Wealth (% of net disposable income)")

- Australian takeaway food turnover (aus_retail).

aus_retail |>
  distinct(State) |>
  arrange(State)

## # A tibble: 8 × 1
##   State                       
##   <chr>                       
## 1 Australian Capital Territory
## 2 New South Wales             
## 3 Northern Territory          
## 4 Queensland                  
## 5 South Australia             
## 6 Tasmania                    
## 7 Victoria                    
## 8 Western Australia

takeaway <- aus_retail |>
  filter(State == "New South Wales",
         Industry == "Takeaway food services")
takeaway

## # A tsibble: 441 x 5 [1M]
## # Key:       State, Industry [1]
##    State           Industry               `Series ID`    Month Turnover
##    <chr>           <chr>                  <chr>          <mth>    <dbl>
##  1 New South Wales Takeaway food services A3349792X   1982 Apr     85.4
##  2 New South Wales Takeaway food services A3349792X   1982 May     84.8
##  3 New South Wales Takeaway food services A3349792X   1982 Jun     80.7
##  4 New South Wales Takeaway food services A3349792X   1982 Jul     82.4
##  5 New South Wales Takeaway food services A3349792X   1982 Aug     80.7
##  6 New South Wales Takeaway food services A3349792X   1982 Sep     82.1
##  7 New South Wales Takeaway food services A3349792X   1982 Oct     87.3
##  8 New South Wales Takeaway food services A3349792X   1982 Nov     87.4
##  9 New South Wales Takeaway food services A3349792X   1982 Dec     97.2
## 10 New South Wales Takeaway food services A3349792X   1983 Jan     93  
## # ℹ 431 more rows

fit_takeaway <- takeaway |>
  model(SNAIVE(Turnover))

fc_takeaway <- fit_takeaway |>
  forecast(h = "2 years")

fc_takeaway |>
  autoplot(takeaway) +
  labs(title = "NSW takeaway food – SNAIVE",
       x = "Month", y = "Turnover")

5.2 Use the Facebook stock price (data set gafa_stock) to do the following:

A. Produce a time plot of the series.

fb <- gafa_stock |>
  filter(Symbol == "FB")

fb |>
  ggplot(aes(x = Date, y = Close)) +
  geom_line() +
  labs(title = "Facebook daily closing price",
       x = "Date", y = "Close")

- B. Produce forecasts using the drift method and plot them.

fb_ts <- gafa_stock |>
  filter(Symbol == "FB") |>
  arrange(Date) |>
  transmute(
    trading_day = row_number(),  # 1, 2, 3, ... regular spacing
    Close
  )

fb_ts <- as_tsibble(fb_ts, index = trading_day, regular = TRUE)

fit_fb_drift <- fb_ts |>
  model(RW(Close ~ drift()))

fc_fb_drift <- fit_fb_drift |>
  forecast(h = 60)

fc_fb_drift |>
  autoplot(fb_ts) +
  labs(
    title = "FB stock – drift forecasts (60 trading days ahead)",
    x = "Trading day index",
    y = "Closing Price (USD)"
  )

- C. Show that the forecasts are identical to extending the line drawn between the first and last observations.

augment(fit_fb_drift) |>
  filter(.model == "RW(Close ~ drift())") |>
  summarize(first = first(Close),
            last  = last(Close))

## # A tsibble: 1,258 x 3 [1]
##    trading_day first  last
##          <int> <dbl> <dbl>
##  1           1  54.7  54.7
##  2           2  54.6  54.6
##  3           3  57.2  57.2
##  4           4  57.9  57.9
##  5           5  58.2  58.2
##  6           6  57.2  57.2
##  7           7  57.9  57.9
##  8           8  55.9  55.9
##  9           9  57.7  57.7
## 10          10  57.6  57.6
## # ℹ 1,248 more rows

The drift forecasts lie on the straight line connecting the first and last observations, so extending that line gives the same forecast values.

D. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

fb_ts <- gafa_stock |>
  filter(Symbol == "FB") |>
  arrange(Date) |>
  transmute(
    trading_day = row_number(),
    Close
  ) |>
  as_tsibble(index = trading_day, regular = TRUE)

fits <- fb_ts |>
  model(
    mean  = MEAN(Close),
    naive = NAIVE(Close),
    drift = RW(Close ~ drift())
  )

fc <- fits |>
  forecast(h = 60)

fc |>
  autoplot(fb_ts) +
  labs(
    title = "FB stock – benchmark forecasts",
    x = "Trading day index",
    y = "Closing Price (USD)"
  )

Among the benchmark methods, the random walk–type models are most appropriate. The mean and seasonal naïve methods ignore the data’s structure. Drift captures the overall upward trend in the FB series and produces plausible forecasts over this sample, so it is the best of these benchmarks for this data set.

5.3 Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

beer <- aus_production |>
  filter(year(Quarter) >= 1992) |>
  select(Quarter, Beer)

fit_beer <- beer |>
  model(SNAIVE(Beer))

# Check residuals
fit_beer |>
  gg_tsresiduals()

The residuals are roughly centered around zero with no obvious remaining seasonal pattern; most autocorrelations fall within the bounds, so they are approximately white noise (though there may be small deviations).

# Extract data of interest
recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()

# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)

What do you conclude?

5.4 Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

aus_exports <- global_economy |>
  filter(Code == "AUS") |>
  select(Year, Exports)

fit_exports <- aus_exports |>
  model(NAIVE(Exports))

fc_exports <- fit_exports |>
  forecast(h = 10)

autoplot(fc_exports, aus_exports) +
  labs(title = "Australian Exports – NAIVE forecasts",
       x = "Year", y = "Exports")

bricks <- aus_production |>
  select(Quarter, Bricks)

fit_bricks <- bricks |>
  model(SNAIVE(Bricks))

fc_bricks <- fit_bricks |>
  forecast(h = "3 years")

autoplot(fc_bricks, bricks) +
  labs(title = "Bricks production – SNAIVE forecasts",
       x = "Quarter", y = "Millions of bricks")

5.7 For your retail time series (from Exercise 7 in Section 2.10):

A. Create a training dataset consisting of observations before 2011 using

myseries <- aus_retail |>
  filter(State == "New South Wales",
         Industry == "Department stores")

myseries_train <- myseries |>
  filter(year(Month) < 2011)

B. Check that your data have been split appropriately by producing the following plot.

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red") +
  labs(title = "Retail series with training portion highlighted",
       x = "Month", y = "Turnover")

C. Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

fit <- myseries_train |>
  model(SNAIVE(Turnover))

D. Check the residuals.

fit |> gg_tsresiduals()

Do the residuals appear to be uncorrelated and normally distributed?

E. Produce forecasts for the test data

myseries_train <- myseries |> filter(year(Month) < 2011)
myseries_test  <- myseries |> filter(year(Month) >= 2011)

fc <- fit |>
  forecast(h = nrow(myseries_test))

autoplot(fc, myseries) +
  labs(title = "SNAIVE forecasts vs actuals",
       x = "Month", y = "Turnover")

F. Compare the accuracy of your forecasts against the actual values.

fit |> accuracy()

## # A tibble: 1 × 12
##   State  Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 New S… Departm… SNAIV… Trai…  11.2  24.7  19.1  3.24  5.58     1     1 -0.0527

fc |> accuracy(myseries)

## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… New … Departm… Test  -2.81  25.5  19.9 -1.02  4.05  1.04  1.03 0.201

-G. How sensitive are the accuracy measures to the amount of training data used?

The accuracy measures are not highly sensitive to the exact amount of training data, provided the training set contains at least one full seasonal cycle and the pattern is stable. Using very short training periods leads to noticeably worse accuracy, while adding much older data tends to give only small changes in forecast accuracy unless the series has structural changes