Forecasting

Required Libraries

library(fpp3)

Problem 5.1

Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Australian Population (global_economy)

We don’t observe any seasonality, only a strong, positive linear trend. Using the RW() (random walk) drift function would be most appropriate.

aus_pop <-
  global_economy |>
  filter(Country == "Australia") |>
  select(c(Country, Year, Population))

aus_pop_model <-
  aus_pop |>
  model(Drift = RW(Population ~ drift())) 

aus_pop_forecast <-
  aus_pop_model|>
  forecast(h=5)

aus_pop_forecast |>
  autoplot(aus_pop, level = 0.95)

Bricks (aus_production)

We see there is a seasonality trend and we can use SNAIVE() to forecast the next 8 quarters.

bricks <-
  aus_production |>
  filter(!is.na(Bricks)) |>
  select(c(Quarter, Bricks))

bricks_model <-
  bricks |>
  model(`Seasonal naïve` = SNAIVE(Bricks)) 

bricks_forecast <-
  bricks_model|>
  forecast(h=8)

bricks_forecast |>
  autoplot(bricks, level = 0.95)

NSW Lambs (aus_livestock)

We see some seasonality again and use the SNAIVE() function again

nsw_lambs <-
  aus_livestock |>
  filter(Animal == "Lambs",
         State == "New South Wales")

nsw_lambs_model <-
  nsw_lambs |>
  model(`Seasonal naïve` = SNAIVE(Count)) 

nsw_lambs_forecast <-
  nsw_lambs_model|>
  forecast(h=8)

nsw_lambs_forecast |>
  autoplot(nsw_lambs, level = 0.95)

Household wealth (hh_budget).

We have yearly data and no evidence of seasonlaity. The RW() function will help forecast the overall trend.

wealth <-
  hh_budget |>
  select(Country, Year, Wealth)

wealth_model <-
  wealth |>
  model(Drift = RW(Wealth ~ drift()))

wealth_forecast <-
  wealth_model |>
  forecast(h=4)

wealth_forecast |>
  autoplot(wealth, level = 0.95)

Australian takeaway food turnover (aus_retail).

Initially an SNAIVE() looked to make some sense given possible seasonality effects, but we do lose the overall trend. The RW() function made more sense overall.

food <-
  aus_retail |>
  filter(Industry == "Takeaway food services",
         State == "Australian Capital Territory")

food_model <-
  food |>
  model(`Seasonal naïve` = SNAIVE(Turnover),
        Drift = RW(Turnover ~ drift()))

food_forecast <-
  food_model |>
  forecast(h=12)

food_forecast |>
  autoplot(food, level = NULL) +
  guides(colour = guide_legend(title = "Forecast"))

Problem 5.2

Use the Facebook stock price (data set gafa_stock) to do the following:

Produce a time plot of the series.

fb_stock <-
  gafa_stock |>
  filter(Symbol == "FB") |>
  select(Date, Symbol, Close) |>
  mutate(day = row_number()) |>
  update_tsibble(index = day, regular = TRUE)

fb_stock |>
  autoplot(Close)

Produce forecasts using the drift method and plot them.

Our green line shows the drift method forecast.

fb_stock_model <-
  fb_stock |>
  model(Drift = RW(Close ~ drift()))

fb_stock_forecast <-
  fb_stock_model |>
  forecast(h = 30)

fb_stock_forecast |>
  autoplot(fb_stock, level = 0.95, colour='green')

Show that the forecasts are identical to extending the line drawn between the first and last observations.

Here we can see the drift method aligns with the first and last observations slope

fb_stock_forecast |>
  autoplot(fb_stock, level = 0.95, colour='green') +
  geom_segment(aes(x=fb_stock$day[[1]], 
                   y=fb_stock$Close[[1]], 
                   xend=fb_stock$day[[1258]], 
                   yend=fb_stock$Close[[1258]], 
                   colour="segment")) +
  theme(legend.position = "none")

Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

Using the Naïve forecast would be best as it uses the most recent value to determine it’s future value, while the mean is undervaluing it’s current pricing.

fb_stock_model <-
  fb_stock |>
  model(`Naïve` = NAIVE(Close),
        Mean = MEAN(Close))

fb_stock_forecast <-
  fb_stock_model |>
  forecast(h = 30)

fb_stock_forecast |>
  autoplot(fb_stock, level = NULL) +
  guides(colour = guide_legend(title = "Forecast"))

Problem 5.3

Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

What do you conclude?

We don’t see many residuals that are outliers within the ACF plot. However, our histogram shows us that the distribution is more bimodal rather than normally distributed and the residuals may not just be white noise. We can check it with the Ljung-Box test to determine if the p-value is low enough to be statistically different than just white noise.

# Extract data of interest
recent_production <- 
  aus_production |>
  filter(year(Quarter) >= 1992)

# Define and estimate a model
fit <- 
  recent_production |>
  model(SNAIVE(Beer))

# Look at the residuals
fit |> 
  gg_tsresiduals()

# Look a some forecasts
fit |> 
  forecast() |> 
  autoplot(recent_production, level=0.95)

Indeed, it is statistically different and this model may not be well used for forecasting as the residuals are not just accounting for white noise.

fit |> 
  augment() |>
  features(.innov, ljung_box, lag = 10)

## # A tibble: 1 × 3
##   .model       lb_stat lb_pvalue
##   <chr>          <dbl>     <dbl>
## 1 SNAIVE(Beer)    37.8 0.0000412

Problem 5.4

Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

For Australian exports we see we have a normally distributed residual histogram, and our ACF() plot has only one outlier.

aus_exports <-
  global_economy |>
  select(Country, Year, Exports) |>
  filter(Country=="Australia")

aus_exports_model <-
  aus_exports |> 
  model(Naive = NAIVE(Exports)) 

aus_exports_model|>
  gg_tsresiduals()

aus_exports_model |> 
  augment() |>
  features(.innov, ljung_box, lag = 10)

## # A tibble: 1 × 4
##   Country   .model lb_stat lb_pvalue
##   <fct>     <chr>    <dbl>     <dbl>
## 1 Australia Naive     16.4    0.0896

Here we have a left-skewed distribution and many outliers in the ACF plot. Both NAIVE and SNAIVE are not great model fits for the bricks production dataset.

aus_bricks <-
  aus_production |>
  select(Quarter, Bricks) |>
  filter(!is.na(Bricks))

aus_bricks_model <-
  aus_bricks |> 
  model(Naive = SNAIVE(Bricks)) 

aus_bricks_model|>
  gg_tsresiduals()

aus_bricks_model |> 
  augment() |>
  features(.innov, ljung_box, lag = 10)

## # A tibble: 1 × 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 Naive     301.         0

Problem 5.7

For your retail time series (from Exercise 7 in Section (2.10):

Create a training dataset consisting of observations before 2011 using

set.seed(42)

myseries <- 
  aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_train <- 
  myseries |>
  filter(year(Month) < 2011)

Check that your data have been split appropriately by producing the following plot.

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

fit <- 
  myseries_train |>
  model(SNAIVE(Turnover))

Check the residuals. Do the residuals appear to be uncorrelated and normally distributed?

The residuals are heavily correlated and not just white noise. They are also not normally distributed as there are outliers.

fit |> gg_tsresiduals()

Produce forecasts for the test data

fc <- 
  fit |>
  forecast(new_data = anti_join(myseries, myseries_train))

fc |> 
  autoplot(myseries, level=0.95)

Compare the accuracy of your forecasts against the actual values. How sensitive are the accuracy measures to the amount of training data used?

The training data was more accurate compared to our test data. This can be because we know our test data has a lot more data to work with as opposed to the test set.

fit |> 
  accuracy()

## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Western… Newspap… SNAIV… Trai… 0.838  5.67  4.09  2.49  16.0     1     1 0.849

fc |> 
  accuracy(myseries)

## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… West… Newspap… Test   3.04  9.11  7.34  6.06  19.9  1.79  1.61 0.513