Chapter 5 - Forecasting: Principles and Practice

1

Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Australian Population (global_economy) Bricks (aus_production) NSW Lambs (aus_livestock) Household wealth (hh_budget). Australian takeaway food turnover (aus_retail).

Australian Population

aus_pop <- global_economy %>%
  filter(Country == 'Australia') %>%
  select(Country, Year, Population)

train <- aus_pop %>%
  filter_index(. ~ '2014')

aus_pop_fit <- train %>%
  model(Naive = NAIVE(Population),
        Drift = NAIVE(Population ~ drift())
        )

aus_pop_fc <- aus_pop_fit %>%
  forecast(h = '3 years')

aus_pop_fc %>%
  autoplot(train, level = NULL) +
  autolayer(
    filter_index(aus_pop, '2015' ~ .),
    color = 'black'
  ) +
  labs(title = 'Australian Population Forecast 2015 - 2017' ) +
  guides(color = guide_legend(title = 'Forecast'))

## Plot variable not specified, automatically selected `.vars = Population`

Bricks

bricks <- aus_production %>%
  select(Quarter, Bricks)

train <- bricks %>%
  filter_index('1992 Q1' ~ '2001 Q4')

bricks_fit <- train %>%
  model(
    Mean = MEAN(Bricks),
    Drift = NAIVE(Bricks ~ drift()),
    `Season Naive` = SNAIVE(Bricks)
  )

bricks_fc <- bricks_fit %>% forecast(h = 14)

bricks_fc %>%
  autoplot(train, level = NULL) +
  autolayer(
    filter_index(bricks, '2002 Q1' ~ .),
    color = 'black'
  ) +
  labs(title = "Forecasts for quarterly bricks production"
  ) +
  guides(color = guide_legend(title = "Forecast"))

## Plot variable not specified, automatically selected `.vars = Bricks`

## Warning: Removed 20 row(s) containing missing values (geom_path).

NSW Lambs

lambs <- aus_livestock %>%
  filter(Animal == 'Lambs') %>%
  select(Month, Count)

lambs <- lambs[,1:2] %>%
  group_by(Month) %>%
  summarise(Count = sum(Count)) %>%
  as_tsibble(index = Month)

## `summarise()` ungrouping output (override with `.groups` argument)

lambs_train <- lambs %>%
  filter_index(. ~ '2017 Dec')

lambs_fit <- lambs_train %>%
  model(
    Mean = MEAN(Count),
    Drift = NAIVE(Count ~ drift()),
    `Season Naive` = SNAIVE(Count)
  )

lambs_fc <- lambs_fit %>% forecast(h = 12)

lambs_fc %>%
  autoplot(lambs_train, level = NULL) +
  autolayer(
    filter_index(lambs, '2018 Jan' ~ .),
    color = 'black'
  ) +
  labs(title = "Forecasts for monthly lambs live stock"
  ) +
  guides(color = guide_legend(title = "Forecast"))

## Plot variable not specified, automatically selected `.vars = Count`

Household wealth

wealth <- hh_budget %>%
  filter(Country == 'Australia') %>%
  select(Year, Wealth)

train <- wealth %>%
  filter_index(. ~ '2013')

wealth_fit <- train %>%
  model(Naive = NAIVE(Wealth),
        Drift = NAIVE(Wealth ~ drift())
        )

wealth_fc <- wealth_fit %>%
  forecast(h = '3 years')


wealth_fc %>%
  autoplot(train, level = NULL) +
  autolayer(
    filter_index(wealth, '2014' ~ .),
    color = 'black'
  ) +
  labs(title = 'Australian Wealth Forecast 2014 - 2016' ) +
  guides(color = guide_legend(title = 'Forecast'))

## Plot variable not specified, automatically selected `.vars = Wealth`

Australian takeway food turnover

takeway_turnover <- aus_retail %>%
  filter(Industry == 'Takeaway food services') %>%
  select(Month, Turnover) 

takeway_turnover <- takeway_turnover[,1:2] %>%
  group_by(Month) %>%
  summarise(Turnover = sum(Turnover)) %>%
  as_tsibble(index = Month)

## `summarise()` ungrouping output (override with `.groups` argument)

train <- takeway_turnover %>%
  filter_index(. ~ '2018 Jun')

takeaway_turnover_fit <- train %>%
  model(Naive = NAIVE(Turnover),
        `Seasonal Naive` = SNAIVE(Turnover),
        Drift = NAIVE(Turnover ~ drift())
        )

takeaway_turnover_fc <- takeaway_turnover_fit %>%
  forecast(h = '6 months')


takeaway_turnover_fc %>%
  autoplot(train, level = NULL) +
  autolayer(
    filter_index(takeway_turnover, '2018 Jul' ~ .),
    color = 'black'
  ) +
  labs(title = 'Takeaway Turnover Forecast July - December 2018' ) +
  guides(color = guide_legend(title = 'Forecast'))

## Plot variable not specified, automatically selected `.vars = Turnover`

2

Use the Facebook stock price (data set gafa_stock) to do the following:

Produce a time plot of the series.

fb_stock <- gafa_stock %>%
  filter(Symbol == 'FB', year(Date) >= 2015) %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE) %>%
  select(Date, Close)

fb_stock %>%
  autoplot(Close) +
  labs(y = '$US', title = 'Facebook Stock Price')

Produce forecasts using the drift method and plot them.

fb_stock_2015 <- fb_stock %>%
  filter(year(Date) == 2015) %>%
  select(day, Close)

fb_stock_fit <- fb_stock_2015 %>%
  model(Drift = RW(Close ~ drift()))

fb_stock_2016 <- fb_stock %>%
  filter(yearmonth(Date) == yearmonth('2016 Jan')) %>%
  select(day, Close)

fb_fc <- fb_stock_fit %>%
  forecast(new_data = fb_stock_2016)

fb_fc %>%
  autoplot(fb_stock_2015, level = NULL) +
  autolayer(fb_stock_2016, Close, color = 'black') +
  labs(y = '$US',
       title = 'Facebook daily closing stock prices',
       subtitle = 'Jan 2015 - Jan 2016'
       ) +
  guides(color = guide_legend((title = 'Forecasts')))

Show that the forecasts are identical to extending the line drawn between the first and last observations.

fb_stock2 <- fb_stock %>%
  filter(year(Date) == 2015)

fb_fc %>% 
  autoplot(fb_stock2, level = NULL) +
  geom_line(data = slice(fb_stock2, range(cumsum(!is.na(Close)))),
                         aes(y=Close), linetype = 'dashed')

Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

The best forecast method is the Drift method as it follows the trend of the stock prices. The trend of the stock prices looks to be a strong indicator for future stock prices and the Mean and Naive methods do not take this into account.

fb_stock_fit2 <- fb_stock_2015 %>%
  model(
    Mean = MEAN(Close),
    Naive = NAIVE(Close)
    )
          
fb_stock_2016 <- fb_stock %>%
  filter(yearmonth(Date) == yearmonth('2016 Jan'))

fb_fc2 <- fb_stock_fit2 %>%
  forecast(new_data = fb_stock_2016)

fb_fc2 %>%
  autoplot(fb_stock_2015, level = NULL) +
  autolayer(fb_stock_2016, Close, color = 'black') +
  labs(y = '$US',
       title = 'Facebook daily closing stock prices',
       subtitle = 'Jan 2015 - Jan 2016'
       ) +
  guides(color = guide_legend((title = 'Forecasts')))

## 3

Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

There is one outlier on the ACF graph that shows one lag observation to have a significant autocorrelation.

# Extract data of interest
recent_production <- aus_production %>%
  filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast() %>% autoplot(recent_production)

4

Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

aus_exp <- global_economy %>%
  filter(Country == 'Australia') %>%
  select(Country, Year, Exports)

# Define and estimate a model
fit <- aus_exp %>% model(NAIVE(Exports))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast() %>% autoplot(aus_exp) +
  labs(title = 'Australian Exports Forecast Using Naive Method')

bricks <- aus_production %>%
  select(Quarter, Bricks)

# Define and estimate a model
fit <- bricks %>% model(SNAIVE(Bricks))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 24 row(s) containing missing values (geom_path).

## Warning: Removed 24 rows containing missing values (geom_point).

## Warning: Removed 24 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast() %>% autoplot(bricks) +
  labs(title = 'Bricks Forecast Using Seasonal Naive Method')

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning -
## Inf

## Warning: Removed 8 row(s) containing missing values (geom_path).

## Warning: Removed 20 row(s) containing missing values (geom_path).

7

For your retail time series (from Exercise 8 in Section 2.10):

Create a training dataset consisting of observations before 2011 using

set.seed(164)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

Check that your data have been split appropriately by producing the following plot.

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

fit <- myseries_train %>%
  model(SNAIVE(Turnover))

Check the residuals.

fit %>% gg_tsresiduals()

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

Do the residuals appear to be uncorrelated and normally distributed?

The residuals are normally distributed with a mean of 0. However, the residuals seem to be correlated. The ACF graph shows that there is significant correlation in most lag periods. Moreover, the autocorrelation changes from positive to negative at lag 10.

Produce forecasts for the test data

fc <- fit %>%
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

fc %>% autoplot(myseries)

Compare the accuracy of your forecasts against the actual values.

fit %>% accuracy()

## # A tibble: 1 x 12
##   State  Industry   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>      <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Austr~ Electrica~ SNAIV~ Trai~ 0.987  3.05  2.18  5.27  13.0     1     1 0.755

fc %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model  State Industry  .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr> <chr>     <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE~ Aust~ Electric~ Test  -0.508  3.65  2.99 -2.83  9.61  1.37  1.20 0.713

How sensitive are the accuracy measures to the amount of training data used?

The seasonal naive method to forecast Electrical and electronic goods retailing turnover proves to be very senstitve with low error scores.

DATA624 - DC HW3