Homework #3

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──

## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.0
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(readxl)
library(ggplot2)
library(tsibble)
library(lubridate)
library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ readr   2.1.2     ✔ stringr 1.4.1
## ✔ purrr   0.3.4     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union()         masks lubridate::union(), base::union()

library(dplyr)
library(seasonal)

## 
## Attaching package: 'seasonal'
## 
## The following object is masked from 'package:tibble':
## 
##     view

Question 1

Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Australian Population (global_economy)

The unforecasted graph shows a trend of Population is almost linear, this means that the drift forecast would be best.

global_economy %>% 
  filter(Country == "Australia") %>%
  autoplot(Population) +
  labs(title = "Unforecasted Australia Population",
       subtitle = "1960 - 2017")

global_economy %>% 
  filter(Country == "Australia") %>%
  model(RW(Population ~ drift())) %>%
  forecast(h = 4) %>%
  autoplot(global_economy) +
  labs(title = "Australia Population",
       subtitle = "1960 - 2017, Forecasted until 2021")

Bricks (aus_production)

The data, shown in the “Astralian Bricks Production” graph is seasonal. Because of this, the SNaive method is best. The data for Q3 of 2005 on is missing so we are forecasting 2 years ahead with the seasonal periods being quarters.

aus_production %>% 
  autoplot(Bricks) +
  labs(title = "Australian Bricks Production")

## Warning: Removed 20 row(s) containing missing values (geom_path).

aus_production %>% 
  filter(!is.na(Bricks)) %>%
  model(SNAIVE(Bricks)) %>%
  forecast(h = 8) %>%
  autoplot(aus_production) +
  labs(title = "Australian Bricks Production",
       subtitle = "1956 - 2005 Q2, Forecasted until 2007 Q2")

## Warning: Removed 20 row(s) containing missing values (geom_path).

NSW Lambs (aus_livestock) There isn’t a significant trend in the data like the two previous datasets.There is a seasonal component and because of this, the SNaive method is best.

aus_livestock %>%
  filter(State == "New South Wales", 
         Animal == "Lambs") %>%
  autoplot() +
  labs(title = "Lambs in New South Wales",
       subtitle = "July 1976 - Dec 2018")

## Plot variable not specified, automatically selected `.vars = Count`

aus_livestock %>%
  filter(State == "New South Wales", 
         Animal == "Lambs") %>%
  model(SNAIVE(Count)) %>%
  forecast(h = 24) %>%
  autoplot(aus_livestock) +
  labs(title = "Lambs in New South Wales",
       subtitle = "July 1976 - Dec 2018, Forecasted until Dec 2020")

Household wealth (hh_budget) There is a general trend with each country in the hh_budget data set. Australia and Canada have an obvious positive trend. Japan has a slight negative trend and the US has a positive then negative trend. Since there isn’t a seasonal component the the data, the drift method is best.

autoplot(hh_budget)

## Plot variable not specified, automatically selected `.vars = Debt`

hh_budget %>%
  model(RW(Wealth ~ drift())) %>%
  forecast(h = 5) %>%
  autoplot(hh_budget) +
  labs(title = "Household Wealth",
       subtitle = "1996 - Dec 2016, Forecasted until 2021")

Australian takeaway food turnover (aus_retail). After plotting the data, there are some states that have a seasonal component to them, particularly in New South Wales and Victoria. Even though there is a seasonal component, I think that the drift method is still the best for forecasting all states at once. This shows a general upward trend is all states with North Territory being the state with the lowest growth.

aus_retail %>%
  filter(Industry == "Cafes, restaurants and takeaway food services") %>%
  autoplot(Turnover) +
  labs(title = "Australian Takeaway Food Turnover")

aus_retail %>%
  filter(Industry == "Cafes, restaurants and takeaway food services") %>%
  model(RW(Turnover ~ drift())) %>%
  forecast(h = 36) %>%
  autoplot(aus_retail) +
  labs(title = "Australian Takeaway Food Turnover",
       subtitle = "Apr 1982 - Dec 2018, Forecasted until Dec 2021") +
  facet_wrap(~State, scales = "free")

Question 2

Use the Facebook stock price (data set gafa_stock) to do the following:

Produce a time plot of the series. The graph shows an overall positive trend until around the 1100th trading day where it becomes negative.

fb_stock <- gafa_stock %>%
  filter(Symbol == "FB") %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE) %>%
  select(Date, Close)

fb_stock %>%
  autoplot(Close) +
  labs(title = "Facebook Closing Price", y = "Closing Price", x = "Trading day")

b. Produce forecasts using the drift method and plot them. There are on average of 21 trading days in a month so setting h equal to 105 is the equivalent of forecasting 5 months ahead.

fb_stock %>%
  model(RW(Close ~ drift())) %>%
  forecast(h = 105) %>%
  autoplot(fb_stock) +
  labs(title = "Daily Close Price of Facebook", y = "USD")

Show that the forecasts are identical to extending the line drawn between the first and last observations.

The dashed line is the potion of the drift forecast that is not seen. It shows the overall trend of the data though a line of best fit.

fb_stock %>%
  model(RW(Close ~ drift())) %>%
  forecast(h = 105) %>%
  autoplot(fb_stock) +
  labs(title = "Daily Close Price of Facebook", y = "USD") +
  geom_segment(aes(x = 1, y = 54.83, xend = 1258, yend = 134.45),
               colour = "blue", linetype = "dashed")

Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

I think the Drift method is the best because the naive method doesn’t account for the trend of the data and the average method is almost meaningless in this example.

fb_stock %>%
  model(Mean = MEAN(Close),
        Naive = NAIVE(Close), 
        Drift = NAIVE(Close ~ drift())) %>%
  forecast(h = 42) %>%
  autoplot(fb_stock, level = NULL) +
  labs(title = "Daily Close Price of Facebook", y = "USD")

Question 3

Produce forecasts for the 7 Victorian series in aus_livestock using SNAIVE(). Plot the resulting forecasts including the historical data. Is this a reasonable benchmark for these series?

Bulls, bullocks and steers Out of these 7 forecast, the only ones where a seasonal naive forecast is the best fit are Calves, Cattle (excl. calves),Cows and heifers, and Sheep. A seasonal naive approach works well for these four because the data follows a seasonal pattern without much variation in trend. The drift method would be more reasonable for the Bulls, bullocks and steers and Lambs data. This is because both of these have prominent trends in the data which will be modeled by the drift line. The Pigs graph would be better suited with the mean method. This is because there is a lack of a trend in the data and the seasonal component varies from period to period.

aus_livestock %>%
  filter(State == "Victoria") %>%
  model(SNAIVE(Count ~ lag("3 years"))) %>%
  forecast(h = "3 years") %>%
  autoplot(aus_livestock) +
  labs(title = "Calves in Victoria") +
  facet_wrap(~Animal, scales = "free")

Question 4

Are the following statements true or false? Explain your answer. a. Good forecast methods should have normally distributed residuals.

True, a normally distribution in the residuals make the forcast accurate. The should also be uncorrolated and follow a white-noise.

A model with small residuals will give good forecasts.

False, just because the residuals are small, doesn’t mean the forecast is accurate.

c.The best measure of forecast accuracy is MAPE.

TRUE, MAPE calculates the prediction accuracy of your forecasting method.

d.If your model does not forecast well, you should make it more complicated.

False, making a model more complicated will cause more issues than it solves. It can make interpreting the results difficult.

e.Always choose the model with the best forecast accuracy as measured on the test set.

True, the model with the best forecast accuracy will give you the most accurate results possible.

Question 5

Select one of the time series as follows (but choose your own seed value): a.Create a training dataset consisting of observations before 2011.

set.seed(4560747)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

b.Check that your data have been split appropriately by producing the following plot.

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

c.Calculate seasonal naive forecasts using SNAIVE() applied to your training data (myseries_train).

fit1 <- myseries_train %>%
  model(SNAIVE(Turnover))

d.Check the residuals. Do the residuals appear to be uncorrelated and normally distributed?

fit1%>%
  gg_tsresiduals() + ggtitle("Residual")

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

e.Produce forecasts for the test data.

fc <- fit1 %>%
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

fc %>% autoplot(myseries)

f.Compare the accuracy of your forecasts against the actual values.

fit1 %>% accuracy()

## # A tibble: 1 × 12
##   State     Indus…¹ .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr>   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South Au… Cafes,… SNAIV… Trai…  2.40  6.07  4.29  5.04  10.7     1     1 0.725
## # … with abbreviated variable name ¹Industry

fc %>% accuracy(myseries)

## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Tu… Sout… Cafes,… Test   12.6  21.5  15.4  10.7  14.5  3.58  3.53 0.927
## # … with abbreviated variable name ¹Industry

g.How sensitive are the accuracy measures to the amount of training data used? (Challenge)

Question 6

Consider the number of pigs slaughtered in New South Wales (data set aus_livestock).

a.Produce some plots of the data in order to become familiar with it.

nsw_pigs <- aus_livestock %>%
  filter(Animal == "Pigs", State == "New South Wales")

nsw_pigs %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = Count`

nsw_pigs_stl <- nsw_pigs %>%
  model(STL(Count ~ season(window = 12), robust=TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL Decomposition: NSW Pigs Count")
nsw_pigs_stl

b.Create a training set of 486 observations

train_pigs <- nsw_pigs %>%
  filter(year(Month) < 2013)

c.Try using various benchmark methods to forecast the training set and compare the results on the test set. Which method did best? Seasonal Niave fit the data best

train_pigs_fit <- train_pigs %>%
  model(
    mean = MEAN(Count),
    Naive = NAIVE(Count),
    Seasonal_Naive = SNAIVE(Count),
    Drift = RW(Count ~ drift())
  ) 
piggie_fc <- train_pigs_fit %>%
  forecast(h = 12) %>%
  autoplot(train_pigs, level = NULL) +
  labs(title = "Forecasts for quarterly pig count") +
  guides(color = guide_legend(title = "Forecast"))
piggie_fc

accuracy(train_pigs_fit)

## # A tibble: 4 × 12
##   Animal State     .model .type        ME   RMSE    MAE    MPE  MAPE  MASE RMSSE
##   <fct>  <fct>     <chr>  <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Pigs   New Sout… mean   Trai…  2.42e-12 25389. 21496. -5.59   20.8  2.03  1.75
## 2 Pigs   New Sout… Naive  Trai… -3.98e+ 1 15324. 12171. -1.02   11.2  1.15  1.05
## 3 Pigs   New Sout… Seaso… Trai… -8.23e+ 2 14530. 10600  -1.83   10.1  1     1   
## 4 Pigs   New Sout… Drift  Trai…  3.01e-12 15324. 12173. -0.985  11.2  1.15  1.05
## # … with 1 more variable: ACF1 <dbl>

Seasonal Naive best fits the data.

d.Check the residuals of your preferred method. Do they resemble white noise?

fit <- train_pigs %>%
  model(SNAIVE(Count))

fit%>%
  gg_tsresiduals() + ggtitle("Residual")

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

The residuals do not resemble white noise and have correlation between them.

Question 7

We will use the bricks data from aus_production (Australian quarterly clay brick production 1956–2005) for this exercise.

Use an STL decomposition to calculate the trend-cycle and seasonal indices for additive and multiplicative. (Experiment with having fixed (periodic) or changing seasonality.)

brick1 <- aus_production %>%
  filter(year(Quarter) < 2005) %>%
  select(-c(Beer, Tobacco, Cement, Electricity, Gas))

brick_add <- brick1 %>%
  model(classical_decomposition(Bricks, type = "additive")) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical additive decomposition of Australian clay production")
brick_add

## Warning: Removed 2 row(s) containing missing values (geom_path).

brick_multi <- brick1 %>%
  model(classical_decomposition(Bricks, type = "multiplicative")) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical multiplicative decomposition of Australian clay production")
brick_multi

## Warning: Removed 2 row(s) containing missing values (geom_path).

brick_stl_4 <- brick1 %>%
  model(STL(Bricks ~ season(window = 4), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL decomposition: Australian Bricks")
brick_stl_4

brick_period <- brick1 %>%
  model(STL(Bricks ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL decomposition: Australian Bricks")
brick_period

b.Compute and plot the seasonally adjusted data.

brickseasonally <- brick1 %>%
  model(stl = STL(Bricks))
bk_comp <- components(brickseasonally)

bk_seasonadj <- brick1 %>%
  autoplot(Bricks, color = 'gray') +
  autolayer(components(brickseasonally), season_adjust, color='red') +
  labs(y = "Clay Brick Production", title = "Clay Brick Production overtime")
bk_seasonadj

c.Use a naive method to produce forecasts of the seasonally adjusted data.

bk_trend <- bk_comp %>%
  select(-c(.model, Bricks, trend, season_year, remainder)) 

bk_trend %>%
  model(NAIVE(season_adjust)) %>%
  forecast(h = "10 years") %>%
  autoplot(bk_trend) +
  labs(title = "Seasonally Adjusted Forecast (Naive Method)", y = "Bricks")

d.Use decomposition_model() to reseasonalise the results, giving forecasts for the original data.

brickdcmp <- brick1 %>%
  model(stlf = decomposition_model(
    STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)
  ))
brickdcmp %>%
  forecast() %>%
  autoplot(brick1) +
  labs(title = "Forecast with decomposition_model()")

e.Do the residuals look uncorrelated

gg_tsresiduals(brickdcmp)

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

f.Compare forecasts from decomposition_model() with those from SNAIVE(), using a test set comprising the last 2 years of data. Which is better?

brick2 <- aus_production %>%
  filter(year(Quarter) > 2003) %>%
  select(-c(Beer, Tobacco, Cement, Electricity, Gas))

brick3 <- na.omit(brick2)

brick_niave <- brick3 %>%
  model(SNAIVE(Bricks ~ lag("1 year"))) %>%
  forecast(h = "2 years") %>%
  autoplot(brick3) +
  labs(title = "Forecast with SNAIVE since 2004")
brick_niave

brickdcmp2 <- brick3 %>%
  model(stlf = decomposition_model(
    STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)
  ))
brickdcmp2 %>%
  forecast() %>%
  autoplot(brick3) +
  labs(title = "Forecast with decomposition_model() since 2004")

Question 8

tourism contains quarterly visitor nights (in thousands) from 1998 to 2017 for 76 regions of Australia.

a.Extract data from the Gold Coast region using filter() and aggregate total overnight trips (sum over Purpose) using summarise(). Call this new dataset gc_tourism.

gc_tourism <- tourism%>%
  filter(Region == "Gold Coast")%>%
  summarise(Total_Trips = sum(Trips))

b.Using slice() or filter(), create three training sets for this data excluding the last 1, 2 and 3 years. For example, gc_train_1 <- gc_tourism %>% slice(1:(n()-4)).

gc_train_1 <- gc_tourism %>%
  filter(year(Quarter)<= 2016)
gc_train_2 <- gc_tourism %>%
  filter(year(Quarter)<= 2015)
gc_train_3 <- gc_tourism %>%
  filter(year(Quarter)<= 2014)

c.Compute one year of forecasts for each training set using the seasonal naÃ¯ve (SNAIVE()) method. Call these gc_fc_1, gc_fc_2 and gc_fc_3, respectively.

gc_fc_1 <- gc_train_1%>%
  model(Snaive = SNAIVE(Total_Trips))%>%
  forecast(h = 4)
gc_fc_2 <- gc_train_2%>%
  model(Snaive = SNAIVE(Total_Trips))%>%
  forecast(h = 4)
gc_fc_3 <- gc_train_3%>%
  model(Snaive = SNAIVE(Total_Trips))%>%
  forecast(h = 4)

d.Use accuracy() to compare the test set forecast accuracy using MAPE. Comment on these.

gc_fc_1 %>% accuracy(gc_tourism)

## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Snaive Test   75.1  167.  154.  6.36  15.1  2.66  2.36 -0.410

gc_fc_2 %>% accuracy(gc_tourism)

## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Snaive Test   12.0  43.1  39.5  1.14  4.32 0.670 0.599 -0.792

gc_fc_3 %>% accuracy(gc_tourism)

## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Snaive Test   35.8  91.4  83.9  3.56  9.07  1.46  1.30 0.239

Question 9

From the gafa_stock select apple stock close pricings and plot it

aapl_stock <- gafa_stock %>%
  filter(Symbol == "AAPL") %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE)

aapl_stock%>%
  autoplot(Close) +
  labs(title= "Daily Close Price of Apple", y = "USD")

apply the cross-validation with a minimum lenght of 10, growing by 1 each step. (creates test subsets)

aapl_stretch <- aapl_stock %>%
  stretch_tsibble(.init = 10, .step = 1)

Estimate a Random walk with drift model

fit_cv <- aapl_stretch %>%
  model(RW(Close ~ drift()))
fc_cv <- fit_cv %>%
  forecast(h=1)

repeat step c by using a mean model

fit_mean <- aapl_stretch%>%
  model(mean = MEAN(Close))
fc_mean <- fit_mean %>%
  forecast(h=1)

Compare both models’ accuracy. Which model did perfomed the best?

# Cross-validated
fc_cv %>% accuracy(aapl_stock)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 1259

## # A tibble: 1 × 11
##   .model       Symbol .type      ME  RMSE   MAE     MPE  MAPE  MASE RMSSE   ACF1
##   <chr>        <chr>  <chr>   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 RW(Close ~ … AAPL   Test  -0.0158  2.10  1.41 -0.0139  1.06  1.00  1.00 0.0330

# Mean
fc_mean %>% accuracy(aapl_stock)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 1259

## # A tibble: 1 × 11
##   .model Symbol .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mean   AAPL   Test   26.9  37.6  28.5  16.9  18.6  20.2  17.9 0.996

# Training set
aapl_stock %>% model(RW(Close ~ drift())) %>% accuracy()

## # A tibble: 1 × 11
##   Symbol .model    .type        ME  RMSE   MAE      MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr>     <chr>     <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 AAPL   RW(Close… Trai… -1.61e-15  2.10  1.41 -0.00789  1.06 0.999  1.00 0.0332

Question 10

a.From the data set prices plot the wheat (do not forget to remove NA)

wheat <- prices %>%
  select(year, wheat)
wheat_ <- na.omit(wheat)

autoplot(wheat_) +
  labs(title = "Wheat Prices from 1800 to 1997", y = "Wheat Prices", x = "Year")

## Plot variable not specified, automatically selected `.vars = wheat`

b. Fit a RW with drift, and forecast for the next 50 periods (plot)

wheat_ %>%
  model(RW(wheat ~ drift())) %>%
  forecast(h = 50) %>%
  autoplot(wheat_) +
  labs(title = "Wheat Prices forecasted using RW", y = "Wheat Price", x = "Year")

Let’s see that you do not trust on the predict interval estimated. In doing so, please bootstrap 500 scenarios

wheat_boot <- wheat_ %>%
  model(NAIVE(wheat))
wheat_sim <- wheat_boot %>%
  generate(h = 50, times = 500, bootstrap = TRUE)
wheat_sim

## # A tsibble: 25,000 x 4 [1Y]
## # Key:       .model, .rep [500]
##    .model        year .rep   .sim
##    <chr>        <dbl> <chr> <dbl>
##  1 NAIVE(wheat)  1997 1     129. 
##  2 NAIVE(wheat)  1998 1      78.8
##  3 NAIVE(wheat)  1999 1      97.3
##  4 NAIVE(wheat)  2000 1      90.5
##  5 NAIVE(wheat)  2001 1     112. 
##  6 NAIVE(wheat)  2002 1     136. 
##  7 NAIVE(wheat)  2003 1      85.8
##  8 NAIVE(wheat)  2004 1     107. 
##  9 NAIVE(wheat)  2005 1     116. 
## 10 NAIVE(wheat)  2006 1      97.4
## # … with 24,990 more rows

plot all scenarios

wheat_ %>%
  ggplot(aes(x = year)) +
  geom_line(aes(y = wheat)) +
  geom_line(aes(y = .sim, color = as.factor(.rep)), data = wheat_sim) +
  guides(color = "none")

e. What are the conditions in which predict intervals from bootstrapped residuals might be reasonable?

You should use bootstraping if the residuals do not follow a normal distribution.