Homework 3

library(readxl)
library(tidyquant)

## Loading required package: lubridate

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

## Loading required package: PerformanceAnalytics

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'PerformanceAnalytics'

## The following object is masked from 'package:graphics':
## 
##     legend

## Loading required package: quantmod

## Loading required package: TTR

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──

## ✔ tibble      3.1.8      ✔ tsibble     1.1.2 
## ✔ dplyr       1.0.10     ✔ tsibbledata 0.4.1 
## ✔ tidyr       1.2.0      ✔ feasts      0.3.0 
## ✔ ggplot2     3.3.6      ✔ fable       0.3.2

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::first()       masks xts::first()
## ✖ tsibble::index()     masks zoo::index()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ dplyr::last()        masks xts::last()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
## ✖ fable::VAR()         masks tidyquant::VAR()

## 
## Attaching package: 'fpp3'

## The following object is masked from 'package:PerformanceAnalytics':
## 
##     prices

library(moments)

## 
## Attaching package: 'moments'

## The following objects are masked from 'package:PerformanceAnalytics':
## 
##     kurtosis, skewness

library(tsibble)
library(tsibbledata)
library(ggplot2)
library(dplyr)
library(seasonal)

## 
## Attaching package: 'seasonal'

## The following object is masked from 'package:tibble':
## 
##     view

library(seasonalview)

## 
## Attaching package: 'seasonalview'

## The following object is masked from 'package:seasonal':
## 
##     view

## The following object is masked from 'package:tibble':
## 
##     view

library(fabletools)
library(fable)
library(ggfortify)

#Question 1

#Question 1 Part a
auspop_plot=global_economy %>%
  filter(Country == "Australia")

autoplot(auspop_plot,Population) +
  labs(title = "Population of Australia")

auspop_plot%>%
  model(RW(Population ~ drift())) %>%
  forecast(h = 5) %>%
  autoplot(global_economy) +
  labs(y = "Population", title = "Population of Australia, Forecasted")

#I used the drift method, as it allows me to catch the strong upward trend in my forecast.

#Question 1 Part b
brick<-aus_production %>% 
  filter(!is.na(Bricks))

brick %>%
  model(SNAIVE(Bricks ~ lag("year")))%>%
  forecast(h = 8)%>%
  autoplot(brick) +
  labs(title = "Australian Brick Production, Forecasted")

#I used the SNAIVE method due to the extremely high seasonality present in the data. 

#Question 1 Part c

NSW_lambs=aus_livestock %>%
  filter(State== "New South Wales" & Animal== "Lambs") 


NSW_lambsforecast = NSW_lambs%>% 
  model(`Seasonal Naïve` = SNAIVE(Count))

NSW_lambsforecast %>% 
  forecast(h = 10)%>%
  autoplot(NSW_lambs)

#Because of the downward trend and high seasonality, I used the SNAIVE method to capture both.

#Question 1 Part d
hh_plot1 <- hh_budget %>% 
  model(naive = NAIVE(Wealth))

hh_plot1 %>% 
  forecast(h = 5)%>%
  autoplot(hh_budget) + 
  labs(title="HH Wealth Forecasted")

#I used the NAIVE method due to the low variance and the general stability of the four countries' economies.

#Question 1 Part e
takeway_turnover <- aus_retail %>%
  filter(Industry == 'Takeaway food services') %>%
  select(Month, Turnover) 

takeway_turnover <- takeway_turnover[,1:2] %>%
  group_by(Month) %>%
  summarise(Turnover = sum(Turnover)) %>%
  as_tsibble(index = Month)

train <- takeway_turnover %>%
  filter_index(. ~ '2018 Jun')

takeaway_turnover_fit <- train %>%
  model(`Seasonal Naive` = SNAIVE(Turnover)
        )

takeaway_turnover_fc <- takeaway_turnover_fit %>%
  forecast(h = '6 months')


takeaway_turnover_fc %>%
  autoplot(train, level = NULL) +
  autolayer(
    filter_index(takeway_turnover, '2018 Jul' ~ .),
    color = 'black'
  ) +
  labs(title = 'Takeaway Turnover Forecast July - December 2018' ) +
  guides(color = guide_legend(title = 'Forecast'))

## Plot variable not specified, automatically selected `.vars = Turnover`

#I reduced the scope of the time to make modeling easier. I used the SNAIVE method to capture the upward trend and seasonality.

#Question 2

fb_stock <- gafa_stock %>%
  filter(Symbol == "FB") %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE)

fb_stock%>%
  autoplot(Open) +
  labs(title= "Facebook Stock Opening Prices")

#Question 2 Part b
fb_stock %>%
  model(RW(Open ~ drift())) %>%
  forecast(h = 90) %>%
  autoplot(fb_stock) +
  labs(title = "Facebook Stock Opening Price")

#Question 2 Part c
fb_stock %>%
  model(RW(Open ~ drift())) %>%
  forecast(h = 90) %>%
  autoplot(fb_stock) +
  labs(title = "Facebook Stock Opening Price") +
  geom_segment(aes(x = 1, y = 54.83, xend = 1258, yend = 134.45),
               colour = "red", linetype = "dashed")

#Question 2 Part d
fb_stock %>%
  model(Mean = MEAN(Open),
        `Naïve` = NAIVE(Open),
        Drift = NAIVE(Open ~ drift())) %>%
  forecast(h = 90) %>%
  autoplot(fb_stock, level = NULL) +
  labs(title = "Facebook Stock Opening Price")

#The drift function works best to capture the upward trend observed at the end of the plot.

#Question 3

aus_livestock %>%
  distinct(Animal)

## # A tibble: 7 × 1
##   Animal                    
##   <fct>                     
## 1 Bulls, bullocks and steers
## 2 Calves                    
## 3 Cattle (excl. calves)     
## 4 Cows and heifers          
## 5 Lambs                     
## 6 Pigs                      
## 7 Sheep

series1 = aus_livestock %>%
  filter(State == "Victoria" & Animal == "Bulls, bullocks and steers")
series1forecast = series1%>% 
  model(`Seasonal Naïve` = SNAIVE(Count))
series1forecast %>% 
  forecast(h = 24)%>%
  autoplot(series1)

series2 = aus_livestock %>%
  filter(State == "Victoria" & Animal == "Calves")
series2forecast = series2%>% 
  model(`Seasonal Naïve` = SNAIVE(Count))
series2forecast %>% 
  forecast(h = 24)%>%
  autoplot(series2)

series3 = aus_livestock %>%
  filter(State == "Victoria" & Animal == "Cattle (excl. calves)")
series3forecast = series3%>% 
  model(`Seasonal Naïve` = SNAIVE(Count))
series3forecast %>% 
  forecast(h = 24)%>%
  autoplot(series3)

series4 = aus_livestock %>%
  filter(State == "Victoria" & Animal == "Cows and heifers")
series4forecast = series4%>% 
  model(`Seasonal Naïve` = SNAIVE(Count))
series4forecast %>% 
  forecast(h = 24)%>%
  autoplot(series4)

series5 = aus_livestock %>%
  filter(State == "Victoria" & Animal == "Lambs")
series5forecast = series5%>% 
  model(`Seasonal Naïve` = SNAIVE(Count))
series5forecast %>% 
  forecast(h = 24)%>%
  autoplot(series5)

series6 = aus_livestock %>%
  filter(State == "Victoria" & Animal == "Pigs")
series6forecast = series6%>%
  model(`Seasonal Naïve` = SNAIVE(Count))
series6forecast %>% 
  forecast(h = 24)%>%
  autoplot(series6)

series7 = aus_livestock %>%
  filter(State == "Victoria" & Animal == "Sheep")
series7forecast = series7%>% 
  model(`Seasonal Naïve` = SNAIVE(Count))
series7forecast %>% 
  forecast(h = 24)%>%
  autoplot(series7)

#Since all series display a high degree of seasonality, SNAIVE is a reasonaable benchmark for them.

#Question 4

#Question 4 Part a
#True, residuals should always be normally distributed in an effective model.

#Question 4 Part b
#False. This does not necessarily indicate a mean of 0, normal distribution, and constant variance, which are necessary for effective forecasting.

#Question 4 Part c
#False. MAPE is highly sensitive to outliers. 

#Question 4 Part d
#False. Making a more complicated model will not fix issues. One should look at transforming the data or consider exogenous factors.

#Question 4 Part e
#True. It is best to choose a model based on performance with the test set as the objective of forecasting is to correctly forecast new data.

#Question 5

#Question 5 Part a
aus_retail

## # A tsibble: 64,532 x 5 [1M]
## # Key:       State, Industry [152]
##    State                        Industry                Serie…¹    Month Turno…²
##    <chr>                        <chr>                   <chr>      <mth>   <dbl>
##  1 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Apr     4.4
##  2 Australian Capital Territory Cafes, restaurants and… A33498… 1982 May     3.4
##  3 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jun     3.6
##  4 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jul     4  
##  5 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Aug     3.6
##  6 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Sep     4.2
##  7 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Oct     4.8
##  8 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Nov     5.4
##  9 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Dec     6.9
## 10 Australian Capital Territory Cafes, restaurants and… A33498… 1983 Jan     3.8
## # … with 64,522 more rows, and abbreviated variable names ¹`Series ID`,
## #   ²Turnover

set.seed(87654321)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

fit <- myseries_train %>%
  model(SNAIVE(Turnover))

fit %>% gg_tsresiduals()

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

#The residuals are not uncorrelated and the histogram shows that they are not centered around 0. 

fc <- fit %>%
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

fc %>% autoplot(myseries)

fit %>% accuracy()

## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victoria Footwea… SNAIV… Trai…  5.12  10.2  7.33  5.99  8.82     1     1 0.681

fc %>% accuracy(myseries)

## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Tu… Vict… Footwe… Test  -1.47  20.4  16.0 -2.37  9.54  2.19  1.99 0.646
## # … with abbreviated variable name ¹Industry

#The accuracy measures are highly sensitive to the split since the variability grows more and more severe. The mean based error measurements are far more affected by the severe variability than the percentage based ones.

#Question 6

#Question 6 Part a
real_data <- aus_livestock %>%
  filter(State == "New South Wales" & Animal == "Pigs")
autoplot(real_data)

## Plot variable not specified, automatically selected `.vars = Count`

#Question 6 Part b
aus_train <- real_data %>%
  slice(1:486)

aus_test <- real_data %>%
  slice(487:558)

#Question 6 Part c
austrain_fit <- aus_train %>%
  model(
    mean = MEAN(Count),
    Naive = NAIVE(Count),
    Seasonal_Naive = SNAIVE(Count),
    Drift = RW(Count ~ drift())
  ) 
austrain_fc <- austrain_fit %>%
  forecast(h = 12) %>%
  autoplot(aus_train, level = NULL) +
  labs(title = "Forecasts for quarterly pig count") +
  guides(color = guide_legend(title = "Forecast"))
austrain_fc

accuracy(austrain_fit)

## # A tibble: 4 × 12
##   Animal State     .model .type        ME   RMSE    MAE    MPE  MAPE  MASE RMSSE
##   <fct>  <fct>     <chr>  <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Pigs   New Sout… mean   Trai…  2.42e-12 25389. 21496. -5.59   20.8  2.03  1.75
## 2 Pigs   New Sout… Naive  Trai… -3.98e+ 1 15324. 12171. -1.02   11.2  1.15  1.05
## 3 Pigs   New Sout… Seaso… Trai… -8.23e+ 2 14530. 10600  -1.83   10.1  1     1   
## 4 Pigs   New Sout… Drift  Trai…  3.01e-12 15324. 12173. -0.985  11.2  1.15  1.05
## # … with 1 more variable: ACF1 <dbl>

#The SNAIVE method had the highest degree of accuracy. 

ausnaive <- aus_train %>%
  model(Seasonal_Naive = SNAIVE(Count))

ausnaive %>%
  gg_tsresiduals()

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

#These residuals do not resemble white noise. There is autocorrelation present from lags 1 to 11, the mean is not centered around 0, and the distribution of residuals is skewed to the right.

#Question 7

#Question 7 Part a 
aus_clayyears <- aus_production %>%
  filter(year(Quarter) < 2005) %>%
  select(-c(Beer, Tobacco, Cement, Electricity, Gas))

aus_clayadd <- aus_clayyears %>%
  model(classical_decomposition(Bricks, type = "additive")) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical additive decomposition of Australian clay production")
aus_clayadd

## Warning: Removed 2 row(s) containing missing values (geom_path).

aus_claymulti <- aus_clayyears %>%
  model(classical_decomposition(Bricks, type = "multiplicative")) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical multiplicative decomposition of Australian clay production")
aus_claymulti

## Warning: Removed 2 row(s) containing missing values (geom_path).

aus_claystlfix <- aus_clayyears %>%
  model(STL(Bricks ~ season(window = 4), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL decomposition: Australian Bricks")
aus_claystlfix

aus_claystlperiod <- aus_clayyears %>%
  model(STL(Bricks ~ season(window = "periodic"), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL decomposition: Australian Bricks")
aus_claystlperiod

#Question 7 Part b
aus_claydcmp <- aus_clayyears %>%
  model(stl = STL(Bricks))
aus_cmp <- components(aus_claydcmp)

aus_claysa <- aus_clayyears %>%
  autoplot(Bricks, color = 'red') +
  autolayer(components(aus_claydcmp), trend, color='red') +
  labs(y = "Clay Brick Production", title = "Clay Brick Production overtime")
aus_claysa

#Question 7 Part c
aus_trend <- aus_cmp %>%
  select(-c(.model, Bricks, trend, season_year, remainder)) 

aus_trend %>%
  model(NAIVE(season_adjust)) %>%
  forecast(h = "5 years") %>%
  autoplot(aus_trend) +
  labs(title = "Seasonally adjusted naive forecast", y = "Bricks")

#Question 7 Part d

aus_claydecomp <- aus_clayyears %>%
  model(stlf = decomposition_model(
    STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)
  ))
aus_claydecomp %>%
  forecast() %>%
  autoplot(aus_clayyears) +
  labs(title = "Forecast with decomposition_model()")

#Question 8

#Question 8 Part a

gc_tourism <- tourism %>%
  filter(Region == "Gold Coast") %>%
  summarise(Total_Trips = sum(Trips))
gc_tourism

## # A tsibble: 80 x 2 [1Q]
##    Quarter Total_Trips
##      <qtr>       <dbl>
##  1 1998 Q1        827.
##  2 1998 Q2        681.
##  3 1998 Q3        839.
##  4 1998 Q4        820.
##  5 1999 Q1        987.
##  6 1999 Q2        751.
##  7 1999 Q3        822.
##  8 1999 Q4        914.
##  9 2000 Q1        871.
## 10 2000 Q2        780.
## # … with 70 more rows

#Question 8 Part b
gc_tourism_ts <- gc_tourism %>%
  as_tsibble(index = Quarter)

gc_train_1 <- gc_tourism_ts %>% slice(1:(n()-4))
gc_train_2 <- gc_tourism_ts %>% slice(1:(n()-8))
gc_train_3 <- gc_tourism_ts %>% slice(1:(n()-12))

#Question 8 Part c
gc_fc_1 <- gc_train_1 %>%
  model(SNAIVE(Total_Trips ~ lag("year"))) %>%
  forecast(h = "1 year") 
gc_fc_1 %>%
  autoplot(gc_train_1) +
  labs(title = "Train Forecast -1 Year", y = "Total Trips", x = "Time")

gc_fc_2 <- gc_train_2 %>%
  model(SNAIVE(Total_Trips ~ lag("year"))) %>%
  forecast(h = "1 year")
gc_fc_2 %>%
  autoplot(gc_train_2) +
  labs(title = "Train Forecast -2 Years", y = "Total Trips", x = "Time")

gc_fc_3 <- gc_train_3 %>%
  model(SNAIVE(Total_Trips ~ lag("year"))) %>%
  forecast(h = "1 year")
gc_fc_3 %>%
  autoplot(gc_train_3) +
  labs(title = "Train Forecast -3 Years", y = "Total Trips", x = "Time")

#Question 8 Part d
gc_fc_1 %>%
  accuracy(gc_tourism_ts)

## # A tibble: 1 × 10
##   .model                  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 "SNAIVE(Total_Trips ~ … Test   75.1  167.  154.  6.36  15.1  2.66  2.36 -0.410

gc_fc_2 %>%
  accuracy(gc_tourism_ts)

## # A tibble: 1 × 10
##   .model                  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 "SNAIVE(Total_Trips ~ … Test   12.0  43.1  39.5  1.14  4.32 0.670 0.599 -0.792

gc_fc_3 %>%
  accuracy(gc_tourism_ts)

## # A tibble: 1 × 10
##   .model                   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(Total_Trips ~ l… Test   35.8  91.4  83.9  3.56  9.07  1.46  1.30 0.239

#gc_fc_2 has the lowest MAPE and is therefore the most accurate of the three forecasts.

#Question 9

#Question 9 Part a
apple_stock <- gafa_stock %>%
  filter(Symbol == "AAPL") %>%
  mutate(trading_day = row_number()) %>%
  update_tsibble(index = trading_day, regular = TRUE) %>%
  select(-c(Open, High, Low, Adj_Close, Volume))
autoplot(apple_stock) +
  labs(title = "Apple Closing Stock Prices", x = "Trading Days", y = "Closing Price(USD)")

## Plot variable not specified, automatically selected `.vars = Close`

#Question 9 Part b
apple_crossval <- apple_stock %>%
  stretch_tsibble(.init = 10, .step = 1) %>%
  relocate(Date, Symbol, .id)

#Question 9 Part c
apple_walk <- apple_crossval %>%
  model(RW(Close ~ drift())) %>%
  forecast(h=1) %>%
  accuracy(apple_stock)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 1259

#Question 9 Part d 
apple_mean <- apple_crossval %>%
  model(MEAN(Close)) %>%
  forecast(h=1) %>%
  accuracy(apple_stock)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 1259

#Question 9 Part e 
apple_walk

## # A tibble: 1 × 11
##   .model       Symbol .type      ME  RMSE   MAE     MPE  MAPE  MASE RMSSE   ACF1
##   <chr>        <chr>  <chr>   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 RW(Close ~ … AAPL   Test  -0.0158  2.10  1.41 -0.0139  1.06  1.00  1.00 0.0330

apple_mean

## # A tibble: 1 × 11
##   .model      Symbol .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>       <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Close) AAPL   Test   26.9  37.6  28.5  16.9  18.6  20.2  17.9 0.996

#The random walk forecast was more accurate than the mean forecast as it had the lowest RMSE.

#Question 10

#Question 10 Part a 
wheat_remove <- prices %>%
  select(-c(eggs, chicken, copper, nails, oil))

wheat_clean <- na.omit(wheat_remove)

autoplot(wheat_clean) +
  labs(title = "Wheat Production", y = "Volume", x = "Year")

## Plot variable not specified, automatically selected `.vars = wheat`

#Question 10 Part b
wheat_clean %>%
  model(RW(wheat ~ drift())) %>%
  forecast(h = 50) %>%
  autoplot(wheat_clean) +
  labs(title = "Wheat Forecast RW", y = "Volume", x = "Year")

#Question 10 Part c
wheat_naive <- wheat_clean %>%
  model(NAIVE(wheat))
wheat_boot <- wheat_naive %>%
  generate(h = 50, times = 500, bootstrap = TRUE)
wheat_boot

## # A tsibble: 25,000 x 4 [1Y]
## # Key:       .model, .rep [500]
##    .model        year .rep   .sim
##    <chr>        <dbl> <chr> <dbl>
##  1 NAIVE(wheat)  1997 1     124. 
##  2 NAIVE(wheat)  1998 1      98.5
##  3 NAIVE(wheat)  1999 1     107. 
##  4 NAIVE(wheat)  2000 1      95.0
##  5 NAIVE(wheat)  2001 1     161. 
##  6 NAIVE(wheat)  2002 1      56.5
##  7 NAIVE(wheat)  2003 1      68.5
##  8 NAIVE(wheat)  2004 1     152. 
##  9 NAIVE(wheat)  2005 1     150. 
## 10 NAIVE(wheat)  2006 1     327. 
## # … with 24,990 more rows

#Question 10 Part d
wheat_clean %>%
  ggplot(aes(x = year)) +
  geom_line(aes(y = wheat)) +
  geom_line(aes(y = .sim, color = as.factor(.rep)), data = wheat_boot) +
  guides(colour = "none")

wheat_bootclean <- wheat_naive %>%
  forecast(h = 50, bootstrap = TRUE, times = 500)

autoplot(wheat_bootclean, wheat_clean)

#Question 10 Part e
#They are reasonable when the residuals are not expected to follow a normal distribution. The goal of bootstrapping is to create a set of future values and derive a prediction interval.

Homework 3

Max Harley

2022-10-02