HW3

Load packages and data

library(fpp3)
library(tidyverse)
library(seasonal)

Questions

Question 1

Part 1 - I think the drift method will be best for this, because the line seems to be relatively linear.

aus <- global_economy %>% 
  filter(Country=="Australia") %>% 
  select(Population)
aus %>% 
  autoplot(Population)

aus %>% 
  model(RW(Population ~ drift())) %>%
  forecast(h = 5) %>%
  autoplot(aus, level = NULL) +
  geom_line(data = slice(aus, range(cumsum(!is.na(Population)))),
            aes(y=Population), linetype = "dashed", colour = "#0072B2") +
  labs(title = "Australia Population")

Part 2 - The seasonal naive seems to be best here. It will somewhat capture the seasonal trends unlike the other methods.

bricks <- aus_production %>%
  filter(!is.na(Bricks))%>%
  select(Bricks)
bricks %>% 
  autoplot(Bricks)

bricks %>%
  model(SNAIVE(Bricks ~ lag(" 5 year"))) %>%
  forecast(h = "5 years") %>%
  autoplot(bricks, level = NULL) +
  labs(title = "Clay brick production in Australia")

Part 3 - The seasonal naive seems to be best here as well. It will somewhat capture the seasonal trends unlike the other methods.

lambs <- aus_livestock %>% 
  filter(State=="New South Wales",Animal=="Lambs")
lambs %>% 
  autoplot(Count)

lambs %>% 
  model(SNAIVE(Count ~ lag(" 4 year"))) %>%
  forecast(h = "5 years") %>%
  autoplot(lambs, level = NULL) +
  labs(title = "Lambs slaughtered in NSW")

Part 4 - The drift method should work well here too. There doesn’t seem to be much seasonality at all.

wealth <- hh_budget %>% 
  select(Wealth)
wealth %>% 
  autoplot(Wealth)

wealth %>% 
  filter(Country=="Australia") %>% 
  model(RW(Wealth ~ drift())) %>%
  forecast(h = 5) %>%
  autoplot(wealth, level = NULL) +
  geom_line(data = slice(wealth, range(cumsum(!is.na(Wealth)))),
            aes(y=Wealth), linetype = "dashed", colour = "#0072B2") +
  labs(title = "Household wealth of Australia")

wealth %>% 
  filter(Country=="USA") %>% 
  model(RW(Wealth ~ drift())) %>%
  forecast(h = 5) %>%
  autoplot(wealth, level = NULL) +
  geom_line(data = slice(wealth, range(cumsum(!is.na(Wealth)))),
            aes(y=Wealth), linetype = "dashed", colour = "#0072B2") +
  labs(title = "Household wealth of USA")

wealth %>% 
  filter(Country=="Canada") %>% 
  model(RW(Wealth ~ drift())) %>%
  forecast(h = 5) %>%
  autoplot(wealth, level = NULL) +
  geom_line(data = slice(wealth, range(cumsum(!is.na(Wealth)))),
            aes(y=Wealth), linetype = "dashed", colour = "#0072B2") +
  labs(title = "Household wealth of Canada")

wealth %>% 
  filter(Country=="Japan") %>% 
  model(RW(Wealth ~ drift())) %>%
  forecast(h = 5) %>%
  autoplot(wealth, level = NULL) +
  geom_line(data = slice(wealth, range(cumsum(!is.na(Wealth)))),
            aes(y=Wealth), linetype = "dashed", colour = "#0072B2") +
  labs(title = "Household wealth of Japan")

Part 5 - SNAIVE or Drift would work well here. SNAIVE is certainly better, though.

myseries3 <- aus_retail %>% 
  filter(Industry=="Takeaway food services")
myseries3 %>% 
  autoplot(Turnover)

myseries3 %>% 
  model(RW(Turnover ~ drift())) %>%
  forecast(h = 30) %>%
  autoplot(myseries3, level = NULL) +
  geom_line(data = slice(myseries3, range(cumsum(!is.na(Turnover)))),
            aes(y=Turnover), linetype = "dashed", colour = "#0072B2") +
  labs(title = "Turnover")+
  facet_wrap(~State, scales = "free")

## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

myseries3 %>% 
  model(SNAIVE(Turnover)) %>%
  forecast(h = 30) %>%
  autoplot(myseries3, level = NULL) +
  geom_line(data = slice(myseries3, range(cumsum(!is.na(Turnover)))),
            aes(y=Turnover), linetype = "dashed", colour = "#0072B2") +
  labs(title = "Turnover")+
  facet_wrap(~State, scales = "free")

## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Question 2

fb <- gafa_stock %>%
  filter(Symbol == "FB") %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE)
fb %>% 
  autoplot(Close)

fbfit <- fb %>% 
  model(RW(Close ~ drift()))
fb_fc <- fbfit %>% 
  forecast(h = 40)
fb_fc %>%
  autoplot(fb,level=NULL) +
  labs(y = "$US",
       title = "Facebook Closing Prices") +
  guides(colour = guide_legend(title = "Forecast"))+
  geom_line(data = slice(fb, range(cumsum(!is.na(Close)))),
                                                             aes(y=Close), linetype = "dashed", colour = "#0072B2")

fbfit2 <- fb %>%
  model(
    Mean = MEAN(Close),
    `Naïve` = NAIVE(Close),
    Drift = NAIVE(Close ~ drift())
  )
# Produce forecasts for the trading days in January 2016
fb_fc2 <- fbfit2 %>% forecast(h = 40)

# Plot the forecasts
fb_fc2 %>%
  autoplot(fb,level=NULL) +
  labs(y = "$US",
       title = "Facebook Closing Prices") +
  guides(colour = guide_legend(title = "Forecast"))+
  geom_line(data = slice(fb, range(cumsum(!is.na(Close)))),
                                                             aes(y=Close), linetype = "dashed", colour = "#0072B2")

I think the drift method is the best. The stock market is one of the most commonly cited examples of a random walk. The drift kind of cuts out the randomness and only focuses on the general trend. Mean and Naive just don’t really work here, considering that the stock price is likely to go up on average.

Question 3

vic <- aus_livestock %>% 
  filter(State=="Victoria")
vic %>% 
  model(SNAIVE(Count ~ lag(" 5 year"))) %>%
  forecast(h = "5 years") %>%
  autoplot(vic, level = NULL) +
  labs(title = "Livestock in Victoria")+
  facet_wrap(~State, scales = "free")

Personally, I think this a reasonable benchmark. These series all exhibit high, and somewhat regular, seasonality. It seems reasonable to just repeat the previous periods, except where there are trends.

Question 4

1 - True. Generally, a good forecast will have normally distributed residuals, but this is not an essential property. It helps with calculating the prediction intervals, but it is possible that a forecasting model without this property can’t be improved.

2 - True (somewhat). The RMSE, and similar methods, indicate that a forecast is better when the residuals are smaller. That said, the residuals depend only on current data, and forecasting is for unkown data. If there are no extreme exogenous shocks, the model will likely be more accurate, but this isn’t categorically true.

3 - False. MAPE is extremely bad when the values are close to/at zero. MAPE will approach infinity, which overstates the problem. It is also bad for things like temperature, where 0 is somewhat meaningless.

4 - False. One problem with forecasting is potential over-fitting, which can actually be alleviated by making the model simpler

5 - True. You should make sure to check different measures and assess which of them delivers the most meaningful information. Plus, it is possible that the forecast could also be improved, so one shouldn’t settle. There is no single perfect measure of a forecast.

Exercise 5

set.seed(35266302)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_train <- myseries %>%
  filter(year(Month) < 2011)
myseries_train

## # A tsibble: 345 x 5 [1M]
## # Key:       State, Industry [1]
##    State           Industry          `Series ID`    Month Turnover
##    <chr>           <chr>             <chr>          <mth>    <dbl>
##  1 South Australia Department stores A3349658K   1982 Apr     48.9
##  2 South Australia Department stores A3349658K   1982 May     52.2
##  3 South Australia Department stores A3349658K   1982 Jun     48.9
##  4 South Australia Department stores A3349658K   1982 Jul     48.3
##  5 South Australia Department stores A3349658K   1982 Aug     49.4
##  6 South Australia Department stores A3349658K   1982 Sep     48.5
##  7 South Australia Department stores A3349658K   1982 Oct     46.1
##  8 South Australia Department stores A3349658K   1982 Nov     58.5
##  9 South Australia Department stores A3349658K   1982 Dec     88.9
## 10 South Australia Department stores A3349658K   1983 Jan     43.5
## # … with 335 more rows
## # ℹ Use `print(n = ...)` to see more rows

autoplot(myseries, Turnover) +
  geom_line(data = myseries_train, aes(x=Month, y=Turnover), color="red")

my_model <- myseries_train %>% 
  model(SNAIVE(Turnover ~ lag(" 5 year")))

modfit <- my_model %>% 
  forecast(h = "8 years") 
modfit%>% 
  autoplot(myseries_train, level = NULL) +
  labs(title = "Clay brick production in Australia")

gg_tsresiduals(my_model)

## Warning: Removed 60 row(s) containing missing values (geom_path).

## Warning: Removed 60 rows containing missing values (geom_point).

## Warning: Removed 60 rows containing non-finite values (stat_bin).

These absolutely don’t appear normally distributed and uncorrelated.

fc <- my_model %>%
  forecast(new_data = anti_join(myseries, myseries_train))
fc %>% autoplot(myseries)

modfit %>% accuracy(myseries)

## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test  -6.62  11.4  9.49 -5.97  8.47  1.89  1.81 0.485
## # … with abbreviated variable name ¹Industry

my_model %>% 
  accuracy()

## # A tibble: 1 × 12
##   State     Indus…¹ .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr>   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South Au… Depart… "SNAI… Trai…  12.7  16.6  13.6  12.7  13.7  2.70  2.64 0.577
## # … with abbreviated variable name ¹Industry

fc %>% accuracy(myseries)

## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test  -6.62  11.4  9.49 -5.97  8.47  1.89  1.81 0.485
## # … with abbreviated variable name ¹Industry

Challenge

training_accu <- function(dataset,year) {
  dataset_train <- dataset %>%
    filter(year(Month) < year)
  data_model <- dataset_train %>% 
    model(SNAIVE(Turnover ~ lag(" 5 year")))
  fc1 <- data_model %>%
    forecast(new_data = anti_join(dataset, dataset_train))
  accu <- fc1 %>% 
    accuracy(dataset)
  return(accu)
}
yearlist=c(1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008)
for (i in yearlist){
  print(training_accu(myseries,i))
}

## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   18.7  23.3  19.3  16.0  16.6  4.27  4.09 0.632
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   18.2  22.8  19.0  15.5  16.4  4.32  4.10 0.644
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   18.0  22.4  18.9  15.3  16.3  4.42  4.12 0.662
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   18.4  22.5  19.2  15.5  16.5  4.50  4.16 0.630
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   18.8  22.8  19.6  15.8  16.7  4.52  4.14 0.553
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   20.2  23.6  20.7  17.0  17.6  4.71  4.23 0.525
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   21.5  24.3  21.6  18.1  18.3  4.85  4.24 0.418
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   22.1  24.5  22.1  18.8  18.8  4.89  4.20 0.341
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   21.0  23.2  21.0  18.0  18.0  4.57  3.93 0.495
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   18.3  21.2  18.6  15.8  16.0  3.98  3.56 0.600
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   15.0  18.3  15.6  12.8  13.4  3.30  3.06 0.578
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   11.3  14.8  12.4  9.68  10.8  2.61  2.46 0.579
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   7.86  11.9  9.66  6.56  8.28  2.01  1.97 0.547
## # … with abbreviated variable name ¹Industry
## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(T… Sout… Depart… Test   3.84  10.1  8.01  2.98  6.81  1.63  1.65 0.535
## # … with abbreviated variable name ¹Industry

As can be seen, the accuracy tends to get worse when the training data amount is reduced.

Question 6

piggy <- aus_livestock %>% 
  filter(Animal=="Pigs") %>% 
  filter(State=="New South Wales")
piggy %>% 
  autoplot(Count)

piggy %>% 
  gg_season(Count)

piggy %>% 
  gg_subseries(Count)

piggy_train <- piggy%>% 
  slice(1:486)
pig_test <- piggy %>% 
  slice(487:558)

pig_mean <- piggy_train %>%
  model(Mean = MEAN(Count)
  )

pig_naive <- piggy_train %>% 
  model(`Naive`=NAIVE(Count))

pig_snaive <- piggy_train %>% 
  model(`Seasonal naive`=SNAIVE(Count~lag("1 year"))
  )

pig_mfc <- pig_mean %>% 
  forecast(h = 72)

pig_nfc <- pig_naive %>% 
  forecast(h=72)

pig_snfc <- pig_snaive %>% 
  forecast(h=72)

pig_mfc %>%
  autoplot(piggy_train, level = NULL) +
  autolayer(
    filter_index(pig_test),
    colour = "black"
  )

pig_nfc %>%
  autoplot(piggy_train, level = NULL) +
  autolayer(
    filter_index(pig_test),
    colour = "black"
  )

pig_snfc %>%
  autoplot(piggy_train, level = NULL) +
  autolayer(
    filter_index(pig_test),
    colour = "black"
  )

gg_tsresiduals(pig_mean)

gg_tsresiduals(pig_naive)

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

gg_tsresiduals(pig_snaive)

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

My preferred method is SNAIVE. The residuals could almost be normally distributed with a mean of zero, but they exhibit serious correlation with the most recent values. Obviously, this makes sense because NAIVE and SNAIVE are based on previous values.

Question 7

Here is an additive decomposition with a fixed seasonal window. The forecast I used is the Naive of the seasonally adjusted data.

br <- aus_production %>% 
  select(Bricks) %>% 
  slice(1:198)
fixedbricks <- br %>% 
  model(STL(Bricks~trend(window=7)+season(window="periodic"),robust=TRUE)) %>% 
  components() 
fixedbricks %>% 
  autoplot()

fixedbricks %>% 
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=season_adjust,color='Adjusted'))

fixadj <- fixedbricks %>% 
  select(season_adjust) 
fixfit <- fixadj%>% 
  model(`Naive`=NAIVE(season_adjust))
fixfc <- fixfit %>% 
  forecast(h=4)
fixfc %>% 
  autoplot(fixadj)

gg_tsresiduals(fixfit)

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

br %>% 
  autoplot(Bricks)

br %>% 
  autoplot(box_cox(Bricks,0))+
  labs(title="Log Bricks")

Here is an multiplicative decomposition with a fixed seasonal window. The forecast I used is the Naive of the seasonally adjusted data.

Note: I believe this is multiplicative. I did the box-cox with a value of zero to make it logarithmic, which the textbook said makes the values multiplicative

bc_fixedbricks <- br %>% 
  model(STL(box_cox(Bricks,0)~trend(window=7)+season(window="periodic"),robust=TRUE)) %>% 
  components()
bc_fixedbricks %>% 
  autoplot()

bc_fixedbricks %>% 
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=season_adjust,color='Adjusted'))

bc_fixadj <- bc_fixedbricks %>% 
  select(season_adjust) 
bc_fixfit <- bc_fixadj%>% 
  model(`Naive`=NAIVE(season_adjust))
bc_fixfc <- bc_fixfit %>% 
  forecast(h=4)
bc_fixfc %>% 
  autoplot(bc_fixadj)

gg_tsresiduals(bc_fixfit)

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

This is an additive decomposition with a window of 13 rather than periodic.

flexbricks <- br %>% 
  model(STL(Bricks~trend(window=7)+season(window=13),robust=TRUE)) %>% 
  components()
flexbricks %>% 
  autoplot()

flexbricks %>% 
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=season_adjust,color='Adjusted'))

flxadj <- flexbricks %>% 
  select(season_adjust) 
flxfit <- flxadj%>% 
  model(`Naive`=NAIVE(season_adjust))
flxfc <- flxfit %>% 
  forecast(h=4)
flxfc %>% 
  autoplot(flxadj)

gg_tsresiduals(flxfit)

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

This is a multiplicative decomposition with a window of 13 rather than periodic.

bc_flexbricks <- br %>% 
  model(STL(box_cox(Bricks,0)~trend(window=7)+season(window=13),robust=TRUE)) %>% 
  components()
bc_flexbricks %>% 
  autoplot()

bc_flexbricks %>% 
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=season_adjust,color='Adjusted'))

bc_flxadj <- bc_flexbricks %>% 
  select(season_adjust) 
bc_flxfit <- bc_flxadj%>% 
  model(`Naive`=NAIVE(season_adjust))
bc_flxfc <- bc_flxfit %>% 
  forecast(h=4)
bc_flxfc %>% 
  autoplot(bc_flxadj)

gg_tsresiduals(bc_flxfit)

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

This is an additive decomposition with a window of 2 rather than periodic.

flexbricks2 <- br %>% 
  model(STL(Bricks~trend(window=7)+season(window=2),robust=TRUE)) %>% 
  components()
flexbricks2 %>% 
  autoplot()

flexbricks2 %>% 
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=season_adjust,color='Adjusted'))

flxadj2 <- flexbricks2 %>% 
  select(season_adjust) 
flxfit2 <- flxadj2%>% 
  model(`Naive`=NAIVE(season_adjust))
flxfc2 <- flxfit2 %>% 
  forecast(h=4)
flxfc2 %>% 
  autoplot(flxadj2)

gg_tsresiduals(flxfit2)

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

This is a multiplicative decomposition with a window of 2 rather than periodic.

bc_flexbricks2 <- br %>% 
  model(STL(box_cox(Bricks,0)~trend(window=7)+season(window=2),robust=TRUE)) %>% 
  components()
bc_flexbricks2 %>% 
  autoplot()

bc_flexbricks2 %>% 
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=season_adjust,color='Adjusted'))

bc_flxadj2 <- bc_flexbricks2 %>% 
  select(season_adjust) 
bc_flxfit2 <- bc_flxadj2 %>% 
  model(`Naive`=NAIVE(season_adjust))
bc_flxfc2 <- bc_flxfit2 %>% 
  forecast(h=4)
bc_flxfc2 %>% 
  autoplot(bc_flxadj2)

gg_tsresiduals(bc_flxfit2)

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

This is the decomposition model.

br %>%
  model(stlf = decomposition_model(
    STL(Bricks ~ trend(window = 7), robust = TRUE),
    SNAIVE(season_adjust)
  )) %>%
  forecast() %>%
  autoplot(br)+
  labs(y = "Number",
       title = "Brick Production")

gg_tsresiduals(br %>%
                 model(stlf = decomposition_model(
                   STL(Bricks ~ trend(window = 7), robust = TRUE),
                   SNAIVE(season_adjust)
                 )))

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

The residuals do not appear uncorrelated.

br_train <- aus_production %>% 
  select(Bricks) %>% 
  slice(1:190)

br_model <- br_train %>% 
  model(SNAIVE(Bricks))
fcbrs <- br_model %>%
  forecast(new_data = anti_join(br, br_train))
fcbrs %>% accuracy(br)

## # A tibble: 1 × 10
##   .model         .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>          <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 SNAIVE(Bricks) Test   2.75    20  18.2 0.395  4.52 0.504 0.407 -0.0503

decomp_br <- br_train %>%
  model(stlf = decomposition_model(
    STL(Bricks ~ trend(window = 7), robust = TRUE),
    NAIVE(season_adjust)
  ))
fcbrd <- decomp_br %>%
  forecast(new_data = anti_join(br, br_train))
fcbrd %>% accuracy(br)

## # A tibble: 1 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 stlf   Test   4.78  18.1  14.4  1.01  3.55 0.399 0.369 0.0390

The decomposition_model is slightly better than the SNAIVE.

Question 8

gc_tourism <- tourism %>%
  filter(Region=='Gold Coast') %>% 
  summarise(sum(Trips))
gc_tourism

## # A tsibble: 80 x 2 [1Q]
##    Quarter `sum(Trips)`
##      <qtr>        <dbl>
##  1 1998 Q1         827.
##  2 1998 Q2         681.
##  3 1998 Q3         839.
##  4 1998 Q4         820.
##  5 1999 Q1         987.
##  6 1999 Q2         751.
##  7 1999 Q3         822.
##  8 1999 Q4         914.
##  9 2000 Q1         871.
## 10 2000 Q2         780.
## # … with 70 more rows
## # ℹ Use `print(n = ...)` to see more rows

gc_train_1 <- gc_tourism %>% 
  slice(1:76)
gc_train_2 <- gc_train_1 %>% 
  slice(1:72)
gc_train_3 <- gc_train_2 %>% 
  slice(1:68)

model1 <- gc_train_1 %>% 
  model(SNAIVE(`sum(Trips)`))
fit1 <- model1 %>% 
  forecast(h=4)
fit1 %>% 
  autoplot(gc_train_1)

model2 <- gc_train_2 %>% 
  model(SNAIVE(`sum(Trips)`))
fit2 <- model2 %>% 
  forecast(h=4)
fit2 %>% 
  autoplot(gc_train_2)

model3 <- gc_train_3 %>% 
  model(SNAIVE(`sum(Trips)`))
fit3 <- model3 %>% 
  forecast(h=4)
fit3 %>% 
  autoplot(gc_train_3)

accuracy(fit1, gc_tourism)

## # A tibble: 1 × 10
##   .model               .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 SNAIVE(`sum(Trips)`) Test   75.1  167.  154.  6.36  15.1  2.66  2.36 -0.410

accuracy(fit2, gc_train_1)

## # A tibble: 1 × 10
##   .model               .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 SNAIVE(`sum(Trips)`) Test   12.0  43.1  39.5  1.14  4.32 0.670 0.599 -0.792

accuracy(fit3, gc_train_2)

## # A tibble: 1 × 10
##   .model               .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(`sum(Trips)`) Test   35.8  91.4  83.9  3.56  9.07  1.46  1.30 0.239

For some reason, the 2 year prediction is the most accurate according to MAPE. Then the 3 year one is next. Last is the 1 year model.

Question 9

apple <- gafa_stock %>%
  filter(Symbol == "AAPL") %>%
  mutate(trading_day = row_number()) %>%
  update_tsibble(index = trading_day, regular = TRUE) %>% 
  select(Close)
apple_tr <- apple %>%
  stretch_tsibble(.init = 10, .step = 1) %>% 
  relocate(trading_day, .id)
apple_tr %>%
  model(RW(Close ~ drift())) %>%
  forecast(h = 1) %>%
  accuracy(apple)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 1259

## # A tibble: 1 × 10
##   .model              .type      ME  RMSE   MAE     MPE  MAPE  MASE RMSSE   ACF1
##   <chr>               <chr>   <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 RW(Close ~ drift()) Test  -0.0158  2.10  1.41 -0.0139  1.06  1.00  1.00 0.0330

apple_tr %>%
  model(MEAN(Close)) %>%
  forecast(h = 1) %>%
  accuracy(apple)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 1259

## # A tibble: 1 × 10
##   .model      .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>       <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Close) Test   26.9  37.6  28.5  16.9  18.6  20.2  17.9 0.996

The drift model did substantially better. All of its error measures are much lower.

Question 10

wheaties <- prices %>% 
  select(wheat) %>% 
  slice(1:197)
wheaties %>% 
  autoplot(wheat)

wm <- wheaties %>% 
  model(RW(wheat ~ drift()))
wmf <- wm%>%
  forecast(h = 50)
wmf %>% 
  autoplot(wheaties)

sim <- wm %>%
  generate(h = 50, times = 500, bootstrap = TRUE)
sim

## # A tsibble: 25,000 x 4 [1Y]
## # Key:       .model, .rep [500]
##    .model               year .rep   .sim
##    <chr>               <dbl> <chr> <dbl>
##  1 RW(wheat ~ drift())  1997 1     -41.5
##  2 RW(wheat ~ drift())  1998 1     -36.0
##  3 RW(wheat ~ drift())  1999 1      38.6
##  4 RW(wheat ~ drift())  2000 1      44.2
##  5 RW(wheat ~ drift())  2001 1     217. 
##  6 RW(wheat ~ drift())  2002 1     249. 
##  7 RW(wheat ~ drift())  2003 1     261. 
##  8 RW(wheat ~ drift())  2004 1     225. 
##  9 RW(wheat ~ drift())  2005 1     300. 
## 10 RW(wheat ~ drift())  2006 1     279. 
## # … with 24,990 more rows
## # ℹ Use `print(n = ...)` to see more rows

wheaties %>%
  ggplot(aes(x = year)) +
  geom_line(aes(y = wheat)) +
  geom_line(aes(y = .sim, colour = as.factor(.rep)),
            data = sim) +
  labs(title="Wheat Prices", y="Amount" ) +
  guides(colour = "none")

When a normal distribution is an unreasonable assumption, and when residuals are uncorrelated with constant variance, then it might be a good idea to use boostrapping.

HW3

Brandon Pascali

2022-10-01