homework 5

library(readxl)
library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──

## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(moments)
library(tsibble)
library(tsibbledata)
library(ggplot2)
library(dplyr)
library(seasonal)

## 
## Attaching package: 'seasonal'

## The following object is masked from 'package:tibble':
## 
##     view

library(fabletools)
library(fable)
library(ggfortify)
library(quantmod)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following object is masked from 'package:tsibble':
## 
##     index

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## Loading required package: TTR

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(GGally)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

library(feasts)
pig <- aus_livestock %>%
  filter(State == "Victoria", Animal == "Pigs")

pig_fit <- pig %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N"))) 

pig_fit %>%
  report()

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

pig_fore <- pig_fit %>%
  forecast(h = 4) 

piggy <- pig %>%
  filter(year(Month) > 2009)

pig_fore %>%
  autoplot(piggy)

pig_fore_mean <- pig_fore %>%
  select(Animal, State, Month, Count, .mean)

pig_mean_pi <- pig_fore_mean %>%
  mutate(pi_lower = .mean - 1.96 * 9353.11) %>%
  mutate(pi_upper = .mean + 1.96 * 9353.11) 
  
pig_mean_pi

## # A fable: 4 x 7 [1M]
## # Key:     Animal, State [1]
##   Animal State       Month             Count  .mean pi_lower pi_upper
##   <fct>  <fct>       <mth>            <dist>  <dbl>    <dbl>    <dbl>
## 1 Pigs   Victoria 2019 Jan N(95187, 8.7e+07) 95187.   76854.  113519.
## 2 Pigs   Victoria 2019 Feb N(95187, 9.7e+07) 95187.   76854.  113519.
## 3 Pigs   Victoria 2019 Mar N(95187, 1.1e+08) 95187.   76854.  113519.
## 4 Pigs   Victoria 2019 Apr N(95187, 1.1e+08) 95187.   76854.  113519.

pig_fore %>%
  hilo(x, level = 95) %>%
  select(Animal, State, Month, Count, .mean, '95%')

## # A tsibble: 4 x 6 [1M]
## # Key:       Animal, State [1]
##   Animal State       Month             Count  .mean                  `95%`
##   <fct>  <fct>       <mth>            <dist>  <dbl>                 <hilo>
## 1 Pigs   Victoria 2019 Jan N(95187, 8.7e+07) 95187. [76854.79, 113518.3]95
## 2 Pigs   Victoria 2019 Feb N(95187, 9.7e+07) 95187. [75927.17, 114445.9]95
## 3 Pigs   Victoria 2019 Mar N(95187, 1.1e+08) 95187. [75042.22, 115330.9]95
## 4 Pigs   Victoria 2019 Apr N(95187, 1.1e+08) 95187. [74194.54, 116178.6]95

The manual parameters turned out fine but I cannot get the hilo() to work. Question 2

aus_econ <- global_economy %>%
  filter(Country == "Australia")

aus_train <- aus_econ %>%
  filter(Year <= year(as.Date("2015-01-01")))


aus_econ %>%
  autoplot(Exports) +
  labs(title = "Australian Exports")

aus_expo <- aus_train %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))


aus_expo %>%
  forecast(h = 10)%>%
  autoplot(aus_train) +
  labs(title = "Aus Exports Forecast")

accuracy(aus_expo)

## # A tibble: 1 × 11
##   Country   .model       .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <fct>     <chr>        <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Australia "ETS(Export… Trai… 0.225  1.14 0.898  1.06  5.36 0.926 0.931 0.00134

aus_linear <- aus_train %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))

accuracy(aus_linear)

## # A tibble: 1 × 11
##   Country   .model     .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <fct>     <chr>      <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Australia "ETS(Expo… Trai… -0.0260  1.11 0.887 -0.574  5.43 0.915 0.910 0.0836

aus_linear %>%
  forecast(h = 10)%>%
  autoplot(aus_train) +
  labs(title = "Aus Exports Forecast Linear")

A.From the 80’s to teh 00’s there seemed to be a large amount of growth in exports. There also seems to be a big spike every 15 years or so

E. and D. Both are not bad forecasting methods, but the linear trend model works out better for this data in my opinion. This is because there is a clear upward trend that I notice. The ANN does not account for this trend. My conclusison is supported by the RMSE values.

Question 3

china <- global_economy %>%
  filter(Country == "China") %>%
  select(Year, GDP)

china %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = GDP`

china_guerro <- china %>%
  features(GDP, features = guerrero)

china_box <- china %>%
  autoplot(box_cox(GDP, china_guerro)) +
  labs(y = "China's Transformed GDP")
china_box

china_method <- china %>%
  model(
    'Auto' = ETS(GDP),
    'Simple' = ETS(GDP ~ error("A") + trend("N") + season("N")),
    'H-W' = ETS(GDP ~ error("A") + trend("A") + season("N")),
    'Damped H-W' = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
  )

china_method %>%
  forecast(h = 20) %>%
  autoplot(china)

I think the damped method is the best because China’s growth rate will most likely start to slow as they reach the late stages of first worldness. The damped method will be able to account for this.

Question 4

gas <- aus_production %>%
  select(Quarter, Gas)

gas %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = Gas`

gas_res <- gas %>%
  model(ets = ETS(Gas),
        'Multi Holt' = ETS(Gas ~ error("A") + trend("A") + season("M")),
        'Damped Multi Holt'= ETS(Gas ~ error("A") + trend("Ad") + season("M")))

gas_res %>%
  glance()

## # A tibble: 3 × 9
##   .model              sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>                <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 ets                0.00324   -831. 1681. 1682. 1711.  21.1  32.2 0.0413
## 2 Multi Holt        18.2       -899. 1816. 1817. 1847.  17.6  25.1 2.84  
## 3 Damped Multi Holt 18.5       -901. 1821. 1822. 1855.  17.8  25.9 2.81

gas_auto <- gas %>%
  model(ets = ETS(Gas))
        
gas_auto %>%
  report()

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

The data further in the past has much lower variance than the data in the future. This makes multiplicative seasonality necessary. I dont think damped trend helps because it doesnt appear the trend will slow its rate.

Question 5

set.seed(2345678)
myseed <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseed %>% autoplot(Turnover)

myseed_holt <- myseed %>% 
  model(
    hw = ETS(Turnover ~ error("M") + trend("A") + season("M")))%>%
  forecast(h = 36) %>%
  autoplot(myseed) +
  labs(title = "Three Year Forecast") 
myseed_holt

myseed_damped <- myseed %>% 
  model(
    damped = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M"))) %>%
    forecast(h = 36) %>%
  autoplot(myseed) +
  labs(title = "Three Year Forecast Damped") 
myseed_damped

multiplicative seasonality is necessary because the variance gets larger over time. This means there is heteroskedacity.

c - e. for some reason I cant get anything I tried past here to run, maybe my computer cant process it for whatever reason. Visiaully though, they looks similar, but I think I prefer the damped because the trend doesnt look very constant. Im sure that they would have white noise if it would load. My best prediction given my data is that nothing will beat SNAIVE.

Question 6

trips <- tourism %>%
  summarize(Trips = sum(Trips))

autoplot(trips)

## Plot variable not specified, automatically selected `.vars = Trips`

trips_stl <- trips %>%
  model(STL(Trips ~ season(window = 4), robust = TRUE))

components(trips_stl) %>%
  autoplot() +
  labs(title = "STL Decomposition")

season_trips <- trips %>%
  model(decomposition_model(STL(Trips),
  ETS(season_adjust ~ error("A") + trend("Ad") + season("N")))) 

season_trips %>%
  forecast(h = 7) %>%
  autoplot(trips)

trips_ets <- trips %>%
  model(ETS(Trips ~ error("A") + trend("A") + season("N"))) 

trips_ets %>%
  forecast(h = 2)

## # A fable: 2 x 4 [1Q]
## # Key:     .model [1]
##   .model                                        Quarter             Trips  .mean
##   <chr>                                           <qtr>            <dist>  <dbl>
## 1 "ETS(Trips ~ error(\"A\") + trend(\"A\") + s… 2018 Q1 N(27594, 1559325) 27594.
## 2 "ETS(Trips ~ error(\"A\") + trend(\"A\") + s… 2018 Q2 N(27920, 1652392) 27920.

trips_auto <- trips %>%
  model(ETS(Trips))

trips_auto

## # A mable: 1 x 1
##   `ETS(Trips)`
##        <model>
## 1 <ETS(A,A,A)>

accuracy(trips_auto)

## # A tibble: 1 × 10
##   .model     .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>      <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 ETS(Trips) Training  105.  794.  604. 0.379  2.86 0.636 0.653 -0.00151

accuracy(trips_ets)

## # A tibble: 1 × 10
##   .model                  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 "ETS(Trips ~ error(\"A… Trai…  123. 1217. 1002. 0.317  4.67  1.06  1.00 0.0895

accuracy(season_trips)

## # A tibble: 1 × 10
##   .model                 .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>                  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 "decomposition_model(… Trai…  103.  763.  576. 0.367  2.72 0.607 0.629 -0.0174

trips_test <- trips %>%
  model(
    'holt' = ETS(Trips ~ error("A") + trend("A") + season("N")),
    'Auto' = ETS(Trips))

trips_test %>%
  forecast(h = "4 years") %>%
  autoplot(trips)

season_trips %>%
  gg_tsresiduals()

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

It is relatively flat until 2010 where there is an increasing trend
The seasonally adjusted game the best values
I think the best forecast is the seasonally adjusted using STL. It follows the trend and moves seasonally just like the data does. The other forecasts can follow the trend but they dont account for the variation.

Question 7

nz <- aus_arrivals %>% 
  filter(Origin == "NZ")
nz %>% 
  autoplot(Arrivals / 1e3)

nz_train <- nz %>%
  slice(1:(n() - 8))
nz_train %>%
  model(ETS(Arrivals ~ error("M") + trend("A") + season("M"))) %>%
  forecast(h = "2 years") %>%
  autoplot() +
  autolayer(nz, Arrivals)

nz_fore <- nz_train %>%
  model(
    ets = ETS(Arrivals),
    log = ETS(log(Arrivals)),
    snaive = SNAIVE(Arrivals),
    stl = decomposition_model(STL(log(Arrivals)), ETS(season_adjust))
  ) %>%
  forecast(h = "2 years")
nz_fore %>%
  autoplot(level = NULL) +
  autolayer(filter(nz, year(Quarter) > 2000), Arrivals)

nz_fore %>%
  autoplot(level = NULL) +
  autolayer(nz, Arrivals)

nz_fore %>%
  accuracy(nz)

## # A tibble: 4 × 11
##   .model Origin .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr>  <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ets    NZ     Test   -3495. 14913. 11421. -0.964  3.78 0.768 0.771 -0.0260
## 2 log    NZ     Test    2467. 13342. 11904.  1.03   4.03 0.800 0.689 -0.0786
## 3 snaive NZ     Test    9709. 18051. 17156.  3.44   5.80 1.15  0.933 -0.239 
## 4 stl    NZ     Test  -12535. 22723. 16172. -4.02   5.23 1.09  1.17   0.109

log_ets <- nz_train %>%
  model(ETS(log(Arrivals)))
log_ets %>% gg_tsresiduals()

nz_comp <- nz %>%
  slice(1:(n() - 3)) %>%
  stretch_tsibble(.init = 36, .step = 3)
nz_comp %>%
  model(
    ets = ETS(Arrivals),
    log_ets = ETS(log(Arrivals)),
    snaive = SNAIVE(Arrivals),
    stl = decomposition_model(STL(log(Arrivals)), ETS(season_adjust))
  ) %>%
  forecast(h = 3) %>%
  accuracy(nz)

## # A tibble: 4 × 11
##   .model  Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets     NZ     Test  4627. 15327. 11799.  2.23  6.45 0.793 0.797 0.283
## 2 log_ets NZ     Test  4388. 15047. 11566.  1.99  6.36 0.778 0.782 0.268
## 3 snaive  NZ     Test  8244. 18768. 14422.  3.83  7.76 0.970 0.976 0.566
## 4 stl     NZ     Test  4252. 15618. 11873.  2.04  6.25 0.798 0.812 0.244

The variance is much lower in the earlier data and continues to increase in variance over time.
the log method appears to give the best forecasts and it passes the residual tests.
yes

Question 8

cement <- aus_production %>%
  slice(1:(n() - 4)) %>%
  stretch_tsibble(.init = 5 * 4)
cement_fore <- cement %>%
  model(ETS(Cement), SNAIVE(Cement)) %>%
  forecast(h = "1 year")

????

homework 5

Ryan

2022-11-10