library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.0
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(readxl)
ffr <- readxl::read_excel("C:\\Users\\PythonAcct\\Downloads\\FF.xls") %>%
  mutate(Week=as_date(Date)) %>%
  as_tsibble(index=Week) %>% 
  select(-Date)
ffr %>% 
  autoplot(FF)

This is weekly data.

First, I will compare the four benchmark forecasts and a TSLM model.

ff_train <- ffr %>% 
  filter(year(Week)<2017)
ff_test <- ffr%>% 
  filter(year(Week)>2016)
fffit <- ff_train %>% 
  model(
    Mean = MEAN(FF),
    Naive = NAIVE(FF),
    Drift = RW(FF ~ drift()),
    tslm1=TSLM(FF ~ trend())
  )
accuracy(fffit)
## # A tibble: 4 × 10
##   .model .type           ME  RMSE    MAE      MPE   MAPE  MASE  RMSSE   ACF1
##   <chr>  <chr>        <dbl> <dbl>  <dbl>    <dbl>  <dbl> <dbl>  <dbl>  <dbl>
## 1 Mean   Training  3.68e-17 2.36  2.14   -673.    712.   28.5  17.4    0.996
## 2 Naive  Training -5.52e- 3 0.136 0.0753   -0.702   4.66  1     1     -0.283
## 3 Drift  Training -5.36e-17 0.136 0.0763    0.749   5.19  1.01  0.999 -0.283
## 4 tslm1  Training -1.74e-16 1.49  1.24   -164.    198.   16.5  11.0    0.995

As far as training set accuracy goes, the drift is the best one.

fffc <- fffit %>% 
  forecast(h=308)
accuracy(fffc,ff_test)
## # A tibble: 4 × 10
##   .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift  Test   1.33   1.63 1.33    342.  342.   NaN   NaN 0.967
## 2 Mean   Test  -1.80   2.04 1.83  -1343. 1344.   NaN   NaN 0.979
## 3 Naive  Test   0.478  1.06 0.885  -224.  294.   NaN   NaN 0.979
## 4 tslm1  Test   2.07   2.27 2.07    681.  681.   NaN   NaN 0.968

The NAIVE

ff_cv <- ffr%>%
  stretch_tsibble(.init = 10, .step = 1) %>% 
  relocate(Week, .id)
ff_cv %>%
  model(Mean = MEAN(FF),
    Naive = NAIVE(FF),
    Drift = RW(FF ~ drift()),
    tslm1=TSLM(FF ~ trend())) %>%
  forecast(h = 1) %>%
  accuracy(ffr)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022-11-30
## # A tibble: 4 × 10
##   .model .type       ME  RMSE    MAE      MPE   MAPE   MASE RMSSE   ACF1
##   <chr>  <chr>    <dbl> <dbl>  <dbl>    <dbl>  <dbl>  <dbl> <dbl>  <dbl>
## 1 Drift  Test   0.00822 0.132 0.0690    1.36    5.60  1.03   1.00 -0.233
## 2 Mean   Test  -1.58    2.29  1.95   -922.    929.   29.1   17.4   0.996
## 3 Naive  Test  -0.00255 0.132 0.0669   -0.749   4.73  0.996  1.00 -0.228
## 4 tslm1  Test   0.203   1.54  1.24   -134.    246.   18.5   11.6   0.994

The naive does the best here.

ffnaive <- ffr %>% 
  model(Naive = NAIVE(FF))
gg_tsresiduals(ffnaive)
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).

Obviously not good.

My prediction for the ETS is no trend trend, possibly multiplicative errors, and no seasonality. I’ll still compare it to a few others.

ffetsfit <- ff_train %>% 
  model(
  dampedmult=ETS(FF~error("M")+trend("Ad")+season("N")),
  regmult=ETS(FF~error("M")+trend("A")+season("N")),
  multno=ETS(FF~error("M")+trend("N")+season("N")),
  regadd=ETS(FF~error("A")+trend("N")+season("N")),
  ets=ETS(FF)
  )
report(ffetsfit)
## Warning in report.mdl_df(ffetsfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 5 × 9
##   .model      sigma2 log_lik   AIC  AICc   BIC    MSE   AMSE    MAE
##   <chr>        <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>
## 1 dampedmult 0.00936  -2190. 4392. 4392. 4424. 0.0174 0.0266 0.0444
## 2 regmult    0.0514   -3358. 6726. 6726. 6753. 0.0225 0.0340 0.0707
## 3 multno     0.00934  -2190. 4386. 4386. 4402. 0.0174 0.0266 0.0444
## 4 regadd     0.0171   -2184. 4375. 4375. 4391. 0.0171 0.0264 0.0725
## 5 ets        0.0160   -2137. 4286. 4286. 4318. 0.0159 0.0228 0.0721

The automatic ets is the best.

accuracy(ffetsfit)
## # A tibble: 5 × 10
##   .model     .type            ME  RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>      <chr>         <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 dampedmult Training -0.00654   0.132 0.0730 -0.795  4.65 0.969 0.970 -0.142 
## 2 regmult    Training  0.0000898 0.150 0.0888 -0.153  6.36 1.18  1.10   0.402 
## 3 multno     Training -0.00638   0.132 0.0730 -0.794  4.65 0.969 0.970 -0.142 
## 4 regadd     Training -0.00749   0.131 0.0725 -0.898  4.73 0.963 0.962 -0.0215
## 5 ets        Training -0.00158   0.126 0.0721  0.165  4.97 0.958 0.929  0.0240

The automatic function is still the best here

ffetsfc <- ffetsfit %>% 
  forecast(h=308)
accuracy(ffetsfc,ff_test)
## # A tibble: 5 × 10
##   .model     .type      ME   RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr>   <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Test   0.483   1.06  0.887  -221.  292.   NaN   NaN 0.979
## 2 ets        Test  -0.0434  0.960 0.830  -515.  552.   NaN   NaN 0.981
## 3 multno     Test   0.484   1.06  0.887  -221.  292.   NaN   NaN 0.979
## 4 regadd     Test   0.496   1.06  0.890  -215.  287.   NaN   NaN 0.979
## 5 regmult    Test  -9.69   11.5   9.69  -6989. 6989.   NaN   NaN 0.993

The ets was best here.

ffets_cv <- ffr %>%
  stretch_tsibble(.init = 10, .step = 1) %>% 
  relocate(Week, .id)
ffets_cv %>%
  model(dampedmult=ETS(FF~error("M")+trend("Ad")+season("N")),
  regmult=ETS(FF~error("M")+trend("A")+season("N")),
  multno=ETS(FF~error("M")+trend("N")+season("N")),
  regadd=ETS(FF~error("A")+trend("N")+season("N")),
  ets=ETS(FF)
  ) %>%
  forecast(h = 1) %>%
  accuracy(ffr)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022-11-30
## # A tibble: 5 × 10
##   .model     .type       ME  RMSE    MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <chr>      <chr>    <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 dampedmult Test  -0.00144 0.127 0.0667 -0.197  5.55 0.992 0.965 0.100 
## 2 ets        Test   0.00235 0.130 0.0693  1.20   6.73 1.03  0.982 0.129 
## 3 multno     Test  -0.00438 0.129 0.0654 -0.935  4.84 0.973 0.976 0.0593
## 4 regadd     Test  -0.00422 0.130 0.0666 -1.12   5.16 0.991 0.984 0.103 
## 5 regmult    Test   0.00214 0.144 0.0813  4.55  14.0  1.21  1.09  0.258

My damped multiplicative was the best here.

ffr %>% 
  gg_tsdisplay(difference(FF),plot_type="partial")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

The first difference appears to not be stationary

ffr %>% 
  gg_tsdisplay(difference(difference(FF)),plot_type="partial")
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).

ffr %>% 
  features(FF,unitroot_ndiffs)
## # A tibble: 1 × 1
##   ndiffs
##    <int>
## 1      2

This confirms that two differences are necessary.

ffarfit <- ff_train %>% 
  model(
    arima100=ARIMA(FF~pdq(1,2,0)),
    arima001=ARIMA(FF~pdq(0,2,1)),
    arima101=ARIMA(FF~pdq(1,2,1)),
    arima100cons=ARIMA(FF~1+pdq(1,2,0)),
    arima001cons=ARIMA(FF~1+pdq(0,2,1)),
    arima101cons=ARIMA(FF~1+pdq(1,2,1)),
    arima=ARIMA(FF),
    arima1=ARIMA(FF,stepwise = FALSE)
  )
## Warning: Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.
report(ffarfit)
## Warning in report.mdl_df(ffarfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 8 × 8
##   .model       sigma2 log_lik    AIC   AICc    BIC ar_roots  ma_roots 
##   <chr>         <dbl>   <dbl>  <dbl>  <dbl>  <dbl> <list>    <list>   
## 1 arima100     0.0278    514. -1024. -1024. -1014. <cpl [1]> <cpl [0]>
## 2 arima001     0.0184    798. -1591. -1591. -1581. <cpl [0]> <cpl [1]>
## 3 arima101     0.0165    874. -1743. -1743. -1727. <cpl [1]> <cpl [1]>
## 4 arima100cons 0.0278    514. -1022. -1022. -1006. <cpl [1]> <cpl [0]>
## 5 arima001cons 0.0184    798. -1590. -1590. -1574. <cpl [0]> <cpl [1]>
## 6 arima101cons 0.0165    874. -1741. -1741. -1720. <cpl [1]> <cpl [1]>
## 7 arima        0.0155    920. -1825. -1825. -1789. <cpl [2]> <cpl [4]>
## 8 arima1       0.0156    917. -1822. -1822. -1791. <cpl [4]> <cpl [1]>

The automatic ARIMA returned the lowest AICc

accuracy(ffarfit)
## # A tibble: 8 × 10
##   .model       .type          ME  RMSE    MAE     MPE  MAPE  MASE RMSSE     ACF1
##   <chr>        <chr>       <dbl> <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 arima100     Training  2.69e-4 0.167 0.0949 -0.168   6.51 1.26  1.23  -0.212  
## 2 arima001     Training  1.78e-3 0.135 0.0775  1.02    5.40 1.03  0.997 -0.315  
## 3 arima101     Training  1.20e-3 0.128 0.0742  0.856   5.36 0.985 0.943 -0.0242 
## 4 arima100cons Training  1.54e-5 0.167 0.0949 -0.235   6.51 1.26  1.23  -0.212  
## 5 arima001cons Training  5.80e-5 0.135 0.0774  0.550   5.37 1.03  0.997 -0.314  
## 6 arima101cons Training -1.00e-4 0.128 0.0741  0.508   5.33 0.984 0.943 -0.0241 
## 7 arima        Training -2.92e-3 0.124 0.0702 -0.0748  4.96 0.933 0.914 -0.00545
## 8 arima1       Training -2.13e-3 0.124 0.0708  0.108   4.91 0.941 0.916 -0.00126

same for arima

ffarfc <- ffarfit %>% 
  forecast(h=308)
accuracy(ffarfc,ff_test)
## # A tibble: 8 × 10
##   .model       .type       ME   RMSE    MAE     MPE   MAPE  MASE RMSSE  ACF1
##   <chr>        <chr>    <dbl>  <dbl>  <dbl>   <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 arima        Test    0.141   0.948  0.821   -397.   443.   NaN   NaN 0.980
## 2 arima001     Test   -0.687   1.46   1.14    -996.  1020.   NaN   NaN 0.994
## 3 arima001cons Test   -1.57    2.41   1.77   -1612.  1623.   NaN   NaN 0.997
## 4 arima1       Test   -0.0117  0.952  0.826   -494.   533.   NaN   NaN 0.981
## 5 arima100     Test  -14.7    17.2   14.7   -10259. 10259.   NaN   NaN 0.993
## 6 arima100cons Test  -17.1    20.4   17.1   -12032. 12032.   NaN   NaN 0.992
## 7 arima101     Test   -1.37    2.09   1.52   -1449.  1457.   NaN   NaN 0.996
## 8 arima101cons Test   -2.17    3.06   2.20   -2017.  2019.   NaN   NaN 0.997

the automatic arima was the best.

fffin_cv <- ffr %>%
  stretch_tsibble(.init = 10, .step = 2) %>% 
  relocate(Week, .id)
fffin_cv %>%
  model(
    arima=ARIMA(FF),
    multno=ETS(FF~error("M")+trend("Ad")+season("N")),
    Naive = NAIVE(FF)
    ) %>%
  forecast(h = 1) %>%
  accuracy(ffr)
## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced

## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2022-11-30
## # A tibble: 3 × 10
##   .model .type       ME  RMSE    MAE    MPE  MAPE  MASE RMSSE     ACF1
##   <chr>  <chr>    <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 arima  Test  -0.00261 0.133 0.0730  0.782  7.37  1.09 1.01  -0.0179 
## 2 multno Test  -0.00789 0.131 0.0694 -0.684  5.69  1.03 0.995 -0.0578 
## 3 Naive  Test  -0.0113  0.138 0.0682 -1.31   4.91  1.02 1.05   0.00780

The damped multiplicative is best here.

fffinfit <-ffr %>% 
  model(multno=ETS(FF~error("M")+trend("Ad")+season("N")))
fffinfc <- fffinfit %>% 
  forecast(h=104)
fffinfc %>% 
  autoplot(ffr,level=NULL)

gg_tsresiduals(fffinfit)

These look amazing.

hilo(fffinfc) %>% 
  select(-`80%`)
## # A tsibble: 104 x 5 [7D]
## # Key:       .model [1]
##    .model Week                FF .mean                     `95%`
##    <chr>  <date>          <dist> <dbl>                    <hilo>
##  1 multno 2022-11-30   N(4, 2.4)  3.96 [ 0.9136285,  7.007091]95
##  2 multno 2022-12-07 N(4.1, 3.8)  4.06 [ 0.2285738,  7.897207]95
##  3 multno 2022-12-14 N(4.2, 5.8)  4.16 [-0.5409256,  8.861043]95
##  4 multno 2022-12-21 N(4.3, 8.3)  4.25 [-1.3926510,  9.896941]95
##  5 multno 2022-12-28  N(4.3, 12)  4.34 [-2.3269060, 11.005737]95
##  6 multno 2023-01-04  N(4.4, 16)  4.42 [-3.3458446, 12.190088]95
##  7 multno 2023-01-11  N(4.5, 21)  4.50 [-4.4531122, 13.454116]95
##  8 multno 2023-01-18  N(4.6, 27)  4.57 [-5.6536482, 14.803215]95
##  9 multno 2023-01-25  N(4.6, 35)  4.65 [-6.9535780, 16.243937]95
## 10 multno 2023-02-01  N(4.7, 44)  4.71 [-8.3601599, 17.783948]95
## # … with 94 more rows
## # ℹ Use `print(n = ...)` to see more rows

These are the forecasted values.