library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.0
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(readxl)
ffr <- readxl::read_excel("C:\\Users\\PythonAcct\\Downloads\\FF.xls") %>%
mutate(Week=as_date(Date)) %>%
as_tsibble(index=Week) %>%
select(-Date)
ffr %>%
autoplot(FF)
This is weekly data.
First, I will compare the four benchmark forecasts and a TSLM model.
ff_train <- ffr %>%
filter(year(Week)<2017)
ff_test <- ffr%>%
filter(year(Week)>2016)
fffit <- ff_train %>%
model(
Mean = MEAN(FF),
Naive = NAIVE(FF),
Drift = RW(FF ~ drift()),
tslm1=TSLM(FF ~ trend())
)
accuracy(fffit)
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Mean Training 3.68e-17 2.36 2.14 -673. 712. 28.5 17.4 0.996
## 2 Naive Training -5.52e- 3 0.136 0.0753 -0.702 4.66 1 1 -0.283
## 3 Drift Training -5.36e-17 0.136 0.0763 0.749 5.19 1.01 0.999 -0.283
## 4 tslm1 Training -1.74e-16 1.49 1.24 -164. 198. 16.5 11.0 0.995
As far as training set accuracy goes, the drift is the best one.
fffc <- fffit %>%
forecast(h=308)
accuracy(fffc,ff_test)
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test 1.33 1.63 1.33 342. 342. NaN NaN 0.967
## 2 Mean Test -1.80 2.04 1.83 -1343. 1344. NaN NaN 0.979
## 3 Naive Test 0.478 1.06 0.885 -224. 294. NaN NaN 0.979
## 4 tslm1 Test 2.07 2.27 2.07 681. 681. NaN NaN 0.968
The NAIVE
ff_cv <- ffr%>%
stretch_tsibble(.init = 10, .step = 1) %>%
relocate(Week, .id)
ff_cv %>%
model(Mean = MEAN(FF),
Naive = NAIVE(FF),
Drift = RW(FF ~ drift()),
tslm1=TSLM(FF ~ trend())) %>%
forecast(h = 1) %>%
accuracy(ffr)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022-11-30
## # A tibble: 4 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Test 0.00822 0.132 0.0690 1.36 5.60 1.03 1.00 -0.233
## 2 Mean Test -1.58 2.29 1.95 -922. 929. 29.1 17.4 0.996
## 3 Naive Test -0.00255 0.132 0.0669 -0.749 4.73 0.996 1.00 -0.228
## 4 tslm1 Test 0.203 1.54 1.24 -134. 246. 18.5 11.6 0.994
The naive does the best here.
ffnaive <- ffr %>%
model(Naive = NAIVE(FF))
gg_tsresiduals(ffnaive)
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
Obviously not good.
My prediction for the ETS is no trend trend, possibly multiplicative errors, and no seasonality. I’ll still compare it to a few others.
ffetsfit <- ff_train %>%
model(
dampedmult=ETS(FF~error("M")+trend("Ad")+season("N")),
regmult=ETS(FF~error("M")+trend("A")+season("N")),
multno=ETS(FF~error("M")+trend("N")+season("N")),
regadd=ETS(FF~error("A")+trend("N")+season("N")),
ets=ETS(FF)
)
report(ffetsfit)
## Warning in report.mdl_df(ffetsfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 5 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult 0.00936 -2190. 4392. 4392. 4424. 0.0174 0.0266 0.0444
## 2 regmult 0.0514 -3358. 6726. 6726. 6753. 0.0225 0.0340 0.0707
## 3 multno 0.00934 -2190. 4386. 4386. 4402. 0.0174 0.0266 0.0444
## 4 regadd 0.0171 -2184. 4375. 4375. 4391. 0.0171 0.0264 0.0725
## 5 ets 0.0160 -2137. 4286. 4286. 4318. 0.0159 0.0228 0.0721
The automatic ets is the best.
accuracy(ffetsfit)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Training -0.00654 0.132 0.0730 -0.795 4.65 0.969 0.970 -0.142
## 2 regmult Training 0.0000898 0.150 0.0888 -0.153 6.36 1.18 1.10 0.402
## 3 multno Training -0.00638 0.132 0.0730 -0.794 4.65 0.969 0.970 -0.142
## 4 regadd Training -0.00749 0.131 0.0725 -0.898 4.73 0.963 0.962 -0.0215
## 5 ets Training -0.00158 0.126 0.0721 0.165 4.97 0.958 0.929 0.0240
The automatic function is still the best here
ffetsfc <- ffetsfit %>%
forecast(h=308)
accuracy(ffetsfc,ff_test)
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Test 0.483 1.06 0.887 -221. 292. NaN NaN 0.979
## 2 ets Test -0.0434 0.960 0.830 -515. 552. NaN NaN 0.981
## 3 multno Test 0.484 1.06 0.887 -221. 292. NaN NaN 0.979
## 4 regadd Test 0.496 1.06 0.890 -215. 287. NaN NaN 0.979
## 5 regmult Test -9.69 11.5 9.69 -6989. 6989. NaN NaN 0.993
The ets was best here.
ffets_cv <- ffr %>%
stretch_tsibble(.init = 10, .step = 1) %>%
relocate(Week, .id)
ffets_cv %>%
model(dampedmult=ETS(FF~error("M")+trend("Ad")+season("N")),
regmult=ETS(FF~error("M")+trend("A")+season("N")),
multno=ETS(FF~error("M")+trend("N")+season("N")),
regadd=ETS(FF~error("A")+trend("N")+season("N")),
ets=ETS(FF)
) %>%
forecast(h = 1) %>%
accuracy(ffr)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022-11-30
## # A tibble: 5 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dampedmult Test -0.00144 0.127 0.0667 -0.197 5.55 0.992 0.965 0.100
## 2 ets Test 0.00235 0.130 0.0693 1.20 6.73 1.03 0.982 0.129
## 3 multno Test -0.00438 0.129 0.0654 -0.935 4.84 0.973 0.976 0.0593
## 4 regadd Test -0.00422 0.130 0.0666 -1.12 5.16 0.991 0.984 0.103
## 5 regmult Test 0.00214 0.144 0.0813 4.55 14.0 1.21 1.09 0.258
My damped multiplicative was the best here.
ffr %>%
gg_tsdisplay(difference(FF),plot_type="partial")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
The first difference appears to not be stationary
ffr %>%
gg_tsdisplay(difference(difference(FF)),plot_type="partial")
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).
ffr %>%
features(FF,unitroot_ndiffs)
## # A tibble: 1 × 1
## ndiffs
## <int>
## 1 2
This confirms that two differences are necessary.
ffarfit <- ff_train %>%
model(
arima100=ARIMA(FF~pdq(1,2,0)),
arima001=ARIMA(FF~pdq(0,2,1)),
arima101=ARIMA(FF~pdq(1,2,1)),
arima100cons=ARIMA(FF~1+pdq(1,2,0)),
arima001cons=ARIMA(FF~1+pdq(0,2,1)),
arima101cons=ARIMA(FF~1+pdq(1,2,1)),
arima=ARIMA(FF),
arima1=ARIMA(FF,stepwise = FALSE)
)
## Warning: Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
## Model specification induces a quadratic or higher order polynomial trend.
## This is generally discouraged, consider removing the constant or reducing the number of differences.
report(ffarfit)
## Warning in report.mdl_df(ffarfit): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 8 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 arima100 0.0278 514. -1024. -1024. -1014. <cpl [1]> <cpl [0]>
## 2 arima001 0.0184 798. -1591. -1591. -1581. <cpl [0]> <cpl [1]>
## 3 arima101 0.0165 874. -1743. -1743. -1727. <cpl [1]> <cpl [1]>
## 4 arima100cons 0.0278 514. -1022. -1022. -1006. <cpl [1]> <cpl [0]>
## 5 arima001cons 0.0184 798. -1590. -1590. -1574. <cpl [0]> <cpl [1]>
## 6 arima101cons 0.0165 874. -1741. -1741. -1720. <cpl [1]> <cpl [1]>
## 7 arima 0.0155 920. -1825. -1825. -1789. <cpl [2]> <cpl [4]>
## 8 arima1 0.0156 917. -1822. -1822. -1791. <cpl [4]> <cpl [1]>
The automatic ARIMA returned the lowest AICc
accuracy(ffarfit)
## # A tibble: 8 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima100 Training 2.69e-4 0.167 0.0949 -0.168 6.51 1.26 1.23 -0.212
## 2 arima001 Training 1.78e-3 0.135 0.0775 1.02 5.40 1.03 0.997 -0.315
## 3 arima101 Training 1.20e-3 0.128 0.0742 0.856 5.36 0.985 0.943 -0.0242
## 4 arima100cons Training 1.54e-5 0.167 0.0949 -0.235 6.51 1.26 1.23 -0.212
## 5 arima001cons Training 5.80e-5 0.135 0.0774 0.550 5.37 1.03 0.997 -0.314
## 6 arima101cons Training -1.00e-4 0.128 0.0741 0.508 5.33 0.984 0.943 -0.0241
## 7 arima Training -2.92e-3 0.124 0.0702 -0.0748 4.96 0.933 0.914 -0.00545
## 8 arima1 Training -2.13e-3 0.124 0.0708 0.108 4.91 0.941 0.916 -0.00126
same for arima
ffarfc <- ffarfit %>%
forecast(h=308)
accuracy(ffarfc,ff_test)
## # A tibble: 8 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test 0.141 0.948 0.821 -397. 443. NaN NaN 0.980
## 2 arima001 Test -0.687 1.46 1.14 -996. 1020. NaN NaN 0.994
## 3 arima001cons Test -1.57 2.41 1.77 -1612. 1623. NaN NaN 0.997
## 4 arima1 Test -0.0117 0.952 0.826 -494. 533. NaN NaN 0.981
## 5 arima100 Test -14.7 17.2 14.7 -10259. 10259. NaN NaN 0.993
## 6 arima100cons Test -17.1 20.4 17.1 -12032. 12032. NaN NaN 0.992
## 7 arima101 Test -1.37 2.09 1.52 -1449. 1457. NaN NaN 0.996
## 8 arima101cons Test -2.17 3.06 2.20 -2017. 2019. NaN NaN 0.997
the automatic arima was the best.
fffin_cv <- ffr %>%
stretch_tsibble(.init = 10, .step = 2) %>%
relocate(Week, .id)
fffin_cv %>%
model(
arima=ARIMA(FF),
multno=ETS(FF~error("M")+trend("Ad")+season("N")),
Naive = NAIVE(FF)
) %>%
forecast(h = 1) %>%
accuracy(ffr)
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning in sqrt(diag(best$var.coef)): NaNs produced
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2022-11-30
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 arima Test -0.00261 0.133 0.0730 0.782 7.37 1.09 1.01 -0.0179
## 2 multno Test -0.00789 0.131 0.0694 -0.684 5.69 1.03 0.995 -0.0578
## 3 Naive Test -0.0113 0.138 0.0682 -1.31 4.91 1.02 1.05 0.00780
The damped multiplicative is best here.
fffinfit <-ffr %>%
model(multno=ETS(FF~error("M")+trend("Ad")+season("N")))
fffinfc <- fffinfit %>%
forecast(h=104)
fffinfc %>%
autoplot(ffr,level=NULL)
gg_tsresiduals(fffinfit)
These look amazing.
hilo(fffinfc) %>%
select(-`80%`)
## # A tsibble: 104 x 5 [7D]
## # Key: .model [1]
## .model Week FF .mean `95%`
## <chr> <date> <dist> <dbl> <hilo>
## 1 multno 2022-11-30 N(4, 2.4) 3.96 [ 0.9136285, 7.007091]95
## 2 multno 2022-12-07 N(4.1, 3.8) 4.06 [ 0.2285738, 7.897207]95
## 3 multno 2022-12-14 N(4.2, 5.8) 4.16 [-0.5409256, 8.861043]95
## 4 multno 2022-12-21 N(4.3, 8.3) 4.25 [-1.3926510, 9.896941]95
## 5 multno 2022-12-28 N(4.3, 12) 4.34 [-2.3269060, 11.005737]95
## 6 multno 2023-01-04 N(4.4, 16) 4.42 [-3.3458446, 12.190088]95
## 7 multno 2023-01-11 N(4.5, 21) 4.50 [-4.4531122, 13.454116]95
## 8 multno 2023-01-18 N(4.6, 27) 4.57 [-5.6536482, 14.803215]95
## 9 multno 2023-01-25 N(4.6, 35) 4.65 [-6.9535780, 16.243937]95
## 10 multno 2023-02-01 N(4.7, 44) 4.71 [-8.3601599, 17.783948]95
## # … with 94 more rows
## # ℹ Use `print(n = ...)` to see more rows
These are the forecasted values.