library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ lubridate 1.8.0 ✔ feasts 0.2.2
## ✔ tsibble 1.1.2 ✔ fable 0.3.1
## ✔ tsibbledata 0.4.0
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
Loading packages
piggy <- aus_livestock %>%
filter(State=="Victoria") %>%
filter(Animal=="Pigs")
pigfit <- piggy %>% model(ses=ETS(Count ~ error("A") + trend("N") + season("N")))
piggy %>%
autoplot(Count)
pigfit %>%
components() %>%
autoplot()
## Warning: Removed 1 row(s) containing missing values (geom_path).
pigfit %>%
report()
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
Optimal value of alpha is .3221247 Optimal value of l0 is 100646.6
pigfc <- pigfit %>%
forecast(h=4)
pigfc %>%
autoplot(piggy %>% filter(year(Month)>2010))
Upper
95187+1.96*(87480760)^(1/2)
## [1] 113519.1
Lower
95187-1.96*(87480760)^(1/2)
## [1] 76854.89
hilo(pigfc)
## # A tsibble: 4 x 8 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean `80%`
## <fct> <fct> <chr> <mth> <dist> <dbl> <hilo>
## 1 Pigs Victor… ses 2019 Jan N(95187, 8.7e+07) 95187. [83200.06, 107173.1]80
## 2 Pigs Victor… ses 2019 Feb N(95187, 9.7e+07) 95187. [82593.52, 107779.6]80
## 3 Pigs Victor… ses 2019 Mar N(95187, 1.1e+08) 95187. [82014.88, 108358.2]80
## 4 Pigs Victor… ses 2019 Apr N(95187, 1.1e+08) 95187. [81460.61, 108912.5]80
## # … with 1 more variable: `95%` <hilo>
## # ℹ Use `colnames()` to see all variable names
They are almost the exact same
ger <- global_economy %>%
filter(Country=="Germany")
gerexp <- ger %>%
select(Exports) %>%
na.omit()
gerexp %>%
autoplot(Exports)
There is a clear upward trend with something of a yearly seasonality.
There is a downward dip likely due to the great recession
gerfit <- gerexp %>% model(ses=ETS(Exports ~ error("A") + trend("N") + season("N")))
gerfc <- gerfit %>%
forecast(h=10)
gerfc %>%
autoplot(gerexp)
This forecast completely ignores the upward trend
accuracy(gerfit)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ses Training 0.672 1.76 1.31 2.21 4.73 0.982 0.990 -0.00124
gerfit2 <- gerexp %>% model(ses=ETS(Exports ~ error("A") + trend("A") + season("N")))
accuracy(gerfit2)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ses Training -0.0121 1.62 1.11 -0.571 4.14 0.832 0.914 0.0220
AAN includes the obvious upwards trend.
gerfc2 <- gerfit2 %>%
forecast(h=10)
gerfc2 %>%
autoplot(gerexp)
I believe the AAN model more because it is likely exports will continue to increase.
china <- global_economy %>%
filter(Country=="China") %>%
select(GDP)
china %>%
autoplot(GDP)
lambdaval <- china %>%
features(GDP,features=guerrero) %>%
pull(lambda_guerrero)
chinafit <- china %>%
model(
reg=ETS(GDP),
damp=ETS(GDP~error("A")+trend("Ad")+season("N")),
boxreg=ETS(box_cox(GDP,lambdaval)~error("A")+trend("N")+season("N"))
)
chinafit %>%
report()
## Warning in report.mdl_df(.): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.
## # A tibble: 3 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 reg 1.08e- 2 -1546. 3102. 3103. 3112. 4.00e+22 1.61e+23 7.93e- 2
## 2 damp 3.96e+22 -1624. 3260. 3262. 3273. 3.62e+22 1.30e+23 9.49e+10
## 3 boxreg 2.72e- 3 54.5 -103. -103. -96.9 2.63e- 3 9.76e- 3 4.51e- 2
chinafc <- chinafit %>%
forecast(h=20)
chinafc %>%
autoplot(china)
gas <- aus_production %>%
select(Gas)
gas %>%
autoplot(Gas)
gasfit <- gas %>%
model(
mod=ETS(Gas~error("M")+trend("A")+season("M"))
)
gasfc <- gasfit %>%
forecast(h=36)
gasfc %>%
autoplot(gas)
Multiplicative is necessary here because the magnitude of the seasonality changes greatly.
gg_tsresiduals(gasfit)
Significant autocorrelation at many levels, along with heteroskedasticity.
gasfit2 <- gas %>%
model(
mod2=ETS(Gas~error("M")+trend("Ad")+season("M"))
)
gasfc2 <- gasfit2 %>%
forecast(h=36)
gg_tsresiduals(gasfit2)
Not much of an improvement.
gasfc2 %>%
autoplot(gas)
gasfit %>%
report()
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
gasfit2 %>%
report()
## Series: Gas
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.6489044
## beta = 0.1551275
## gamma = 0.09369372
## phi = 0.98
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
##
## sigma^2: 0.0033
##
## AIC AICc BIC
## 1684.028 1685.091 1717.873
The first one has a lower AICc value.
set.seed(223465)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries %>% autoplot(Turnover)
There is heteroskedasticity which necesitates multiplicative seasonality
myfit <- myseries %>%
model(
modm=ETS(Turnover~trend("A")+season("M"))
)
myfit2 <- myseries %>%
model(
modm=ETS(Turnover~trend("Ad")+season("M"))
)
fc1 <- myfit %>%
forecast(h=36)
fc2 <- myfit2 %>%
forecast(h=36)
fc1 %>%
autoplot(myseries)
fc2 %>%
autoplot(myseries)
myfit3 <- myseries %>%
model(
`Regular`=ETS(Turnover~trend("A")+season("M")),
`Damped`=ETS(Turnover~trend("Ad")+season("M"))
)
fc3 <- myfit3 %>%
forecast(h=36)
fc3 %>%
autoplot(myseries,level=NULL)
gg_tsresiduals(myfit)
gg_tsresiduals(myfit2)
The second one is slightly better
accuracy(myfit3) %>%
select(.model,RMSE)
## # A tibble: 2 × 2
## .model RMSE
## <chr> <dbl>
## 1 Regular 0.671
## 2 Damped 0.667
The RMSE values confirm this.
mytrain <- myseries %>%
filter(year(Month)<2011)
mytest <- myseries %>%
filter(year(Month)>2010)
mytrainfit <- mytrain %>%
model(
snaive=SNAIVE(Turnover),
damped=ETS(Turnover~error("M")+trend("Ad")+season("M")),
reg=ETS(Turnover~trend("A")+season("M"))
)
mytrainfc <- mytrainfit %>%
forecast(h=96)
accuracy(mytrainfc, mytest ) %>%
select(.model, RMSE)
## # A tibble: 3 × 2
## .model RMSE
## <chr> <dbl>
## 1 damped 2.41
## 2 reg 4.19
## 3 snaive 2.29
My Damped Seasonality has a slightly worse RMSE than Seasonal NAIVE
comparison <- anti_join(myseries, mytrain, by = c('State', 'Industry', 'Series ID', 'Month', 'Turnover'))
yup <- mytrainfit %>% forecast(comparison)
autoplot(comparison, Turnover) +
autolayer(yup, level = NULL) +
labs(title = 'Forecast Method Comparison')
SNAIVE is the closest.
accuracy(mytrainfit) %>%
select(.model,RMSE)
## # A tibble: 3 × 2
## .model RMSE
## <chr> <dbl>
## 1 snaive 1.34
## 2 damped 0.583
## 3 reg 0.619
For the data, however, the damped does the best.
tot <- tourism %>%
summarise(Trips=sum(Trips))
tot %>%
autoplot(Trips)
Seasonal data with a large dip followed by a massive upward trend.
ugh <- tot %>%
model(STL(Trips)) %>% components()
ugh %>%
as_tsibble() %>%
autoplot(season_adjust)
thing <- tot %>%
model(
dampdec=decomposition_model(STL(Trips),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))))
thingfc <- thing%>%
forecast(h = "2 years")
thingfc%>%
autoplot(tot)
This forecast appears to be strong at first glance.
thing2 <- tot %>%
model(
regdec=decomposition_model(STL(Trips),
ETS(season_adjust ~ error("A") + trend("A") + season("N"))))
thing2fc <- thing2%>%
forecast(h = "2 years")
thing2fc%>%
autoplot(tot)
thing3 <- tot %>%
model(
m=ETS(Trips))
thingfc3 <- thing3 %>%
forecast(h=8)
thingfc3 %>%
autoplot(tot)
accuracy(thing) %>%
select(.model,RMSE)
## # A tibble: 1 × 2
## .model RMSE
## <chr> <dbl>
## 1 dampdec 763.
accuracy(thing2) %>%
select(.model,RMSE)
## # A tibble: 1 × 2
## .model RMSE
## <chr> <dbl>
## 1 regdec 763.
accuracy(thing3) %>%
select(.model,RMSE)
## # A tibble: 1 × 2
## .model RMSE
## <chr> <dbl>
## 1 m 794.
thingfc
## # A fable: 8 x 4 [1Q]
## # Key: .model [1]
## .model Quarter Trips .mean
## <chr> <qtr> <dist> <dbl>
## 1 dampdec 2018 Q1 N(28813, 613295) 28813.
## 2 dampdec 2018 Q2 N(27572, 779622) 27572.
## 3 dampdec 2018 Q3 N(27249, 976340) 27249.
## 4 dampdec 2018 Q4 N(28553, 1205316) 28553.
## 5 dampdec 2019 Q1 N(29839, 1482121) 29839.
## 6 dampdec 2019 Q2 N(28578, 1781723) 28578.
## 7 dampdec 2019 Q3 N(28234, 2118429) 28234.
## 8 dampdec 2019 Q4 N(29519, 2493604) 29519.
thing2fc
## # A fable: 8 x 4 [1Q]
## # Key: .model [1]
## .model Quarter Trips .mean
## <chr> <qtr> <dist> <dbl>
## 1 regdec 2018 Q1 N(28883, 6e+05) 28883.
## 2 regdec 2018 Q2 N(27683, 768628) 27683.
## 3 regdec 2018 Q3 N(27405, 961774) 27405.
## 4 regdec 2018 Q4 N(28760, 1186493) 28760.
## 5 regdec 2019 Q1 N(30102, 1458939) 30102.
## 6 regdec 2019 Q2 N(28901, 1755034) 28901.
## 7 regdec 2019 Q3 N(28624, 2089927) 28624.
## 8 regdec 2019 Q4 N(29978, 2466013) 29978.
thingfc3
## # A fable: 8 x 4 [1Q]
## # Key: .model [1]
## .model Quarter Trips .mean
## <chr> <qtr> <dist> <dbl>
## 1 m 2018 Q1 N(29068, 7e+05) 29068.
## 2 m 2018 Q2 N(27794, 870750) 27794.
## 3 m 2018 Q3 N(27619, 1073763) 27619.
## 4 m 2018 Q4 N(28627, 1311711) 28627.
## 5 m 2019 Q1 N(30336, 1587455) 30336.
## 6 m 2019 Q2 N(29062, 1903591) 29062.
## 7 m 2019 Q3 N(28887, 2262980) 28887.
## 8 m 2019 Q4 N(29895, 2668392) 29895.
I buy the undamped decomposition the most. It has the lowest RMSE.
gg_tsresiduals(thing2)
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
There apepars to be autocorrelation at a lag of 14, but otherwise the residuals definitely appear to be a white noise.
nz <- aus_arrivals %>%
filter(Origin=="NZ")
nz %>%
autoplot(Arrivals)
High seasonality, upward trend
trainz <- nz %>%
filter(year(Quarter)<2011)
testz <- nz %>%
filter(year(Quarter)>2010)
trainzfit <- trainz %>%
model(
mets=ETS(Arrivals~error("M")+trend("A")+season("M"))
)
trainzfc <- trainzfit %>%
forecast(h=7)
trainzfc %>%
autoplot(nz)
accuracy(trainzfc,testz)
## # A tibble: 1 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 mets NZ Test -10467. 18302. 12746. -3.27 4.16 NaN NaN -0.0857
Multiplicative seasonality is necessary here because the magnitude of the seasonality is inconstant over time.
bigfit <- trainz %>%
model(
etschosen=ETS(Arrivals),
etschosenlog=ETS(box_cox(Arrivals,0)),
snaive=SNAIVE(Arrivals),
logdecomp=decomposition_model(STL(box_cox(Arrivals,0)),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N")))
)
bigfc <- bigfit %>%
forecast(h=7)
bigfc %>%
autoplot(nz,level=NULL)
accuracy(bigfc,testz) %>%
select(.model,RMSE)
## # A tibble: 4 × 2
## .model RMSE
## <chr> <dbl>
## 1 etschosen 18302.
## 2 etschosenlog 14763.
## 3 logdecomp 19979.
## 4 snaive 14962.
The one generated by the ETS function on logged data gave the lowest RMSE.
letsfckingo <- bigfit %>% select('etschosenlog')
letsfckingo %>%
gg_tsresiduals()
nzcv <- nz %>%
slice(1:(n()-3)) %>%
stretch_tsibble(.init = 36, .step = 3) %>%
relocate(Quarter, .id)
nczvfc <- nzcv %>%
model(
etschosen2=ETS(Arrivals),
etschosenlog2=ETS(box_cox(Arrivals,0)),
snaive2=SNAIVE(Arrivals),
mod2=decomposition_model(STL(box_cox(Arrivals,0)),
ETS(season_adjust ~ error("M") + trend("Ad") + season("N")))) %>%
forecast(h = 3)
nczvfc%>%
accuracy(nz)
## # A tibble: 4 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 etschosen2 NZ Test 4627. 15327. 11799. 2.23 6.45 0.793 0.797 0.283
## 2 etschosenlog2 NZ Test 4388. 15047. 11566. 1.99 6.36 0.778 0.782 0.268
## 3 mod2 NZ Test 3020. 15114. 11864. 1.45 6.34 0.798 0.786 0.250
## 4 snaive2 NZ Test 8244. 18768. 14422. 3.83 7.76 0.970 0.976 0.566
The best one here is now the decomposition model. The previous winner is second best now.
cement <- aus_production %>%
select(Cement)
cement %>% autoplot(Cement)
Very strong upward trend, clear heteroskedasticity, high seasonality.
ccv <- cement %>%
stretch_tsibble(.init=20,.step=1) %>%
relocate(Quarter,.id)
ccvmod <- ccv %>%
model(
ETS(Cement),
SNAIVE(Cement)
)
tail(ccvmod %>% forecast(h=4))
## # A fable: 6 x 5 [1Q]
## # Key: .id, .model [2]
## .id .model Quarter Cement .mean
## <int> <chr> <qtr> <dist> <dbl>
## 1 199 ETS(Cement) 2011 Q1 N(2091, 21888) 2091.
## 2 199 ETS(Cement) 2011 Q2 N(2347, 35012) 2347.
## 3 199 SNAIVE(Cement) 2010 Q3 N(2325, 17963) 2325
## 4 199 SNAIVE(Cement) 2010 Q4 N(2273, 17963) 2273
## 5 199 SNAIVE(Cement) 2011 Q1 N(1904, 17963) 1904
## 6 199 SNAIVE(Cement) 2011 Q2 N(2401, 17963) 2401
zoinks <- cement %>%
stretch_tsibble(.init = 5, .step = 4) %>%
relocate(Quarter,.id)
cemented <- zoinks %>%
model(ETS(Cement),
SNAIVE(Cement)) %>%
forecast(h= 4)
cementtesting <- accuracy(cemented, cement) %>%
mutate(MSE = (RMSE)^2) %>%
select(RMSE,MSE,.model)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 3 observations are missing between 2010 Q3 and 2011 Q1
cementtesting
## # A tibble: 2 × 3
## RMSE MSE .model
## <dbl> <dbl> <chr>
## 1 99.8 9962. ETS(Cement)
## 2 134. 18028. SNAIVE(Cement)
The ETS model is much better. It has a lower RMSE.