library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.3
## ✔ dplyr 1.1.2 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.2 ✔ fable 0.3.3
## ✔ ggplot2 3.4.2 ✔ fabletools 0.3.4
## Warning: package 'tsibble' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
pigs <- aus_livestock %>%
filter(Animal == "Pigs",State == "Victoria")
pigs
## # A tsibble: 558 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1972 Jul Pigs Victoria 94200
## 2 1972 Aug Pigs Victoria 105700
## 3 1972 Sep Pigs Victoria 96500
## 4 1972 Oct Pigs Victoria 117100
## 5 1972 Nov Pigs Victoria 104600
## 6 1972 Dec Pigs Victoria 100500
## 7 1973 Jan Pigs Victoria 94700
## 8 1973 Feb Pigs Victoria 93900
## 9 1973 Mar Pigs Victoria 93200
## 10 1973 Apr Pigs Victoria 78000
## # ℹ 548 more rows
fit <- pigs %>%
model(ANN = ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
The alpha parameter is 0.32 and the inital state is 1006.46
## Look at the components of the fit..
components(fit) %>%
autoplot()
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Now we forecast the values onto 4 months
fc <-
fit %>%
forecast(h = "4 months")
fc
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria ANN 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs Victoria ANN 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs Victoria ANN 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs Victoria ANN 2019 Apr N(95187, 1.1e+08) 95187.
## Taken from the textbook...
fc |>
autoplot(pigs) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit)) +
labs(y="Count", title="Pigs Slaughter in Victoria") +
guides(colour = "none")
fc
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria ANN 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs Victoria ANN 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs Victoria ANN 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs Victoria ANN 2019 Apr N(95187, 1.1e+08) 95187.
## Use the hilo function to extract a specified prediction interval
hilo(fc$Count,95)
## <hilo[4]>
## [1] [76854.79, 113518.3]95 [75927.17, 114445.9]95 [75042.22, 115330.9]95
## [4] [74194.54, 116178.6]95
## Formula for predictionn interval is the equation y +- 1.96s
## We have to use residual function to extract the models residuals
s <- sd(residuals(fit)$.resid)
uper <- 95186.56 + 1.96*s
lwr <- 95186.56 - 1.96*s
lwr
## [1] 76871.02
uper
## [1] 113502.1
There doesn’t appear to be too much of a change between the hilo distribution and manually calculating the prediction interval in R. This is for the first observation
Bd <- global_economy %>%
filter(Country == "Bangladesh")
Bd %>%
autoplot(Exports) +
labs(title = "Bangladesh Exports")
The data seems to an decreasing trend followed by an increasing trend past 1985..? It seems there is no seasonal data and follows a cyclic frequency.
## Create an ETS model with additive decmp
Bdfit <- Bd %>%
model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N")))
report(Bdfit)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l[0]
## 9.997555
##
## sigma^2: 1.6265
##
## AIC AICc BIC
## 267.6831 268.1275 273.8644
The alpha parameter is really close to 1 which tells me recent observations are having a heavy influence on the data, or the prediction is lagged by 1.
Bdfc <- Bdfit %>%
forecast(h = "15 years")
Bdfc %>%
autoplot(Bd) + labs(title = "Bd Exports Forecasts ahead for 15 years")
## USe the accuracy function from the textbook video
accuracy(Bdfit)$RMSE
## [1] 1.253158
The RMSE is 1.25
Bdfit2 <- Bd %>%
model(ANN = ETS(Exports ~ error("A") + trend("A") + season("N")))
report(Bdfit2)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998998
## beta = 0.0001001086
##
## Initial states:
## l[0] b[0]
## 10.1597 0.07875098
##
## sigma^2: 1.6798
##
## AIC AICc BIC
## 271.4452 272.5990 281.7474
The alpha value is very high which tells us that the level changes rapidly in order to capture the highly trended series. The beta parameter is 0.0001 which is really small which may tells us that the trend doesn’t change often..
## Make the same forecast but with Additive Trend
Bdfc2 <- Bdfit2 %>%
forecast(h = "15 years")
## Plot the forecasts
Bdfc2 %>%
autoplot(Bd) + labs(title = "Bd Exports Forecasts ahead for 15 years")
The simple exponential smoothing is useful since it utilizes recent observations to help weigh the forecasts value. While double exponential smoothing allows us to produce forecasts with a trend.
Compute RMSE for both values and then compare the RMSE for both methods and choose the lowest one.
## FOr the first fit with simple exponential smoothing
accuracy(Bdfit)$RMSE
## [1] 1.253158
accuracy(Bdfit2)$RMSE
## [1] 1.250591
It appears for Bangladesh Exports the RMSE values for both models are smiliar and thus it is difficult to see which methods are better in this case.
head(Bdfc)
## # A fable: 6 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Bangladesh ANN 2018 N(15, 1.6) 15.0
## 2 Bangladesh ANN 2019 N(15, 3.3) 15.0
## 3 Bangladesh ANN 2020 N(15, 4.9) 15.0
## 4 Bangladesh ANN 2021 N(15, 6.5) 15.0
## 5 Bangladesh ANN 2022 N(15, 8.1) 15.0
## 6 Bangladesh ANN 2023 N(15, 9.8) 15.0
hilo(Bdfc$Exports,95)
## <hilo[15]>
## [1] [12.536649, 17.53589]95 [11.501449, 18.57109]95 [10.707088, 19.36545]95
## [4] [10.037404, 20.03513]95 [ 9.447396, 20.62514]95 [ 8.913985, 21.15855]95
## [7] [ 8.423463, 21.64908]95 [ 7.966894, 22.10564]95 [ 7.538075, 22.53446]95
## [10] [ 7.132487, 22.94005]95 [ 6.746720, 23.32582]95 [ 6.378124, 23.69441]95
## [13] [ 6.024592, 24.04795]95 [ 5.684415, 24.38812]95 [ 5.356185, 24.71635]95
## Use R calculation again for the first calculation
ss <- sd(residuals(Bdfit)$.resid)
upppr <- 15.03627 + 1.96 * ss
lower <- 15.03627 - 1.96 * ss
print(lower)
## [1] 12.56459
print(upppr)
## [1] 17.50795
head(Bdfc2)
## # A fable: 6 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Bangladesh ANN 2018 N(15, 1.7) 15.1
## 2 Bangladesh ANN 2019 N(15, 3.4) 15.2
## 3 Bangladesh ANN 2020 N(15, 5) 15.3
## 4 Bangladesh ANN 2021 N(15, 6.7) 15.4
## 5 Bangladesh ANN 2022 N(15, 8.4) 15.4
## 6 Bangladesh ANN 2023 N(16, 10) 15.5
### Use the hilo function again (check 8,7) for the trend forecast
hilo(Bdfc2$Exports,95)
## <hilo[15]>
## [1] [12.574787, 17.65533]95 [11.601355, 18.78633]95 [10.872598, 19.67266]95
## [4] [10.270483, 20.43234]95 [ 9.749288, 21.11110]95 [ 9.285566, 21.73239]95
## [7] [ 8.865388, 22.31013]95 [ 8.479678, 22.85341]95 [ 8.122134, 23.36852]95
## [10] [ 7.788166, 23.86005]95 [ 7.474310, 24.33148]95 [ 7.177875, 24.78548]95
## [13] [ 6.896720, 25.22420]95 [ 6.629110, 25.64938]95 [ 6.373616, 26.06244]95
## Calculate it again
sss <- sd(residuals(Bdfit2)$.resid)
upppr <- 15.11506 + 1.96 * ss
lower <- 15.11506 - 1.96 * ss
print(lower)
## [1] 12.64338
print(upppr)
## [1] 17.58674
Once again, the prediction interval for both method is not too different using the hilo and manually calculating except the values are a few decimal values below or above.
Let us first visualize the data so we can understand how the time-series data appears, it appears the log transformation of the GDP seems the best..
Chinese <- global_economy %>%
filter(Country == "China")
## Let's take a look at the plot
Chinese %>%
autoplot(GDP) + labs(title = "China GDP")
Okay the time-series data for China appears to be an steady exponential
growth after the 1990s, we can perhaps transform the data into a
square-root transformation,(apply the guerro function to let us choose
lambda function). Also since the data is going up in a positive trend we
can apply double exponential smoothing, this is the thought-process I
have so far.
## Let's see if a sq-rt transformation works, the transformation seems to have helped
Chinese %>%
autoplot(sqrt(GDP)) + labs(title = "China GDP (SqRt Transformed)")
## Wow the log does an even better job!!
Chinese %>%
autoplot(log(GDP)) + labs(title = "China GDP (Log Transformed)")
## Let's create a forecasts for the ETS model with a logged GDP, since the data isn't seasonal we can just leave it null
Chinese
## # A tsibble: 58 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China CHN 1960 59716467625. NA NA 4.43 4.31 667070000
## 2 China CHN 1961 50056868958. -27.3 NA 3.49 3.87 660330000
## 3 China CHN 1962 47209359006. -5.58 NA 2.91 4.05 665770000
## 4 China CHN 1963 50706799903. 10.3 NA 2.86 4.01 682335000
## 5 China CHN 1964 59708343489. 18.2 NA 2.86 3.77 698355000
## 6 China CHN 1965 70436266147. 17.0 NA 3.19 3.64 715185000
## 7 China CHN 1966 76720285970. 10.7 NA 3.24 3.49 735400000
## 8 China CHN 1967 72881631327. -5.77 NA 2.98 3.28 754550000
## 9 China CHN 1968 70846535056. -4.10 NA 2.92 3.30 774510000
## 10 China CHN 1969 79705906247. 16.9 NA 2.41 3.05 796025000
## # ℹ 48 more rows
## Create an ETS model with additive trend with the errors and the trend
Chinfit <- Chinese %>%
model(ANN = ETS(GDP ~ error("A") + trend("A") + season("N")))
report(Chinfit)
## Series: GDP
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998964
## beta = 0.5518569
##
## Initial states:
## l[0] b[0]
## 50284778074 3288256684
##
## sigma^2: 3.87701e+22
##
## AIC AICc BIC
## 3258.053 3259.207 3268.356
The alpha is close to 1, tells us that recent observations were weighted more and the beta tells us that the trend is somewhat changing.
## ## Create the forecast and visualize it for the next 15 years..
Chinfit <- Chinfit %>%
forecast(h = "15 years")
Chinfit %>%
autoplot(Chinese) + labs(title = "Chinese GDP with Additive Trend")
## Ad is the dampend variant of the addditive trend
Chinfit2 <- Chinese %>%
model(ANN = ETS(GDP ~ error("A") + trend("Ad",phi = 0.83) + season("N")))
report(Chinfit2)
## Series: GDP
## Model: ETS(A,Ad,N)
## Smoothing parameters:
## alpha = 0.8406579
## beta = 0.8406564
## phi = 0.83
##
## Initial states:
## l[0] b[0]
## 45713434614 7859600144
##
## sigma^2: 4.524326e+22
##
## AIC AICc BIC
## 3265.925 3267.079 3276.227
It seems both the alpha and beta parameter are both 0.84
## Make sure to forecast before plotting it, the prediction interval is shaped like a funnel
Chinfit2 %>%
forecast(h = "15 years") %>%
autoplot(Chinese) + labs(title = "Chinese GDP with Additive Dampened Trend")
Chinlogfit <- Chinese %>%
model(ANN = ETS(log(GDP) ~ error("A") + trend("A") + season("N")))
report(Chinlogfit)
## Series: GDP
## Model: ETS(A,A,N)
## Transformation: log(GDP)
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.1079782
##
## Initial states:
## l[0] b[0]
## 24.77007 0.04320226
##
## sigma^2: 0.0088
##
## AIC AICc BIC
## -33.07985 -31.92600 -22.77763
The alpha parameter is really high close to 1 telling us that recent observation are influencing the forecast and the beta is really low suggesting us that the trend doesn’t flucutate that much.
Chinlogfit %>%
forecast(h = "15 years") %>%
autoplot(Chinese) + labs(title = "Transformed GDP with Additive Trend")
Chinlogfit2 <- Chinese %>%
model(ANN = ETS(log(GDP) ~ error("A") + trend("Ad",phi = 0.83) + season("N")))
report(Chinlogfit2)
## Series: GDP
## Model: ETS(A,Ad,N)
## Transformation: log(GDP)
## Smoothing parameters:
## alpha = 0.965097
## beta = 0.5928893
## phi = 0.83
##
## Initial states:
## l[0] b[0]
## 24.96615 -0.1897312
##
## sigma^2: 0.0093
##
## AIC AICc BIC
## -31.32767 -30.17383 -21.02546
Dampened the beta parameter reduces it to 0.59
Chinlogfit2 %>%
forecast(h = "15 years") %>%
autoplot(Chinese) + labs(title = "Transformed GDP with Additive Trend")
ausGas <- aus_production
ausGas
## # A tsibble: 218 x 7 [1Q]
## Quarter Beer Tobacco Bricks Cement Electricity Gas
## <qtr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1956 Q1 284 5225 189 465 3923 5
## 2 1956 Q2 213 5178 204 532 4436 6
## 3 1956 Q3 227 5297 208 561 4806 7
## 4 1956 Q4 308 5681 197 570 4418 6
## 5 1957 Q1 262 5577 187 529 4339 5
## 6 1957 Q2 228 5651 214 604 4811 7
## 7 1957 Q3 236 5317 227 603 5259 7
## 8 1957 Q4 320 6152 222 582 4735 6
## 9 1958 Q1 272 5758 199 554 4608 5
## 10 1958 Q2 233 5641 229 620 5196 7
## # ℹ 208 more rows
ausGas %>%
autoplot(Gas) + labs(title = "Gas Production in Australia")
The data shows increasing trend and seasonality
ausmod <- ausGas %>%
model(additive = ETS(Gas ~ error("A") + trend("A") + season("A")),
multplicative = ETS(Gas ~ error("M") + trend("A") + season("M")))
report(ausmod)
## Warning in report.mdl_df(ausmod): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 2 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive 23.6 -927. 1872. 1873. 1903. 22.7 29.7 3.35
## 2 multplicative 0.00324 -831. 1681. 1682. 1711. 21.1 32.2 0.0413
accuracy(ausmod)
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive Training 0.00525 4.76 3.35 -4.69 10.9 0.600 0.628 0.0772
## 2 multplicative Training -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
## forecast 20 quarters
ausmod %>%
forecast(h = 20) %>%
autoplot(aus_production,level = NULL) +
labs(title = "Gas Production in Australia") +
guides(color = guide_legend(title = "Forecast"),
subtitle = "Additive vs Multiplicative Seasonality")
ausmod2 <- ausGas %>%
model(additive_dampened = ETS(Gas ~ error("A") + trend("Ad",phi = 0.83) + season("A")),
multplicative_dampened = ETS(Gas ~ error("M") + trend("Ad",phi = 0.83) + season("M")))
report(ausmod2)
## Warning in report.mdl_df(ausmod2): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 2 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive_dampened 21.3 -916. 1850. 1851. 1880. 20.5 29.8 3.03
## 2 multplicative_dampened 0.00350 -838. 1693. 1694. 1724. 20.1 31.5 0.0422
accuracy(ausmod2)
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive_dampened Trai… 0.915 4.52 3.03 1.40 4.30 0.543 0.596 0.0336
## 2 multplicative_dampen… Trai… 0.429 4.48 2.97 0.830 4.11 0.532 0.591 -0.00216
## Plot the model..
ausmod2 %>%
forecast(h = 20) %>%
autoplot(aus_production,level = NULL) +
labs(title = "Gas Production in Australia (Damped..)") +
guides(color = guide_legend(title = "Forecast"),
subtitle = "Additve Damp vs Multiplicative Damp")
The multiplicative method is preferred because the seasonal variations are changing proportional to the level of the series and hence this method is better. Looking at the model fits, the multiplicative seasonality performed a bit better since it has lower RMSE and AIC values, though by not much. The dampened measurements seems to not have improved the forecasts as well.
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
autoplot() + labs(title = "Australian Retail Data")
## Plot variable not specified, automatically selected `.vars = Turnover`
The plot shows a somewhat increasing trend and seasonality
Multiplicative Seasonality is necessary since the seasonality of the data is changing proportional to the level of the series
mymod <- myseries %>%
model(multplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")),
multplicative_damp = ETS(Turnover ~ error("M") + trend("Ad",phi = 0.83) + season("M")))
report(mymod)
## Warning in report.mdl_df(mymod): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 2 × 11
## State Industry .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern… Clothin… multp… 0.00447 -870. 1775. 1776. 1841. 0.376 0.473 0.0511
## 2 Northern… Clothin… multp… 0.00479 -880. 1794. 1796. 1861. 0.382 0.497 0.0518
accuracy(mymod)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… multp… Trai… -0.0128 0.613 0.450 -0.469 5.15 0.513 0.529
## 2 Northern T… Clothin… multp… Trai… 0.0465 0.618 0.451 0.194 5.16 0.515 0.533
## # ℹ 1 more variable: ACF1 <dbl>
mymod %>%
forecast(h = 20) %>%
autoplot(myseries,level = NULL) + labs(title = "Australia Retail Data") +
guides(color = guide_legend(title = "Forecast"),
subtitle = "Additve vs Multiplicative")
Looking at the RMSE for both models, they both have smilar RMSE but since the multiplicative seasonality without dampening performed slightly better. I may prefer to use that method..
We can use gg_tsresiduals and both of the pormanteau test to make our conclusion
## Recreate the multiplicative model..
Mymod <- myseries %>%
model(multplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")))
Mymod %>%
gg_tsresiduals()
The residuals display a high variance with correlations in the ACF plot, the residuals are somewhat normally distributed but has a skew towards the left. The model may not be a good fit for the data.
We can perform a ljung-box to figure whether the residuals look like white-noise
## Recall the argument is the model_fitting, lag = 10 for non_seasonal data and lag = 2*m for seasonal
## Since seasonal is 12 we multiply it by 12.
augment(Mymod) %>%
features(.innov,ljung_box,lag = 12*2)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Northern Territory Clothing, footwear and personal a… multp… 34.5 0.0765
From the ljung_box test since the p-value is larger than 0.05 we can say that the residuals are not different from white-noise.
## Set the training data to the end of 2010
Trainseries <- myseries %>%
filter_index("1988 Aug"~ "2010 Dec")
## Fit the models..
Series_fit <- Trainseries %>%
model(Seasonal_Naive = SNAIVE(Turnover),
multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")))
## Make Forecasts.. and plot it for 32 quarters which is 8 years..?
Series_fit %>%
forecast(h = 96) %>%
autoplot(Trainseries,level = NULL) +
autolayer(
filter_index(myseries,"2011 Jan" ~ .),
color = "black") +
guides(color = guide_legend(title = "Forecast")
) + labs(title = "Forecasts with Retail Data")
## Plot variable not specified, automatically selected `.vars = Turnover`
## Find the RMSE for training set
accuracy(Series_fit)
## # A tibble: 2 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… Seaso… Trai… 0.440 1.22 0.923 5.15 12.4 1 1
## 2 Northern T… Clothin… multi… Trai… -0.0271 0.520 0.386 -0.710 5.15 0.418 0.426
## # ℹ 1 more variable: ACF1 <dbl>
It appears the the RMSE of the training set for multiplicative seasonality has a lower RMSE than the Seasonal_naive approach.
## Make a test_set start from 2011 Jan
Testseries <- myseries %>%
filter_index("2011 Jan"~.)
## We can forecast the testing data..
forcastt <- Series_fit %>%
forecast(new_data = Testseries)
## Review it for the test_set
forcastt %>%
accuracy(myseries)
## # A tibble: 2 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Season… Nort… Clothin… Test 0.836 1.55 1.24 5.94 9.06 1.36 1.28 0.601
## 2 multip… Nort… Clothin… Test -1.69 1.95 1.75 -13.4 13.7 1.91 1.61 0.568
However, the seasonal naive has a lower RMSE on the test set as compared to the seasonal multiplicative. (Did I mess up..?)
## Define the series again just in case..
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
## Select a lambda value.
lambda <- myseries %>%
features(Turnover,features = guerrero) %>%
pull(lambda_guerrero)
## Try an STL decomposition with box_cox transformation. leave it with the default setting
myseries %>%
model(STL(box_cox(Turnover,lambda),robust = TRUE)) %>%
components() %>%
autoplot() + labs(title = "STL Decomp With Box-Cox Transformation")
## We have to save the components..
Dcmp <- myseries %>%
model(STL(box_cox(Turnover,lambda),robust = TRUE)) %>%
components()
Dcmp
## # A dable: 369 x 9 [1M]
## # Key: State, Industry, .model [1]
## # : box_cox(Turnover, lambda) = trend + season_year + remainder
## State Industry .model Month box_cox(Turnover, la…¹ trend season_year
## <chr> <chr> <chr> <mth> <dbl> <dbl> <dbl>
## 1 Northern T… Clothin… STL(b… 1988 Apr 0.862 0.991 -0.111
## 2 Northern T… Clothin… STL(b… 1988 May 1.11 1.01 0.0515
## 3 Northern T… Clothin… STL(b… 1988 Jun 0.994 1.03 0.0438
## 4 Northern T… Clothin… STL(b… 1988 Jul 1.07 1.05 0.248
## 5 Northern T… Clothin… STL(b… 1988 Aug 1.11 1.07 0.111
## 6 Northern T… Clothin… STL(b… 1988 Sep 1.15 1.09 0.0424
## 7 Northern T… Clothin… STL(b… 1988 Oct 1.19 1.11 0.0265
## 8 Northern T… Clothin… STL(b… 1988 Nov 1.15 1.13 0.000123
## 9 Northern T… Clothin… STL(b… 1988 Dec 1.52 1.15 0.357
## 10 Northern T… Clothin… STL(b… 1989 Jan 1.04 1.17 -0.212
## # ℹ 359 more rows
## # ℹ abbreviated name: ¹`box_cox(Turnover, lambda)`
## # ℹ 2 more variables: remainder <dbl>, season_adjust <dbl>
## Save the season_adjust in myseries
myseries$Season_adj <- Dcmp$season_adjust
head(myseries)
## # A tsibble: 6 x 6 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover Season_adj
## <chr> <chr> <chr> <mth> <dbl> <dbl>
## 1 Northern Territory Clothing, footwea… A3349767W 1988 Apr 2.3 0.974
## 2 Northern Territory Clothing, footwea… A3349767W 1988 May 2.9 1.06
## 3 Northern Territory Clothing, footwea… A3349767W 1988 Jun 2.6 0.951
## 4 Northern Territory Clothing, footwea… A3349767W 1988 Jul 2.8 0.827
## 5 Northern Territory Clothing, footwea… A3349767W 1988 Aug 2.9 1.00
## 6 Northern Territory Clothing, footwea… A3349767W 1988 Sep 3 1.11
## Let's create training and testing sets again..
## Set the training data to the end of 2010
Trainseries1 <- myseries %>%
filter_index("1988 Aug"~ "2010 Dec")
## Fit the models again this time with the seasonal_adjusted data
Series_fit1 <- Trainseries1 %>%
model(multiplicative = ETS(Season_adj ~ error("M") + trend("A") + season("M")))
Series_fit1 %>%
gg_tsresiduals()
The residuals look normal, there is some correlations with the acf and the variance is high at the beginning of the set but the variance reduces after Jan 2000.
## Make a test_set start from 2011 Jan
Testseries1 <- myseries %>%
filter_index("2011 Jan"~.)
## We can forecast the testing data..
forcast <- Series_fit1 %>%
forecast(new_data = Testseries1)
## Review the accuracy for the training set
Series_fit1 %>%
accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… multi… Trai… -0.00408 0.0732 0.0560 -0.314 2.84 0.368 0.359
## # ℹ 1 more variable: ACF1 <dbl>
## Review it for the test_set
forcast %>%
accuracy(myseries)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multiplica… Nort… Clothin… Test -0.0559 0.0827 0.0681 -2.00 2.41 0.448 0.406
## # ℹ 1 more variable: ACF1 <dbl>
The RMSE for the test set is 0.082 and for the training set it is 0.0731