library(fpp2)
## Warning: package 'fpp2' was built under R version 3.6.3
## Loading required package: ggplot2
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.6.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.6.3
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.6.3
library(kableExtra)
SES_pigs <- ses(pigs, h= 4)
SES_pigs$model
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
# ses plot
autoplot(SES_pigs) +
autolayer(SES_pigs$fitted, series="Fitted") +
ylab("Count") + xlab("Year")
optimal values of alpha and l0 are 0.29 and 77260.05 respectively.
# MANUAL CALCULATION
s <- sd(residuals(SES_pigs))
I_95 <- c(Lower = SES_pigs$mean[1] - 1.96*s, Upper = SES_pigs$mean[1] + 1.96*s)
I_95
## Lower Upper
## 78679.97 118952.84
# WITH 'R'
I_95_R <- c(SES_pigs$lower[1,2], SES_pigs$upper[1,2])
names(I_95_R) <- c("Lower", "Upper")
I_95_R
## Lower Upper
## 78611.97 119020.84
‘R’ interval is little wider when compared to manual calculation.
# plot
autoplot(books) + ggtitle("Daily Sales of Books")
# features
head(books)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## Paperback Hardcover
## 1 199 139
## 2 172 128
## 3 111 172
## 4 209 139
## 5 161 191
## 6 119 168
Both series are correlated and positive trending pattern.sales of books increasing with time and it doesn’t have any particular pattern.
SES_pigs_pb <- ses(books[,'Paperback'], h = 4)
SES_pigs_hc <- ses(books[, 'Hardcover'], h = 4)
autoplot(books) + autolayer(SES_pigs_pb, series="Paperback", PI=FALSE) + autolayer(SES_pigs_hc, series="Hardcover", PI=FALSE)
.
(ses_rmse_pb <- sqrt(mean(residuals(SES_pigs_pb)^2)))
## [1] 33.63769
(ses_rmse_hc <- sqrt(mean(residuals(SES_pigs_hc)^2)))
## [1] 31.93101
accuracy(SES_pigs_pb)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.175981 33.63769 27.8431 0.4736071 15.57784 0.7021303 -0.2117522
accuracy(SES_pigs_hc)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 9.166735 31.93101 26.77319 2.636189 13.39487 0.7987887 -0.1417763
# holt method
holt_pb <- holt(books[, "Paperback"], h = 4)
holt_hc <- holt(books[, "Hardcover"], h = 4)
#plot
autoplot(books[, "Paperback"]) + autolayer(holt_pb)
autoplot(books[, "Hardcover"]) + autolayer(holt_hc)
(pb_rmse <- round(accuracy(holt_pb), 2))
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -3.72 31.14 26.18 -5.51 15.58 0.66 -0.18
(hc_rmse <- round(accuracy(holt_hc), 2))
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.14 27.19 23.16 -2.11 12.16 0.69 -0.03
RMSE improved compared to SES model.
ans) Hardcover sales are the best when compared to Paperback. RMSE scores are lower with the holt’s method so this method is better.
s <- accuracy(holt_pb)[1,"RMSE"]
m_i_95_pb <- round(c(Lower = holt_pb$mean[1] - 1.96*s, Upper = holt_pb$mean[1] + 1.96*s), 2)
s <- accuracy(holt_hc)[1,"RMSE"]
m_i_95_hc <- round(c(Lower = holt_hc$mean[1] - 1.96*s, Upper = holt_hc$mean[1] + 1.96*s), 2)
s.i_95_pb <- round(c(SES_pigs_pb$lower[1,2], SES_pigs_pb$upper[1,2]), 2)
names(s.i_95_pb) <- c("Lower", "Upper")
s.i_95_hc <- round(c(SES_pigs_hc$lower[1,2], SES_pigs_hc$upper[1,2]), 2)
names(s.i_95_hc) <- c("Lower", "Upper")
h.i_95_pb <- round(c(holt_pb$lower[1,2], holt_pb$upper[1,2]), 2)
names(h.i_95_pb) <- c("Lower", "Upper")
h.i_95_hc <- round(c(holt_hc$lower[1,2], holt_hc$upper[1,2]), 2)
names(h.i_95_hc) <- c("Lower", "Upper")
df.i_95_R <- data.frame(Paperback=c(paste("(",m_i_95_pb["Lower"],",",m_i_95_pb["Upper"],")"),
paste("(",s.i_95_pb["Lower"],",",s.i_95_pb["Upper"],")"),
paste("(",h.i_95_pb["Lower"],",",h.i_95_pb["Upper"],")")),
Hardcover=c(paste("(",m_i_95_hc["Lower"],",",m_i_95_hc["Upper"],")"),
paste("(",s.i_95_hc["Lower"],",",s.i_95_hc["Upper"],")"),
paste("(",h.i_95_hc["Lower"],",",h.i_95_hc["Upper"],")")),
row.names = c("RMSE", "SES", "HOLT"))
df.i_95_R
## Paperback Hardcover
## RMSE ( 148.44 , 270.5 ) ( 196.87 , 303.47 )
## SES ( 138.87 , 275.35 ) ( 174.78 , 304.34 )
## HOLT ( 143.91 , 275.02 ) ( 192.92 , 307.43 )
Confidence intervals that were calculated by the ses and holt functions are just a little wider than the ones calculated manually.
fc1eggs <- holt(eggs, h=100)
autoplot(fc1eggs)
fc2eggs <- holt(eggs, damped=TRUE, phi=0.9, h=100)
autoplot(fc2eggs)
fc3eggs <- holt(eggs, lambda="auto", h=100)
autoplot(fc3eggs) + ggtitle("Forecasts from Holt's Method with Box-Cox transformation")
fc4eggs <- holt(eggs, damped=TRUE, phi=0.9, lambda="auto", h=100)
autoplot(fc4eggs) + ggtitle("Forecasts from Damped Holt's Method with Box-Cox transformation")
# RMSE
fc1_RMSE <- accuracy(fc1eggs)[,"RMSE"]
fc2_RMSE <- accuracy(fc2eggs)[,"RMSE"]
fc3_RMSE <- accuracy(fc3eggs)[,"RMSE"]
fc4_RMSE <- accuracy(fc4eggs)[,"RMSE"]
kable(data.frame(fc1_RMSE, fc2_RMSE, fc3_RMSE, fc4_RMSE)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
fc1_RMSE | fc2_RMSE | fc3_RMSE | fc4_RMSE |
---|---|---|---|
26.58219 | 26.62317 | 26.39376 | 26.54716 |
Finally Holt’s Method with Box-Cox transformation is the best method when compared to other methods since RMSE is lowest.
temp = tempfile(fileext = ".xlsx")
dataURL <- "https://otexts.com/fpp2/extrafiles/retail.xlsx"
download.file(dataURL, destfile=temp, mode='wb')
retaildata <- readxl::read_excel(temp, skip=1)
retail <- ts(retaildata[,"A3349335T"],
frequency=12, start=c(1982,4))
autoplot(retail)
>The forecasts generated by the method with the multiplicative seasonality display larger and increasing seasonal variation as the level of the forecasts increases compared to the forecasts generated by the method with additive seasonality.
f_retail_m <- hw(retail, seasonal="multiplicative", h=100)
autoplot(f_retail_m)
f_retail_d <- hw(retail, damped=TRUE, phi=0.98,
seasonal="multiplicative", h=100)
autoplot(f_retail_d)
m_RMSE <- accuracy(f_retail_m)[,"RMSE"]
d_RMSE <- accuracy(f_retail_d)[,"RMSE"]
kable(data.frame(m_RMSE, d_RMSE)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
m_RMSE | d_RMSE |
---|---|
25.20381 | 25.28197 |
Since RMSE is sligth lower for multiplication(non-damping) method, it is the best method.
checkresiduals(f_retail_m)
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' multiplicative method
## Q* = 250.64, df = 8, p-value < 2.2e-16
##
## Model df: 16. Total lags used: 24
The histogram looks to be nearly normal which would indicate that the residuals are not entirely appearing as white noise.
train <- window(retail, end=c(2010,12))
test <- window(retail, start=2011)
fc_m <- hw(train, seasonal="multiplicative", h=100)
kable(data.frame(accuracy(fc_m,test))) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | Theil.s.U | |
---|---|---|---|---|---|---|---|---|
Training set | 2.658058 | 25.00649 | 18.25149 | 0.2228273 | 1.965971 | 0.2958851 | -0.0067375 | NA |
Test set | -63.670299 | 77.04807 | 65.91346 | -2.9161237 | 3.023249 | 1.0685599 | 0.4473749 | 0.5550438 |
# plot
autoplot(train) +
autolayer(fc_m)
>Based on RMSE value, Holt Winter’s multiplicative seasonality model is much better.
stl.fc <- stlf(train, lambda = "auto")
stlf.fc.ac <- accuracy(stl.fc, test)
stlf.fc.ac
## ME RMSE MAE MPE MAPE MASE
## Training set -0.06389502 21.55525 16.24639 0.02475693 1.760560 0.2633793
## Test set -85.57853696 100.28507 86.26902 -3.97375614 4.005763 1.3985552
## ACF1 Theil's U
## Training set -0.01680539 NA
## Test set 0.61013941 0.8263593
The RMSE value of 21.55 from the Holt-Winters’ method is still a lot better than 100.28 from this STL approach.