library(ggplot2)
library(forecast)
library(fpp)
#explore the books dataset
str(books)
## Time-Series [1:30, 1:2] from 1 to 30: 199 172 111 209 161 119 195 195 131 183 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "Paperback" "Hardcover"
summary(books)
## Paperback Hardcover
## Min. :111.0 Min. :128.0
## 1st Qu.:167.2 1st Qu.:170.5
## Median :189.0 Median :200.5
## Mean :186.4 Mean :198.8
## 3rd Qu.:207.2 3rd Qu.:222.0
## Max. :247.0 Max. :283.0
head(books)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## Paperback Hardcover
## 1 199 139
## 2 172 128
## 3 111 172
## 4 209 139
## 5 161 191
## 6 119 168
frequency(books)
## [1] 1
# Break the time series into two
paper<-ts(books[,1])
hard<-ts(books[,2])
#Plotting the time series for Paperbacks
autoplot(books) + xlab("Month")
## We cannot decompose this time series as it has no or less than 2 periods
#decompose (books)
Both series are trending upwards. It is not clear if seasonality is present from the graph, but given that this is daily sales data, it would seem logical that there be some seasonality from a weekday/weekend day sales perspective. We are not able to use decompose() to discover seasonality with the data as is, since the series has no or less than 2 periods.
We can try to change the frequency of the time series. We know the data is daily according to its documentation, so a ‘frequency=7’ may work to show seasonality at the weekly level. We’ll create a new time series with a weekly frequency and see what decompose() gives us.
books2 <- ts(books, frequency=7)
plot(decompose(books2[, 1]))
plot(decompose(books2[, 2]))
paper2<-ts(books2[,1])
hard2<-ts(books2[,2])
By using decomposition() on the data set, we see the apparent trend of both paperback and hardback books and the seasonality of the data is quite obvious also. As this is four full cycles, we will move forward with the data broken into 7 day periods.
#stepping with .05 for alpha
#Create two empty vectors
alphas <-numeric()
SSEs<-numeric()
for (i in seq(0,1,.05)) {
alphas <-c(alphas, i)
s<-ses(paper2, initial="simple",alpha=i,h=4)
SSEs<-c(SSEs, s$model$SSE)
}
#combine then into a dataframe
paper2DF <- data.frame(alphas, SSEs)
paper2DF
## alphas SSEs
## 1 0.00 41270.00
## 2 0.05 39244.90
## 3 0.10 37785.20
## 4 0.15 36738.44
## 5 0.20 36329.34
## 6 0.25 36438.42
## 7 0.30 36930.75
## 8 0.35 37715.63
## 9 0.40 38738.40
## 10 0.45 39967.19
## 11 0.50 41383.70
## 12 0.55 42977.49
## 13 0.60 44742.62
## 14 0.65 46675.48
## 15 0.70 48773.56
## 16 0.75 51034.58
## 17 0.80 53456.14
## 18 0.85 56035.45
## 19 0.90 58769.45
## 20 0.95 61655.09
## 21 1.00 64690.00
#now plot them
plot(paper2DF)
The SSE drops until around \(\alpha=0.2\) and then starts to go back up gradually.
optAlphaSimplepaper2<-ses(paper2, initial="simple", h=4)
summary(optAlphaSimplepaper2)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = paper2, h = 4, initial = "simple")
##
## Smoothing parameters:
## alpha = 0.2125
##
## Initial states:
## l = 199
##
## sigma: 34.7918
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.749509 34.79175 28.64424 -2.770157 16.56938 0.7223331
## ACF1
## Training set -0.1268119
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 210.1537 165.5663 254.7411 141.9631 278.3443
## 32 210.1537 164.5706 255.7368 140.4404 279.8671
## 33 210.1537 163.5962 256.7112 138.9501 281.3573
## 34 210.1537 162.6418 257.6657 137.4905 282.8170
optAlphaOptpaper2<-ses(paper2, initial="optimal", h=4)
summary(optAlphaOptpaper2)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = paper2, h = 4, initial = "optimal")
##
## Smoothing parameters:
## alpha = 0.1685
##
## Initial states:
## l = 170.8257
##
## sigma: 33.6377
##
## AIC AICc BIC
## 318.9747 319.8978 323.1783
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 7.176212 33.63769 27.8431 0.4737524 15.57782 0.7021303
## ACF1
## Training set -0.2117579
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.1098 164.0013 250.2182 141.1811 273.0384
## 32 207.1098 163.3934 250.8261 140.2513 273.9682
## 33 207.1098 162.7937 251.4258 139.3342 274.8853
## 34 207.1098 162.2021 252.0174 138.4294 275.7901
#stepping with .05 for alpha
#Create two empty vectors
alphas <-numeric()
SSEs<-numeric()
for (i in seq(0,1,.05)) {
alphas <-c(alphas, i)
s<-ses(hard2, initial="simple",alpha=i,h=4)
SSEs<-c(SSEs, s$model$SSE)
}
#combine then into a dataframe
hard2DF <- data.frame(alphas, SSEs)
hard2DF
## alphas SSEs
## 1 0.00 154503.00
## 2 0.05 70483.46
## 3 0.10 45714.82
## 4 0.15 36814.18
## 5 0.20 33148.16
## 6 0.25 31553.85
## 7 0.30 30909.69
## 8 0.35 30758.47
## 9 0.40 30895.27
## 10 0.45 31224.35
## 11 0.50 31702.60
## 12 0.55 32314.49
## 13 0.60 33059.93
## 14 0.65 33948.03
## 15 0.70 34993.95
## 16 0.75 36217.34
## 17 0.80 37641.79
## 18 0.85 39295.05
## 19 0.90 41209.53
## 20 0.95 43423.39
## 21 1.00 45982.00
#now plot them
plot(hard2DF)
The SSE drops dramatically until around \(\alpha=0.25\), stays flat until \(\alpha=0.40\) and then slowly starts to go back up. From observation of the data.frame values, the lowest value is at \(\alpha=0.35\)
optAlphaSimplehard2<-ses(hard2, initial="simple", h=4)
summary(optAlphaSimplehard2)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = hard2, h = 4, initial = "simple")
##
## Smoothing parameters:
## alpha = 0.3473
##
## Initial states:
## l = 139
##
## sigma: 32.0198
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 9.72952 32.01982 26.34467 3.104211 13.05063 0.7860035
## ACF1
## Training set -0.1629042
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 240.3808 199.3457 281.4158 177.6231 303.1385
## 32 240.3808 196.9410 283.8206 173.9453 306.8162
## 33 240.3808 194.6625 286.0990 170.4608 310.3008
## 34 240.3808 192.4924 288.2691 167.1418 313.6197
optAlphaOpthard2<-ses(hard2, initial="optimal", h=4)
summary(optAlphaOpthard2)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = hard2, h = 4, initial = "optimal")
##
## Smoothing parameters:
## alpha = 0.3283
##
## Initial states:
## l = 149.2836
##
## sigma: 31.931
##
## AIC AICc BIC
## 315.8506 316.7737 320.0542
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 9.166918 31.93101 26.7731 2.636328 13.39479 0.7987858
## ACF1
## Training set -0.1417817
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 239.5602 198.6390 280.4815 176.9766 302.1439
## 32 239.5602 196.4905 282.6299 173.6908 305.4297
## 33 239.5602 194.4443 284.6762 170.5613 308.5591
## 34 239.5602 192.4869 286.6336 167.5677 311.5527
The value for alpha with initial=‘optimal’ is alpha=.3283. We also see that the MAPE went slightly down and the actual point forecast went down slightly also.
First, the parameter ‘initial=“simple”’ is set and observed.
#paper2
paper2H<-holt(paper2, initial="simple",h=4)
summary(paper2H)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = paper2, h = 4, initial = "simple")
##
## Smoothing parameters:
## alpha = 0.2984
## beta = 0.4984
##
## Initial states:
## l = 199
## b = -27
##
## sigma: 39.5463
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 7.769844 39.54634 33.5377 1.633306 18.19621 0.8457332
## ACF1
## Training set -0.1088681
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 222.0201 171.3394 272.7007 144.51068 299.5295
## 32 229.6904 164.8872 294.4935 130.58245 328.7983
## 33 237.3606 145.1175 329.6038 96.28696 378.4343
## 34 245.0309 115.5211 374.5407 46.96280 443.0991
We see the point forecasts are going up consistently for the 4 forecasted days. We will now delete the ‘initial’ parameter to observe the outcome.
#Paper2
paper2H<-holt(paper2,h=4)
summary(paper2H)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = paper2, h = 4)
##
## Smoothing parameters:
## alpha = 1e-04
## beta = 1e-04
##
## Initial states:
## l = 174.6003
## b = 1.0606
##
## sigma: 31.6618
##
## AIC AICc BIC
## 319.3427 321.8427 326.3486
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -4.458116 31.66184 26.4503 -6.029264 15.84609 0.6670076
## ACF1
## Training set -0.1444935
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.0380 166.4617 247.6143 144.9820 269.0941
## 32 208.0853 167.5090 248.6615 146.0292 270.1413
## 33 209.1325 168.5562 249.7088 147.0764 271.1886
## 34 210.1797 169.6034 250.7560 148.1236 272.2358
Here, the alpha goes to .0001. This looks better from the perspective of a lower MAPE. All four point forecasts are still trending upwards but are shifted lower than the previous model with initial=“simple”.
#hardback
hard2H<-holt(hard2,h=4)
summary(hard2H)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = hard2, h = 4)
##
## Smoothing parameters:
## alpha = 1e-04
## beta = 1e-04
##
## Initial states:
## l = 154.6543
## b = 2.9961
##
## sigma: 27.4359
##
## AIC AICc BIC
## 310.747 313.247 317.753
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.169026 27.43588 23.34589 -3.402396 12.47686 0.6965336
## ACF1
## Training set -0.02685637
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 247.3504 212.1899 282.5109 193.5770 301.1237
## 32 250.3400 215.1795 285.5005 196.5667 304.1133
## 33 253.3296 218.1691 288.4901 199.5563 307.1030
## 34 256.3192 221.1587 291.4797 202.5459 310.0926
# paper2 Holt
sum(residuals(paper2H)^2)
## [1] 30074.17
#paper2 ses simple
optAlphaSimplepaper2$model$SSE
## [1] 36313.98
#Check it for fun
sum(residuals(optAlphaSimplepaper2)^2)
## [1] 36313.98
#paper2 ses optimal
sum(residuals(optAlphaOptpaper2)^2)
## [1] 33944.82
Judging from the observed SSE measures, Holt’s method outperformed the simple exponential smoothing methods.
# hard2 Holt
sum(residuals(hard2H)^2)
## [1] 22581.83
#hard2 ses simple
optAlphaSimplehard2$model$SSE
## [1] 30758.07
#Check it for fun
sum(residuals(optAlphaSimplehard2)^2)
## [1] 30758.07
#Ads ses optimal
sum(residuals(optAlphaOpthard2)^2)
## [1] 30587.69
In the case of the hardback book time series, we see that the Holt’s linear model also here has the lowest SSE.
First, we plot the different forecasts for the paper2 time series.
autoplot(paper2) + xlab("Month") + autolayer(optAlphaSimplepaper2,PI=FALSE,series="SES simple") + autolayer(optAlphaOptpaper2, PI=FALSE,series="SES optimal") + autolayer(paper2H, series="Holt", PI=FALSE)
Let’s now check the accuracy for each forecast model for the paperback series.
#SES simple
accuracy(optAlphaSimplepaper2$fitted, paper2)
## ME RMSE MAE MPE MAPE ACF1
## Test set 1.749509 34.79175 28.64424 -2.770157 16.56938 -0.1268119
## Theil's U
## Test set 0.6807692
#SES Optimal
accuracy(optAlphaOptpaper2$fitted,paper2)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 7.176212 33.63769 27.8431 0.4737524 15.57782 -0.2117579 0.6685721
#Holt Linear Trend
accuracy(paper2H$fitted,paper2)
## ME RMSE MAE MPE MAPE ACF1
## Test set -4.458116 31.66184 26.4503 -6.029264 15.84609 -0.1444935
## Theil's U
## Test set 0.6081267
The RMSE and MAE were lowest with the Holt’s model and the MAPE was lowest with the simple exponential smoothing model that used the parameter ‘initial=“optimal”’.
Now we look at hardback, by first plotting them.
autoplot(hard2) + xlab("Month") + autolayer(optAlphaSimplehard2,PI=FALSE,series="SES simple") + autolayer(optAlphaOpthard2, PI=FALSE,series="SES optimal") + autolayer(hard2H, series="Holt", PI=FALSE)
#SES simple
accuracy(optAlphaSimplehard2$fitted,sales)
## ME RMSE MAE MPE MAPE ACF1
## Test set -110.1732 110.5679 110.1732 -298.1836 298.1836 -0.3799344
## Theil's U
## Test set 5.28753
#SES Optimal
accuracy(optAlphaOpthard2$fitted,sales)
## ME RMSE MAE MPE MAPE ACF1
## Test set -112.8813 113.2893 112.8813 -307.6344 307.6344 -0.3605628
## Theil's U
## Test set 5.432897
#Holt Linear Trend
accuracy(hard2H$fitted,sales)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -129.1056 129.424 129.1056 -350.5 350.5 -0.6031518 6.342066
We see that with the hardback data, the RMSE, MAE and MAPE were all lowest with the simple exponential smoothing model and in all three cases, Holt’s linear trend model had the highest error value or was the least accurate.
It would be interesting to investigate further why Holt’s method switched almost completely with the Simple Exponential Smoothing method for most and least accurate predictive models between two similar variables (paperback and hardback) from the same dataset.