library(ggplot2)
library(forecast)
library(fpp)

#explore the books dataset
str(books)
##  Time-Series [1:30, 1:2] from 1 to 30: 199 172 111 209 161 119 195 195 131 183 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:2] "Paperback" "Hardcover"
summary(books)
##    Paperback       Hardcover    
##  Min.   :111.0   Min.   :128.0  
##  1st Qu.:167.2   1st Qu.:170.5  
##  Median :189.0   Median :200.5  
##  Mean   :186.4   Mean   :198.8  
##  3rd Qu.:207.2   3rd Qu.:222.0  
##  Max.   :247.0   Max.   :283.0
head(books)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
##   Paperback Hardcover
## 1       199       139
## 2       172       128
## 3       111       172
## 4       209       139
## 5       161       191
## 6       119       168
frequency(books)
## [1] 1
# Break the time series into two
paper<-ts(books[,1])
hard<-ts(books[,2])

Plot the series and discuss the main features of the data.

#Plotting the time series for Paperbacks
autoplot(books) + xlab("Month")

## We cannot decompose this time series as it has no or less than 2 periods
#decompose (books)

Both series are trending upwards. It is not clear if seasonality is present from the graph, but given that this is daily sales data, it would seem logical that there be some seasonality from a weekday/weekend day sales perspective. We are not able to use decompose() to discover seasonality with the data as is, since the series has no or less than 2 periods.

An alternative approach to organizing the data for the purposes of using decompose()

We can try to change the frequency of the time series. We know the data is daily according to its documentation, so a ‘frequency=7’ may work to show seasonality at the weekly level. We’ll create a new time series with a weekly frequency and see what decompose() gives us.

books2 <- ts(books, frequency=7)
plot(decompose(books2[, 1]))

plot(decompose(books2[, 2]))

paper2<-ts(books2[,1])
hard2<-ts(books2[,2])

By using decomposition() on the data set, we see the apparent trend of both paperback and hardback books and the seasonality of the data is quite obvious also. As this is four full cycles, we will move forward with the data broken into 7 day periods.

#stepping with .05 for alpha
#Create two empty vectors
alphas <-numeric()
SSEs<-numeric()


for (i in seq(0,1,.05)) {
    alphas <-c(alphas, i)
    s<-ses(paper2, initial="simple",alpha=i,h=4)
    SSEs<-c(SSEs, s$model$SSE)
}

#combine then into a dataframe
paper2DF <- data.frame(alphas, SSEs)
paper2DF
##    alphas     SSEs
## 1    0.00 41270.00
## 2    0.05 39244.90
## 3    0.10 37785.20
## 4    0.15 36738.44
## 5    0.20 36329.34
## 6    0.25 36438.42
## 7    0.30 36930.75
## 8    0.35 37715.63
## 9    0.40 38738.40
## 10   0.45 39967.19
## 11   0.50 41383.70
## 12   0.55 42977.49
## 13   0.60 44742.62
## 14   0.65 46675.48
## 15   0.70 48773.56
## 16   0.75 51034.58
## 17   0.80 53456.14
## 18   0.85 56035.45
## 19   0.90 58769.45
## 20   0.95 61655.09
## 21   1.00 64690.00
#now plot them
plot(paper2DF)

The SSE drops until around \(\alpha=0.2\) and then starts to go back up gradually.

Now we will let ‘ses’ select the optimal value of \(\alpha\) and use this value to generate forecast for the next four time periods. We will then compare the optimally found value of \(\alpha\) to our visual method above.

optAlphaSimplepaper2<-ses(paper2, initial="simple", h=4)
summary(optAlphaSimplepaper2)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = paper2, h = 4, initial = "simple") 
## 
##   Smoothing parameters:
##     alpha = 0.2125 
## 
##   Initial states:
##     l = 199 
## 
##   sigma:  34.7918
## Error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 1.749509 34.79175 28.64424 -2.770157 16.56938 0.7223331
##                    ACF1
## Training set -0.1268119
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       210.1537 165.5663 254.7411 141.9631 278.3443
## 32       210.1537 164.5706 255.7368 140.4404 279.8671
## 33       210.1537 163.5962 256.7112 138.9501 281.3573
## 34       210.1537 162.6418 257.6657 137.4905 282.8170

So with the initial=‘simple’ value, we have an alpha=0.2125.

We will repeat but with ’initial=“optimal”.

optAlphaOptpaper2<-ses(paper2, initial="optimal", h=4)
summary(optAlphaOptpaper2)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = paper2, h = 4, initial = "optimal") 
## 
##   Smoothing parameters:
##     alpha = 0.1685 
## 
##   Initial states:
##     l = 170.8257 
## 
##   sigma:  33.6377
## 
##      AIC     AICc      BIC 
## 318.9747 319.8978 323.1783 
## 
## Error measures:
##                    ME     RMSE     MAE       MPE     MAPE      MASE
## Training set 7.176212 33.63769 27.8431 0.4737524 15.57782 0.7021303
##                    ACF1
## Training set -0.2117579
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       207.1098 164.0013 250.2182 141.1811 273.0384
## 32       207.1098 163.3934 250.8261 140.2513 273.9682
## 33       207.1098 162.7937 251.4258 139.3342 274.8853
## 34       207.1098 162.2021 252.0174 138.4294 275.7901

The value for alpha with initial=‘optimal’ is alpha=.1685

We’ll repeat the same steps with the ‘hardback’ series

We use simple exponential smoothing with the ‘ses’ function (setting ‘initial=“simple”’) and explore different values of \(\alpha\) for the hardback series. We then record the within-sample SSE for the one-step forecasts, plot SSE against \(\alpha\) and then find the value of \(\alpha\) that works best.

#stepping with .05 for alpha
#Create two empty vectors
alphas <-numeric()
SSEs<-numeric()


for (i in seq(0,1,.05)) {
    alphas <-c(alphas, i)
    s<-ses(hard2, initial="simple",alpha=i,h=4)
    SSEs<-c(SSEs, s$model$SSE)
}

#combine then into a dataframe
hard2DF <- data.frame(alphas, SSEs)
hard2DF
##    alphas      SSEs
## 1    0.00 154503.00
## 2    0.05  70483.46
## 3    0.10  45714.82
## 4    0.15  36814.18
## 5    0.20  33148.16
## 6    0.25  31553.85
## 7    0.30  30909.69
## 8    0.35  30758.47
## 9    0.40  30895.27
## 10   0.45  31224.35
## 11   0.50  31702.60
## 12   0.55  32314.49
## 13   0.60  33059.93
## 14   0.65  33948.03
## 15   0.70  34993.95
## 16   0.75  36217.34
## 17   0.80  37641.79
## 18   0.85  39295.05
## 19   0.90  41209.53
## 20   0.95  43423.39
## 21   1.00  45982.00
#now plot them
plot(hard2DF)

The SSE drops dramatically until around \(\alpha=0.25\), stays flat until \(\alpha=0.40\) and then slowly starts to go back up. From observation of the data.frame values, the lowest value is at \(\alpha=0.35\)

Now we will let ‘ses’ select the optimal value of \(\alpha\) and use this value to generate forecast for the next four time periods. We will then compare the optimally found value of \(\alpha\) to our visual method above.

optAlphaSimplehard2<-ses(hard2, initial="simple", h=4)
summary(optAlphaSimplehard2)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = hard2, h = 4, initial = "simple") 
## 
##   Smoothing parameters:
##     alpha = 0.3473 
## 
##   Initial states:
##     l = 139 
## 
##   sigma:  32.0198
## Error measures:
##                   ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 9.72952 32.01982 26.34467 3.104211 13.05063 0.7860035
##                    ACF1
## Training set -0.1629042
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       240.3808 199.3457 281.4158 177.6231 303.1385
## 32       240.3808 196.9410 283.8206 173.9453 306.8162
## 33       240.3808 194.6625 286.0990 170.4608 310.3008
## 34       240.3808 192.4924 288.2691 167.1418 313.6197

So with the initial=‘simple’ value, we have an alpha=0.3473.

We will repeat but with ’initial=“optimal”.

optAlphaOpthard2<-ses(hard2, initial="optimal", h=4)
summary(optAlphaOpthard2)
## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = hard2, h = 4, initial = "optimal") 
## 
##   Smoothing parameters:
##     alpha = 0.3283 
## 
##   Initial states:
##     l = 149.2836 
## 
##   sigma:  31.931
## 
##      AIC     AICc      BIC 
## 315.8506 316.7737 320.0542 
## 
## Error measures:
##                    ME     RMSE     MAE      MPE     MAPE      MASE
## Training set 9.166918 31.93101 26.7731 2.636328 13.39479 0.7987858
##                    ACF1
## Training set -0.1417817
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       239.5602 198.6390 280.4815 176.9766 302.1439
## 32       239.5602 196.4905 282.6299 173.6908 305.4297
## 33       239.5602 194.4443 284.6762 170.5613 308.5591
## 34       239.5602 192.4869 286.6336 167.5677 311.5527

The value for alpha with initial=‘optimal’ is alpha=.3283. We also see that the MAPE went slightly down and the actual point forecast went down slightly also.

Now, we will apply Holt’s linear method to the ‘paperback’ and ‘hardback’ series and compute four day forecasts in each case.

First, the parameter ‘initial=“simple”’ is set and observed.

#paper2
paper2H<-holt(paper2, initial="simple",h=4)
summary(paper2H)
## 
## Forecast method: Holt's method
## 
## Model Information:
## Holt's method 
## 
## Call:
##  holt(y = paper2, h = 4, initial = "simple") 
## 
##   Smoothing parameters:
##     alpha = 0.2984 
##     beta  = 0.4984 
## 
##   Initial states:
##     l = 199 
##     b = -27 
## 
##   sigma:  39.5463
## Error measures:
##                    ME     RMSE     MAE      MPE     MAPE      MASE
## Training set 7.769844 39.54634 33.5377 1.633306 18.19621 0.8457332
##                    ACF1
## Training set -0.1088681
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 31       222.0201 171.3394 272.7007 144.51068 299.5295
## 32       229.6904 164.8872 294.4935 130.58245 328.7983
## 33       237.3606 145.1175 329.6038  96.28696 378.4343
## 34       245.0309 115.5211 374.5407  46.96280 443.0991

We see the point forecasts are going up consistently for the 4 forecasted days. We will now delete the ‘initial’ parameter to observe the outcome.

#Paper2
paper2H<-holt(paper2,h=4)
summary(paper2H)
## 
## Forecast method: Holt's method
## 
## Model Information:
## Holt's method 
## 
## Call:
##  holt(y = paper2, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 174.6003 
##     b = 1.0606 
## 
##   sigma:  31.6618
## 
##      AIC     AICc      BIC 
## 319.3427 321.8427 326.3486 
## 
## Error measures:
##                     ME     RMSE     MAE       MPE     MAPE      MASE
## Training set -4.458116 31.66184 26.4503 -6.029264 15.84609 0.6670076
##                    ACF1
## Training set -0.1444935
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       207.0380 166.4617 247.6143 144.9820 269.0941
## 32       208.0853 167.5090 248.6615 146.0292 270.1413
## 33       209.1325 168.5562 249.7088 147.0764 271.1886
## 34       210.1797 169.6034 250.7560 148.1236 272.2358

Here, the alpha goes to .0001. This looks better from the perspective of a lower MAPE. All four point forecasts are still trending upwards but are shifted lower than the previous model with initial=“simple”.

#hardback
hard2H<-holt(hard2,h=4)
summary(hard2H)
## 
## Forecast method: Holt's method
## 
## Model Information:
## Holt's method 
## 
## Call:
##  holt(y = hard2, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 154.6543 
##     b = 2.9961 
## 
##   sigma:  27.4359
## 
##     AIC    AICc     BIC 
## 310.747 313.247 317.753 
## 
## Error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -2.169026 27.43588 23.34589 -3.402396 12.47686 0.6965336
##                     ACF1
## Training set -0.02685637
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       247.3504 212.1899 282.5109 193.5770 301.1237
## 32       250.3400 215.1795 285.5005 196.5667 304.1133
## 33       253.3296 218.1691 288.4901 199.5563 307.1030
## 34       256.3192 221.1587 291.4797 202.5459 310.0926

We now compare the SSE measures of Holt’s method for the two series to those of simple exponential smoothing models performed above.

# paper2 Holt
sum(residuals(paper2H)^2)
## [1] 30074.17
#paper2 ses simple
optAlphaSimplepaper2$model$SSE
## [1] 36313.98
#Check it for fun
sum(residuals(optAlphaSimplepaper2)^2)
## [1] 36313.98
#paper2 ses optimal
sum(residuals(optAlphaOptpaper2)^2)
## [1] 33944.82

Judging from the observed SSE measures, Holt’s method outperformed the simple exponential smoothing methods.

# hard2 Holt
sum(residuals(hard2H)^2)
## [1] 22581.83
#hard2 ses simple
optAlphaSimplehard2$model$SSE
## [1] 30758.07
#Check it for fun
sum(residuals(optAlphaSimplehard2)^2)
## [1] 30758.07
#Ads ses optimal
sum(residuals(optAlphaOpthard2)^2)
## [1] 30587.69

In the case of the hardback book time series, we see that the Holt’s linear model also here has the lowest SSE.

We compare the forecasts for the two series using both methods.

First, we plot the different forecasts for the paper2 time series.

autoplot(paper2) + xlab("Month") + autolayer(optAlphaSimplepaper2,PI=FALSE,series="SES simple") + autolayer(optAlphaOptpaper2, PI=FALSE,series="SES optimal") + autolayer(paper2H, series="Holt", PI=FALSE)

Let’s now check the accuracy for each forecast model for the paperback series.

#SES simple
accuracy(optAlphaSimplepaper2$fitted, paper2)
##                ME     RMSE      MAE       MPE     MAPE       ACF1
## Test set 1.749509 34.79175 28.64424 -2.770157 16.56938 -0.1268119
##          Theil's U
## Test set 0.6807692
#SES Optimal
accuracy(optAlphaOptpaper2$fitted,paper2)
##                ME     RMSE     MAE       MPE     MAPE       ACF1 Theil's U
## Test set 7.176212 33.63769 27.8431 0.4737524 15.57782 -0.2117579 0.6685721
#Holt Linear Trend
accuracy(paper2H$fitted,paper2)
##                 ME     RMSE     MAE       MPE     MAPE       ACF1
## Test set -4.458116 31.66184 26.4503 -6.029264 15.84609 -0.1444935
##          Theil's U
## Test set 0.6081267

The RMSE and MAE were lowest with the Holt’s model and the MAPE was lowest with the simple exponential smoothing model that used the parameter ‘initial=“optimal”’.

Now we look at hardback, by first plotting them.

autoplot(hard2) + xlab("Month") + autolayer(optAlphaSimplehard2,PI=FALSE,series="SES simple") + autolayer(optAlphaOpthard2, PI=FALSE,series="SES optimal") + autolayer(hard2H, series="Holt", PI=FALSE)

#SES simple
accuracy(optAlphaSimplehard2$fitted,sales)
##                 ME     RMSE      MAE       MPE     MAPE       ACF1
## Test set -110.1732 110.5679 110.1732 -298.1836 298.1836 -0.3799344
##          Theil's U
## Test set   5.28753
#SES Optimal
accuracy(optAlphaOpthard2$fitted,sales)
##                 ME     RMSE      MAE       MPE     MAPE       ACF1
## Test set -112.8813 113.2893 112.8813 -307.6344 307.6344 -0.3605628
##          Theil's U
## Test set  5.432897
#Holt Linear Trend
accuracy(hard2H$fitted,sales)
##                 ME    RMSE      MAE    MPE  MAPE       ACF1 Theil's U
## Test set -129.1056 129.424 129.1056 -350.5 350.5 -0.6031518  6.342066

We see that with the hardback data, the RMSE, MAE and MAPE were all lowest with the simple exponential smoothing model and in all three cases, Holt’s linear trend model had the highest error value or was the least accurate.

It would be interesting to investigate further why Holt’s method switched almost completely with the Simple Exponential Smoothing method for most and least accurate predictive models between two similar variables (paperback and hardback) from the same dataset.