This is a Retail sales dataset downloaded from Datamarket.com. I processed the data and performed the forecasting analysis.
library(rdatamarket)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
# fetch the data from data market
dminit("404ca002d8964fd682c3eb39a09dace1")
data <- dmlist("http://datamarket.com/data/set/3oei/retailers-sales#!ds=3oei")
# lets explore the data(first 5 rows)
head(data)
## Month Value
## 1 1994-08 182445
## 2 1994-09 175128
## 3 1994-10 178642
## 4 1994-11 184272
## 5 1994-12 221549
## 6 1995-01 158004
names(data)
## [1] "Month" "Value"
tsdata<-data$Value
# convert to time series format
tseriesdata<-ts(tsdata,start=c(2006,10),end=c(2013,10),frequency=12)
# plot time series
plot.ts(tseriesdata)
Though it does not look like having a multiplicative trend , yet to check that we will go for log transformation . this will convert it to additive trend if multiplicative trend exists.
r newts<-log(tseriesdata) plot(newts)
As expected, it does not have any multiplicative trend. So we will perform seasonal decomposition to handle additive trend, seasonlity , and any irregular component if present.
# Seasonal decompostion
fit <- stl(tseriesdata, s.window="period")
plot(fit)
# Lets check the seasonality ,if present
monthplot(tseriesdata)
library(forecast)
## Loading required package: timeDate
## This is forecast 5.5
seasonplot(tseriesdata)
fit<-ets(tseriesdata)
plot(fit)
acf(tseriesdata)
pacf(tseriesdata)
# simple exponential - models level
fits <- HoltWinters(tseriesdata, beta=FALSE, gamma=FALSE)
accuracy(fits)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 5252 24870 17948 2.163 7.195 0.9418 -0.02458
# double exponential - models level and trend
fitd <- HoltWinters(tseriesdata, gamma=FALSE)
accuracy(fitd)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 84837 149074 94682 1.831 7.397 4.969 0.9491
# triple exponential - models level, trend, and seasonal components
fite <- HoltWinters(tseriesdata)
accuracy(fite)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 130882 186904 136227 1.512 5.502 7.149 0.9632
forecast1<-forecast(fite,5)
plot(forecast1)
forecast2<-forecast(fitd,5)
plot(forecast2)
acf(forecast1$residuals,lag.max=20)
acf(forecast2$residuals,lag.max=20)
Box.test(forecast1$residuals, lag=20, type="Ljung-Box")
##
## Box-Ljung test
##
## data: forecast1$residuals
## X-squared = 15.48, df = 20, p-value = 0.7482
Box.test(forecast2$residuals, lag=20, type="Ljung-Box")
##
## Box-Ljung test
##
## data: forecast2$residuals
## X-squared = 23.24, df = 20, p-value = 0.2771
# needs smoothing
tsforecast<-HoltWinters(log(tseriesdata))
plot(tsforecast)
tsforecast
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = log(tseriesdata))
##
## Smoothing parameters:
## alpha: 0.3236
## beta : 0
## gamma: 0.7722
##
## Coefficients:
## [,1]
## a 12.706868
## b 0.004957
## s1 0.153936
## s2 -0.003740
## s3 -0.016749
## s4 0.089852
## s5 0.046179
## s6 0.137895
## s7 -0.001986
## s8 -0.041266
## s9 0.026064
## s10 -0.035710
## s11 -0.001709
## s12 0.043178
tsforecast$SSE
## [1] 0.5707
ftAsmooth<-forecast.HoltWinters(tsforecast,h=48)
plot.forecast(ftAsmooth)
ftAsmooth
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Nov 2013 12.87 12.75 12.98 12.69 13.04
## Dec 2013 12.71 12.59 12.83 12.53 12.90
## Jan 2014 12.70 12.58 12.83 12.51 12.90
## Feb 2014 12.82 12.69 12.95 12.62 13.02
## Mar 2014 12.78 12.64 12.91 12.57 12.98
## Apr 2014 12.87 12.73 13.01 12.66 13.09
## May 2014 12.74 12.60 12.88 12.52 12.96
## Jun 2014 12.71 12.56 12.85 12.48 12.93
## Jul 2014 12.78 12.62 12.93 12.54 13.01
## Aug 2014 12.72 12.56 12.88 12.48 12.96
## Sep 2014 12.76 12.60 12.92 12.51 13.01
## Oct 2014 12.81 12.64 12.98 12.56 13.06
## Nov 2014 12.93 12.73 13.12 12.63 13.22
## Dec 2014 12.77 12.58 12.97 12.47 13.07
## Jan 2015 12.76 12.57 12.96 12.46 13.07
## Feb 2015 12.88 12.67 13.08 12.57 13.19
## Mar 2015 12.84 12.63 13.04 12.52 13.15
## Apr 2015 12.93 12.73 13.14 12.61 13.25
## May 2015 12.80 12.59 13.01 12.48 13.12
## Jun 2015 12.76 12.55 12.98 12.44 13.09
## Jul 2015 12.84 12.62 13.06 12.50 13.17
## Aug 2015 12.78 12.56 13.00 12.44 13.12
## Sep 2015 12.82 12.60 13.04 12.48 13.16
## Oct 2015 12.87 12.64 13.10 12.52 13.22
## Nov 2015 12.98 12.74 13.23 12.61 13.36
## Dec 2015 12.83 12.58 13.08 12.45 13.21
## Jan 2016 12.82 12.57 13.08 12.44 13.21
## Feb 2016 12.94 12.68 13.19 12.55 13.32
## Mar 2016 12.90 12.64 13.15 12.50 13.29
## Apr 2016 12.99 12.73 13.25 12.60 13.39
## May 2016 12.86 12.60 13.12 12.46 13.26
## Jun 2016 12.82 12.56 13.09 12.42 13.23
## Jul 2016 12.90 12.63 13.16 12.49 13.31
## Aug 2016 12.84 12.57 13.11 12.43 13.25
## Sep 2016 12.88 12.61 13.15 12.46 13.30
## Oct 2016 12.93 12.65 13.20 12.51 13.35
## Nov 2016 13.04 12.75 13.34 12.60 13.49
## Dec 2016 12.89 12.60 13.18 12.44 13.34
## Jan 2017 12.88 12.59 13.18 12.43 13.34
## Feb 2017 13.00 12.70 13.29 12.54 13.45
## Mar 2017 12.96 12.66 13.26 12.50 13.42
## Apr 2017 13.05 12.75 13.36 12.59 13.52
## May 2017 12.92 12.61 13.22 12.45 13.38
## Jun 2017 12.88 12.58 13.19 12.41 13.35
## Jul 2017 12.96 12.65 13.27 12.48 13.43
## Aug 2017 12.90 12.59 13.21 12.42 13.38
## Sep 2017 12.94 12.62 13.25 12.46 13.42
## Oct 2017 12.99 12.67 13.30 12.51 13.47
Box.test(ftAsmooth$residuals, lag=20, type="Ljung-Box")
##
## Box-Ljung test
##
## data: ftAsmooth$residuals
## X-squared = 18.67, df = 20, p-value = 0.5434
plot.ts(ftAsmooth$residuals)
plotForecastErrors <- function(forecasterrors)
{
# make a histogram of the forecast errors:
mybinsize <- IQR(forecasterrors)/4
mysd <- sd(forecasterrors)
mymin <- min(forecasterrors) - mysd*5
mymax <- max(forecasterrors) + mysd*3
# generate normally distributed data with mean 0 and standard deviation mysd
mynorm <- rnorm(10000, mean=0, sd=mysd)
mymin2 <- min(mynorm)
mymax2 <- max(mynorm)
if (mymin2 < mymin) { mymin <- mymin2 }
if (mymax2 > mymax) { mymax <- mymax2 }
# make a red histogram of the forecast errors, with the normally distributed data overlaid:
mybins <- seq(mymin, mymax, mybinsize)
hist(forecasterrors, col="red", freq=FALSE, breaks=mybins)
# freq=FALSE ensures the area under the histogram = 1
# generate normally distributed data with mean 0 and standard deviation mysd
myhist <- hist(mynorm, plot=FALSE, breaks=mybins)
# plot the normal curve as a blue line on top of the histogram of forecast errors:
points(myhist$mids, myhist$density, type="l", col="blue", lwd=2)
}
plotForecastErrors(ftAsmooth$residuals)
From the time plot, it appears plausible that the forecast errors have constant variance over time. From the histogram of forecast errors, it seems plausible that the forecast errors are normally distributed with mean zero.
Thus,there is little evidence of autocorrelation at lags 1-20 for the forecast errors, and the forecast errors appear to be normally distributed with mean zero and constant variance over time. This suggests that Holt-Winters exponential smoothing provides an adequate predictive model of the log of sales, which probably cannot be improved upon. Furthermore, the assumptions upon which the prediction intervals were based are probably valid.