In moving averages, shortening windows makes recent results influential in forming the fitted line. In order to increase the weight of recent term(s) using exponential smoothing, the constant approaches 1.
On the other hand, in order to weigh simple exponential smoothing terms equivalent to moving average terms, the constant would be zero.
Moving average of raw series - is not suitable because the series needs to be de-trended and de-seasonalized
Moving average of de-seasonalized series - is not suitable because the series needs to be de-trended
Simple exponential smoothing of the raw series - is not suitable because the series needs to be de-trended and de-seasonalized
Double exponential smoothing of the raw series - is not suitable because the series needs to be de-trended and de-seasonalized
#install.packages("forecast")
library(forecast)
## Warning: package 'forecast' was built under R version 3.3.3
#setwd("~/wd678/Unit 4")
data <- read.csv("DepartmentStoreSales.csv", stringsAsFactors=FALSE, header=TRUE)
storeSales.ts <- ts(data$Sales, start=c(2001,1), end = c(2006,4), freq=4)
plot(storeSales.ts, bty="l")
nValid <- 4
nTrain <- length(storeSales.ts) - nValid
train.ts <- window(storeSales.ts, start = c(2001,1), end = c(2001, nTrain))
valid.ts <- window(storeSales.ts, start = c(2001, nTrain + 1), end = c(2001, nTrain + nValid))
hwin <- ets(train.ts, model = "ZZZ", restrict = FALSE, alpha=0.2, beta=0.15, gamma=0.05)
hwin.pred <- forecast(hwin, h = nValid, level = 0)
plot(c(48000, 105000), ylab = "Sales", xlab = "Time", bty = "l", xaxt = "n", xlim = c(2001, 2007), main = "", lty = 2)
axis(1, at = seq(2001,2007,1), labels = format(seq(2001,2007,1)))
lines(hwin.pred$fitted, lwd = 1, col = "red")
lines(hwin.pred$mean, col="green", lwd=1, lty=2)
hwin.pred
## Point Forecast Lo 0 Hi 0
## 2006 Q1 62115.16 62115.16 62115.16
## 2006 Q2 65371.60 65371.60 65371.60
## 2006 Q3 77076.87 77076.87 77076.87
## 2006 Q4 102937.73 102937.73 102937.73
MAPE is 2.56
accuracy(hwin.pred, test=1:2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -30.23443 1274.065 1273.706 -0.08195804 2.561611 NaN -0.5
According to the metrics, the model is suitable for forecasting quarters 21 and 22. Note that in the fourth year, the seasonality changes from additive to multiplicative. Seasonality is no longer eliminated (extracted) from the residuals.
Set up the plot to have 3 rows and 2 columns
par(mfrow = c(3,2))
#Plot the originial time series
plot(train.ts, bty="l", main="Original Series")
#Plot the Lag-4 difference
plot(diff(train.ts, lag=4), ylab="Lag-4", xlab="Year", bty="l", main="Lag-4 Difference")
#Plot the Lag-1 difference
plot(diff(train.ts, lag=1), ylab="Lag-1", xlab="Year", bty="l", main="Lag-1 Difference")
#Plot the double-difference
lag4ThenLag1 <- diff(diff(train.ts, lag=4), lag=1)
plot(lag4ThenLag1, ylab="Lag-4, then Lag-1", xlab="Year", bty="l", main="Double-Diff (Lag-4, Lag-1)")
lag1ThenLag4 <- diff(diff(train.ts, lag=1), lag=4)
plot(lag1ThenLag4, ylab="Lag-1, then Lag-4", xlab="Year", bty="l", main="Double-Diff (Lag-1, Lag-4)")
When double-differencing, removing the seasonality first sometimes results in a stationary series making the second (trend) differencing unneccessary. The results will be the same no matter whether seasonal or trend differencing is done first.
pointForecasts <- meanf(diff(diff(train.ts, lag=4), lag=1), h=4)
pointForecasts
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2006 Q1 569.2 -2116.935 3255.335 -3714.114 4852.514
## 2006 Q2 569.2 -2116.935 3255.335 -3714.114 4852.514
## 2006 Q3 569.2 -2116.935 3255.335 -3714.114 4852.514
## 2006 Q4 569.2 -2116.935 3255.335 -3714.114 4852.514
realForecasts <- vector()
for (i in 1:nValid) {
if(i == 1) {
realForecasts[i] <- pointForecasts$mean[i] + train.ts[(nTrain+i)-nValid] + (train.ts[nTrain] - train.ts[nTrain+i-1-nValid])
} else {
realForecasts[i] <- pointForecasts$mean[i] + train.ts[(nTrain+i)-nValid] + (realForecasts[i-1] - train.ts[nTrain+i-1-nValid])
}
}
realForecasts
## [1] 63982.2 68177.4 80201.6 101467.8
par(mfrow = c(1,1))
plot(realForecasts, type="l", bty="l")
dataFrame <- read.csv("AustralianWines.csv", stringsAsFactors=FALSE, header=TRUE)
wineSales.ts <- ts(dataFrame$Fortified, start=c(1980,1), end = c(1994,12), freq=12)
plot(wineSales.ts, bty="l")
nnValid <- 12
nnTrain <- length(wineSales.ts) - nnValid
train2.ts <- window(wineSales.ts, start = c(1980,1), end = c(1980, nnTrain))
valid2.ts <- window(wineSales.ts, start = c(1980, nnTrain + 1), end = c(1980, nnTrain + nnValid))
library(forecast)
hwin2 <- ets(train2.ts, model = "ZZM", restrict = FALSE)
hwin.pred2 <- forecast(hwin2, h = 2, level = 0)
yrange = range(train2.ts)
plot(c(yrange[1], yrange[2]), ylab = "Fortified Wine Sales", xlab = "Time", bty = "l", xaxt = "n", xlim = c(1980, 1995), main = "Forecasts for training period(red) & the next two months(blue)", lty = 2)
axis(1, at = seq(1980,1995,1), labels = format(seq(1980,1995,1)))
lines(hwin.pred2$fitted, lwd = 1, col = "red")
lines(hwin.pred2$mean, col="blue", lwd=1, lty=2)
Plot of residuals
residues <- train2.ts - hwin.pred2$fitted
plot(residues, bty="l", main="Residuals for training period")