## ── Attaching packages ────────────────────────────────────────────────────────── fpp3 0.3 ──
## ✓ tibble 3.0.3 ✓ tsibble 0.9.2
## ✓ dplyr 1.0.2 ✓ tsibbledata 0.2.0
## ✓ tidyr 1.1.2 ✓ feasts 0.1.5
## ✓ lubridate 1.7.9 ✓ fable 0.2.1
## ✓ ggplot2 3.3.2
## ── Conflicts ───────────────────────────────────────────────────────────── fpp3_conflicts ──
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
Do exercises 7.1, 7.5,7.6, 7.7, 7.8 and 7.9 in Hyndman.
α and ℓ0, and generate forecasts for the next four months.
head(pigs)
## Jan Feb Mar Apr May Jun
## 1980 76378 71947 33873 96428 105084 95741
pigdata <- pigs
autoplot(pigdata) +
ylab("# Pigs") + xlab("Month")
THe Simple Exponential Method (“SES”) method is suitable to forecast time series with no clear trend or seasonality. The Pigs data does not have a clear pattern and therefore we will use SES.
fc <- ses(pigdata, h=4)
round(accuracy(fc),2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 385.87 10253.6 7961.38 -0.92 9.27 0.8 0.01
fc$model
## Simple exponential smoothing
##
## Call:
## ses(y = pigdata, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
fc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 1995 98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995 98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995 98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995 98816.41 83958.37 113674.4 76092.99 121539.8
autoplot(fc) +
autolayer(fitted(fc), series="Fitted") +
ylab("# Pigs") + xlab("Month")
(fc$upper[1, "95%"])
## 95%
## 119020.8
(fc$lower[1, "95%"])
## 95%
## 78611.97
s <- sd(fc$residuals)
(fc$mean[1] + 1.96*s)
## [1] 118952.8
(fc$mean[1] - 1.96*s)
## [1] 78679.97
head(books)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## Paperback Hardcover
## 1 199 139
## 2 172 128
## 3 111 172
## 4 209 139
## 5 161 191
## 6 119 168
bookdata <- books
autoplot(bookdata) +
ylab("Sales") +
xlab("Day")
The graphs shows an increasing trend of overall book sales
ses_paperback <- ses(bookdata[, "Paperback"], h = 7)
ses_hardcover <- ses(bookdata[, "Hardcover"], h = 7)
autoplot(bookdata[, "Paperback"], series = "Paperback") +
autolayer(ses_paperback, series = "Paperback") +
xlab("Day")+
ggtitle("Paperback Books Sales")
autoplot(bookdata[, "Hardcover"], series = "Hardcover") +
autolayer(ses_hardcover, series = "Hardcover") +
xlab("Day")+
ggtitle("Hardcover Books Sales")
round(accuracy(ses_hardcover),2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 9.17 31.93 26.77 2.64 13.39 0.8 -0.14
round(accuracy(ses_paperback),2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.18 33.64 27.84 0.47 15.58 0.7 -0.21
Root Mean Square Error (RMSE) is the standard deviation of the residuals (prediction errors). Residuals are a measure of how far from the regression line data points are; RMSE is a measure of how spread out these residuals are. In other words, it tells you how concentrated the data is around the line of best fit. RMSE for hardcover is 31.93 and RMSE for paperback is 33.64. Typically you want your RMSE to be closer to 0.
holtpaperback <- holt(bookdata[, "Paperback"], h = 4)
holthardcover <- holt(bookdata[, "Hardcover"], h = 4)
autoplot(bookdata[, "Paperback"], series = "Paperback") +
autolayer(holtpaperback) +
autolayer(bookdata[, "Hardcover"], series = "Hardcover")+
autolayer(holthardcover,PI=FALSE)+
ylab("Sales") +
xlab("Day")+
ggtitle("Paperback Books Sales")
Compare the forecasts for the two series using both methods. Which do you think is best?
round(accuracy(holthardcover),2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.14 27.19 23.16 -2.11 12.16 0.69 -0.03
round(accuracy(holtpaperback),2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -3.72 31.14 26.18 -5.51 15.58 0.66 -0.18
The RMSE values are lower for the holt method model which assumes that this is a better model than the SES model. The data is trending upwardd so that may be why the Holt method is more appropriate to use for forecasting the book sales.
Calculate a 95% prediction interval for the first forecast for each series, using the RMSE values and assuming normal errors. Compare your intervals with those produced using ses and holt.
(holthardcover$upper[1, "95%"])
## 95%
## 307.4256
(holthardcover$lower[1, "95%"])
## 95%
## 192.9222
(holtpaperback$upper[1, "95%"])
## 95%
## 275.0205
(holtpaperback$lower[1, "95%"])
## 95%
## 143.913
head(eggs)
## Time Series:
## Start = 1900
## End = 1905
## Frequency = 1
## [1] 276.79 315.42 314.87 321.25 314.54 317.92
autoplot(eggs) +
ylab("Price of eggs in US Dollars") +
xlab("Years")+
ggtitle("Price of dozen eggs in US, 1900–1993,")
There does not appear to be seasonal trends in the eggs data, so we will explore Holts linear and damped methods. Holt’s linear method is appropriate to forecast data with a trend and will forecast a constant linear trend into the future. According to the text, this may over forecast into the future and the Holts Dampens method is meant to correct this by approaching a constant at some point in the future. We will explore the two methods below.
eggfit1 <- holt(eggs, h = 100)
eggfitDamped <- holt(eggs,damped = TRUE, h = 100)
autoplot(eggs) +
autolayer(eggfit1, series = "Holt Linear") +
autolayer(eggfitDamped, series = "Holt Damped", PI=FALSE)+
ylab("Price of eggs in US Dollars") +
xlab("Years")+
ggtitle("Price of dozen eggs in US, 1900–1993,")
guides(colour=guide_legend(title="Forecast"))
## $colour
## $title
## [1] "Forecast"
##
## $title.position
## NULL
##
## $title.theme
## NULL
##
## $title.hjust
## NULL
##
## $title.vjust
## NULL
##
## $label
## [1] TRUE
##
## $label.position
## NULL
##
## $label.theme
## NULL
##
## $label.hjust
## NULL
##
## $label.vjust
## NULL
##
## $keywidth
## NULL
##
## $keyheight
## NULL
##
## $direction
## NULL
##
## $override.aes
## named list()
##
## $nrow
## NULL
##
## $ncol
## NULL
##
## $byrow
## [1] FALSE
##
## $reverse
## [1] FALSE
##
## $order
## [1] 0
##
## $available_aes
## [1] "any"
##
## $name
## [1] "legend"
##
## attr(,"class")
## [1] "guide" "legend"
##
## attr(,"class")
## [1] "guides"
Which model gives the best RMSE?
round(accuracy(eggfit1),2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.04 26.58 19.18 -1.14 9.65 0.95 0.01
round(accuracy(eggfitDamped),2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -2.89 26.54 19.28 -2.91 10.02 0.95 0
The Holt Dampes method is the best fit between linear and dampens as it minimizes the RMSE Value (26.58 vs 25.54)
# 7.8 Recall your retail time series data (from Exercise 3 in Section 2.10).
myts <- ts(retaildata[,"A3349413L"],
frequency=12, start=c(1982,4))
autoplot(myts) +
ylab("Sales") +
xlab("Time")+
ggtitle("Retail Data")
ggseasonplot(myts)
myts%>% decompose(type="multiplicative") %>%
autoplot() + xlab("Month") +
ggtitle("Retail Sales")
The Retail Data shows seasonal trends. The additive method is preferred when the seasonal variations are roughly constant through the series, while the multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series. As you can see in the above chart the seasonal variations are changing with an upward overall trend year over year.
myts_mult <- hw(myts,seasonal="multiplicative")
autoplot(myts) +
autolayer(myts_mult, series="HW multiplicative forecasts") +
xlab("Month") +
ylab("Retail Sales") +
ggtitle("Retail Sales") +
guides(colour=guide_legend(title="Forecast"))
myts_mult_Damp <- hw(myts,seasonal="multiplicative", damped = TRUE)
autoplot(myts) +
autolayer(myts_mult_Damp, series="HW multiplicative forecasts Damped") +
xlab("Month") +
ylab("Retail Sales") +
ggtitle("Retail Sales") +
guides(colour=guide_legend(title="Forecast"))
accuracy(myts_mult)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01128536 9.453679 6.900005 -0.3215905 4.896188 0.4699252
## ACF1
## Training set 0.0462354
accuracy(myts_mult_Damp)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4573854 9.478814 6.894024 0.01293068 4.871026 0.4695179
## ACF1
## Training set 0.02727025
The holt multiplicative method has a lower RMSE value than the multiplicative method damped. The multiplative method (without damped) is preferred from an RMSE standpoint, but it is concerning for a model to always have a linear trend that is increasing or decreasing. I would prefer the damped method which would approach a constant over a period of time.
checkresiduals(myts_mult)
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' multiplicative method
## Q* = 38.47, df = 8, p-value = 6.162e-06
##
## Model df: 16. Total lags used: 24
checkresiduals(myts_mult_Damp)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method
## Q* = 39.296, df = 7, p-value = 1.716e-06
##
## Model df: 17. Total lags used: 24
#Applied Box Cox Transformation
train <- ts(as.vector(myts), frequency = 12)
lambda <- BoxCox.lambda(train)
lambda
## [1] 0.1606171
#Use a classical multiplicative decomposition to calculate the trend-cycle and seasonal indices.
train_boxcox <- BoxCox(train, lambda)
fit.stl <- stl(train_boxcox, s.window='periodic', robust=T)
autoplot(fit.stl)
train_boxcox_seas <- train_boxcox - fit.stl$time.series[,'seasonal']
autoplot(train_boxcox) +
autolayer(train_boxcox_seas, series='Seasonally Adjusted')
fit.ets <- ets(train_boxcox_seas)
summary(fit.ets)
## ETS(A,N,N)
##
## Call:
## ets(y = train_boxcox_seas)
##
## Smoothing parameters:
## alpha = 0.5397
##
## Initial states:
## l = 5.8443
##
## sigma: 0.1393
##
## AIC AICc BIC
## 766.3807 766.4444 778.2091
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01353689 0.1389662 0.1065641 0.1620591 1.458521 0.4760334
## ACF1
## Training set 0.02882528
ANN model: simple exponential smoothing with additive errors is given by the ets model Methods from prior tests showed that holts multiplicative would have been the a better model.