if(!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, ggplot2, forecast, caret, fpp2)
?visitors
## starting httpd help server ... done
This gives information about the data set, indicating it is a monthly australian short term overseas visitors time series data over May 1985 - April 2005. It has been taken from “Forecasting with exponential smoothing: the state space approach” By Hyndman, R.J., Koehler, A.B., Ord, J.K., and Snyder, R.D.
It also indicated that the scale is in thousands.
data(visitors)
force(visitors)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
## 1985 75.7 75.4 83.1 82.9 77.3 105.7 121.9
## 1986 98.0 118.0 129.5 110.6 91.7 94.8 109.5 105.1 95.0 130.3 156.7
## 1987 139.7 147.8 145.2 132.7 120.7 116.5 142.0 140.4 128.0 165.7 183.1
## 1988 161.3 180.4 185.2 160.5 157.1 163.8 203.3 196.9 179.6 207.3 208.0
## 1989 168.9 191.1 180.0 160.1 136.6 142.7 175.4 161.4 149.9 174.1 192.7
## 1990 176.2 192.8 189.1 181.1 149.9 157.3 185.3 178.2 162.7 190.6 198.6
## 1991 177.4 190.6 189.2 168.0 161.4 172.2 208.3 199.3 197.4 216.0 223.9
## 1992 196.1 238.2 217.8 203.8 175.2 176.9 219.3 199.1 190.0 229.3 255.0
## 1993 242.8 245.5 257.9 226.3 213.4 204.6 244.6 239.9 224.0 267.2 285.9
## 1994 250.5 304.3 307.4 255.1 214.9 230.9 282.5 265.4 254.0 301.6 311.0
## 1995 303.8 319.1 313.5 294.2 244.8 261.4 329.7 304.9 268.6 320.7 342.9
## 1996 317.2 392.7 365.6 333.2 261.5 306.9 358.2 329.2 309.2 350.4 375.6
## 1997 342.9 408.0 390.9 325.9 289.1 308.2 397.4 330.4 330.9 366.5 379.5
## 1998 346.2 353.6 338.6 341.1 283.4 304.2 372.3 323.7 323.9 354.8 367.9
## 1999 351.0 398.6 389.0 334.1 298.1 317.1 388.5 355.6 353.1 397.0 416.7
## 2000 360.8 434.6 411.9 405.6 319.3 347.9 429.0 372.9 403.0 426.5 459.9
## 2001 416.6 429.2 428.7 405.4 330.2 370.0 446.9 384.6 366.3 378.5 376.2
## 2002 379.3 437.2 446.5 360.3 329.9 339.4 418.2 371.9 358.6 428.9 437.0
## 2003 396.6 427.5 392.5 321.5 260.9 308.3 415.5 362.2 385.6 435.3 473.3
## 2004 420.2 454.8 432.3 402.8 341.3 367.3 472.0 405.8 395.6 449.9 479.9
## 2005 462.4 501.6 504.7 409.5
## Dec
## 1985 150.0
## 1986 190.1
## 1987 222.8
## 1988 245.8
## 1989 247.4
## 1990 253.1
## 1991 266.8
## 1992 302.4
## 1993 344.0
## 1994 384.0
## 1995 422.3
## 1996 465.2
## 1997 448.3
## 1998 457.6
## 1999 460.8
## 2000 559.9
## 2001 523.2
## 2002 534.0
## 2003 566.6
## 2004 593.1
## 2005
Checking if time series
is.ts(visitors)
## [1] TRUE
#ts.plot(visitors) (base R style of plotting)
autoplot(visitors)+
ggtitle("Overseas visitors to Australia")+
xlab("Months")+
ylab("Thousands of people")
ggseasonplot(visitors, polar = TRUE, )+
ggtitle("Polar Season Plot: Visitors")
ggsubseriesplot(visitors)+
ggtitle("Sub-series Plot: Overseas visitors to Australia")+
xlab("Months")+
ylab("Thousands of people")
ggAcf(visitors)+
ggtitle("Autocorrelation Plot: Overseas visitors to Australia")
There seems to be a great fall in 2003, sometime in May. There is a strong positive trend in the data this can be seen in the time series and seasonal plots. There is also a seasonality in the data over months which can be verified in subseries and autocorrelation plots.
v_train <- window(visitors, end = c(2003, 12))
v_test <- window(visitors, start = c(2004, 1))
Holt-Winters’ multiplicative method
hwfc <- hw(v_train, seasonal="multiplicative")
autoplot(v_train) +
autolayer(hwfc, series="HW multiplicative forecasts", PI=FALSE)+
autolayer(v_test)+
ggtitle("Holt-Winter's forecast: Overseas visitors to Australia")+
xlab("Months")+
ylab("Thousands of people")+
guides(colour=guide_legend(title="Forecast vs Actual"))
I believe of the two Holt Winters’ Forecasts, multipliactive forecast is necessary here as the seasonality is increasing with time (year).
Forecasting using an ETS model, an additive ETS model applied to a Box-Cox transformed series, and a seasonal naïve method.
v_ets <- v_train %>% ets() %>% forecast(h = length(v_test))
autoplot(v_ets)+
ggtitle("ETS: Overseas visitors to Australia")+
xlab("Months")+
ylab("Thousands of people")
v_ets.box <- ets(v_train, lambda = BoxCox.lambda(v_train), additive.only = T) %>% forecast(h = length(v_test))
autoplot(v_ets.box)+
ggtitle("Box-Cox transformed: Overseas visitors to Australia")+
xlab("Months")+
ylab("Thousands of people")
v_sn <- snaive(v_train, h = length(v_test))
autoplot(v_sn) + ggtitle(" Seasonal Naive Model: Overseas visitors to Australia")+
xlab("Months")+
ylab("Thousands of people")
accuracy(v_ets, v_test)[,"RMSE"]
## Training set Test set
## 14.95912 22.90797
accuracy(v_ets.box, v_test)[,"RMSE"]
## Training set Test set
## 15.51547 20.22801
accuracy(v_sn, v_test)[,"RMSE"]
## Training set Test set
## 31.24792 59.02680
Additive ETS model applied to BOX-COX transformation series is the best model for having lowest Test RMSE.
Residuals Test for Additive ETS model applied to BOX-COX transformation series
checkresiduals(v_ets.box)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,A)
## Q* = 15.475, df = 7, p-value = 0.03037
##
## Model df: 17. Total lags used: 24
As per the test, it is visible that, the p-value is 3.037%. This means that we can reject the hypothesis that it is white noise and conclude that the residuals are clearly distinguishable from a white noise series.