Compute the total domestic overnight trips for holidays across Australia from the
tourismdataset.
fileName <- "C:/Users/Mouhamad M Bani/Downloads/tourism.xlsx"
tourism <- readxl::read_excel(fileName)
tourism <- tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(
index = Quarter,
key = c(Region, State, Purpose)
)
tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ... with 24,310 more rows
holiday_tourism <- tourism %>%
summarise(Trips = sum(Trips))
h_tourism <- holiday_tourism
- Plot the data and describe the main features of the series.
h_tourism %>% autoplot(Trips)
- Decompose the series using STL and obtain the seasonally adjusted data.
h_tourism %>%
model(
STL(Trips ~ trend(window = 4) + season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
- Forecast the next two years of the series using an additive damped trend method applied to the seasonally adjusted data. (This can be specified using
decomposition_model().)
fit <- h_tourism %>%
model(
"additive damped trend" = ETS(Trips ~ error("A") + trend("Ad") + season("A")))
fc <- fit %>% forecast(h = "2 years")
fc %>%
autoplot(h_tourism, level = NULL)
- Forecast the next two years of the series using an appropriate model for Holt’s linear method applied to the seasonally adjusted data (as before but without damped trend).
fit <- h_tourism %>%
model(
"Holt Linear Method" = ETS(Trips ~ error("A") + trend("A") + season("N")))
fc <- fit %>% forecast ( h = "2 years")
fc %>%
autoplot(h_tourism, level = NULL)
- Now use
ETS()to choose a seasonal model for the data.
fit <- h_tourism %>%
model(ETS(Trips))
fc <- fit %>% forecast(h = 12)
fc %>% autoplot(h_tourism, level = NULL)
- Compare the RMSE of the ETS model with the RMSE of the models you obtained using STL decompositions. Which gives the better in-sample fits?
Holiday <- h_tourism %>%
model(
"Holt Linear Method" = ETS(Trips ~ error("A") + trend("Ad") + season("A")),
"Holt Multiplicative" = ETS(Trips ~ error("A") + trend("Ad") + season("N")),
"ETS" = ETS(Trips))
fc <- Holiday %>% forecast(h = "5 years")
fc %>%
autoplot(h_tourism, level = NULL)
- Compare the forecasts from the three approaches? Which seems most reasonable?
Holiday <- h_tourism %>%
model(
"Holt Linear Method" = ETS(Trips ~ error("A") + trend("Ad") + season("A")),
"Holt Multiplicative" = ETS(Trips ~ error("M") + trend("Ad") + season("N")),
"ETS" = ETS(Trips))
accuracy(Holiday)
## # A tibble: 3 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Holt Linear Method Training 103. 795. 606. 0.357 2.86 0.638 0.654 4.34e-4
## 2 Holt Multiplicative Training 148. 1216. 997. 0.399 4.64 1.05 1.00 9.00e-2
## 3 ETS Training 105. 794. 604. 0.379 2.86 0.636 0.653 -1.51e-3
- Check the residuals of your preferred model.
Holiday %>%
select("Holt Linear Method") %>%
gg_tsresiduals()
For this exercise use the quarterly number of arrivals to Australia from New Zealand, 1981 Q1 – 2012 Q3, from data set
aus_arrivals.
- Make a time plot of your data and describe the main features of the series.
Arrivals <- aus_arrivals %>% filter (Origin == "NZ" & (Quarter <= yearquarter("2012 Q3")))
Arrivals
## # A tsibble: 127 x 3 [1Q]
## # Key: Origin [1]
## Quarter Origin Arrivals
## <qtr> <chr> <int>
## 1 1981 Q1 NZ 49140
## 2 1981 Q2 NZ 87467
## 3 1981 Q3 NZ 85841
## 4 1981 Q4 NZ 61882
## 5 1982 Q1 NZ 42045
## 6 1982 Q2 NZ 63081
## 7 1982 Q3 NZ 73275
## 8 1982 Q4 NZ 54808
## 9 1983 Q1 NZ 41030
## 10 1983 Q2 NZ 56155
## # ... with 117 more rows
- Create a training set that withholds the last two years of available data. Forecast the test set using an appropriate model for Holt-Winters’ multiplicative method.
train <- Arrivals %>%
filter(Quarter <= max(Quarter) - 4 * 2)
train
## # A tsibble: 119 x 3 [1Q]
## # Key: Origin [1]
## Quarter Origin Arrivals
## <qtr> <chr> <int>
## 1 1981 Q1 NZ 49140
## 2 1981 Q2 NZ 87467
## 3 1981 Q3 NZ 85841
## 4 1981 Q4 NZ 61882
## 5 1982 Q1 NZ 42045
## 6 1982 Q2 NZ 63081
## 7 1982 Q3 NZ 73275
## 8 1982 Q4 NZ 54808
## 9 1983 Q1 NZ 41030
## 10 1983 Q2 NZ 56155
## # ... with 109 more rows
fit <- train %>%
model(
"Holt-Winter Multiplicative" = ETS(Arrivals ~ error("M") + trend("A") + season("M")) )
fc <- fit %>% forecast(h = "4 years")
fc %>%
autoplot(aus_arrivals, level = NULL)
- Why is multiplicative seasonality necessary here?
The multiplicative seasonality is necessary here because the seasonality variation is increasing. Since the variation of the seasonal pattern change a lot, a multiplicative seasonality is a better option.
- Forecast the two-year test set using each of the following methods:
i) an ETS model;
fit <- Arrivals %>%
model(ETS(Arrivals))
fc <- fit %>% forecast(h = 8)
fc %>% autoplot(aus_arrivals, level = NULL)
ii) an additive ETS model applied to a log transformed series;
Arrivals %>%
autoplot(log(Arrivals))
fit <- Arrivals %>%
model(ETS(log(Arrivals)))
fc <- fit %>% forecast(h = 8)
fc %>% autoplot(aus_arrivals, level = NULL)
iii) a seasonal naïve method;
Arrivals %>%
model(SNAIVE(Arrivals)) %>%
forecast(h = "5 years") %>%
autoplot(aus_arrivals, level = NULL)
iv) an STL decomposition applied to the log transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
Arrivals %>%
model(
STL(Arrivals ~ trend(window = 4) + season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
- Which method gives the best forecasts? Does it pass the residual tests?
The method that gives the best forecasts is the ETS log since it has the lowest RMSE.
fit <- train %>%
model(
"Seasonal Naive" = SNAIVE(Arrivals),
"ETS" = ETS(Arrivals),
"ETS log" = ETS(log(Arrivals)))
fc <- fit %>% forecast(h = "5 years")
accuracy(fc, Arrivals)
## # A tibble: 3 x 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS NZ Test -3495. 14913. 11421. -0.964 3.78 0.768 0.771 -0.0260
## 2 ETS log NZ Test 2467. 13342. 11904. 1.03 4.03 0.800 0.689 -0.0786
## 3 Seasonal N~ NZ Test 9709. 18051. 17156. 3.44 5.80 1.15 0.933 -0.239
- Compare the same four methods using time series cross-validation instead of using a training and test set. Do you come to the same conclusions?
Yes, we come to the same conclusion because when we do the accuracy, the ETS log (Arrivals) still have the same RMSE
cross_v <- Arrivals %>%
slice( 1: (n()-2) ) %>%
stretch_tsibble(.init = 20,.step = 2)
fit <- cross_v %>%
model(
SNAIVE(Arrivals),
ETS(Arrivals),
ETS(log(Arrivals)))
fc <- fit %>% forecast(h= "5 years")
fc
## # A fable: 3,180 x 6 [1Q]
## # Key: .id, Origin, .model [159]
## .id Origin .model Quarter Arrivals .mean
## <int> <chr> <chr> <qtr> <dist> <dbl>
## 1 1 NZ SNAIVE(Arrivals) 1986 Q1 N(41646, 1e+08) 41646
## 2 1 NZ SNAIVE(Arrivals) 1986 Q2 N(63668, 1e+08) 63668
## 3 1 NZ SNAIVE(Arrivals) 1986 Q3 N(67803, 1e+08) 67803
## 4 1 NZ SNAIVE(Arrivals) 1986 Q4 N(72177, 1e+08) 72177
## 5 1 NZ SNAIVE(Arrivals) 1987 Q1 N(41646, 2e+08) 41646
## 6 1 NZ SNAIVE(Arrivals) 1987 Q2 N(63668, 2e+08) 63668
## 7 1 NZ SNAIVE(Arrivals) 1987 Q3 N(67803, 2e+08) 67803
## 8 1 NZ SNAIVE(Arrivals) 1987 Q4 N(72177, 2e+08) 72177
## 9 1 NZ SNAIVE(Arrivals) 1988 Q1 N(41646, 3e+08) 41646
## 10 1 NZ SNAIVE(Arrivals) 1988 Q2 N(63668, 3e+08) 63668
## # ... with 3,170 more rows
Arrivals %>% autoplot(Arrivals)
fc %>% accuracy(Arrivals)
## # A tibble: 3 x 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Arrivals) NZ Test 18267. 33939. 25886. 9.81 13.9 1.75 1.77 0.781
## 2 ETS(log(Arriv~ NZ Test 15206. 29836. 22390. 7.58 12.2 1.51 1.56 0.764
## 3 SNAIVE(Arriva~ NZ Test 24167. 36436. 27935. 12.3 14.9 1.88 1.90 0.702