Q1

Compute the total domestic overnight trips for holidays across Australia from the tourism dataset.

fileName <- "C:/Users/Mouhamad M Bani/Downloads/tourism.xlsx"
tourism <- readxl::read_excel(fileName)

tourism <- tourism %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(
    index = Quarter,
    key = c(Region, State, Purpose)
  )
tourism

## # A tsibble: 24,320 x 5 [1Q]
## # Key:       Region, State, Purpose [304]
##    Quarter Region   State           Purpose  Trips
##      <qtr> <chr>    <chr>           <chr>    <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.
##  2 1998 Q2 Adelaide South Australia Business  110.
##  3 1998 Q3 Adelaide South Australia Business  166.
##  4 1998 Q4 Adelaide South Australia Business  127.
##  5 1999 Q1 Adelaide South Australia Business  137.
##  6 1999 Q2 Adelaide South Australia Business  200.
##  7 1999 Q3 Adelaide South Australia Business  169.
##  8 1999 Q4 Adelaide South Australia Business  134.
##  9 2000 Q1 Adelaide South Australia Business  154.
## 10 2000 Q2 Adelaide South Australia Business  169.
## # ... with 24,310 more rows

holiday_tourism <- tourism %>%
  summarise(Trips = sum(Trips))

h_tourism <- holiday_tourism

Plot the data and describe the main features of the series.

h_tourism %>% autoplot(Trips)

Decompose the series using STL and obtain the seasonally adjusted data.

h_tourism %>%
  model(
    STL(Trips ~ trend(window = 4) + season(window = "periodic"),
    robust = TRUE)) %>%
  components() %>%
  autoplot()

Forecast the next two years of the series using an additive damped trend method applied to the seasonally adjusted data. (This can be specified using decomposition_model().)

fit <- h_tourism %>%
  model(
    "additive damped trend" = ETS(Trips ~ error("A") + trend("Ad") + season("A")))

fc <- fit %>% forecast(h = "2 years")
fc %>%
  autoplot(h_tourism, level = NULL)

Forecast the next two years of the series using an appropriate model for Holt’s linear method applied to the seasonally adjusted data (as before but without damped trend).

fit <- h_tourism %>%
  model(
    "Holt Linear Method" = ETS(Trips ~ error("A") + trend("A") + season("N")))

fc <- fit %>% forecast ( h = "2 years")
fc %>%
  autoplot(h_tourism, level = NULL)

Now use ETS() to choose a seasonal model for the data.

fit <- h_tourism %>%
  model(ETS(Trips))

fc <- fit %>% forecast(h = 12)

fc %>% autoplot(h_tourism, level = NULL)

Compare the RMSE of the ETS model with the RMSE of the models you obtained using STL decompositions. Which gives the better in-sample fits?

Holiday <- h_tourism %>%  
  model(
    "Holt Linear Method" = ETS(Trips ~ error("A") +  trend("Ad") + season("A")),
    "Holt Multiplicative" = ETS(Trips ~ error("A") +  trend("Ad") + season("N")),
    "ETS" = ETS(Trips))

fc <- Holiday %>% forecast(h = "5 years")
fc %>%
  autoplot(h_tourism, level = NULL)

Compare the forecasts from the three approaches? Which seems most reasonable?

Holiday <- h_tourism %>%  
  model(
    "Holt Linear Method" = ETS(Trips ~ error("A") +  trend("Ad") + season("A")),
    "Holt Multiplicative" = ETS(Trips ~ error("M") +  trend("Ad") + season("N")),
    "ETS" = ETS(Trips))
accuracy(Holiday)

## # A tibble: 3 x 10
##   .model              .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>               <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 Holt Linear Method  Training  103.  795.  606. 0.357  2.86 0.638 0.654  4.34e-4
## 2 Holt Multiplicative Training  148. 1216.  997. 0.399  4.64 1.05  1.00   9.00e-2
## 3 ETS                 Training  105.  794.  604. 0.379  2.86 0.636 0.653 -1.51e-3

Check the residuals of your preferred model.

Holiday %>%
  select("Holt Linear Method") %>%
  gg_tsresiduals()

Q2

For this exercise use the quarterly number of arrivals to Australia from New Zealand, 1981 Q1 – 2012 Q3, from data set aus_arrivals.

Make a time plot of your data and describe the main features of the series.

Arrivals <- aus_arrivals %>% filter (Origin == "NZ" & (Quarter <= yearquarter("2012 Q3")))
Arrivals

## # A tsibble: 127 x 3 [1Q]
## # Key:       Origin [1]
##    Quarter Origin Arrivals
##      <qtr> <chr>     <int>
##  1 1981 Q1 NZ        49140
##  2 1981 Q2 NZ        87467
##  3 1981 Q3 NZ        85841
##  4 1981 Q4 NZ        61882
##  5 1982 Q1 NZ        42045
##  6 1982 Q2 NZ        63081
##  7 1982 Q3 NZ        73275
##  8 1982 Q4 NZ        54808
##  9 1983 Q1 NZ        41030
## 10 1983 Q2 NZ        56155
## # ... with 117 more rows

Create a training set that withholds the last two years of available data. Forecast the test set using an appropriate model for Holt-Winters’ multiplicative method.

train <- Arrivals %>%
  filter(Quarter <= max(Quarter) - 4 * 2)
train

## # A tsibble: 119 x 3 [1Q]
## # Key:       Origin [1]
##    Quarter Origin Arrivals
##      <qtr> <chr>     <int>
##  1 1981 Q1 NZ        49140
##  2 1981 Q2 NZ        87467
##  3 1981 Q3 NZ        85841
##  4 1981 Q4 NZ        61882
##  5 1982 Q1 NZ        42045
##  6 1982 Q2 NZ        63081
##  7 1982 Q3 NZ        73275
##  8 1982 Q4 NZ        54808
##  9 1983 Q1 NZ        41030
## 10 1983 Q2 NZ        56155
## # ... with 109 more rows

fit <- train %>%
  model(
    "Holt-Winter Multiplicative" = ETS(Arrivals ~ error("M") +  trend("A") + season("M")) )
fc <- fit %>% forecast(h = "4 years")

fc %>%
  autoplot(aus_arrivals, level = NULL)

Why is multiplicative seasonality necessary here?

The multiplicative seasonality is necessary here because the seasonality variation is increasing. Since the variation of the seasonal pattern change a lot, a multiplicative seasonality is a better option.

Forecast the two-year test set using each of the following methods:

  i) an ETS model;

fit <- Arrivals %>%
  model(ETS(Arrivals))

fc <- fit %>% forecast(h = 8)

fc %>% autoplot(aus_arrivals, level = NULL)

  ii) an additive ETS model applied to a log transformed series;

Arrivals %>%
  autoplot(log(Arrivals))

fit <- Arrivals %>%
  model(ETS(log(Arrivals)))

fc <- fit %>% forecast(h = 8)

fc %>% autoplot(aus_arrivals, level = NULL)

  iii) a seasonal naïve method;

Arrivals %>%
  model(SNAIVE(Arrivals)) %>%
  forecast(h = "5 years") %>%
  autoplot(aus_arrivals, level = NULL)

  iv) an STL decomposition applied to the log transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.

Arrivals %>%
  model(
    STL(Arrivals ~ trend(window = 4) + season(window = "periodic"),
    robust = TRUE)) %>%
  components() %>%
  autoplot()

Which method gives the best forecasts? Does it pass the residual tests?

The method that gives the best forecasts is the ETS log since it has the lowest RMSE.

fit <- train %>% 
  model(
  "Seasonal Naive" = SNAIVE(Arrivals),
  "ETS" = ETS(Arrivals),
  "ETS log" = ETS(log(Arrivals)))

fc <- fit %>%  forecast(h = "5 years")

accuracy(fc, Arrivals)

## # A tibble: 3 x 11
##   .model      Origin .type     ME   RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>       <chr>  <chr>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS         NZ     Test  -3495. 14913. 11421. -0.964  3.78 0.768 0.771 -0.0260
## 2 ETS log     NZ     Test   2467. 13342. 11904.  1.03   4.03 0.800 0.689 -0.0786
## 3 Seasonal N~ NZ     Test   9709. 18051. 17156.  3.44   5.80 1.15  0.933 -0.239

Compare the same four methods using time series cross-validation instead of using a training and test set. Do you come to the same conclusions?

Yes, we come to the same conclusion because when we do the accuracy, the ETS log (Arrivals) still have the same RMSE

cross_v <- Arrivals %>% 
  slice( 1: (n()-2) ) %>% 
  stretch_tsibble(.init = 20,.step = 2)

fit <- cross_v %>% 
  model(
    SNAIVE(Arrivals),
    ETS(Arrivals),
    ETS(log(Arrivals)))

fc <- fit %>% forecast(h= "5 years")
fc

## # A fable: 3,180 x 6 [1Q]
## # Key:     .id, Origin, .model [159]
##      .id Origin .model           Quarter        Arrivals .mean
##    <int> <chr>  <chr>              <qtr>          <dist> <dbl>
##  1     1 NZ     SNAIVE(Arrivals) 1986 Q1 N(41646, 1e+08) 41646
##  2     1 NZ     SNAIVE(Arrivals) 1986 Q2 N(63668, 1e+08) 63668
##  3     1 NZ     SNAIVE(Arrivals) 1986 Q3 N(67803, 1e+08) 67803
##  4     1 NZ     SNAIVE(Arrivals) 1986 Q4 N(72177, 1e+08) 72177
##  5     1 NZ     SNAIVE(Arrivals) 1987 Q1 N(41646, 2e+08) 41646
##  6     1 NZ     SNAIVE(Arrivals) 1987 Q2 N(63668, 2e+08) 63668
##  7     1 NZ     SNAIVE(Arrivals) 1987 Q3 N(67803, 2e+08) 67803
##  8     1 NZ     SNAIVE(Arrivals) 1987 Q4 N(72177, 2e+08) 72177
##  9     1 NZ     SNAIVE(Arrivals) 1988 Q1 N(41646, 3e+08) 41646
## 10     1 NZ     SNAIVE(Arrivals) 1988 Q2 N(63668, 3e+08) 63668
## # ... with 3,170 more rows

Arrivals %>%  autoplot(Arrivals)

fc %>% accuracy(Arrivals)

## # A tibble: 3 x 11
##   .model         Origin .type     ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <chr>  <chr>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Arrivals)  NZ     Test  18267. 33939. 25886.  9.81  13.9  1.75  1.77 0.781
## 2 ETS(log(Arriv~ NZ     Test  15206. 29836. 22390.  7.58  12.2  1.51  1.56 0.764
## 3 SNAIVE(Arriva~ NZ     Test  24167. 36436. 27935. 12.3   14.9  1.88  1.90 0.702

STAT 427 Project 1

Done by : Mouhamad Bani

Q1

Q2