R Markdown

YUL Monthly Passengers 2019-2024

YUL = read.csv("YUL.csv", header = T)
YULTS = ts(YUL["Passagers"], start = c(2019,1), end = c(2024,3), frequency = 12)
head(YULTS)
##          Jan     Feb     Mar     Apr     May     Jun
## 2019 1532219 1432616 1712624 1567937 1603155 1839150
tail(YULTS)
##          Jan     Feb     Mar Apr May Jun Jul Aug Sep     Oct     Nov     Dec
## 2023                                                 1775914 1501408 1701441
## 2024 1673383 1568672 1786266

Data visualization

plot(YULTS)

autoplot(YULTS)+
  ggtitle("Nombre de Passagers à l'aéroport YUL 2019-2024")+
  xlab("Date")+ylab("Passagers")

ggseasonplot(YULTS, year.labels = TRUE, year.labels.left = TRUE)+ylab("Number Passengers")+
  ggtitle("Seasonal plot: YUL Passengers")

New dataset

YUL = read.csv("YUL.csv", header = T)
YULTS = ts(YUL["Passagersa"], start = c(2019,1), end = c(2024,3), frequency = 12)

Data visualization

plot(YULTS)

autoplot(YULTS)+
  ggtitle("Passagers à l'aéroport YUL")+
  xlab("Date")+ylab("Passagers")

ggseasonplot(YULTS, year.labels = TRUE, year.labels.left = TRUE)+ylab("Number Passengers")+
  ggtitle("Seasonal plot: YUL Passengers")

LAG plots

gglagplot(YULTS)

ggAcf(YULTS)

ggPacf(YULTS)

Autocorrelation

autoplot(YULTS)+xlab("Date")+ylab("Passengers")

ggAcf(YULTS, lag=24)

Average Method

The forecasts of all future values are equal to the averages of the historical data.
meanf(YULTS, 12)
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Apr 2024        1738174 1442300 2034048 1281586 2194762
## May 2024        1738174 1442300 2034048 1281586 2194762
## Jun 2024        1738174 1442300 2034048 1281586 2194762
## Jul 2024        1738174 1442300 2034048 1281586 2194762
## Aug 2024        1738174 1442300 2034048 1281586 2194762
## Sep 2024        1738174 1442300 2034048 1281586 2194762
## Oct 2024        1738174 1442300 2034048 1281586 2194762
## Nov 2024        1738174 1442300 2034048 1281586 2194762
## Dec 2024        1738174 1442300 2034048 1281586 2194762
## Jan 2025        1738174 1442300 2034048 1281586 2194762
## Feb 2025        1738174 1442300 2034048 1281586 2194762
## Mar 2025        1738174 1442300 2034048 1281586 2194762

Naive Method

The forecast values are equal to the last observation .
naive(YULTS, 12)
##          Point Forecast     Lo 80   Hi 80     Lo 95   Hi 95
## Apr 2024        1786266 1533828.0 2038704 1400195.5 2172337
## May 2024        1786266 1429264.8 2143267 1240279.8 2332252
## Jun 2024        1786266 1349030.6 2223501 1117572.2 2454960
## Jul 2024        1786266 1281390.1 2291142 1014124.9 2558407
## Aug 2024        1786266 1221797.6 2350734  922986.0 2649546
## Sep 2024        1786266 1167921.8 2404610  840590.2 2731942
## Oct 2024        1786266 1118378.0 2454154  764819.4 2807713
## Nov 2024        1786266 1072263.6 2500268  694293.6 2878238
## Dec 2024        1786266 1028952.1 2543580  628054.4 2944478
## Jan 2025        1786266  987987.1 2584545  565403.8 3007128
## Feb 2025        1786266  949024.0 2623508  505814.9 3066717
## Mar 2025        1786266  911795.3 2660737  448878.4 3123654

Seasonal Naive Method

We set each forecast to be equal to the last observed value from the same season of
the year. For example, with monthly data, the forecast for all future February
values is equal to the last observed February value.
snaive(YULTS, 12)
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Apr 2024        1631602 1585900 1677304 1561707 1701497
## May 2024        1741104 1695402 1786806 1671209 1810999
## Jun 2024        1913827 1868125 1959529 1843932 1983722
## Jul 2024        2183209 2137507 2228911 2113314 2253104
## Aug 2024        2233977 2188275 2279679 2164082 2303872
## Sep 2024        1924029 1878327 1969731 1854134 1993924
## Oct 2024        1775914 1730212 1821616 1706019 1845809
## Nov 2024        1501408 1455706 1547110 1431513 1571303
## Dec 2024        1701441 1655739 1747143 1631546 1771336
## Jan 2025        1673383 1627681 1719085 1603488 1743278
## Feb 2025        1568672 1522970 1614374 1498777 1638567
## Mar 2025        1786266 1740564 1831968 1716371 1856161

Drift Method

A variation on the naive method is to allow the forecasts to increase or decrease
over time, where the amount of change over time is set to be the average change seen
in the historical data.
rwf(YULTS, 12, drift = TRUE )
##          Point Forecast     Lo 80   Hi 80     Lo 95   Hi 95
## Apr 2024        1790364 1533876.1 2046851 1398099.9 2182627
## May 2024        1794461 1428865.7 2160056 1235331.1 2353591
## Jun 2024        1798559 1347312.9 2249804 1108437.9 2488679
## Jul 2024        1802656 1277609.7 2327703  999666.9 2605645
## Aug 2024        1806754 1215303.5 2398204  902208.7 2711299
## Sep 2024        1810851 1158132.8 2463570  812604.6 2809098
## Oct 2024        1814949 1104767.3 2525130  728819.9 2901078
## Nov 2024        1819046 1054348.6 2583744  649542.1 2988550
## Dec 2024        1823144 1006286.5 2640001  573868.4 3072419
## Jan 2025        1827241  960155.7 2694327  501148.3 3153334
## Feb 2025        1831339  915638.2 2747040  430895.6 3231782
## Mar 2025        1835436  872489.7 2798383  362736.6 3308136

Forecasts

head(YULTS)
##          Jan     Feb     Mar     Apr     May     Jun
## 2019 1532219 1432616 1712624 1567937 1603155 1839150
tail(YULTS)
##          Jan     Feb     Mar Apr May Jun Jul Aug Sep     Oct     Nov     Dec
## 2023                                                 1775914 1501408 1701441
## 2024 1673383 1568672 1786266
Training Set
YULTSTR = window(YULTS, start = c(2019,1), end = c(2023,3))
Testing Set
YULTSTE   = window(YULTS, start = c(2023,4), end = c(2024,3)) 
length(YULTSTR)
## [1] 51
ts.plot(YULTSTR)

Different Forecast Plots

autoplot(YULTSTR) +
  autolayer(meanf(YULTSTR,  h = 12),  series = "Mean", PI = FALSE) +
  autolayer(naive(YULTSTR,  h = 12),  series = "Naive", PI = FALSE) +
  autolayer(snaive(YULTSTR, h = 12),  series = "Seasonal Naive", PI = FALSE) +
  ggtitle("Forecasts next 12 months for YUL Passengers") + 
  xlab("Date") + ylab("Number Passengers") + 
  guides(colour = guide_legend(title="Forecast"))

Residuals

resmean <- residuals(meanf(YULTSTR))
autoplot(resmean) + xlab("Date") + ylab("") +
  ggtitle("Residuals from Meanf method")

resnaive <- residuals(naive(YULTSTR))
autoplot(resnaive) + xlab("Date") + ylab("") +
  ggtitle("Residuals from Naive method")

ressnaive <- residuals(snaive(YULTSTR))
autoplot(ressnaive) + xlab("Date") + ylab("") +
  ggtitle("Residuals from Seasonal Naive method")

Forecast Errors

Residuals are calculated on the training set while the forecast errors are calculated
on the test set. Residuals are based on one-step forecasts while forecast errors can
involve multi-step forecasts.

Scale-dependent errors

Mean Absolute Error: MAE
Root Mean Squared Error: RMSE

Percentage Errors

Percentage errors have the advantage of being unit-free, and so are frequently used
to compare forecast performances between data sets.

Mean Absolute Percentage Error : MAPE

Measures based on percentage errors have the disadvantage of being infinite or
underfined if the observation value is 0 for any period of time, and having extreme
values if any observation value is close to 0.

Scaled Errors

Scaled errors were proposed by Hyndman and Koehler as an alternative to using
percentage errors when comparing forecast accuracy across series with different
units. They proposed scaling the errors based o the training MAE from a simple
forecast method. A scaled error is less than one if it arises from a better forecast
than the average naive forecast computed on the training data. Conversely, it is
greater than one if the forecast is worse than the average naive forecast computed
on the training data.

Forecasts

YULTSTR          = window(YULTS, start = c(2019,1), end = c(2023,3))
YULTSTRfitmeanf  = meanf(YULTSTR,  h = 12)
YULTSTRfitnaive  = naive(YULTSTR,  h = 12)
YULTSTRfitsnaive = snaive(YULTSTR, h = 12)

ForecastTable <- cbind(YULTSTRfitmeanf[["mean"]] , YULTSTRfitnaive[["mean"]], 
                       YULTSTRfitsnaive[["mean"]], YULTSTE)

ForecastTable
##          YULTSTRfitmeanf[["mean"]] YULTSTRfitnaive[["mean"]]
## Apr 2023                   1722944                   1782163
## May 2023                   1722944                   1782163
## Jun 2023                   1722944                   1782163
## Jul 2023                   1722944                   1782163
## Aug 2023                   1722944                   1782163
## Sep 2023                   1722944                   1782163
## Oct 2023                   1722944                   1782163
## Nov 2023                   1722944                   1782163
## Dec 2023                   1722944                   1782163
## Jan 2024                   1722944                   1782163
## Feb 2024                   1722944                   1782163
## Mar 2024                   1722944                   1782163
##          YULTSTRfitsnaive[["mean"]] YULTSTE
## Apr 2023                    1615447 1631602
## May 2023                    1651732 1741104
## Jun 2023                    1894878 1913827
## Jul 2023                    2161593 2183209
## Aug 2023                    2211858 2233977
## Sep 2023                    1856534 1924029
## Oct 2023                    1673651 1775914
## Nov 2023                    1405533 1501408
## Dec 2023                    1631427 1701441
## Jan 2024                    1646613 1673383
## Feb 2024                    1578184 1568672
## Mar 2024                    1782163 1786266
autoplot(window(YULTS, start = 2019)) +
  autolayer(YULTSTRfitmeanf,  series = "Mean", PI = FALSE) +
  autolayer(YULTSTRfitnaive,  series = "Naive", PI = FALSE) +
  autolayer(YULTSTRfitsnaive, series = "Seasonal Naive", PI = FALSE) +
  ggtitle("Next 12 months forecasts for YUL Passengers") + 
  xlab("Date") + ylab("Number Passengers") + 
  ggtitle("Monthly Forecasts YUL passengers")

  guides(colour = guide_legend(title="Forecast"))
## <Guides[1] ggproto object>
## 
## colour : <GuideLegend>

Accuracy

accuracy(YULTSTRfitmeanf, YULTSTE)
##                         ME     RMSE      MAE       MPE     MAPE     MASE
## Training set -4.566031e-11 223938.2 179787.1 -1.590572 10.27689 8.731039
## Test set      7.995886e+04 231148.6 169661.0  3.128318  8.86391 8.239285
##                   ACF1 Theil's U
## Training set 0.6063824        NA
## Test set     0.6110622  1.132946
accuracy(YULTSTRfitnaive, YULTSTE)
##                    ME     RMSE      MAE        MPE     MAPE     MASE      ACF1
## Training set  4998.88 198671.7 170378.2 -0.3602056 9.950856 8.274115 0.1281459
## Test set     20739.67 217867.8 167675.8 -0.2012525 9.047404 8.142877 0.6110622
##              Theil's U
## Training set        NA
## Test set      1.078714
accuracy(YULTSTRfitsnaive, YULTSTE)
##                    ME     RMSE      MAE      MPE     MAPE     MASE      ACF1
## Training set 20591.72 25501.52 20591.72 1.211139 1.211139 1.000000 0.4313680
## Test set     43768.25 57369.51 45353.58 2.506966 2.608028 2.202516 0.3938252
##              Theil's U
## Training set        NA
## Test set     0.3158235

Time Series Decomposition

Time series can be decomposed into three patterns: trend, seasonality and cycles.
We might think that time series is comprising three components:
a trend (trend + cycle), a seasonal component, and a remainder component.

Time series components

The additive decomposition is the most appropriate if the magnitude of the seasonal
fluctuations, or the variation around the trend-cycle, does not vary with the level
of the time series. When the variation in the seasonal pattern, or the variation
around the trend-cycle, appears to be proportional to the level of the time series,
then a multiplicative decomposition is more appropriate. Multiplicative
decomposition is more common with economic time series.
An alternative to using a multiplicative decomposition is to first transform the
data until the variation in the series appears to be stable over time, then use
an additive decomposition. When a log transformation has been used, this is
equivalent to using a multiplicative decomposition.
YULTS %>% decompose(type = "multiplicative") %>%
  autoplot() + xlab("Date") +
  ggtitle("Classical multiplicative decomposition of passengers")

autoplot(YULTS, series = "Data") +
  autolayer(seasadj(decompose(YULTS, "multiplicative")), 
            series = "Seasonally Adjusted") + xlab("Date") + 
  ylab("Number of passengers")+ ggtitle("Passengers at YUL")

autoplot(YULTS, series = "Data") +
  autolayer(trendcycle(decompose(YULTS, "multiplicative")), 
            series = "Trend-cycle") + xlab("Date") + 
  ylab("Number of passengers")+ ggtitle("Passengers at YUL")
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

Simple Exponential Smoothing

The simplest of the exponentially smoothing methods is naturally called
simple exponential smoothing (SES). This method is suitable for forecasting
data with no clear trend or seasonal pattern.
autoplot(YULTS) + ylab("Number of Passengers") + xlab("Date")

Using the naive method, all forecasts for the future are equal to the last
observed value of the series. Hence, the naive method assumes that the most
recent observation is the only important one, and all previous observations
provide no information for the future.
Using the average method, all future forecasts are equal to a simple average
of the observed data. Hence, the average method assumes that all observations
are of equal importance, and gives them equal weights when generating
forecasts.

Weighted average form

The forecast is equal to a weighted average between the most recent
observation and the previous forecast.

Component form

Flat Forecasts
Simple exponential smoothing has a flat forecast. That is, all forecasts
take the same value, equal to the last level component. Remember that these
forecasts

Estimate parameters

YULTSses <- ses(YULTSTR, h = 12)
YULTSses
##          Point Forecast     Lo 80   Hi 80     Lo 95   Hi 95
## Apr 2023        1781103 1523732.3 2038473 1387488.5 2174717
## May 2023        1781103 1418072.2 2144134 1225895.3 2336311
## Jun 2023        1781103 1336869.8 2225336 1101707.0 2460499
## Jul 2023        1781103 1268370.3 2293836  996946.0 2565260
## Aug 2023        1781103 1208000.4 2354205  904618.3 2657587
## Sep 2023        1781103 1153410.2 2408796  821129.7 2741076
## Oct 2023        1781103 1103201.8 2459004  744342.7 2817863
## Nov 2023        1781103 1056464.0 2505742  672863.3 2889342
## Dec 2023        1781103 1012563.2 2549643  605722.9 2956483
## Jan 2024        1781103  971038.1 2591168  542215.7 3019990
## Feb 2024        1781103  931540.3 2630666  481809.0 3080397
## Mar 2024        1781103  893798.9 2668407  424088.6 3138117

Accuracy of one-step-ahead training errors: period 1-12

round(accuracy(YULTSses), 2)
##                   ME     RMSE      MAE   MPE MAPE MASE ACF1
## Training set 4778.01 196850.2 167370.1 -0.36 9.77 8.13 0.13
autoplot(YULTSses) +
  autolayer(fitted(YULTSses), series = "Fitted") +
  ylab("Number of Passengers") + xlab("Date")

Trend methods

Holt’s linear trend method

Holt extended simple exponential smoothing to allow the forecasting of data
with a trend. This method involves a forecast equation and two smoothing
equations(level and trend). The forecast function is no longer flat but
trending. The h-step-ahead forecast is equal to the last estimated level
plus h times the last estimated trend value. Hence the forecasts are a
linear function of h.
YULTSHOLT = holt(YULTSTR, h = 12)
round(accuracy(YULTSHOLT), 2)
##                     ME     RMSE      MAE   MPE MAPE MASE ACF1
## Training set -30546.27 200751.7 167746.3 -2.45 9.95 8.15 0.13

Damped trend methods

The forecasts generated by Holt’s linear method display a constant trend
(increasing or decreasing) indefinitely into the future. Empirical evidence
indicates that these methods tend to over-forecast, especially for longer
forecast horizons. Motivated by this observation, Gardner introduced a
parameter that dampens the trend to a flat line some time in the future.
Methods that include a damped trend have proven to be very successful, and
automatically for many series. Short-run forecasts are trended while
long-run forecasts are constant.
YULTSHOLTDT <- holt(YULTSTR, damped = TRUE, phi = 0.9, h = 12)
round(accuracy(YULTSHOLTDT), 2)
##                   ME     RMSE      MAE   MPE MAPE MASE ACF1
## Training set -2947.9 196528.6 166847.9 -0.83 9.78  8.1 0.13
autoplot(YULTSTR) + 
  autolayer(YULTSHOLT, series = "Holt's method", PI = FALSE) +
  autolayer(YULTSHOLTDT, series = "Damped Holt's method", PI = FALSE) + 
  ggtitle("Forecasts from Holt's method") + xlab("Date") +
  ylab("Passengers at YUL") +
  guides(colour = guide_legend(title = "Forecast"))

autoplot(YULTS) + 
  autolayer(YULTSHOLT, series = "Holt's method", PI = FALSE) +
  autolayer(YULTSHOLTDT, series = "Damped Holt's method", PI = FALSE) + 
  ggtitle("Forecasts from Holt's method") + xlab("Date") +
  ylab("Passengers at YUL") +
  guides(colour = guide_legend(title = "Forecast"))

Holt-Winters’ seasonal method

Holt-Winters extends the Holt’s method to capture seasonality.
The Holt-Winters seasonal method comprises the forecast equation and three
smoothing equations (level, trend, seasonal)
Holt-Winters’s additive / Holt-Winters’s multiplicative
YULTSHWADD = hw(YULTSTR, seasonal = "additive", h = 12)
YULTSHWMUL = hw(YULTSTR, seasonal = "multiplicative", h = 12)

autoplot(YULTSTR) + 
  autolayer(YULTSHWADD, series = "HW additive forecasts", PI = FALSE) +
  autolayer(YULTSHWMUL, series = "HW multiplicative forecasts", PI = FALSE) +
  xlab("Date") + ylab("Number of Passengers") +
  ggtitle("Passengers at YUL") +
  guides(colour = guide_legend(title="Forecast"))

autoplot(YULTS) + 
  autolayer(YULTSHWADD, series = "HW additive forecasts", PI = FALSE) +
  autolayer(YULTSHWMUL, series = "HW multiplicative forecasts", PI = FALSE) +
  xlab("Date") + ylab("Number of Passengers") +
  ggtitle("Passengers at YUL") +
  guides(colour = guide_legend(title="Forecast"))

round(accuracy(YULTSHWADD), 2)
##                   ME     RMSE     MAE   MPE MAPE MASE ACF1
## Training set -689.97 11969.49 6290.45 -0.06 0.39 0.31  0.2
round(accuracy(YULTSHWMUL), 2)
##                    ME    RMSE     MAE   MPE MAPE MASE ACF1
## Training set -3005.49 12730.2 8877.83 -0.19 0.53 0.43 0.43

Estimation and model Selection

Estimating ETS models (Exponential Smoothing State Space Model)

An alternative to estimating the parameters by minimizing the sum of
squared errors is to maximize the likelihood. The likelihood is the
probability of the data arising from the specified model. Thus, a large
likelihood is associated with a good model. For an additive error model,
maximizing the likelihood gives the same results as minimizing the sum of
squared errors.

Model Selection

The information criterias AIC, AICc and BIC can be used for model selection

ets() function in R

The models can be estimated in R using the ets() function. Unlike the ses(),
holt(), and hw() functions, the ets() function does not produce forecasts.
Rather, it estimates the model parameters and returns information about
the fitted model.
YULTSETS = ets(YULTSTR)
summary(YULTSETS)
## ETS(A,Ad,A) 
## 
## Call:
##  ets(y = YULTSTR) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     beta  = 1e-04 
##     gamma = 1e-04 
##     phi   = 0.9768 
## 
##   Initial states:
##     l = 1677438.2044 
##     b = 2918.1838 
##     s = -128996.3 -348998.4 -84819.83 96329.33 448651.9 401836
##            138803.3 -100702.9 -130142.4 14683.16 -184634.8 -122009.1
## 
##   sigma:  13486.2
## 
##      AIC     AICc      BIC 
## 1185.805 1207.180 1220.578 
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE      MAPE      MASE
## Training set -786.0797 11011.44 5467.156 -0.06293346 0.3431345 0.2655027
##                   ACF1
## Training set 0.2613893
autoplot(YULTSETS)

cbind('Residuals' = residuals(YULTSETS), 
      'Forecast errors' = residuals(YULTSETS, type ='response')) %>%
  autoplot(facet = TRUE) + xlab("Date") + ylab("")

Forecasting with ETS Models

ETS point forecasts are equal to the medians of the forecast distributions.
For models with only additive components, the forecast distributions are
normal, so the medians and means are equal. For ETS models with
multiplicative errors, or with multiplicative seasonality, the point
forecasts will not be equal to the means of the forecast distributions.
YULTSETS %>% forecast(h=12) %>%
  autoplot() + 
  ylab("Number of Passengers at YUL")

YULTSETS %>% forecast(h=12)
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Apr 2023        1633820 1616537 1651103 1607387 1660252
## May 2023        1664102 1646819 1681385 1637670 1690535
## Jun 2023        1904429 1887146 1921712 1877997 1930862
## Jul 2023        2168263 2150980 2185547 2141831 2194696
## Aug 2023        2215863 2198580 2233147 2189431 2242296
## Sep 2023        1864307 1847023 1881590 1837874 1890739
## Oct 2023        1683905 1666622 1701188 1657473 1710338
## Nov 2023        1420457 1403173 1437740 1394024 1446889
## Dec 2023        1641173 1623889 1658456 1614740 1667605
## Jan 2024        1648857 1631574 1666140 1622424 1675289
## Feb 2024        1586906 1569622 1604189 1560473 1613338
## Mar 2024        1786897 1769613 1804180 1760464 1813329

ARIMA Models

Exponential smoothing and ARIMA models are the two most widely-used
approaches to time series forecasting. While exponential smoothing models
are based on a description of the trend and seasonality in the data, ARIMA
models aim to describe the autocorrelations in the data.

Stationarity and differencing

A stationary time series is one whose properties do not depend on the time
seasonality, are not stationary. The trend and seasonality will affect the
value of the time series at different times. On the other hand, a white
noise series is stationary, it does not matter when you observe it , it
should look much the same at any point in time.
Some cases can be confusing, a time series with cyclic behaviour (but with
no trend or seasonality) is stationary. This is because the cycles are not
of a fixed length, so before we observe the series we cannot be sure where
the peaks and troughs of the cycles will be.
In general, a stationary time series will have no predictable patterns in
the long-term. Time plots will show the series to be roughly horizontal,
with constant variance.
Box.test(diff(YULTS), lag = 10, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  diff(YULTS)
## X-squared = 27.466, df = 10, p-value = 0.002197
There are autocorrelations lying outside the 95%
limits, and the Ljung-Box statistic has a p-value less than the 0.05
level of significance. This suggests that the passengers change is
correlated with that of previous days.

Random Walk Model

When the differenced series is white noise, rearranging will lead to the
random walk model. Random walk models are very widely used for
non-stationary data, particularly financial and economic data. Random walks
typically have:
sudden and unpredictable changes in direction
The forecasts from a random walk model are equal to the last observation,
as future movements are unpredictable, and are equally likely to be up or
down. Thus, the random walk model underpins naive forecasts.

Second-order differencing

Occasionally the differenced data will not appear to be stationary and it
may be necessary to difference the data a second time to obtain a
stationary series.

Seasonal differencing

A seasonal difference is the difference between an observation and the
previous observation from the same season.
Forecasts from this model are equal to the last observation from the
relevant season. That is, this model gives seasonal naive forecasts.

Unit Root Test

One way to determine more objectively whether differencing is required is
to use a unit root test. These are statistical hypothesis tests of
stationarity that are designed for determining whether differencing is
required. The KPPS test is one of the unit root test available in R.
The null hypothesis is that the data are stationary, and we look for
evidence that the null hypothesis is false. Consequently, a small p-value
suggest that differencing is required.
YULTS %>% ur.kpss() %>% summary()
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 3 lags. 
## 
## Value of test-statistic is: 0.0826 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

Autoregressive Models

In an autogressive model, we forecast the variable of interest using a
linear combination of past values of the variable. This is like a multiple
regression but with lagged values of dependent variable as predictors.

Moving Average Models

Rather than using past values of the forecast variable in a regression,
a moving average model uses past forecast errors in a regression.

Non-seasonal ARIMA models

If we combine differencing with autoregression and a moving average model,
we obtain a non-seasonal ARIMA model.
autoplot(YULTS) + 
  xlab("Date") + ylab("Monthly Passengers at YUL")

YULTSARIMA <- auto.arima(YULTSTR)
summary(YULTSARIMA)
## Series: YULTSTR 
## ARIMA(0,0,1)(0,1,0)[12] with drift 
## 
## Coefficients:
##          ma1      drift
##       0.5558  1715.6547
## s.e.  0.1516   268.3313
## 
## sigma^2 = 179745890:  log likelihood = -425.03
## AIC=856.07   AICc=856.75   BIC=861.06
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE      MAPE      MASE
## Training set 341.5125 11419.45 5176.727 0.02443175 0.3194263 0.2513985
##                     ACF1
## Training set -0.01530382
YULTSARIMA %>% forecast(h=12) %>% autoplot(include=80)

YULTSARIMA %>% forecast(h=12)
##          Point Forecast   Lo 80   Hi 80   Lo 95   Hi 95
## Apr 2023        1635342 1618161 1652524 1609065 1661619
## May 2023        1672320 1652663 1691977 1642257 1702383
## Jun 2023        1915466 1895809 1935123 1885403 1945529
## Jul 2023        2182181 2162524 2201838 2152118 2212244
## Aug 2023        2232446 2212789 2252103 2202383 2262509
## Sep 2023        1877122 1857465 1896779 1847059 1907185
## Oct 2023        1694239 1674582 1713896 1664176 1724302
## Nov 2023        1426121 1406464 1445778 1396058 1456184
## Dec 2023        1652015 1632358 1671672 1621952 1682078
## Jan 2024        1667201 1647544 1686858 1637138 1697264
## Feb 2024        1598772 1579115 1618429 1568709 1628835
## Mar 2024        1802751 1783094 1822408 1772688 1832814
ARIMA(0,0,1)(0,1,0)[12]
ARIMA(p,d,q)(P,D,Q)m

ACF & PACF Plots

ACF and PACF are used to determine the values of p and q in an ARIMA model.
The ACF plot shows the autocorrelations between the y and its past y values
ggAcf(YULTS) 

ggPacf(YULTS)

ARIMA vs ETS

It is commonly held myth that ARIMA models are more general than
exponential smoothing. While linear exponential smoothing models are all
special cases of ARIMA models, the non-linear exponential smoothing models
have no equivalent ARIMA counterparts. On the other hand, there are also
many ARIMA models that have no exponential smoothing counterparts. In
particular, all ETS models are non-stationary, while some ARIMA models
are stationary.
The ETS models with seasonality or non-damped trend or both have two unit
roots. All other ETS models have one unit root.

Neural Network Models

Artificial neural networks are forecasting methods that are based on simple
mathematical models of the brain. They allow complex nonlinear relationships
between the response variable and its predictors.
A neural network can be thought of as a network of neurons which are
organised in layers. The predictors (inputs) form the bottom layer, and the
forecasts (outputs) form the top layer. There may also be intermediate
layers containing hidden neurons.
The very simplest networks contain no hidden layers and are equivalent to
linear regressions. The coefficients attached to these predictors are
called weights. The forecasts are obtained by a linear combination of the
inputs. The weights are selected in the neural network framework using
a learning algorithm that minimises a cost function such as the MSE.
Once we add an intermediate layer with hidden neurons, the neural network
becomes non-linear. This is known as a multilayer feed-forward network,
where each layer of nodes receives inputs from the previous layers. The
outputs of the nodes in one layer are inputs to the next layer. The inputs
to each node are combined using a weighted linear combination. The result
is then modified by a nonlinear function before being output.

Neural Network autoregression

With time series data, lagged values of the time series can be used as
inputs to a neural network, just as we used lagged values in a linear
autoregression model. We call this a neural network autoregression or
NNAR model.
We only consider feed-forward networks with one hidden layer, and we use
the notation NNAR(p,k) to indicate there are p lagged inputs and k nodes
in the hidden layer. For example, a NNAR(9,5) model is a neural network
with the last nine observations used as inputs for forecasting the output
and with five neurons in the hidden layer.
YULTSNN <- nnetar(YULTSTR, lambda = 0)
autoplot(forecast(YULTSNN,h=12))

NNAR(3,1,2)[12]
The model has inputs (Yt-1, Yt-2, Yt-3), seasonal part (Yt-12) and 2 neurons ###### in the hidden layer.

Forecast Combinations

An easy way to improve forecast accuracy is to use several different
methods on the same time series, and to average the resulting forecasts.
Nearly 50 years ago, Bates and Granger show that combining forecasts often
leads to better forecast accuracy.The results have been virtually unanimous
combining multiple forecasts lead to increased forecast accuracy. In many
cases one can make dramatic performance improvements by simply averaging
the forecasts.
Monthly passengers, 2019/01 to 2024/03
We use forecasts from the following models: ETS, ARIMA, NNAR
and we compare the results using the last 12 months of observations.
ETS   <- forecast(YULTSTR, h = 12)
ARIMA <- forecast(auto.arima(YULTSTR, lambda = 0, biasadj = TRUE), h = 12)
NNAR  <- forecast(nnetar(YULTSTR), h = 12)
Combination <- (ETS[["mean"]] + ARIMA[["mean"]] + NNAR[["mean"]])/3

ForecastTable <- cbind(ETS[["mean"]], ARIMA[["mean"]], 
                       NNAR[["mean"]], Combination, YULTSTE)

colnames(ForecastTable) <- c("ETS", "ARIMA", "NNAR", "Combination", "Testing Set")
ForecastTable
##              ETS   ARIMA    NNAR Combination Testing Set
## Apr 2023 1633820 1634174 1617725     1628573     1631602
## May 2023 1664102 1672219 1667807     1668043     1741104
## Jun 2023 1904429 1918381 1919845     1914218     1913827
## Jul 2023 2168263 2188404 2182428     2179698     2183209
## Aug 2023 2215863 2239292 2220800     2225319     2233977
## Sep 2023 1864307 1879561 1880640     1874836     1924029
## Oct 2023 1683905 1694410 1693639     1690651     1775914
## Nov 2023 1420457 1422966 1403779     1415734     1501408
## Dec 2023 1641173 1651662 1654391     1649075     1701441
## Jan 2024 1648857 1667036 1645262     1653718     1673383
## Feb 2024 1586906 1597759 1592571     1592412     1568672
## Mar 2024 1786897 1804268 1805136     1798767     1786266
autoplot(YULTS) + 
  autolayer(ETS, series = "ETS", PI = FALSE) +
  autolayer(ARIMA, series = "ARIMA", PI = FALSE) +
  autolayer(NNAR, series = "NNAR", PI = FALSE) +
  autolayer(Combination, series = "Combination") + 
  xlab("Date") + ylab("Number Passengers") +
  ggtitle("Passengers at YUL")

c(ETS   = accuracy(ETS, YULTSTE)["Test set", "RMSE"],
  ARIMA = accuracy(ARIMA, YULTSTE)["Test set", "RMSE"],
  NNAR  = accuracy(NNAR, YULTSTE)["Test set", "RMSE"],
  Combination = accuracy(Combination, YULTSTE)["Test set", "RMSE"])
##         ETS       ARIMA        NNAR Combination 
##    49771.27    44054.40    48210.00    46826.82
c(ETS   = accuracy(ETS, YULTSTE)["Test set", "MAPE"],
  ARIMA = accuracy(ARIMA, YULTSTE)["Test set", "MAPE"],
  NNAR  = accuracy(NNAR, YULTSTE)["Test set", "MAPE"],
  Combination = accuracy(Combination, YULTSTE)["Test set", "MAPE"])
##         ETS       ARIMA        NNAR Combination 
##    2.202261    1.926665    2.201377    2.040072

Different Neural Networks ….

Feed-Forward Neural Networks (FNN)
Recurrent Neural Networks (RNN)
Long Short-Term Memory Recurrent Neural Network (LSTM)
Time Lagged Neural Networks (TLNN)
Seasonal Artificial Neural Networks (SANN)
Elman Neural Networks
Jordan Neural Networks