Module 8 Discussion: ARIMA, Seasonal ARIMA, White Noise, Random Walk, and Drift

Author

Adrian Aziza

# Package setup ------------------------------------------------------------
pkgs <- c("forecast", "ggplot2", "dplyr", "tidyr", "tibble", "patchwork", "knitr")
to_install <- setdiff(pkgs, rownames(installed.packages()))
if (length(to_install) > 0) {
  install.packages(to_install, repos = "https://cloud.r-project.org")
}

package 'patchwork' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\azizaa\AppData\Local\Temp\RtmpsHcHb9\downloaded_packages

invisible(lapply(pkgs, library, character.only = TRUE))

theme_set(theme_minimal(base_size = 12))

fmt_arima <- function(fit) {
  ord <- forecast::arimaorder(fit)
  if (all(c("P", "D", "Q", "Frequency") %in% names(ord)) && ord[["Frequency"]] > 1) {
    paste0("ARIMA(", ord[["p"]], ",", ord[["d"]], ",", ord[["q"]], ")(",
           ord[["P"]], ",", ord[["D"]], ",", ord[["Q"]], ")[", ord[["Frequency"]], "]")
  } else {
    paste0("ARIMA(", ord[["p"]], ",", ord[["d"]], ",", ord[["q"]], ")")
  }
}

I. Manual ARIMA selection versus automatic ARIMA

For the non-seasonal example, I use the built-in annual Nile river-flow series. I hold out the last 10 years as the actual comparison period and fit models only on the historical training data.

nile <- ts(as.numeric(Nile), start = 1871, frequency = 1)
nile_train <- window(nile, end = 1960)
nile_test  <- window(nile, start = 1961)

nile_info <- tibble(
  split = c("Training", "Holdout / actual"),
  years = c("1871-1960", "1961-1970"),
  n = c(length(nile_train), length(nile_test))
)
knitr::kable(nile_info)

split	years	n
Training	1871-1960	90
Holdout / actual	1961-1970	10

The original series is not centered around a stable mean, so I difference it once. After first differencing, the ACF has its main meaningful spike at lag 1 and then weakens, while the PACF does not show a strong multi-lag cutoff. That pattern supports a low-order MA model on the differenced series, so my manual choice is ARIMA(0,1,1): no AR term, one regular difference, and one MA term.

p_acf <- ggAcf(diff(nile_train), lag.max = 24) +
  labs(title = "ACF of first-differenced Nile training series")

p_pacf <- ggPacf(diff(nile_train), lag.max = 24) +
  labs(title = "PACF of first-differenced Nile training series")

p_acf + p_pacf

manual_nile <- Arima(nile_train, order = c(0, 1, 1))
auto_nile <- auto.arima(
  nile_train,
  seasonal = FALSE,
  stepwise = FALSE,
  approximation = FALSE
)

model_compare <- tibble(
  model = c("Manual", "Automatic"),
  arima_form = c(fmt_arima(manual_nile), fmt_arima(auto_nile)),
  AICc = c(manual_nile$aicc, auto_nile$aicc),
  BIC = c(BIC(manual_nile), BIC(auto_nile))
)
knitr::kable(model_compare, digits = 2)

model	arima_form	AICc	BIC
Manual	ARIMA(0,1,1)	1141.81	1146.65
Automatic	ARIMA(1,1,1)	1140.04	1147.22

summary(manual_nile)

Series: nile_train 
ARIMA(0,1,1) 

Coefficients:
          ma1
      -0.7523
s.e.   0.1128

sigma^2 = 20887:  log likelihood = -568.84
AIC=1141.67   AICc=1141.81   BIC=1146.65

Training set error measures:
                    ME     RMSE      MAE       MPE     MAPE     MASE     ACF1
Training set -10.21943 142.9089 111.8557 -3.376843 12.87627 0.845736 0.139387

summary(auto_nile)

Series: nile_train 
ARIMA(1,1,1) 

Coefficients:
         ar1      ma1
      0.2589  -0.8727
s.e.  0.1237   0.0599

sigma^2 = 20181:  log likelihood = -566.88
AIC=1139.75   AICc=1140.04   BIC=1147.22

Training set error measures:
                    ME     RMSE      MAE       MPE     MAPE     MASE
Training set -15.35724 139.6734 109.1538 -3.901992 12.68861 0.825307
                    ACF1
Training set -0.02972679

The automatic model is close to the manual model because both use first differencing and a low-order short-memory error structure. If auto.arima() adds one AR term, the difference is not conceptual: it means the algorithm found a small information-criterion improvement by allowing mild persistence in the differenced errors. I would report the automatic model for forecasting because it is chosen by AICc, but the ACF/PACF justification still correctly identified the needed differencing and the low-order ARMA structure.

nile_fc <- forecast(auto_nile, h = length(nile_test), level = 95)

autoplot(nile_fc) +
  autolayer(nile_test, series = "Actual holdout", linewidth = 0.8) +
  labs(
    title = "Nile annual flow: historical values, actual holdout, forecasts, and 95% prediction intervals",
    x = "Year",
    y = "Annual flow",
    color = "Series"
  )

nile_forecast_table <- tibble(
  year = 1961:1970,
  actual = as.numeric(nile_test),
  prediction = as.numeric(nile_fc$mean),
  lo_95 = as.numeric(nile_fc$lower[, "95%"]),
  hi_95 = as.numeric(nile_fc$upper[, "95%"])
)
knitr::kable(nile_forecast_table, digits = 1)

year	actual	prediction	lo_95	hi_95
1961	1020	860.2	581.8	1138.6
1962	906	871.9	573.4	1170.4
1963	901	874.9	569.8	1180.1
1964	1170	875.7	566.2	1185.2
1965	912	875.9	562.6	1189.3
1966	746	876.0	559.0	1193.0
1967	919	876.0	555.4	1196.6
1968	718	876.0	551.8	1200.1
1969	714	876.0	548.3	1203.7
1970	740	876.0	544.8	1207.1

II. Seasonal ARIMA

For the seasonal example, I use the built-in monthly AirPassengers series. It has a clear upward trend and multiplicative annual seasonality, so I use a log transformation through lambda = 0. The seasonal ARIMA model is:

[ (1-B)(1-B^{12})(y_t)=(1+_1B)(1+_1B^{12})_t. ]

This is an ARIMA(0,1,1)(0,1,1)[12] model: one regular difference removes trend, one seasonal difference removes yearly seasonality, the non-seasonal MA(1) term captures short-run one-month error correction, and the seasonal MA(1) term captures same-month-last-year error correction.

ap <- AirPassengers

ap_sarima <- Arima(
  ap,
  order = c(0, 1, 1),
  seasonal = c(0, 1, 1),
  lambda = 0,
  biasadj = TRUE
)

summary(ap_sarima)

Series: ap 
ARIMA(0,1,1)(0,1,1)[12] 
Box Cox transformation: lambda= 0 

Coefficients:
          ma1     sma1
      -0.4018  -0.5569
s.e.   0.0896   0.0731

sigma^2 = 0.001371:  log likelihood = 244.7
AIC=-483.4   AICc=-483.21   BIC=-474.77

Training set error measures:
                     ME     RMSE      MAE         MPE     MAPE      MASE
Training set -0.1407417 10.15843 7.377057 -0.07264481 2.630692 0.2303149
                    ACF1
Training set -0.03701935

coef_table <- tibble(
  coefficient = names(coef(ap_sarima)),
  estimate = as.numeric(coef(ap_sarima)),
  interpretation = c(
    "Non-seasonal MA(1): correction for the previous month's forecast error on the log-differenced scale.",
    "Seasonal MA(1): correction for the same month in the previous year's seasonal forecast error."
  )
)
knitr::kable(coef_table, digits = 3)

coefficient	estimate	interpretation
ma1	-0.402	Non-seasonal MA(1): correction for the previous month’s forecast error on the log-differenced scale.
sma1	-0.557	Seasonal MA(1): correction for the same month in the previous year’s seasonal forecast error.

The negative MA coefficients mean that positive forecast errors tend to be partially corrected in the next relevant period. The regular MA(1) coefficient works month-to-month, while the seasonal MA(1) coefficient works across the 12-month seasonal lag. Because the model is estimated after one regular difference and one seasonal difference, the coefficients describe remaining dependence after trend and annual seasonality have been removed.

ap_fc <- forecast(ap_sarima, h = 24, level = 95)

autoplot(ap_fc) +
  labs(
    title = "Seasonal ARIMA forecast for AirPassengers",
    x = "Year",
    y = "Monthly passengers",
    color = "Series"
  )

checkresiduals(ap_sarima)


    Ljung-Box test

data:  Residuals from ARIMA(0,1,1)(0,1,1)[12]
Q* = 26.446, df = 22, p-value = 0.233

Model df: 2.   Total lags used: 24

III. White noise, random walk, and random walk with drift

Let (_t) be independent random shocks with mean zero and constant variance (^2):

[ _t WN(0,^2). ]

White noise: [ y_t=_t. ] Each value is just the current shock. There is no memory because the previous shock does not carry into the next value.

Random walk: [ y_t=y_{t-1}+t. ] The current value equals the previous value plus a new shock. Differencing gives: [ y_t=y_t-y{t-1}=_t, ] so the first difference of a random walk is white noise.

Random walk with drift: [ y_t=c+y_{t-1}+t. ] Here (c) is a constant drift. Differencing gives: [ y_t=y_t-y{t-1}=c+_t, ] so the first difference is white noise plus a constant.

set.seed(7406)
n <- 160
t <- 1:n
sigma <- 1
drift <- 0.15
eps <- rnorm(n, mean = 0, sd = sigma)

sim_df <- tibble(
  t = t,
  `White noise` = eps,
  `Random walk` = cumsum(eps),
  `Random walk with drift` = cumsum(drift + eps)
)

sim_long <- sim_df |>
  pivot_longer(-t, names_to = "process", values_to = "value")

ggplot(sim_long, aes(x = t, y = value)) +
  geom_line(linewidth = 0.6) +
  facet_wrap(~ process, scales = "free_y", ncol = 1) +
  labs(
    title = "Simulated white noise, random walk, and random walk with drift",
    x = "Time",
    y = "Value"
  )

comparison <- tibble(
  process = c("White noise", "Random walk", "Random walk with drift"),
  trend = c("None; fluctuates around a constant mean", "No deterministic trend, but it wanders", "Upward if c > 0; downward if c < 0"),
  shock = c("Only affects the current period", "Changes the level from that point forward", "Changes the level permanently, plus drift moves the series"),
  memory = c("None", "Permanent", "Permanent"),
  stationarity = c("Yes, if shocks are iid with constant variance", "No", "No"),
  forecast = c("Constant at the mean, usually 0", "Flat at the latest observed value, with widening intervals", "Trending line from the latest value, with widening intervals")
)
knitr::kable(comparison)

process	trend	shock	memory	stationarity	forecast
White noise	None; fluctuates around a constant mean	Only affects the current period	None	Yes, if shocks are iid with constant variance	Constant at the mean, usually 0
Random walk	No deterministic trend, but it wanders	Changes the level from that point forward	Permanent	No	Flat at the latest observed value, with widening intervals
Random walk with drift	Upward if c > 0; downward if c < 0	Changes the level permanently, plus drift moves the series	Permanent	No	Trending line from the latest value, with widening intervals

Visually, white noise looks like random jitter around a stable center. The random walk looks smoother but less stable because each shock accumulates into the level. The random walk with drift also accumulates shocks, but it has a systematic direction because the constant drift is added every period.