library(tseries)
library(tidyverse)
library(magrittr)
library(plotly)
library(timetk)
library(lubridate)
library(pracma)
library(fma)
library(forecast)
library(fpp)
library(car)
library(astsa)
library(dygraphs)



Series 5


Exercise 5.1 - Stationarity Rederal Reserve Board Production Index

d <- prodn
plot(d)

There is a clear trend recognisable.

There are several available options:

  • Linear regression
  • Backward difference
  • Filtration

First guess: SARIMA could fit.

d_decomposition <- stl(d, s.window = 12)
plot(d_decomposition)

d_dygraph <- cbind(origin = d,
                   seasonality = d_decomposition$time.series[,"seasonal"], 
                   trend = d_decomposition$time.series[,"trend"], 
                   remainder = d_decomposition$time.series[,"remainder"])
dygraph(d_dygraph)
# dygraph(d_dygraph[,"trend"])
tsdisplay(d)

(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(1, 1, 1)))
## 
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(1, 1, 1))
## 
## Coefficients:
##          ar1      ma1     sar1     sma1
##       0.5767  -0.2783  -0.0352  -0.6700
## s.e.  0.1178   0.1376   0.0664   0.0424
## 
## sigma^2 estimated as 1.426:  log likelihood = -577.01,  aic = 1164.02
(d_sarima <- arima(d, order = c(1,1,0), seasonal = c(1, 1, 1)))
## 
## Call:
## arima(x = d, order = c(1, 1, 0), seasonal = c(1, 1, 1))
## 
## Coefficients:
##          ar1     sar1     sma1
##       0.3299  -0.0471  -0.6722
## s.e.  0.0500   0.0656   0.0420
## 
## sigma^2 estimated as 1.438:  log likelihood = -578.7,  aic = 1165.41
(d_sarima <- arima(d, order = c(1,0,1), seasonal = c(1, 1, 1)))
## 
## Call:
## arima(x = d, order = c(1, 0, 1), seasonal = c(1, 1, 1))
## 
## Coefficients:
##          ar1     ma1     sar1     sma1
##       0.9936  0.2623  -0.0503  -0.6809
## s.e.  0.0056  0.0436   0.0650   0.0414
## 
## sigma^2 estimated as 1.47:  log likelihood = -585.68,  aic = 1181.36
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 1, 1))) # improved aic
## 
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 1, 1))
## 
## Coefficients:
##          ar1      ma1     sma1
##       0.5827  -0.2857  -0.6831
## s.e.  0.1160   0.1357   0.0331
## 
## sigma^2 estimated as 1.428:  log likelihood = -577.15,  aic = 1162.3
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 0, 1))) 
## 
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 0, 1))
## 
## Coefficients:
##          ar1     ma1   sma1
##       0.0085  0.0578  0.639
## s.e.  0.4972  0.4922  0.036
## 
## sigma^2 estimated as 4.024:  log likelihood = -787.85,  aic = 1583.69
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 2, 0)))
## 
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 2, 0))
## 
## Coefficients:
##          ar1      ma1
##       0.7053  -0.4038
## s.e.  0.0757   0.0935
## 
## sigma^2 estimated as 6.203:  log likelihood = -812.99,  aic = 1631.97
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 1, 3))) # improved aic
## 
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 1, 3))
## 
## Coefficients:
##          ar1      ma1     sma1     sma2    sma3
##       0.5706  -0.2608  -0.7432  -0.1397  0.2782
## s.e.  0.1119   0.1295   0.0535   0.0647  0.0526
## 
## sigma^2 estimated as 1.314:  log likelihood = -564.24,  aic = 1140.48
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 1, 4)))
## 
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 1, 4))
## 
## Coefficients:
##          ar1      ma1     sma1     sma2    sma3    sma4
##       0.5707  -0.2617  -0.7401  -0.1411  0.2647  0.0167
## s.e.  0.1123   0.1301   0.0544   0.0661  0.0675  0.0520
## 
## sigma^2 estimated as 1.314:  log likelihood = -564.19,  aic = 1142.37

The best result performs the following SARIMA model:
arima(d, order = c(1,1,1), seasonal = c(0, 1, 3)))


Exercise 5.2 - sunspot data & AR model

Preparation

lsunspot100 <- window(log(sunspotarea), start = 1875, end = 1974)
fit_ar10 <- arima(lsunspot100, order = c(10, 0, 0))


lsunspot100 <- window(log(sunspotarea), start = 1875, end = 1974)
fit_ar10 <- arima(lsunspot100, order = c(10, 0, 0))
predictions <- predict(fit_ar10, n.ahead = 100)
plot(lsunspot100, xlim = c(1875, 2074), ylab = "yearly avg sunspot area")
abline(h = fit_ar10$coef["intercept"], lty = 2, col = "blue")
lines(predictions$pred, col = "green")

The predictions show a damped sinusoid towards the global mean. Every cycle reduces the scattering of the predicted points.

lsunspot100_2 <- window(log(sunspotarea), start = 1875, end = 2011)
fit_ar10_2 <- arima(ts(lsunspot100_2, start = 1875, end = 1974), order = c(10, 0, 0))
predictions_2 <- predict(fit_ar10_2, n.ahead = 37)
plot(lsunspot100_2, xlim = c(1875, 2074), ylab = "yearly avg sunspot area")
abline(h = fit_ar10_2$coef["intercept"], lty = 2, col = "blue")
lines(predictions_2$pred, col = "green")

cat("Mean squared forecasting error: ", (msfe <- (sum((as.numeric(tail(lsunspot100_2, 37)) - as.numeric(predictions_2$pred))^2))/length(lsunspot100_2)))
## Mean squared forecasting error:  0.2041178
plot((as.numeric(tail(lsunspot100_2, 37)) - as.numeric(predictions_2$pred))^2, type = "l",  xlab = "time", ylab = "msfe")

The predictions getting more inaccurate over time The mean squared forecasting error (msfe) is about 0.2. The msfe is very high at the end because there is a outlier in the time series. One could expect that the model will not fit very well for future predicted points.


Exercise 5.3 - SARIMA with airplane data

data preparation

d <- AirPassengers
d_1949_to_1956 <- window(d, end = c(1956, 12))
plot(d_1949_to_1956) # seasonality seems to be 12 months

hist(d_1949_to_1956)

plot(stl(d_1949_to_1956, s.window = 12))

tsdisplay(d_1949_to_1956)

tsdisplay(diff(d_1949_to_1956))

pacf(diff(d_1949_to_1956))

d_1949_to_1956_arima_coefficients <- auto.arima(d_1949_to_1956, ic = "aic")
d_1949_to_1956_arima_coefficients_2 <- auto.arima(d_1949_to_1956, ic = "bic")
d_1949_to_1956_arima_coefficients_3 <- auto.arima(d_1949_to_1956, ic = "aicc")
fit_1949_to_1956 <- arima(d_1949_to_1956, order = c(1, 1, 0), seasonal = c(1, 1, 0))
tsdisplay(fit_1949_to_1956$residuals)

predictions <- predict(fit_1949_to_1956, n.ahead = 48) # residuals looking good

plot(d_1949_to_1956, lty = 3, xlim = c(1949, 1961), ylim = c(0, 800), ylab = "air passengers (unit = 1000)")
lines(d_1949_to_1956, lwd = 1)
lines(predictions$pred, lwd = 2, col = "darkred")
lines(predictions$pred+predictions$se*1.96, col="tomato")
lines(predictions$pred-predictions$se*1.96, col="tomato")
lines(window(d, start = c(1957, 01), lwd = 2, end = c(1960, 12)), col = "springgreen")

# the easier way with forecast | dark grey = 80% confidence interval | grey = 90% ci)
plot(forecast(fit_1949_to_1956, h = 48))
lines(window(d, start = c(1957, 01), lwd = 2, end = c(1960, 12)), col = "springgreen")

fit <- stl(d, s.window = "periodic")
plot(fit)

season <- fit$time.series[, 1]
trend <- fit$time.series[, 2]
remainder <- head(fit$time.series[, 3], 96)

# trend forecast
xx <- time(tail(trend, 48))
yy <- tail(trend, 48)
fit_regression <- lm(yy ~ xx)
t_forecast <- 306 + (0:47)/12 * coef(fit_regression)[2] # is b0 (306) correctly choosen? > see week 5 slide 57
plot(t_forecast)

# seasonal forecast
l1y <- window(season, start = c(1956, 1), end = c(1956, 12))
s_forecast <- ts(l1y, start = c(1957, 1), end = c(1960, 12), frequency =  12)
plot(s_forecast)

# remainder fitting
tsdisplay(remainder)

remainder_coefficients <- auto.arima(remainder, ic = "aic")
remainder_coefficients2 <- auto.arima(remainder, ic = "bic")
remainder_coefficients3 <- auto.arima(remainder, ic = "aicc")
remainder_coefficients
## Series: remainder 
## ARIMA(3,0,1)(1,1,0)[12] 
## 
## Coefficients:
##          ar1      ar2      ar3      ma1     sar1
##       1.1110  -0.2515  -0.1999  -0.8640  -0.2636
## s.e.  0.1972   0.1837   0.1406   0.2002   0.1088
## 
## sigma^2 estimated as 55.75:  log likelihood=-286.45
## AIC=584.9   AICc=585.99   BIC=599.49
remainder_coefficients2
## Series: remainder 
## ARIMA(1,0,0)(0,1,0)[12] 
## 
## Coefficients:
##          ar1
##       0.3241
## s.e.  0.1034
## 
## sigma^2 estimated as 66.53:  log likelihood=-295.04
## AIC=594.09   AICc=594.24   BIC=598.95
remainder_coefficients3
## Series: remainder 
## ARIMA(1,0,0)(1,1,0)[12] 
## 
## Coefficients:
##          ar1     sar1
##       0.3633  -0.1955
## s.e.  0.1039   0.1102
## 
## sigma^2 estimated as 64.56:  log likelihood=-293.52
## AIC=593.05   AICc=593.35   BIC=600.34
fit_remainder <- arima(remainder, order = c(3, 0, 1), seasonal = c(1, 1, 0), include.mean = F)
tsdisplay(fit_remainder$residuals)

fit_remainder2 <- arima(remainder, order = c(3, 1, 1), seasonal = c(1, 1, 0), include.mean = F)
tsdisplay(fit_remainder2$residuals) # differencing does not improve the result

r_forecast <- predict(fit_remainder, n.ahead = 48)$pred
plot(r_forecast)

forecast_merged <- t_forecast + s_forecast + r_forecast
plot(forecast_merged)

plot(d_1949_to_1956, lty = 3, xlim = c(1949, 1961), ylim = c(0, 800), ylab = "air passengers (unit = 1000)")
lines(d_1949_to_1956, lwd = 1)
lines(forecast_merged, lwd = 2, col = "darkred")
lines(window(d, start = c(1957, 01), lwd = 2, end = c(1960, 12)), col = "springgreen")

The model delivers quite accurate results. The residuals are allright.
I don’t know if my intercept of 306 was choosen rightly? This value has a huge impact on the model results.

One does not need a prediction interval with this method. Reason: The method uses partly the acutal data to generate the predictions.

The SARIMA model is more accurate and is therefore the better solution for fitting the airplane data. Both models could improve by fitting the seasonal peaks even better.