library(tseries)
library(tidyverse)
library(magrittr)
library(plotly)
library(timetk)
library(lubridate)
library(pracma)
library(fma)
library(forecast)
library(fpp)
library(car)
library(astsa)
library(dygraphs)
d <- prodn
plot(d)
There is a clear trend recognisable.
There are several available options:
First guess: SARIMA could fit.
d_decomposition <- stl(d, s.window = 12)
plot(d_decomposition)
d_dygraph <- cbind(origin = d,
seasonality = d_decomposition$time.series[,"seasonal"],
trend = d_decomposition$time.series[,"trend"],
remainder = d_decomposition$time.series[,"remainder"])
dygraph(d_dygraph)
# dygraph(d_dygraph[,"trend"])
tsdisplay(d)
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(1, 1, 1)))
##
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1 sar1 sma1
## 0.5767 -0.2783 -0.0352 -0.6700
## s.e. 0.1178 0.1376 0.0664 0.0424
##
## sigma^2 estimated as 1.426: log likelihood = -577.01, aic = 1164.02
(d_sarima <- arima(d, order = c(1,1,0), seasonal = c(1, 1, 1)))
##
## Call:
## arima(x = d, order = c(1, 1, 0), seasonal = c(1, 1, 1))
##
## Coefficients:
## ar1 sar1 sma1
## 0.3299 -0.0471 -0.6722
## s.e. 0.0500 0.0656 0.0420
##
## sigma^2 estimated as 1.438: log likelihood = -578.7, aic = 1165.41
(d_sarima <- arima(d, order = c(1,0,1), seasonal = c(1, 1, 1)))
##
## Call:
## arima(x = d, order = c(1, 0, 1), seasonal = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1 sar1 sma1
## 0.9936 0.2623 -0.0503 -0.6809
## s.e. 0.0056 0.0436 0.0650 0.0414
##
## sigma^2 estimated as 1.47: log likelihood = -585.68, aic = 1181.36
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 1, 1))) # improved aic
##
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 1, 1))
##
## Coefficients:
## ar1 ma1 sma1
## 0.5827 -0.2857 -0.6831
## s.e. 0.1160 0.1357 0.0331
##
## sigma^2 estimated as 1.428: log likelihood = -577.15, aic = 1162.3
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 0, 1)))
##
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 0, 1))
##
## Coefficients:
## ar1 ma1 sma1
## 0.0085 0.0578 0.639
## s.e. 0.4972 0.4922 0.036
##
## sigma^2 estimated as 4.024: log likelihood = -787.85, aic = 1583.69
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 2, 0)))
##
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 2, 0))
##
## Coefficients:
## ar1 ma1
## 0.7053 -0.4038
## s.e. 0.0757 0.0935
##
## sigma^2 estimated as 6.203: log likelihood = -812.99, aic = 1631.97
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 1, 3))) # improved aic
##
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 1, 3))
##
## Coefficients:
## ar1 ma1 sma1 sma2 sma3
## 0.5706 -0.2608 -0.7432 -0.1397 0.2782
## s.e. 0.1119 0.1295 0.0535 0.0647 0.0526
##
## sigma^2 estimated as 1.314: log likelihood = -564.24, aic = 1140.48
(d_sarima <- arima(d, order = c(1,1,1), seasonal = c(0, 1, 4)))
##
## Call:
## arima(x = d, order = c(1, 1, 1), seasonal = c(0, 1, 4))
##
## Coefficients:
## ar1 ma1 sma1 sma2 sma3 sma4
## 0.5707 -0.2617 -0.7401 -0.1411 0.2647 0.0167
## s.e. 0.1123 0.1301 0.0544 0.0661 0.0675 0.0520
##
## sigma^2 estimated as 1.314: log likelihood = -564.19, aic = 1142.37
The best result performs the following SARIMA model:
arima(d, order = c(1,1,1), seasonal = c(0, 1, 3)))
Preparation
lsunspot100 <- window(log(sunspotarea), start = 1875, end = 1974)
fit_ar10 <- arima(lsunspot100, order = c(10, 0, 0))
lsunspot100 <- window(log(sunspotarea), start = 1875, end = 1974)
fit_ar10 <- arima(lsunspot100, order = c(10, 0, 0))
predictions <- predict(fit_ar10, n.ahead = 100)
plot(lsunspot100, xlim = c(1875, 2074), ylab = "yearly avg sunspot area")
abline(h = fit_ar10$coef["intercept"], lty = 2, col = "blue")
lines(predictions$pred, col = "green")
The predictions show a damped sinusoid towards the global mean. Every cycle reduces the scattering of the predicted points.
lsunspot100_2 <- window(log(sunspotarea), start = 1875, end = 2011)
fit_ar10_2 <- arima(ts(lsunspot100_2, start = 1875, end = 1974), order = c(10, 0, 0))
predictions_2 <- predict(fit_ar10_2, n.ahead = 37)
plot(lsunspot100_2, xlim = c(1875, 2074), ylab = "yearly avg sunspot area")
abline(h = fit_ar10_2$coef["intercept"], lty = 2, col = "blue")
lines(predictions_2$pred, col = "green")
cat("Mean squared forecasting error: ", (msfe <- (sum((as.numeric(tail(lsunspot100_2, 37)) - as.numeric(predictions_2$pred))^2))/length(lsunspot100_2)))
## Mean squared forecasting error: 0.2041178
plot((as.numeric(tail(lsunspot100_2, 37)) - as.numeric(predictions_2$pred))^2, type = "l", xlab = "time", ylab = "msfe")
The predictions getting more inaccurate over time The mean squared forecasting error (msfe) is about 0.2. The msfe is very high at the end because there is a outlier in the time series. One could expect that the model will not fit very well for future predicted points.
data preparation
d <- AirPassengers
d_1949_to_1956 <- window(d, end = c(1956, 12))
plot(d_1949_to_1956) # seasonality seems to be 12 months
hist(d_1949_to_1956)
plot(stl(d_1949_to_1956, s.window = 12))
tsdisplay(d_1949_to_1956)
tsdisplay(diff(d_1949_to_1956))
pacf(diff(d_1949_to_1956))
d_1949_to_1956_arima_coefficients <- auto.arima(d_1949_to_1956, ic = "aic")
d_1949_to_1956_arima_coefficients_2 <- auto.arima(d_1949_to_1956, ic = "bic")
d_1949_to_1956_arima_coefficients_3 <- auto.arima(d_1949_to_1956, ic = "aicc")
fit_1949_to_1956 <- arima(d_1949_to_1956, order = c(1, 1, 0), seasonal = c(1, 1, 0))
tsdisplay(fit_1949_to_1956$residuals)
predictions <- predict(fit_1949_to_1956, n.ahead = 48) # residuals looking good
plot(d_1949_to_1956, lty = 3, xlim = c(1949, 1961), ylim = c(0, 800), ylab = "air passengers (unit = 1000)")
lines(d_1949_to_1956, lwd = 1)
lines(predictions$pred, lwd = 2, col = "darkred")
lines(predictions$pred+predictions$se*1.96, col="tomato")
lines(predictions$pred-predictions$se*1.96, col="tomato")
lines(window(d, start = c(1957, 01), lwd = 2, end = c(1960, 12)), col = "springgreen")
# the easier way with forecast | dark grey = 80% confidence interval | grey = 90% ci)
plot(forecast(fit_1949_to_1956, h = 48))
lines(window(d, start = c(1957, 01), lwd = 2, end = c(1960, 12)), col = "springgreen")
fit <- stl(d, s.window = "periodic")
plot(fit)
season <- fit$time.series[, 1]
trend <- fit$time.series[, 2]
remainder <- head(fit$time.series[, 3], 96)
# trend forecast
xx <- time(tail(trend, 48))
yy <- tail(trend, 48)
fit_regression <- lm(yy ~ xx)
t_forecast <- 306 + (0:47)/12 * coef(fit_regression)[2] # is b0 (306) correctly choosen? > see week 5 slide 57
plot(t_forecast)
# seasonal forecast
l1y <- window(season, start = c(1956, 1), end = c(1956, 12))
s_forecast <- ts(l1y, start = c(1957, 1), end = c(1960, 12), frequency = 12)
plot(s_forecast)
# remainder fitting
tsdisplay(remainder)
remainder_coefficients <- auto.arima(remainder, ic = "aic")
remainder_coefficients2 <- auto.arima(remainder, ic = "bic")
remainder_coefficients3 <- auto.arima(remainder, ic = "aicc")
remainder_coefficients
## Series: remainder
## ARIMA(3,0,1)(1,1,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 ma1 sar1
## 1.1110 -0.2515 -0.1999 -0.8640 -0.2636
## s.e. 0.1972 0.1837 0.1406 0.2002 0.1088
##
## sigma^2 estimated as 55.75: log likelihood=-286.45
## AIC=584.9 AICc=585.99 BIC=599.49
remainder_coefficients2
## Series: remainder
## ARIMA(1,0,0)(0,1,0)[12]
##
## Coefficients:
## ar1
## 0.3241
## s.e. 0.1034
##
## sigma^2 estimated as 66.53: log likelihood=-295.04
## AIC=594.09 AICc=594.24 BIC=598.95
remainder_coefficients3
## Series: remainder
## ARIMA(1,0,0)(1,1,0)[12]
##
## Coefficients:
## ar1 sar1
## 0.3633 -0.1955
## s.e. 0.1039 0.1102
##
## sigma^2 estimated as 64.56: log likelihood=-293.52
## AIC=593.05 AICc=593.35 BIC=600.34
fit_remainder <- arima(remainder, order = c(3, 0, 1), seasonal = c(1, 1, 0), include.mean = F)
tsdisplay(fit_remainder$residuals)
fit_remainder2 <- arima(remainder, order = c(3, 1, 1), seasonal = c(1, 1, 0), include.mean = F)
tsdisplay(fit_remainder2$residuals) # differencing does not improve the result
r_forecast <- predict(fit_remainder, n.ahead = 48)$pred
plot(r_forecast)
forecast_merged <- t_forecast + s_forecast + r_forecast
plot(forecast_merged)
plot(d_1949_to_1956, lty = 3, xlim = c(1949, 1961), ylim = c(0, 800), ylab = "air passengers (unit = 1000)")
lines(d_1949_to_1956, lwd = 1)
lines(forecast_merged, lwd = 2, col = "darkred")
lines(window(d, start = c(1957, 01), lwd = 2, end = c(1960, 12)), col = "springgreen")
The model delivers quite accurate results. The residuals are allright.
I don’t know if my intercept of 306 was choosen rightly? This value has a huge impact on the model results.
One does not need a prediction interval with this method. Reason: The method uses partly the acutal data to generate the predictions.
The SARIMA model is more accurate and is therefore the better solution for fitting the airplane data. Both models could improve by fitting the seasonal peaks even better.