時系列分析(ARIMA)
y <- c(AirPassengers) # 月間航空旅客数
n <- length(y) # 月数
n.tr <- 120 # 訓練月数
n.te <- n - n.tr # テスト月数
t <- 1:n # 月
ii.tr <- 1:120 # 訓練期間インデックス
ii.te <- 121:n # テスト期間インデックス
matplot(y, type = 'o', pch = 1, col = 2)

fq <- 12
library(MASS)
bc <- boxcox(y ~ t, data = data.frame(y = y[ii.tr], t = t[ii.tr]))
lambda <- round(bc$x[which.max(bc$y)], 1)
lambda
## [1] 0
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo

y.bc <- BoxCox(y[ii.tr], lambda)
matplot(y.bc, type = 'o', pch = 1, col = 2, ylab = 'y')

d <- 1
dy <- diff(y.bc, diff = d)
matplot(dy, type = 'o', pch = 1, col = 2, ylab = 'y')

ys <- ts(10 * sin(2 * pi * t / 10) + rnorm(n))
dir.create('acf_lag', showWarnings = F)
for (k in 1:25)
{
png(paste0('acf_lag/acf_lag', k, '.png'))
ys.lag <- lag(ys, k = -k)
ts2 <- cbind(ys, ys.lag)
par(mfrow = c(3, 1), mar = c(2, 4, 1, 1))
matplot(ts2, type = 'o', pch = 1, col = c(1, 4),
xlab = '', ylab = '', xlim = c(0, 120))
abline(v = c(0, k), col = 2)
arrows( 0, 0, k, 0, length = 0.1, col = 2)
mtext(paste(k), side = 1, at = k, line = 1, col = 2)
legend('topright', col = c(1, 4), bg = 'white', lty = 1,
legend = c('原系列(周期10の正弦波+ノイズ)', paste0(k, '次ラグ系列')))
grid()
acf(ys, lag.max = k, xlim = c(0, 120), ylim = c(-1, 1), main = '')
pacf(ys, lag.max = k, xlim = c(0, 120), ylim = c(-1, 1), main = '')
dev.off()
}
a <- acf(dy, lag.max = 50) # MA(q)
grid()
abline(v = fq, col = 3, lty = 3)

q <- 12
pa <- pacf(dy, lag.max = 50) # AR(p)【注意】acfと違いグラフが1次ラグから始まる。
grid()
abline(v = fq, col = 3, lty = 3)

p <- 12
fit.arima <- Arima(ts(y[ii.tr], frequency = fq), lambda = 'auto',
order = c(p, d, q),
seasonal = c(1, 1, 0)) # SARIMA
# c.f.
#fit.arima <- auto.arima(ts(y[ii.tr], frequency = fq), lambda = 'auto')
fit.arima
## Series: ts(y[ii.tr], frequency = fq)
## ARIMA(12,1,12)(1,1,0)[12]
## Box Cox transformation: lambda= -0.3096643
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## -0.2542 0.0940 -0.0545 0.0567 -0.0594 -0.0297 0.1077 -0.0028
## s.e. 0.1417 0.1441 0.1368 0.1352 0.1335 0.1306 0.1425 0.1334
## ar9 ar10 ar11 ar12 ma1 ma2 ma3 ma4
## 0.1405 0.0729 -0.2619 -0.2848 -0.0736 -0.1363 -0.0955 -0.2205
## s.e. 0.1335 0.1311 0.1333 0.1807 0.1472 0.1384 0.1264 0.1472
## ma5 ma6 ma7 ma8 ma9 ma10 ma11 ma12
## 0.1913 0.2118 -0.1829 -0.0367 -0.1446 0.0096 0.3593 -0.6336
## s.e. 0.1458 0.1479 0.1603 0.1385 0.1426 0.1516 0.1519 0.1503
## sar1
## 0.1202
## s.e. 0.2297
##
## sigma^2 = 5.446e-05: log likelihood = 381.33
## AIC=-710.67 AICc=-693.12 BIC=-641.17
checkresiduals(fit.arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(12,1,12)(1,1,0)[12]
## Q* = 6.3161, df = 3, p-value = 0.09721
##
## Model df: 25. Total lags used: 28
pred <- forecast(fit.arima, h = n.te, level = 95)
matplot(t, y, type = 'o', pch = 16, col = 2, main = pred$method)
grid()
abline(v = n.tr + 1, lty = 3, col = 3)
matlines(t[ii.tr], fit.arima$fitted, type = 'o', pch = 16, lty = 2, col = 4)
matlines(t[ii.te], pred$mean, type = 'o', pch = 1, lty = 2, col = 4)
matlines(t[ii.te], pred$lower, lty = 3, col = 'gray')
matlines(t[ii.te], pred$upper, lty = 3, col = 'gray')
legend('topleft', bg = 'white', lty = c(1, 2, 2, 3),
col = c(2, 4, 4, 'gray'), pch = c(16, 16, 1, NA),
legend = c('原系列', '1期先予測値', '予測値', '95%予測区間'))

get.accuracy <- function(yhat, y)
{
data.frame(MBE = mean(yhat - y),
MAE = mean(abs(yhat - y)),
MAPE = mean(abs((yhat - y) / y)) * 100,
RMSE = sqrt(mean((yhat - y)^2))
)
}
get.accuracy(yhat = pred$mean, y = y[ii.te])
## MBE MAE MAPE RMSE
## 1 -14.07091 14.92617 3.421171 17.18222
n <- 24*7*2 # 時間数
t <- 1:n # 時間
n.tr <- floor(n * 0.8) # 訓練期間数
n.te <- n - n.tr # テスト期間数
ii.tr <- 1:n.tr # 訓練期間インデックス
ii.te <- (n.tr+1):n # テスト期間インデックス
set.seed(5)
y <- 100 + 0.1 * t + 0.02 * t^2 + 100 * sin(2 * pi * t / 24) + rnorm(n, sd = 5)
matplot(y, type = 'o', pch = 1, col = 2)

d <- 2
dy <- diff(y[ii.tr], diff = d)
matplot(dy, type = 'o', pch = 1, col = 2, ylab = 'y')

a <- acf(dy, lag.max = 50) # MA(q)
grid()
abline(v = fq, col = 3, lty = 3)

q <- 2
pa <- pacf(dy, lag.max = 50) # AR(p)【注意】acfと違いグラフが1次ラグから始まる。
grid()
abline(v = fq, col = 3, lty = 3)

p <- 4
fit.arima <- Arima(ts(y[ii.tr], frequency = fq), lambda = 'auto',
order = c(p, d, q),
seasonal = c(1, 0, 0))
## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.
fit.arima
## Series: ts(y[ii.tr], frequency = fq)
## ARIMA(4,2,2)(1,0,0)[12]
## Box Cox transformation: lambda= 0.9190757
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): 計算結果が NaN になりました
## ar1 ar2 ar3 ar4 ma1 ma2 sar1
## 0.9297 0.4478 -0.0589 -0.4882 -1.9251 0.9989 0.1034
## s.e. 0.0543 0.0797 0.0799 0.0545 NaN NaN 0.0639
##
## sigma^2 = 24.85: log likelihood = -807.37
## AIC=1630.75 AICc=1631.31 BIC=1659.41
checkresiduals(fit.arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,2,2)(1,0,0)[12]
## Q* = 102.25, df = 17, p-value = 3.397e-14
##
## Model df: 7. Total lags used: 24
pred <- forecast(fit.arima, h = n.te, level = 95)
matplot(t, y, type = 'o', pch = 16, col = 2, main = pred$method)
grid()
abline(v = n.tr + 1, lty = 3, col = 3)
matlines(t[ii.tr], fit.arima$fitted, type = 'o', pch = 16, lty = 2, col = 4)
matlines(t[ii.te], pred$mean, type = 'o', pch = 1, lty = 2, col = 4)
matlines(t[ii.te], pred$lower, lty = 3, col = 'gray')
matlines(t[ii.te], pred$upper, lty = 3, col = 'gray')
legend('topleft', bg = 'white', lty = c(1, 2, 2, 3),
col = c(2, 4, 4, 'gray'), pch = c(16, 16, 1, NA),
legend = c('原系列', '1期先予測値', '予測値', '95%予測区間'))
