回帰付きARIMA(RegARIMA,ARIMAXなどという)
d0 <- read.csv('https://stats.dip.jp/01_ds/data/load_amedas_y2017-2022_tokyo.csv')
head(d0)
## date datetime dow name event md gr year month
## 1 2017-01-01 2017-01-01 00:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 2 2017-01-01 2017-01-01 01:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 3 2017-01-01 2017-01-01 02:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 4 2017-01-01 2017-01-01 03:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 5 2017-01-01 2017-01-01 04:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 6 2017-01-01 2017-01-01 05:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## day su mo tu we th fr sa wd ho bw af sp ab mw time hr mi hm rm ws
## 1 1 1 0 0 0 0 0 0 0 1 0 0 1 0 27830 00:00:00 0 0 77.43 0 2.32
## 2 1 1 0 0 0 0 0 0 0 1 0 0 1 0 26340 01:00:00 1 0 71.00 0 2.52
## 3 1 1 0 0 0 0 0 0 0 1 0 0 1 0 25200 02:00:00 2 0 68.86 0 1.52
## 4 1 1 0 0 0 0 0 0 0 1 0 0 1 0 24380 03:00:00 3 0 71.43 0 1.40
## 5 1 1 0 0 0 0 0 0 0 1 0 0 1 0 23890 04:00:00 4 0 74.86 0 1.97
## 6 1 1 0 0 0 0 0 0 0 1 0 0 1 0 23940 05:00:00 5 0 73.71 0 2.25
## tp tp3h tp5h tp3d tp7d
## 1 4.87 5.31 5.53 5.50 6.74
## 2 4.49 5.01 5.20 5.53 6.73
## 3 4.07 4.48 4.90 5.54 6.72
## 4 3.91 4.16 4.60 5.56 6.71
## 5 3.19 3.72 4.11 5.58 6.69
## 6 3.14 3.41 3.76 5.61 6.68
d <- d0['2022-06-01' <= d0$date & d0$date <= '2022-07-03', ]
n <- nrow(d)
d$px <- as.POSIXct(d$datetime)
d$gw <- d$mw / 1000 # 測定値 (単位変換:MW -> GW)
# 気温欠損値処理(1箇所)
i.tp.na <- which(d$tp < -100)
d$tp[i.tp.na] <- mean(d$tp[c(i.tp.na-1, i.tp.na+1)])
# Suffixとしてtr: training term, te: test term, al: all term を使用する。
# 訓練期間フラグ
d.tr <- d['2017-06-01' <= d$date & d$date <= '2022-06-30', ]
n.tr <- nrow(d.tr)
ii.tr <- 1:n.tr
# テスト期間フラグ
d.te <- d['2022-07-01' <= d$date & d$date <= '2022-07-03', ]
n.te <- nrow(d.te)
ii.te <- (n.tr + 1):n
# グラフ表示期間フラグ
is.plot <- '2022-06-28' <= d$date & d$date <= '2022-07-03'
d.plot <- d[is.plot, ] # グラフ表用データ
#cairo_pdf('electricity_regarima.pdf')
matplot(d.tr$px, d.tr$gw, type = 'l', col = 2, ylim = c(0, 60),
main = '電力需要', xlab = '年', ylab = '電力需要[GW], 気温[℃]')
matlines(d.tr$px, d.tr$tp, type = 'l', col = 4)
legend('topleft', lty = 1, col = c(2, 4),
legend = c('電力需要[GW]', '気温[℃]'))
#dev.off()
#View(d.tr[, c('datetime', 'tp')])
fq <- 24
グラフから周期(\(fq = 24\))の周期性があることが分かる。
fit <- lm(gw ~ tp + I(tp^2) + sa + su - 1, data = d.tr)
d.tr$gw.tp.adj <- d.tr$gw - fit$fitted
matplot(d.tr$gw.tp.adj, type = 'l', pch = 1, col = 2, ylab = 'y')
周期性がある場合は周期内で有意な最大ラグ次数を選ぶ。
a <- acf(d.tr$gw.tp.adj, lag.max = 50) # MA(q)
grid()
abline(v = fq, col = 3, lty = 3)
q <- 0
pa <- pacf(d.tr$gw.tp.adj, lag.max = 50) # AR(p)【注意】acfと違いグラフが1次ラグから始まる。
grid()
abline(v = fq, col = 3, lty = 3)
p <- 2
【注意】次数が大きいと計算時間が掛かる。また,訓練データは大きなサイズが必要となる。
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
fit.arima <- Arima(ts(d.tr$gw, frequency = fq),
xreg = model.matrix(fit, data = d.tr),
order = c(p, 0, q),
seasonal = c(1, 0, 0)) # SARIMA
fit.arima
## Series: ts(d.tr$gw, frequency = fq)
## Regression with ARIMA(2,0,0)(1,0,0)[24] errors
##
## Coefficients:
## ar1 ar2 sar1 intercept tp I(tp^2) sa su
## 1.5421 -0.5814 0.8821 38.1882 -0.5204 0.0122 -0.1601 -0.1879
## s.e. 0.0334 0.0335 0.0186 3.7493 0.1620 0.0033 0.1266 0.1271
##
## sigma^2 = 0.2449: log likelihood = -531.2
## AIC=1080.39 AICc=1080.65 BIC=1121.61
checkresiduals(fit.arima) # 残差分析グラフ
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,0)(1,0,0)[24] errors
## Q* = 141.08, df = 45, p-value = 7.651e-12
##
## Model df: 3. Total lags used: 48
pred <- forecast(fit.arima, xreg = model.matrix(fit, data = d.te), level = 95)
#cairo_pdf('regarima_forecast.pdf')
matplot(d.plot$px, d.plot$gw, type = 'o', pch = 16, col = 2, main = pred$method)
grid()
abline(v = n.tr + 1, lty = 3, col = 3)
matlines(d.tr$px, fit.arima$fitted, type = 'o', pch = 16, lty = 2, col = 4)
matlines(d.te$px, pred$mean, type = 'o', pch = 1, lty = 2, col = 4)
matlines(d.te$px, pred$lower, lty = 3, col = 'gray')
matlines(d.te$px, pred$upper, lty = 3, col = 'gray')
legend('topleft', bg = 'white', lty = c(1, 2, 2, 3),
col = c(2, 4, 4, 'gray'), pch = c(16, 16, 1, NA),
legend = c('原系列', '1期先予測値', '予測値', '95%予測区間'))
#dev.off()
get.accuracy <- function(yhat, y)
{
data.frame(MBE = mean(yhat - y),
MAE = mean(abs(yhat - y)),
MAPE = mean(abs((yhat - y) / y)) * 100,
RMSE = sqrt(mean((yhat - y)^2))
)
}
get.accuracy(yhat = pred$mean, y = d.te$gw)
## MBE MAE MAPE RMSE
## 1 1.915914 2.731864 7.272117 3.519398
RegARIMAの外生変数を使った回帰モデルを改良し精度を上げよ。