1126-2.knit

d0 <- read.csv("https://stats.dip.jp/01_ds/data/load/load_amedas_y2022_tokyo.csv")

is.term <- "2022-05-01" <= d0$date & d0$date <= "2022-06-30"
d1 <- d0[is.term, ]

n <- nrow(d1)                 
t <- 1:n                      
px <- as.POSIXct(d1$datetime) 
y <- d1$mw / 1000            

d1 <- data.frame(px, t, y) 
head(d1)

##                    px t     y
## 1 2022-05-01 00:00:00 1 21.62
## 2 2022-05-01 01:00:00 2 20.65
## 3 2022-05-01 02:00:00 3 20.61
## 4 2022-05-01 03:00:00 4 20.81
## 5 2022-05-01 04:00:00 5 20.86
## 6 2022-05-01 05:00:00 6 20.72

if (!require(forecast)) install.packages("forecast")

##  要求されたパッケージ forecast をロード中です

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

fit1 <- loess(y ~ t, # モデル式（データフレームd1のカラム名で表記する）
              data = d1, 
              span = 24 * 30 / n)

# フィット値（fitted values）がトレンドとなる。
# データフレームd1にtrendを追加
d1$trend <- fit1$fitted

# 折れ線プロット（原系列）
matplot(x = d1$px, y = d1$y, type = "l", pch = 1, col = 2,
        main = "電力需要",
        xlab = "月 日",
        ylab = "GW")

abline(h = 0, col = "gray") # x軸線

# 折れ線プロット（トレンド成分）
matlines(x = d1$px, y = d1$trend, pch = 2, col = 4, lwd = 2)

# 凡例（はんれい）
legend("topleft", lty = 1, col = c(2, 4), 
       legend = c("原系列", "トレンド"))

d1$detrended <- d1$y - d1$trend

# 折れ線プロット（残余成分）
matplot(d1$px, d1$detrended, type = "l", pch = 1, col = 2,
        main = "残余成分（原系列－トレンド）",
        xlab = "月 日",
        ylab = "GW")

abline(h = 0, col = "gray")

fit2 <- loess(detrended ~ t, 
              data = d1, # モデル式（データフレームd1のカラム名で表記する） 
              span = 24 * 4 / n)

# フィット値（fitted values）が周期成分となる。
# データフレームd1にseasonalを追加
d1$seasonal <- fit2$fitted

# 折れ線プロット（トレンド除去成分・周期成分）
matplot(x = d1$px, y = d1$detrended, type = "l", pch = 1, col = 2, ylim = c(-15, 15),
        main = "残余成分（トレンド除去後の成分）・周期成分",
        xlab = "月 日",
        ylab = "GW")

# 折れ線プロット（周期成分）
matlines(x = d1$px, y = d1$seasonal, pch = 2, col = 4, lwd = 2)

abline(h = 0, col = "gray") # x軸線

d1$remainder <- d1$detrended - d1$seasonal

# 折れ線プロット（残余成分）
matplot(d1$px, d1$remainder, type = "l", pch = 1, col = 2, ylim = c(-15, 15),
        main = "残余成分（トレンド，周期成分除去後の成分）",
        xlab = "月 日",
        ylab = "GW")

d1$remainder <- d1$detrended - d1$seasonal

# 折れ線プロット（残余成分）
matplot(d1$px, d1$remainder, type = "l", pch = 1, col = 2, ylim = c(-15, 15),
        main = "残余成分（トレンド，周期成分除去後の成分）",
        xlab = "月 日",
        ylab = "GW")

abline(h = 0, col = "gray")

s <- mstl(x = msts(y, seasonal.period = c(24, 24*7)),
          s.window = "periodic")

autoplot(s, main = "周期成分：時間変動なし")

s <- mstl(x = msts(y, seasonal.period = c(24, 24*7)))

autoplot(s, main = "周期成分：時間変動あり")

PERIOD <- c(24, 24*7) # 周期変動成分の周期（単位：hour）
n.p <- length(PERIOD) # 時間数

#
# データ取得
#
# 【注意】訓練データとテストデータの期間は連続していること。
d0 <- read.csv("https://stats.dip.jp/01_ds/data/load/load_amedas_y2022_tokyo.csv")


# 訓練データ
d.tr <- d0["2022-05-01" <= d0$date & d0$date <= "2022-06-30", ]

# テストデータ（予測区間の正解データとして精度評価に利用）
d.te <- d0["2022-07-01" <= d0$date & d0$date <= "2022-07-03", ]

# データサイズ
n.tr <- nrow(d.tr) # 訓練期間データサイズ
n.te <- nrow(d.te) # 試験期間データサイズ
n.al <- n.tr + n.te # 全（訓練＋試験）期間データサイズ

# 範囲インデックス
ii.tr <- 1:n.tr
ii.te <- (n.tr + 1):(n.tr + n.te)
ii.al <- 1:(n.tr + n.te)

# 測定値 (単位変換：MW -> GW)
y.tr <- d.tr$mw / 1000
y.te <- d.te$mw / 1000
y.al <- c(y.tr, y.te)

# タイムスタンプ
px.tr <- as.POSIXct(d.tr$datetime)
px.te <- as.POSIXct(d.te$datetime)
px.al <- c(px.tr, px.te)

fit.stl <- mstl(x = msts(y.tr, seasonal.period = PERIOD))

# 周期成分に時間変化を許さないときは次を使う。上との違いを確認すること。
#fit.stl <- mstl(x = msts(y.tr, seasonal.period = PERIOD), s.window = "periodic")

autoplot(fit.stl)

#
# トレンド成分，周期変動成分の格納
#
stl.al <- matrix(NA, nrow = n.al, ncol = n.p + 3,
                 dimnames = list(ii.al, colnames(fit.stl)))

# トレンド成分（適合値，予測値）の格納
fit.trend <- loess(y ~ x, span = 0.1,
                   data = data.frame(x = ii.tr, y = fit.stl[, "Trend"]),
                   control = loess.control(surface = "direct"))
# 予測（LOESS）
pred.trend <- predict(fit.trend, newdata = data.frame(x = ii.te))

stl.al[, "Trend"] <- c(fit.stl[, "Trend"], pred.trend)

# 周期変動成分（適合値，予測値）の格納
for (j in seq_along(PERIOD))
{
  stl.al[ii.tr, 2 + j] <- fit.stl[, 2 + j]

  for (i in 1:n.te)
  {
    # 予測値は前周期の繰り返し
    stl.al[n.tr + i, 2 + j] <- stl.al[n.tr + i - PERIOD[j], 2 + j]
  }
}

# 残余成分（予測期間は値なし）
stl.al[, "Remainder"] <- c(fit.stl[, "Remainder"], rep(NA, n.te))

# 残余成分以外（トレンド成分＋周期成分）で原系列の適合値，予測値を計算する。
yhat.al <- stl.al[, "Data"] <- rowSums(stl.al[, 2:(n.p+2)])
yhat.te <- yhat.al[ii.te]

head(stl.al)

##       Data    Trend Seasonal24 Seasonal168 Remainder
## 1 20.20017 24.11917  -3.190286  -0.7287139  1.419826
## 2 19.36617 24.12002  -4.019563  -0.7342915  1.283830
## 3 19.39058 24.12088  -3.960578  -0.7697210  1.219423
## 4 19.69813 24.12173  -3.580835  -0.8427612  1.111870
## 5 19.74336 24.12258  -3.420685  -0.9585347  1.116641
## 6 19.19200 24.12343  -3.639697  -1.2917366  1.528005

# 誤差分散の推定
#【注意】
# データへの適合誤差（fitting error）≒ 予測誤差という考え方で推定。
# 正確には１期先予測誤差系列から推定する必要がある。
v <- var(fit.stl[, "Remainder"]) # 不偏分散（誤差分散の推定値）
vs <- cumsum(rep(v, n.te))       # 累積和（時間経過ともに誤差拡大）
se <- sqrt(vs)                   # 不偏分散の平方根（標準誤差）
yhat.te.low <- yhat.te - 1.96 * se
yhat.te.upp <- yhat.te + 1.96 * se

#
# グラフ
#

# 画面表示範囲
range.px <- c(px.tr[n.tr - 24*7*3], px.te[n.te])

# トップパネル
par(mfrow = c(n.p + 2, 1), mar = c(2, 3, 1, 0))

matplot (x = px.al[ii.al],
         y = rep(mean(y.tr), n.al),
         type = "n", xlim = range.px, ylim = c(20, 80))

abline(v = px.te[1], col = 3, lty = 2) # 予測開始線

grid()

matlines(x = px.al[ii.tr], y = y.tr,              type = "o", pch = 16, col = 2, lty = 1) # 測定値
matlines(x = px.al[ii.al], y = stl.al[, "Trend"], type = "o", pch = NA, col = 4, lty = 1) # トレンド成分値
matlines(x = px.al[ii.te], y = y.te,              type = "o", pch = 16, col = 2, lty = 1) # 予測正解値
matlines(x = px.al[ii.te], y = yhat.te,           type = "o", pch =  1, col = 4, lty = 2) # 予測値
matlines(x = px.al[ii.te], y = yhat.te.low, col = "lightgray") # 95%予測区間下限
matlines(x = px.al[ii.te], y = yhat.te.upp, col = "lightgray") # 95%予測区間上限

# 凡例
legend("top", horiz = T, bty = "n", 
       lty = c(1, 2, 1), pch = c(16, 1, NA, NA), col = c(2, 4, "lightgray", 4),
       legend = c("原系列", "予測値", "95%予測区間", "トレンド成分"))
legend("topright", bty = "n", 
       legend = sprintf("MAPE: %2.1f%%", mean(abs((yhat.te - y.te)/y.te))*100))

# 周期成分パネル
for (j in 3:(n.p + 2))
{
  matplot(x = px.al, y = stl.al[, j],
          type = "l", col = 4, xlim = range.px, ylim = c(-30, 30),
          main = paste0(PERIOD[j-2], "周期成分"))
  abline(v = px.te[1], col = 3, lty = 2) # 予測開始線
  grid()
}

# 残余成分パネル
matplot (x = px.al, y = stl.al[, "Remainder"], 
         type = "l", col = 4, xlim = range.px, ylim = c(-30, 30),
         main = "残余成分")
abline(v = px.te[1], col = 3, lty = 2) # 予測開始線
grid()

# Prepare the data
px <- seq(as.POSIXct('1949-01-01'), as.POSIXct('1960-12-01'), by = 'month')
d0 <- data.frame(px = px, y = AirPassengers)

# Convert to time series object
ts_data <- ts(d0$y, start = c(1949, 1), frequency = 12)

# Perform STL decomposition
library(forecast)
fit_stl <- stl(ts_data, s.window = "periodic")

# Plot STL decomposition using autoplot
autoplot(fit_stl, main = "STL Decomposition of Monthly Airline Passenger Data")

# Extract components (Seasonal, Trend, Remainder)
d0$trend <- fit_stl$time.series[, "trend"]
d0$seasonal <- fit_stl$time.series[, "seasonal"]
d0$remainder <- fit_stl$time.series[, "remainder"]

# Plot the original series, trend, and seasonal components
par(mfrow = c(3, 1))  # Set layout for 3 plots
plot(d0$px, d0$y, type = "l", main = "Original Data", xlab = "Time", ylab = "Passengers", col = "blue")
lines(d0$px, d0$trend, col = "red", lwd = 2)
plot(d0$px, d0$seasonal, type = "l", main = "Seasonal Component", xlab = "Time", ylab = "Passengers", col = "green")
plot(d0$px, d0$remainder, type = "l", main = "Remainder Component", xlab = "Time", ylab = "Passengers", col = "purple")

# Reset plot layout
par(mfrow = c(1, 1))