機械学習

n <- 100                         # 全時間数
x <- 1:n                         # 時刻
t <- 2 * x                       # トレンド（係数：2）
e <- rnorm(n, mean = 0, sd = 10) # 不規則変動
y <- t + e # 合成変動 

matplot(x, y, type = 'o', pch = 16, col = 2)

n.tr <- n * 0.8   # 訓練時間（80%）
n.te <- n - n.tr  # テスト時間（20%）

ii.tr <- 1:n.tr 
ii.te <- (n.tr + 1):n

fit <- lm(y ~ x, data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')

myplot <- function(x, y, pred)
{
  matplot(x, y, type = 'o', pch = 16, col = 2, cex = 0.5)
  abline(v = n.tr+1, col = 3, lty = 2)
  matlines(x[ii.te], pred[, 'fit'], type = 'o', pch = 1, lty = 2, col = 4)
  matlines(x[ii.te], pred[, 'lwr'], lty = 3, col = 'lightgray')
  matlines(x[ii.te], pred[, 'upr'], lty = 3, col = 'lightgray')
  legend('topleft', bg = 0,
        lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
        legend = c('原系列', '予測値', '95%予測区間'))
}

myplot(x, y, pred)

t <- 0.05 * x^2 - 2 * x          # 2次のトレンド
y <- t + e                       # 合成変動 

fit <- lm(y ~ poly(x, 2, raw = T), data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')

myplot(x, y, pred)

T <- 10                          # 周期
c <- 30 * sin(2 * pi * x / T)    # 周期変動（振幅：30，周期：10）
t <- 2 * x                       # トレンド（係数：2）
y <- t + c + e                   # 合成変動 

fit <- lm(y ~  poly(x, 2, raw = T), data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')

myplot(x, y, pred)

dir.create('./ts_reg_periodic', showWarnings = F)

yhat <- rep(NA, n)

for (j in 1:T)
{
  # j <- 1
  ii <- seq(j, n.tr, T)
  ii.pred <- ii[length(ii)] + seq(T, T*2, T)

  fit <- lm(y ~  x, data = data.frame(x = x[ii], y = y[ii]))
  pred <- predict(fit, newdata = data.frame(x = x[ii.pred]), interval = 'prediction')
  yhat[ii.pred] <- pred[, 'fit']

  png(paste0('./ts_reg_periodic/ts_reg_periodic', j, '.png'))
  matplot(x, y, type = 'o', pch = 16, col = 2, cex = 0.5, ylim = c(0, 250))
  abline(v = n.tr+1, col = 3, lty = 2)
  matlines(x[ii], y[ii], type = 'o', pch = 1, col = 1, cex = 1.5)
  matlines(x, yhat, type = 'o', pch = 1, col = 4)
  matpoints(x[ii.pred], yhat[ii.pred], pch = 1, col = 1)
  dev.off()
  #Sys.sleep(2)
}

#system('bash -c 'convert -delay 100 -loop 0 ./ts_reg_periodic/ts_reg_periodic{1..10}.png ./ts_reg_periodic/ts_reg_periodic.gif'')

d0 <- read.csv('https://stats.dip.jp/01_ds/data/load/load_amedas_y2017-2022_tokyo.csv')

head(d0)

##         date            datetime dow       name    event    md gr year month
## 1 2017-01-01 2017-01-01 00:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 2 2017-01-01 2017-01-01 01:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 3 2017-01-01 2017-01-01 02:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 4 2017-01-01 2017-01-01 03:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 5 2017-01-01 2017-01-01 04:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 6 2017-01-01 2017-01-01 05:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
##   day su mo tu we th fr sa wd ho bw af sp ab    mw     time hr mi    hm rm   ws
## 1   1  1  0  0  0  0  0  0  0  1  0  0  1  0 27830 00:00:00  0  0 77.43  0 2.32
## 2   1  1  0  0  0  0  0  0  0  1  0  0  1  0 26340 01:00:00  1  0 71.00  0 2.52
## 3   1  1  0  0  0  0  0  0  0  1  0  0  1  0 25200 02:00:00  2  0 68.86  0 1.52
## 4   1  1  0  0  0  0  0  0  0  1  0  0  1  0 24380 03:00:00  3  0 71.43  0 1.40
## 5   1  1  0  0  0  0  0  0  0  1  0  0  1  0 23890 04:00:00  4  0 74.86  0 1.97
## 6   1  1  0  0  0  0  0  0  0  1  0  0  1  0 23940 05:00:00  5  0 73.71  0 2.25
##     tp tp3h tp5h tp3d tp7d
## 1 4.87 5.31 5.53 5.50 6.74
## 2 4.49 5.01 5.20 5.53 6.73
## 3 4.07 4.48 4.90 5.54 6.72
## 4 3.91 4.16 4.60 5.56 6.71
## 5 3.19 3.72 4.11 5.58 6.69
## 6 3.14 3.41 3.76 5.61 6.68

d0$px <- as.POSIXct(d0$datetime)
d0$gw <- d0$mw / 1000 # 測定値 (単位変換：MW -> GW)
d0$gwfit <- NA
d0$gwlwr <- NA
d0$gwupr <- NA


# 訓練データの取得期間フラグ（ここでは予測日の前後１ヶ月間をとった）
is.67 <- '06-01' <= d0$md & d0$md <= '07-31'

# 訓練期間フラグ
is.tr <- '2017-06-01' <= d0$date & d0$date <= '2022-06-30'

# テスト期間フラグ
is.te <- '2022-07-01' <= d0$date & d0$date <= '2022-07-03'

# グラフ表示期間フラグ
is.plot <- '2022-06-28' <= d0$date & d0$date <= '2022-07-03'
d.plot <- d0[is.plot, ] # グラフ表用データ

matplot(d0$px, d0$gw, 
        main = '電力需要',
        xlab = '年', ylab = '電力需要［GW］',
        type = 'l', col = 2, ylim = c(20, 70))

for (hr in 0:23)
{
# 予測時間
#hr <- 10
is.hr <- d0$hr == hr

# 訓練データ
d.tr <- d0[is.hr & is.67 & is.tr, ]

# テストデータ（予測区間の正解データとして精度評価に利用）
d.te <- d0[is.hr & is.te, ]

# フィッティング
# （ここでは，トレンドがなく気温の2次多項式と曜日ダミーのモデルとした）
fit <- lm(gw ~ tp + I(tp^2) + dow, data = d.tr)
summary(fit)

# 予測（予測区間付き）
pred <- predict(fit, newdata = d.te, interval = 'prediction')

# 毎時の予測値を保存
d0$gwfit[d0$px %in% d.te$px] <- pred[, 'fit']
d0$gwlwr[d0$px %in% d.te$px] <- pred[, 'lwr']
d0$gwupr[d0$px %in% d.te$px] <- pred[, 'upr']

# 時間別グラフ
dir.create('./load', showWarnings = F)
png(paste0('./load/load_at', hr, '.png'))

matplot(d0$px[is.hr & is.plot], d0$gw[is.hr & is.plot], 
        main = paste0(hr, '時の電力需要'),
        type = 'n', pch = 16, col = 2, xaxt = 'n', 
        xlim = range(d0$px[is.plot]), ylim = c(20, 70))

px.ax <- seq(d0$px[1], d0$px[nrow(d0)], by = 60*60*24)
abline(v = px.ax, col = 'lightgray', lty = 2)
abline(v = as.POSIXct('2022-07-01'), col = 3, lty = 2) # 予測開始線

matlines(d0$px[is.plot], d0$gw[is.plot], type = 'o', pch = 1, col = 'lightgray') 
matlines(d.te$px, pred[, 'fit'], type = 'o', pch = 1, lty = 2, col = 4)
matlines(d.te$px, pred[, 'lwr'], lty = 3, col = 'lightgray')
matlines(d.te$px, pred[, 'upr'], lty = 3, col = 'lightgray')
matlines(d0$px[is.hr & is.plot], d0$gw[is.hr & is.plot], type = 'o', pch = 16, col = 2) 

# 凡例
legend('topleft', bg = 0,
       lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
       legend = c('原系列', '予測値', '95%予測区間'))
legend('topright', bg = 0,
       legend = sprintf('MAPE: %2.1f%%', 
                        mean(abs((pred[, 'fit'] - d.te$gw)/d.te$gw))*100))
dev.off()
}

#system('bash -c 'convert -delay 100 -loop 0 ./load/load_at{0..23}.png ./load/load_at.gif'')

#cairo_pdf('load_0-23.pdf')

matplot(d0$px[is.plot], d0$gw[is.plot], 
        main = '電力需要',
        type = 'o', pch = 16, col = 2, xaxt = 'n',
        xlim = range(d0$px[is.plot]), ylim = c(20, 70))

px.ax <- seq(d0$px[1], d0$px[nrow(d0)], by = 60*60*24)
abline(v = px.ax, col = 'lightgray', lty = 2)
abline(v = as.POSIXct('2022-07-01'), col = 3, lty = 2) # 予測開始線

matlines(d0$px[is.te], d0$gwlwr[is.te], lty = 3, col = 'lightgray')
matlines(d0$px[is.te], d0$gwupr[is.te], lty = 3, col = 'lightgray')
matlines(d0$px[is.te], d0$gwfit[is.te], type = 'o', pch = 1, lty = 2, col = 4)

# 凡例
legend('topleft', bg = 0,
       lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
       legend = c('原系列', '予測値', '95%予測区間'))
legend('topright', bg = 0,
       legend = sprintf('MAPE: %2.1f%%', 
                        mean(abs((d0$gwfit[is.te] - d0$gw[is.te])/d0$gw[is.te]))*100))

d0 <- read.csv('https://stats.dip.jp/01_ds/data/e-stat_expenditure.csv', skip = 11)

# タイムスタンプと支出金額のカラムを探しデータを抽出
j.time <- grep('時間軸.月次..コード', colnames(d0))
j.ice  <- grep('アイスクリーム',      colnames(d0))

d1 <- d0[, c(j.time, j.ice)]

# 時間コード（10桁）からPOSIX準拠時間を作成
# Extract year and month for date creation
yyyy <- substr(paste(d1[, 1]), 1, 4)
mm <- substr(paste(d1[, 1]), 7, 8)

# Create 'px' as a Date column
px <- as.Date(paste(yyyy, mm, "01"), format = '%Y %m %d')

# Combine the data into a new data frame
d0 <- data.frame(px, date = format(px, '%Y-%m-%d'), y = d1[, 2])
d.ca     <- read.csv('https://stats.dip.jp/01_ds/data/calendar/calendar.csv')
d.amedas <- read.csv('https://stats.dip.jp/01_ds/data/amedas/amedas_Tokyo_1h.csv')
d.amedas$date <- format(as.POSIXct(d.amedas$HS), "%Y-%m-%d") # カラムdateを作成

# 暦，気象とマージ
d0 |> merge(d.ca, by = "date") |> merge(d.amedas, by = "date") -> d
head(d)

##         date         px   y dow         name             event    md gr year
## 1 2014-01-01 2014-01-01 396  水 祝日（元日） 年末年始（01-01） 01-01  6 2014
## 2 2014-01-01 2014-01-01 396  水 祝日（元日） 年末年始（01-01） 01-01  6 2014
## 3 2014-01-01 2014-01-01 396  水 祝日（元日） 年末年始（01-01） 01-01  6 2014
## 4 2014-01-01 2014-01-01 396  水 祝日（元日） 年末年始（01-01） 01-01  6 2014
## 5 2014-01-01 2014-01-01 396  水 祝日（元日） 年末年始（01-01） 01-01  6 2014
## 6 2014-01-01 2014-01-01 396  水 祝日（元日） 年末年始（01-01） 01-01  6 2014
##   month day wk su mo tu we th fr sa wd.x ho bw af sp ab                  HS
## 1     1   1  1  0  0  0  1  0  0  0    0  1  0  0  1  0 2014-01-01 13:00:00
## 2     1   1  1  0  0  0  1  0  0  0    0  1  0  0  1  0 2014-01-01 23:00:00
## 3     1   1  1  0  0  0  1  0  0  0    0  1  0  0  1  0 2014-01-01 15:00:00
## 4     1   1  1  0  0  0  1  0  0  0    0  1  0  0  1  0 2014-01-01 21:00:00
## 5     1   1  1  0  0  0  1  0  0  0    0  1  0  0  1  0 2014-01-01 00:00:00
## 6     1   1  1  0  0  0  1  0  0  0    0  1  0  0  1  0 2014-01-01 05:00:00
##                    HE    hpa   hpa0 pr hm  hm1h  ws ws1h ws_max rm  rm1h   tp
## 1 2014-01-01 14:00:00  997.8 1002.1  0 36 36.50 3.9 3.97    8.1 10 60.00 15.4
## 2 2014-01-02 00:00:00 1002.5 1006.9  0 47 46.00 1.0 1.35    2.3  0  0.00  8.0
## 3 2014-01-01 16:00:00  998.8 1003.1  0 36 35.50 3.9 3.88    6.3 10 55.98 14.3
## 4 2014-01-01 22:00:00 1002.1 1006.6  0 58 54.83 0.6 0.73    1.0  0  0.00  6.4
## 5 2014-01-01 01:00:00 1003.5 1008.0  0 68 67.17 1.0 1.83    1.9  0  0.00  5.3
## 6 2014-01-01 06:00:00 1001.9 1006.3  0 48 50.33 3.4 3.00    4.8  0  0.00  7.2
##    tp1h  tp5h tp3d tp7d   wd.y wd_max              missing
## 1 15.02 13.20 6.68 6.40 南南西 南南西                     
## 2  8.43  8.09 7.29 6.54     北   北東                     
## 3 14.53 14.60 6.85 6.47     南     南                     
## 4  7.17  9.89 7.16 6.55   北西     西                     
## 5  4.82  6.27 5.81 6.13 北北西   北西                     
## 6  7.03  5.20 6.03 6.16     南 南南西

# Convert 'px' to numeric for regression
d$px_numeric <- as.numeric(d$px)

# Define training, testing, and plotting periods
is.tr <- d$px < as.Date("2022-01-01")  # Training: before 2022
is.te <- d$px >= as.Date("2022-01-01") & d$px <= as.Date("2022-11-30")  # Testing: 2022
is.plot <- d$px <= as.Date("2022-11-30")  # Plotting range: up to November 2022

# Fit the model using training data
fit <- lm(y ~ px_numeric + I(px_numeric^2), data = d[is.tr, ])

# Predict for 2022
pred <- predict(fit, newdata = d[is.te, ], interval = "prediction")
d$y_fit <- NA
d$lwr <- NA
d$upr <- NA
d$y_fit[is.te] <- pred[, "fit"]
d$lwr[is.te] <- pred[, "lwr"]
d$upr[is.te] <- pred[, "upr"]

# Plotting with custom x-axis labels
MAIN <- "アイスクリーム月間世帯支出"
matplot(d$px[is.plot], d$y[is.plot], 
        type = "o", pch = 16, col = 2, xaxt = "n",  # Suppress default x-axis
        main = MAIN, xlab = "年", ylab = "円", 
        ylim = c(500, 2000))

# Add custom x-axis labels (Years)
px.ax <- seq(min(d$px), max(d$px), by = "2 years")
axis(1, at = px.ax, labels = format(px.ax, "%Y"))

# Add vertical lines for reference
abline(v = px.ax, col = "lightgray", lty = 2)
abline(v = as.Date("2022-01-01"), col = 3, lty = 2)  # Prediction start line

# Add predicted values and confidence intervals
matlines(d$px[is.te], d$lwr[is.te], lty = 3, col = "lightgray")
matlines(d$px[is.te], d$upr[is.te], lty = 3, col = "lightgray")
matlines(d$px[is.te], d$y_fit[is.te], type = "o", pch = 1, lty = 2, col = 4)

# Add legends
legend("topleft", bg = 0,
       lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, "lightgray"),
       legend = c("原系列", "予測値", "95%予測区間"))
legend("topright", bg = 0,
       legend = sprintf("MAPE: %.1f%%", 
                        mean(abs((d$y_fit[is.te] - d$y[is.te]) / d$y[is.te])) * 100))

# Convert 'px' to numeric for regression
d$px_numeric <- as.numeric(d$px)

# Define training, testing, and plotting periods
is.tr <- d$px < as.Date("2022-01-01")  # Training: before 2022
is.te <- d$px >= as.Date("2022-01-01") & d$px <= as.Date("2022-11-30")  # Testing: 2022
is.plot <- d$px <= as.Date("2022-11-30")  # Plotting range: up to November 2022

# Fit the model using training data
fit <- lm(y ~ px_numeric + I(px_numeric^2), data = d[is.tr, ])

# Predict for 2022
pred <- predict(fit, newdata = d[is.te, ], interval = "prediction")
d$y_fit <- NA
d$lwr <- NA
d$upr <- NA
d$y_fit[is.te] <- pred[, "fit"]
d$lwr[is.te] <- pred[, "lwr"]
d$upr[is.te] <- pred[, "upr"]

for (mon in 1:11) {
  # 予測時間 (Filter data for the current month)
  is.mon <- format(d$px, "%m") == sprintf("%02d", mon)

  # 訓練データ (Training data before 2022)
  d.tr <- d[is.mon & is.tr, ]

  # テストデータ (Testing data for 2022)
  d.te <- d[is.mon & is.te, ]

  # フィッティング (Fit the model)
  fit <- lm(y ~ px_numeric + I(px_numeric^2), data = d.tr)

  # 予測 (Prediction with confidence intervals)
  pred <- predict(fit, newdata = d.te, interval = "prediction")

  # 毎月の予測値を保存 (Save predictions)
  d$y_fit[d$px %in% d.te$px] <- pred[, "fit"]
  d$lwr[d$px %in% d.te$px] <- pred[, "lwr"]
  d$upr[d$px %in% d.te$px] <- pred[, "upr"]

  # 月別グラフ (Plot for each month)
  dir.create('./load', showWarnings = F)
  png(paste0('./load/load_mon_', mon, '.png'))

  matplot(d$px[is.mon & is.plot], d$y[is.mon & is.plot], 
          main = paste0(mon, "月のアイスクリーム月間世帯支出"),
          type = "n", pch = 16, col = 2, xaxt = "n", 
          xlim = range(d$px[is.plot]), ylim = c(0, 2000))

  px.ax <- seq(d$px[1], d$px[nrow(d)], by = "2 years")
  abline(v = px.ax, col = "lightgray", lty = 2)
  abline(v = as.Date("2022-01-01"), col = 3, lty = 2)  # 予測開始線

  matlines(d$px[is.plot], d$y[is.plot], type = "o", pch = 1, col = "lightgray") 
  matlines(d.te$px, pred[, "fit"], type = "o", pch = 1, lty = 2, col = 4)
  matlines(d.te$px, pred[, "lwr"], lty = 3, col = "lightgray")
  matlines(d.te$px, pred[, "upr"], lty = 3, col = "lightgray")
  matlines(d$px[is.mon & is.plot], d$y[is.mon & is.plot], type = "o", pch = 16, col = 2) 

  # 凡例 (Legend)
  legend("topleft", bg = 0,
         lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, "lightgray"),
         legend = c("原系列", "予測値", "95%予測区間"))
  legend("topright", bg = 0,
         legend = sprintf("MAPE: %.1f%%", 
                          mean(abs((pred[, "fit"] - d.te$y)/d.te$y)) * 100))
  dev.off()
}

# 全期間グラフ (Aggregate Graph with Years on X-Axis)
matplot(d$px[is.plot], d$y[is.plot], 
        main = "アイスクリーム月間世帯支出",
        type = "o", pch = 16, col = 2, xaxt = "n",  # Suppress default x-axis
        xlim = range(d$px[is.plot]), ylim = c(0, 2000))

# Add custom x-axis labels (Years)
px.ax <- seq(min(d$px), max(d$px), by = "2 years")
axis(1, at = px.ax, labels = format(px.ax, "%Y"))

# Add vertical lines for reference
abline(v = px.ax, col = "lightgray", lty = 2)
abline(v = as.Date("2022-01-01"), col = 3, lty = 2)  # Prediction start line

# Add predicted values and confidence intervals
matlines(d$px[is.te], d$lwr[is.te], lty = 3, col = "lightgray")
matlines(d$px[is.te], d$upr[is.te], lty = 3, col = "lightgray")
matlines(d$px[is.te], d$y_fit[is.te], type = "o", pch = 1, lty = 2, col = 4)

# 凡例 (Legend)
legend("topleft", bg = 0,
       lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, "lightgray"),
       legend = c("原系列", "予測値", "95%予測区間"))
legend("topright", bg = 0,
       legend = sprintf("MAPE: %.1f%%", 
                        mean(abs((d$y_fit[is.te] - d$y[is.te]) / d$y[is.te])) * 100))

```

機械学習

2024-12-01