機械学習

# パッケージのインストール
if (!require(keras3))
{
  install.packages("keras3")
  library(keras3)
  install_keras() # r-kerasという仮想環境にKerasがインストールされる。
  # install_keras(tensorflow = "gpu") # GPU演算用（NVIDIA GPUとcuDNNが必要）
}

if (!require(plotly)) install.packages("plotly")

set.seed(5) # 乱数シード

n <- 24*7*2 # 時間数（全14日）
x <- 1:n    # 時刻
x2 <- x^2
x3 <- 100*sin(2*pi*x/24)

f <- function(x) 100 + 0.1*x + 0.02*x2 + x3

y <- f(x) + rnorm(n, sd = 5)

d <- data.frame(y, x, x2, x3)

d

ii <- 1:(24*11)  # 11日分のインデックス
d.tr <- d[ ii, ] # 訓練データを抽出（前11日）
d.te <- d[-ii, ] # 試験データを抽出（後3日）

vline <- list(type = "line",
              line = list(color = "blue", dash = "dash"),
              x0 = ii[length(ii)], x1 = ii[length(ii)],
              y0 = 0, y1 = max(d$y))

plot_ly(type = "scatter", mode = "markers") |>
  add_trace(x = d$x, y = d$y,  mode = 'markers', name = "観測値") |>
  add_trace(x = x,   y = f(x), mode = 'lines',   name = "真値") |>
  layout(title = "青破線の左側：訓練領域，右側：予測領域", shapes = list(vline))

# モデル候補
FML <- vector("list", 4)
FML[[1]] <- formula("y ~ x")
FML[[2]] <- formula("y ~ x + x2")
FML[[3]] <- formula("y ~ x + x2 + x3")
FML[[4]] <- formula("y ~ poly(x, 4)")

# 選択モデル番号
ID.MODEL <- 3

# 回帰モデルから計画行列を作成
x.tr <- model.matrix(FML[[ID.MODEL]], d.tr)
x.te <- model.matrix(FML[[ID.MODEL]], d.te)
nc <- ncol(x.tr) # カラム数

me.tr <- apply(x.tr, 2, mean) # 訓練データの説明変数ごとの平均
sd.tr <- apply(x.tr, 2, sd)   # 訓練データの説明変数ごとの標準偏差
x.tr <- scale(x.tr, center = me.tr, scale = sd.tr) # 標準化訓練データ
x.te <- scale(x.te, center = me.tr, scale = sd.tr) # 標準化試験データ
x.tr[, 1] <- 1 # 切片は標準化できない（NaNなる）ので1で置換
x.te[, 1] <- 1 # 切片は標準化できない（NaNなる）ので1で置換

# 深層学習モデル
model <- 
  keras_model_sequential(input_shape = c(nc))    |> # 入力層
  layer_dense(units = nc*2, activation = "relu") |> # 中間層（ReLU）
  layer_dense(units = nc*4, activation = "relu") |> # 中間層（ReLU）
  layer_dense(units = nc*2, activation = "relu") |> # 中間層（ReLU）
  layer_dense(units = 1,    activation = "linear")  # 出力層（線形）

# モデル概要
summary(model)

## Model: "sequential"
## ┌───────────────────────────────────┬──────────────────────────┬───────────────
## │ Layer (type)                      │ Output Shape             │       Param # 
## ├───────────────────────────────────┼──────────────────────────┼───────────────
## │ dense (Dense)                     │ (None, 8)                │            40 
## ├───────────────────────────────────┼──────────────────────────┼───────────────
## │ dense_1 (Dense)                   │ (None, 16)               │           144 
## ├───────────────────────────────────┼──────────────────────────┼───────────────
## │ dense_2 (Dense)                   │ (None, 8)                │           136 
## ├───────────────────────────────────┼──────────────────────────┼───────────────
## │ dense_3 (Dense)                   │ (None, 1)                │             9 
## └───────────────────────────────────┴──────────────────────────┴───────────────
##  Total params: 329 (1.29 KB)
##  Trainable params: 329 (1.29 KB)
##  Non-trainable params: 0 (0.00 B)

# 高速演算のためのコンパイル（PCが素早く理解できる機械語に翻訳）
compile(model,
        loss      = "mse", # mean-squared-erros（２乗誤差平均）
        optimizer = optimizer_adam(learning_rate = 0.1), # 最適化アルゴリズム 
        metrics   = c("mean_absolute_error")) # 評価指標：絶対値誤差平均

# コールバック設定
callbacks <- list(
  # 早期停止（検証データでの損失値の改善が20エポック以上なかったら停止）
  callback_early_stopping(patience = 20, monitor = "val_loss"),
  
  # 検証データでの損失が改善されない限りモデルを上書きしない設定
  # （early_stoppingとセットで使用する）
  callback_model_checkpoint(filepath = "bestmodel.keras",
                            monitor = "val_loss", save_best_only = T),
  
  # 検証データでの損失が改善せず停滞した時（判定：5エポック）
  # に局所解を抜け出すため学習率を0.1倍に下げる設定。 
  callback_reduce_lr_on_plateau(monitor = "val_loss", 
                                factor = 0.1, patience = 5)
)

# フィッティング
fit.dp <- fit(model,                  # 深層学習モデル
              x.tr,                   # 計画行列 
              d.tr$y,                 # 目的変数
              verbose    = 0,         # 1:出力表示（低速），0:出力表示抑制
              batch_size = 2^6,       # バッチサイズ（要調整）
              epochs     = 100,       # エポック数（早期停止設定時設定不要） 
              validation_split = 0.2, # 検証用データ割合（訓練には不使用）
              callbacks  = callbacks) # コールバック設定

plot(fit.dp)

# 予測
yhat <- predict(model, x.te)

## 3/3 - 0s - 27ms/step

# 重回帰モデルとの比較
fit <- lm(FML[[ID.MODEL]], data = d.tr)
yhat.lm <- predict(fit, newdata = d.te)

# 予測結果のグラフ
plot_ly(type = "scatter", mode = "markers") |>
  add_trace(x = d$x,    y = d$y,     mode = "markers", name = "観測値") |>
  add_trace(x = d.te$x, y = yhat,    mode = "markers", name = "予測値（DNN）") |>
  add_trace(x = d.te$x, y = yhat.lm, mode = "lines",   name = "予測値（重回帰）") |>
  layout(shapes = list(vline))

# 精度評価関数
get.accuracy <- function(yhat, y)
{
  data.frame(MBE  = mean(yhat - y),
             MAE  = mean(abs(yhat - y)),
             MAPE = mean(abs((yhat - y) / y)) * 100,
             RMSE = sqrt(mean((yhat - y)^2))
  )
}

get.accuracy(yhat = yhat, y = d.te$y)

# Kerasパッケージでの精度指標
evaluate(model, x.te, d.te$y)

## 3/3 - 0s - 5ms/step - loss: 40941.9922 - mean_absolute_error: 194.8728

## $loss
## [1] 40941.99
## 
## $mean_absolute_error
## [1] 194.8728

get.accuracy(yhat = yhat.lm, y = d.te$y)

機械学習

演習課題

23150124

2024-11-08