このページは公開されるので個人情報などは記載しないこと。
課題が完成したらknitしPublishする。

課題名

rm(list = ls()) # 全オブジェクト削除
# ここにRコードを記入する。
#例）
plot(1:9)

#重回帰

n <- 100                         # 全時間
x <- 1:n                         # 時刻
t <- 2 * x                       # トレンド（係数：2）
e <- rnorm(n, mean = 0, sd = 10) # 不規則変動 
y <- t + e                       # 合成変動 

matplot(x, y, type = 'o', pch = 16, col = 2)

n.tr <- n * 0.8   # 訓練時間（80%）
n.te <- n - n.tr  # テスト時間（20%）

ii.tr <- 1:n.tr 
ii.te <- (n.tr + 1):n

fit <- lm(y ~ x, data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')

myplot <- function(x, y, pred)
{
  matplot(x, y, type = 'o', pch = 16, col = 2, cex = 0.5)
  abline(v = n.tr+1, col = 3, lty = 2)
  matlines(x[ii.te], pred[, 'fit'], type = 'o', pch = 1, lty = 2, col = 4)
  matlines(x[ii.te], pred[, 'lwr'], lty = 3, col = 'lightgray')
  matlines(x[ii.te], pred[, 'upr'], lty = 3, col = 'lightgray')
  legend('topleft', bg = 0,
        lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
        legend = c('原系列', '予測値', '95%予測区間'))
}

myplot(x, y, pred)

t <- 0.05 * x^2 - 2 * x          # 2次のトレンド
e <- rnorm(n, mean = 0, sd = 10) # 不規則変動 
y <- t + e                       # 合成変動 

fit <- lm(y ~ poly(x, 2, raw = T), data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')

myplot(x, y, pred)

T <- 10                          # 周期
c <- 30 * sin(2 * pi * x / T)    # 周期変動（振幅：30，周期：10）
t <- 2 * x                       # トレンド（係数：2）
e <- rnorm(n, mean = 0, sd = 10) # 不規則変動 
y <- t + c + e                   # 合成変動 

fit <- lm(y ~  poly(x, 2, raw = T), data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')

myplot(x, y, pred)

dir.create('./ts_reg_periodic', showWarnings = F)

yhat <- rep(NA, n)

for (j in 1:T)
{
  # j <- 1
  ii <- seq(j, n.tr, T)
  ii.pred <- ii[length(ii)] + seq(T, T*2, T)

  fit <- lm(y ~  x, data = data.frame(x = x[ii], y = y[ii]))
  pred <- predict(fit, newdata = data.frame(x = x[ii.pred]), interval = 'prediction')
  yhat[ii.pred] <- pred[, 'fit']

  png(paste0('./ts_reg_periodic/ts_reg_periodic', j, '.png'))
  matplot(x, y, type = 'o', pch = 16, col = 2, cex = 0.5, ylim = c(0, 250))
  abline(v = n.tr+1, col = 3, lty = 2)
  matlines(x[ii], y[ii], type = 'o', pch = 1, col = 1, cex = 1.5)
  matlines(x, yhat, type = 'o', pch = 1, col = 4)
  matpoints(x[ii.pred], yhat[ii.pred], pch = 1, col = 1)
  dev.off()
  #Sys.sleep(2)
}

#system('bash -c "convert -delay 100 -loop 0 ./ts_reg_periodic/ts_reg_periodic{1..10}.png ./ts_reg_periodic/ts_reg_periodic.gif"')

d0 <- read.csv('https://stats.dip.jp/01_ds/data/load_amedas_y2017-2022_tokyo.csv')

head(d0)

##         date            datetime dow       name    event    md gr year month
## 1 2017-01-01 2017-01-01 00:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 2 2017-01-01 2017-01-01 01:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 3 2017-01-01 2017-01-01 02:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 4 2017-01-01 2017-01-01 03:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 5 2017-01-01 2017-01-01 04:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 6 2017-01-01 2017-01-01 05:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
##   day su mo tu we th fr sa wd ho bw af sp ab    mw     time hr mi    hm rm   ws
## 1   1  1  0  0  0  0  0  0  0  1  0  0  1  0 27830 00:00:00  0  0 77.43  0 2.32
## 2   1  1  0  0  0  0  0  0  0  1  0  0  1  0 26340 01:00:00  1  0 71.00  0 2.52
## 3   1  1  0  0  0  0  0  0  0  1  0  0  1  0 25200 02:00:00  2  0 68.86  0 1.52
## 4   1  1  0  0  0  0  0  0  0  1  0  0  1  0 24380 03:00:00  3  0 71.43  0 1.40
## 5   1  1  0  0  0  0  0  0  0  1  0  0  1  0 23890 04:00:00  4  0 74.86  0 1.97
## 6   1  1  0  0  0  0  0  0  0  1  0  0  1  0 23940 05:00:00  5  0 73.71  0 2.25
##     tp tp3h tp5h tp3d tp7d
## 1 4.87 5.31 5.53 5.50 6.74
## 2 4.49 5.01 5.20 5.53 6.73
## 3 4.07 4.48 4.90 5.54 6.72
## 4 3.91 4.16 4.60 5.56 6.71
## 5 3.19 3.72 4.11 5.58 6.69
## 6 3.14 3.41 3.76 5.61 6.68

d0$px <- as.POSIXct(d0$datetime)
d0$gw <- d0$mw / 1000 # 測定値 (単位変換：MW -> GW)
d0$gwfit <- NA
d0$gwlwr <- NA
d0$gwupr <- NA

# Suffixとしてtr: training term, te: test term, al: all term を使用する。

# 訓練データの取得期間フラグ（ここでは予測日の前後１ヶ月間をとった）
is.67 <- '06-01' <= d0$md & d0$md <= '07-31'

# 訓練期間フラグ
is.tr <- '2017-06-01' <= d0$date & d0$date <= '2022-06-30'

# テスト期間フラグ
is.te <- '2022-07-01' <= d0$date & d0$date <= '2022-07-03'

# グラフ表示期間フラグ
is.plot <- '2022-06-28' <= d0$date & d0$date <= '2022-07-03'
d.plot <- d0[is.plot, ] # グラフ表用データ

matplot(d0$px, d0$gw, 
        main = '電力需要',
        xlab = '年', ylab = '電力需要［GW］',
        type = 'l', col = 2, ylim = c(20, 70))

for (hr in 0:23)
{
# 予測時間
#hr <- 10
is.hr <- d0$hr == hr

# 訓練データ
d.tr <- d0[is.hr & is.67 & is.tr, ]

# テストデータ（予測区間の正解データとして精度評価に利用）
d.te <- d0[is.hr & is.te, ]

# フィッティング
# （ここでは，トレンドがなく気温の2次多項式と曜日ダミーのモデルとした）
fit <- lm(gw ~ tp + I(tp^2) + dow, data = d.tr)
summary(fit)

# 予測（予測区間付き）
pred <- predict(fit, newdata = d.te, interval = 'prediction')

# 毎時の予測値を保存
d0$gwfit[d0$px %in% d.te$px] <- pred[, 'fit']
d0$gwlwr[d0$px %in% d.te$px] <- pred[, 'lwr']
d0$gwupr[d0$px %in% d.te$px] <- pred[, 'upr']

# 時間別グラフ
dir.create('./load', showWarnings = F)
png(paste0('./load/load_at', hr, '.png'))

matplot(d0$px[is.hr & is.plot], d0$gw[is.hr & is.plot], 
        main = paste0(hr, '時の電力需要'),
        xlab = format(d0$px[1], '%Y年'), ylab = '電力需要［GW］',
        type = 'n', pch = 16, col = 2, xaxt = 'n', 
        xlim = range(d0$px[is.plot]), ylim = c(20, 70))

px.ax <- seq(d0$px[1], d0$px[nrow(d0)], by = 60*60*24)
abline(v = px.ax, col = 'lightgray', lty = 2)
abline(v = as.POSIXct('2022-07-01'), col = 3, lty = 2) # 予測開始線
axis(side = 1, at = px.ax, labels = format(px.ax, '%-m/%-d(%a)')) 

matlines(d0$px[is.plot], d0$gw[is.plot], type = 'o', pch = 1, col = 'lightgray') 
matlines(d.te$px, pred[, 'fit'], type = 'o', pch = 1, lty = 2, col = 4)
matlines(d.te$px, pred[, 'lwr'], lty = 3, col = 'lightgray')
matlines(d.te$px, pred[, 'upr'], lty = 3, col = 'lightgray')
matlines(d0$px[is.hr & is.plot], d0$gw[is.hr & is.plot], type = 'o', pch = 16, col = 2) 

# 凡例
legend('topleft', bg = 0,
       lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
       legend = c('原系列', '予測値', '95%予測区間'))
legend('topright', bg = 0,
       legend = sprintf('MAPE: %2.1f%%', 
                        mean(abs((pred[, 'fit'] - d.te$gw)/d.te$gw))*100))
dev.off()
}

#system('bash -c "convert -delay 100 -loop 0 ./load/load_at{0..23}.png ./load/load_at.gif"')

#cairo_pdf('load_0-23.pdf')

matplot(d0$px[is.plot], d0$gw[is.plot], 
        main = '電力需要',
        xlab = format(d0$px[1], '%Y年'), ylab = '電力需要［GW］',
        type = 'o', pch = 16, col = 2, xaxt = 'n',
        xlim = range(d0$px[is.plot]), ylim = c(20, 70))

px.ax <- seq(d0$px[1], d0$px[nrow(d0)], by = 60*60*24)
axis(side = 1, at = px.ax, labels = format(px.ax, '%-m/%-d(%a)')) 
abline(v = px.ax, col = 'lightgray', lty = 2)
abline(v = as.POSIXct('2022-07-01'), col = 3, lty = 2) # 予測開始線

matlines(d0$px[is.te], d0$gwlwr[is.te], lty = 3, col = 'lightgray')
matlines(d0$px[is.te], d0$gwupr[is.te], lty = 3, col = 'lightgray')
matlines(d0$px[is.te], d0$gwfit[is.te], type = 'o', pch = 1, lty = 2, col = 4)

# 凡例
legend('topleft', bg = 0,
       lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
       legend = c('原系列', '予測値', '95%予測区間'))
legend('topright', bg = 0,
       legend = sprintf('MAPE: %2.1f%%', 
                        mean(abs((d0$gwfit[is.te] - d0$gw[is.te])/d0$gw[is.te]))*100))

#dev.off()

#演習課題

d0 <- read.csv('https://stats.dip.jp/01_ds/data/e-stat_expenditure.csv', skip = 11)

# タイムスタンプと支出金額のカラムを探しデータを抽出
j.time <- grep('時間軸.月次..コード', colnames(d0))
j.ice  <- grep('アイスクリーム',      colnames(d0))

d1 <- d0[, c(j.time, j.ice)]

# 時間コード（10桁）からPOSIX準拠時間を作成
yyyy <- substr(paste(d1[, 1]), 1, 4)
mm   <- substr(paste(d1[, 1]), 7, 8)
px <- as.POSIXct(paste(yyyy, mm, '01 12:00:00'), format = '%Y%m%d %H:%M:%S')

d <- data.frame(px, date = format(px, '%Y-%m-%d'), y = d1[, 2])

# 暦，気象とマージ
d <- merge(d, read.csv('https://stats.dip.jp/01_ds/data/calendar.csv'))
d <- merge(d, read.csv('https://stats.dip.jp/01_ds/data/amedas_tokyo.csv')) 
#View(d)

# グラフ
MAIN <- 'アイスクリーム月間世帯支出'
matplot(d$px, d$y, type = 'o', pch = 16, lty =1, col = 2,
        main = MAIN, xlab = '年', ylab = '円')

n <- 100                         # 全時間
x <- 1:n                         # 時刻
t <- 2 * x                       # トレンド（係数：2）
e <- rnorm(n, mean = 0, sd = 10) # 不規則変動 
y <- t + e                       # 合成変動 

matplot(x, y, type = 'o', pch = 16, col = 2)

n.tr <- n * 0.8   # 訓練時間（80%）
n.te <- n - n.tr  # テスト時間（20%）

ii.tr <- 1:n.tr 
ii.te <- (n.tr + 1):n

fit <- lm(y ~ x, data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')

myplot <- function(x, y, pred)
{
  matplot(x, y, type = 'o', pch = 16, col = 2, cex = 0.5)
  abline(v = n.tr+1, col = 3, lty = 2)
  matlines(x[ii.te], pred[, 'fit'], type = 'o', pch = 1, lty = 2, col = 4)
  matlines(x[ii.te], pred[, 'lwr'], lty = 3, col = 'lightgray')
  matlines(x[ii.te], pred[, 'upr'], lty = 3, col = 'lightgray')
  legend('topleft', bg = 0,
        lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
        legend = c('原系列', '予測値', '95%予測区間'))
}

myplot(x, y, pred)

n.tr <- n * 0.8   # 訓練時間（80%）
n.te <- n - n.tr  # テスト時間（20%）

ii.tr <- 1:n.tr 
ii.te <- (n.tr + 1):n

fit <- lm(y ~ x, data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')

myplot <- function(x, y, pred)
{
  matplot(x, y, type = 'o', pch = 16, col = 2, cex = 0.5)
  abline(v = n.tr+1, col = 3, lty = 2)
  matlines(x[ii.te], pred[, 'fit'], type = 'o', pch = 1, lty = 2, col = 4)
  matlines(x[ii.te], pred[, 'lwr'], lty = 3, col = 'lightgray')
  matlines(x[ii.te], pred[, 'upr'], lty = 3, col = 'lightgray')
  legend('topleft', bg = 0,
        lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
        legend = c('原系列', '予測値', '95%予測区間'))
}

myplot(x, y, pred)

T <- 10                          # 周期
c <- 30 * sin(2 * pi * x / T)    # 周期変動（振幅：30，周期：10）
t <- 2 * x                       # トレンド（係数：2）
e <- rnorm(n, mean = 0, sd = 10) # 不規則変動 
y <- t + c + e                   # 合成変動 

fit <- lm(y ~  poly(x, 2, raw = T), data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')

myplot(x, y, pred)

d0 <- read.csv('https://stats.dip.jp/01_ds/data/load_amedas_y2017-2022_tokyo.csv')

head(d0)

##         date            datetime dow       name    event    md gr year month
## 1 2017-01-01 2017-01-01 00:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 2 2017-01-01 2017-01-01 01:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 3 2017-01-01 2017-01-01 02:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 4 2017-01-01 2017-01-01 03:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 5 2017-01-01 2017-01-01 04:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
## 6 2017-01-01 2017-01-01 05:00:00  日 祝日：元日 年末年始 01-01  6 2017     1
##   day su mo tu we th fr sa wd ho bw af sp ab    mw     time hr mi    hm rm   ws
## 1   1  1  0  0  0  0  0  0  0  1  0  0  1  0 27830 00:00:00  0  0 77.43  0 2.32
## 2   1  1  0  0  0  0  0  0  0  1  0  0  1  0 26340 01:00:00  1  0 71.00  0 2.52
## 3   1  1  0  0  0  0  0  0  0  1  0  0  1  0 25200 02:00:00  2  0 68.86  0 1.52
## 4   1  1  0  0  0  0  0  0  0  1  0  0  1  0 24380 03:00:00  3  0 71.43  0 1.40
## 5   1  1  0  0  0  0  0  0  0  1  0  0  1  0 23890 04:00:00  4  0 74.86  0 1.97
## 6   1  1  0  0  0  0  0  0  0  1  0  0  1  0 23940 05:00:00  5  0 73.71  0 2.25
##     tp tp3h tp5h tp3d tp7d
## 1 4.87 5.31 5.53 5.50 6.74
## 2 4.49 5.01 5.20 5.53 6.73
## 3 4.07 4.48 4.90 5.54 6.72
## 4 3.91 4.16 4.60 5.56 6.71
## 5 3.19 3.72 4.11 5.58 6.69
## 6 3.14 3.41 3.76 5.61 6.68

d0$px <- as.POSIXct(d0$datetime)
d0$gw <- d0$mw / 1000 # 測定値 (単位変換：MW -> GW)
d0$gwfit <- NA
d0$gwlwr <- NA
d0$gwupr <- NA

# Suffixとしてtr: training term, te: test term, al: all term を使用する。

# 訓練データの取得期間フラグ（ここでは予測日の前後１ヶ月間をとった）
is.67 <- '06-01' <= d0$md & d0$md <= '07-31'

# 訓練期間フラグ
is.tr <- '2017-06-01' <= d0$date & d0$date <= '2022-06-30'

# テスト期間フラグ
is.te <- '2022-07-01' <= d0$date & d0$date <= '2022-07-03'

# グラフ表示期間フラグ
is.plot <- '2022-06-28' <= d0$date & d0$date <= '2022-07-03'
d.plot <- d0[is.plot, ] # グラフ表用データ

matplot(d0$px, d0$gw, 
        main = '電力需要',
        xlab = '年', ylab = '電力需要［GW］',
        type = 'l', col = 2, ylim = c(20, 70))

#cairo_pdf('load_0-23.pdf')

matplot(d0$px[is.plot], d0$gw[is.plot], 
        main = '電力需要',
        xlab = format(d0$px[1], '%Y年'), ylab = '電力需要［GW］',
        type = 'o', pch = 16, col = 2, xaxt = 'n',
        xlim = range(d0$px[is.plot]), ylim = c(20, 70))

px.ax <- seq(d0$px[1], d0$px[nrow(d0)], by = 60*60*24)
axis(side = 1, at = px.ax, labels = format(px.ax, '%-m/%-d(%a)')) 
abline(v = px.ax, col = 'lightgray', lty = 2)
abline(v = as.POSIXct('2022-07-01'), col = 3, lty = 2) # 予測開始線

matlines(d0$px[is.te], d0$gwlwr[is.te], lty = 3, col = 'lightgray')

## Warning in min(x): min の引数に有限な値がありません: Inf を返します

## Warning in max(x): max の引数に有限な値がありません: -Inf を返します

matlines(d0$px[is.te], d0$gwupr[is.te], lty = 3, col = 'lightgray')

## Warning in min(x): min の引数に有限な値がありません: Inf を返します

## Warning in min(x): max の引数に有限な値がありません: -Inf を返します

matlines(d0$px[is.te], d0$gwfit[is.te], type = 'o', pch = 1, lty = 2, col = 4)

## Warning in min(x): min の引数に有限な値がありません: Inf を返します

## Warning in min(x): max の引数に有限な値がありません: -Inf を返します

# 凡例
legend('topleft', bg = 0,
       lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
       legend = c('原系列', '予測値', '95%予測区間'))
legend('topright', bg = 0,
       legend = sprintf('MAPE: %2.1f%%', 
                        mean(abs((d0$gwfit[is.te] - d0$gw[is.te])/d0$gw[is.te]))*100))

#dev.off()

機械学習

演習課題

2023-12-01

課題名