n <- 100 # 全時間数
x <- 1:n # 時刻
t <- 2 * x # トレンド(係数:2)
e <- rnorm(n, mean = 0, sd = 10) # 不規則変動
y <- t + e # 合成変動
matplot(x, y, type = 'o', pch = 16, col = 2)
n.tr <- n * 0.8 # 訓練時間(80%)
n.te <- n - n.tr # テスト時間(20%)
ii.tr <- 1:n.tr
ii.te <- (n.tr + 1):n
fit <- lm(y ~ x, data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')
myplot <- function(x, y, pred)
{
matplot(x, y, type = 'o', pch = 16, col = 2, cex = 0.5)
abline(v = n.tr+1, col = 3, lty = 2)
matlines(x[ii.te], pred[, 'fit'], type = 'o', pch = 1, lty = 2, col = 4)
matlines(x[ii.te], pred[, 'lwr'], lty = 3, col = 'lightgray')
matlines(x[ii.te], pred[, 'upr'], lty = 3, col = 'lightgray')
legend('topleft', bg = 0,
lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
legend = c('原系列', '予測値', '95%予測区間'))
}
myplot(x, y, pred)
t <- 0.05 * x^2 - 2 * x # 2次のトレンド
y <- t + e # 合成変動
fit <- lm(y ~ poly(x, 2, raw = T), data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')
myplot(x, y, pred)
T <- 10 # 周期
c <- 30 * sin(2 * pi * x / T) # 周期変動(振幅:30,周期:10)
t <- 2 * x # トレンド(係数:2)
y <- t + c + e # 合成変動
fit <- lm(y ~ poly(x, 2, raw = T), data = data.frame(x = x[ii.tr], y = y[ii.tr]))
#summary(fit)
pred <- predict(fit, newdata = data.frame(x = x[ii.te]), interval = 'prediction')
myplot(x, y, pred)
dir.create('./ts_reg_periodic', showWarnings = F)
yhat <- rep(NA, n)
for (j in 1:T)
{
# j <- 1
ii <- seq(j, n.tr, T)
ii.pred <- ii[length(ii)] + seq(T, T*2, T)
fit <- lm(y ~ x, data = data.frame(x = x[ii], y = y[ii]))
pred <- predict(fit, newdata = data.frame(x = x[ii.pred]), interval = 'prediction')
yhat[ii.pred] <- pred[, 'fit']
png(paste0('./ts_reg_periodic/ts_reg_periodic', j, '.png'))
matplot(x, y, type = 'o', pch = 16, col = 2, cex = 0.5, ylim = c(0, 250))
abline(v = n.tr+1, col = 3, lty = 2)
matlines(x[ii], y[ii], type = 'o', pch = 1, col = 1, cex = 1.5)
matlines(x, yhat, type = 'o', pch = 1, col = 4)
matpoints(x[ii.pred], yhat[ii.pred], pch = 1, col = 1)
dev.off()
#Sys.sleep(2)
}
#system('bash -c 'convert -delay 100 -loop 0 ./ts_reg_periodic/ts_reg_periodic{1..10}.png ./ts_reg_periodic/ts_reg_periodic.gif'')
d0 <- read.csv('https://stats.dip.jp/01_ds/data/load/load_amedas_y2017-2022_tokyo.csv')
head(d0)
## date datetime dow name event md gr year month
## 1 2017-01-01 2017-01-01 00:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 2 2017-01-01 2017-01-01 01:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 3 2017-01-01 2017-01-01 02:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 4 2017-01-01 2017-01-01 03:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 5 2017-01-01 2017-01-01 04:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## 6 2017-01-01 2017-01-01 05:00:00 日 祝日:元日 年末年始 01-01 6 2017 1
## day su mo tu we th fr sa wd ho bw af sp ab mw time hr mi hm rm ws
## 1 1 1 0 0 0 0 0 0 0 1 0 0 1 0 27830 00:00:00 0 0 77.43 0 2.32
## 2 1 1 0 0 0 0 0 0 0 1 0 0 1 0 26340 01:00:00 1 0 71.00 0 2.52
## 3 1 1 0 0 0 0 0 0 0 1 0 0 1 0 25200 02:00:00 2 0 68.86 0 1.52
## 4 1 1 0 0 0 0 0 0 0 1 0 0 1 0 24380 03:00:00 3 0 71.43 0 1.40
## 5 1 1 0 0 0 0 0 0 0 1 0 0 1 0 23890 04:00:00 4 0 74.86 0 1.97
## 6 1 1 0 0 0 0 0 0 0 1 0 0 1 0 23940 05:00:00 5 0 73.71 0 2.25
## tp tp3h tp5h tp3d tp7d
## 1 4.87 5.31 5.53 5.50 6.74
## 2 4.49 5.01 5.20 5.53 6.73
## 3 4.07 4.48 4.90 5.54 6.72
## 4 3.91 4.16 4.60 5.56 6.71
## 5 3.19 3.72 4.11 5.58 6.69
## 6 3.14 3.41 3.76 5.61 6.68
d0$px <- as.POSIXct(d0$datetime)
d0$gw <- d0$mw / 1000 # 測定値 (単位変換:MW -> GW)
d0$gwfit <- NA
d0$gwlwr <- NA
d0$gwupr <- NA
# 訓練データの取得期間フラグ(ここでは予測日の前後1ヶ月間をとった)
is.67 <- '06-01' <= d0$md & d0$md <= '07-31'
# 訓練期間フラグ
is.tr <- '2017-06-01' <= d0$date & d0$date <= '2022-06-30'
# テスト期間フラグ
is.te <- '2022-07-01' <= d0$date & d0$date <= '2022-07-03'
# グラフ表示期間フラグ
is.plot <- '2022-06-28' <= d0$date & d0$date <= '2022-07-03'
d.plot <- d0[is.plot, ] # グラフ表用データ
matplot(d0$px, d0$gw,
main = '電力需要',
xlab = '年', ylab = '電力需要[GW]',
type = 'l', col = 2, ylim = c(20, 70))
for (hr in 0:23)
{
# 予測時間
#hr <- 10
is.hr <- d0$hr == hr
# 訓練データ
d.tr <- d0[is.hr & is.67 & is.tr, ]
# テストデータ(予測区間の正解データとして精度評価に利用)
d.te <- d0[is.hr & is.te, ]
# フィッティング
# (ここでは,トレンドがなく気温の2次多項式と曜日ダミーのモデルとした)
fit <- lm(gw ~ tp + I(tp^2) + dow, data = d.tr)
summary(fit)
# 予測(予測区間付き)
pred <- predict(fit, newdata = d.te, interval = 'prediction')
# 毎時の予測値を保存
d0$gwfit[d0$px %in% d.te$px] <- pred[, 'fit']
d0$gwlwr[d0$px %in% d.te$px] <- pred[, 'lwr']
d0$gwupr[d0$px %in% d.te$px] <- pred[, 'upr']
# 時間別グラフ
dir.create('./load', showWarnings = F)
png(paste0('./load/load_at', hr, '.png'))
matplot(d0$px[is.hr & is.plot], d0$gw[is.hr & is.plot],
main = paste0(hr, '時の電力需要'),
type = 'n', pch = 16, col = 2, xaxt = 'n',
xlim = range(d0$px[is.plot]), ylim = c(20, 70))
px.ax <- seq(d0$px[1], d0$px[nrow(d0)], by = 60*60*24)
abline(v = px.ax, col = 'lightgray', lty = 2)
abline(v = as.POSIXct('2022-07-01'), col = 3, lty = 2) # 予測開始線
matlines(d0$px[is.plot], d0$gw[is.plot], type = 'o', pch = 1, col = 'lightgray')
matlines(d.te$px, pred[, 'fit'], type = 'o', pch = 1, lty = 2, col = 4)
matlines(d.te$px, pred[, 'lwr'], lty = 3, col = 'lightgray')
matlines(d.te$px, pred[, 'upr'], lty = 3, col = 'lightgray')
matlines(d0$px[is.hr & is.plot], d0$gw[is.hr & is.plot], type = 'o', pch = 16, col = 2)
# 凡例
legend('topleft', bg = 0,
lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
legend = c('原系列', '予測値', '95%予測区間'))
legend('topright', bg = 0,
legend = sprintf('MAPE: %2.1f%%',
mean(abs((pred[, 'fit'] - d.te$gw)/d.te$gw))*100))
dev.off()
}
#system('bash -c 'convert -delay 100 -loop 0 ./load/load_at{0..23}.png ./load/load_at.gif'')
#cairo_pdf('load_0-23.pdf')
matplot(d0$px[is.plot], d0$gw[is.plot],
main = '電力需要',
type = 'o', pch = 16, col = 2, xaxt = 'n',
xlim = range(d0$px[is.plot]), ylim = c(20, 70))
px.ax <- seq(d0$px[1], d0$px[nrow(d0)], by = 60*60*24)
abline(v = px.ax, col = 'lightgray', lty = 2)
abline(v = as.POSIXct('2022-07-01'), col = 3, lty = 2) # 予測開始線
matlines(d0$px[is.te], d0$gwlwr[is.te], lty = 3, col = 'lightgray')
matlines(d0$px[is.te], d0$gwupr[is.te], lty = 3, col = 'lightgray')
matlines(d0$px[is.te], d0$gwfit[is.te], type = 'o', pch = 1, lty = 2, col = 4)
# 凡例
legend('topleft', bg = 0,
lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'),
legend = c('原系列', '予測値', '95%予測区間'))
legend('topright', bg = 0,
legend = sprintf('MAPE: %2.1f%%',
mean(abs((d0$gwfit[is.te] - d0$gw[is.te])/d0$gw[is.te]))*100))
d0 <- read.csv('https://stats.dip.jp/01_ds/data/e-stat_expenditure.csv', skip = 11)
# タイムスタンプと支出金額のカラムを探しデータを抽出
j.time <- grep('時間軸.月次..コード', colnames(d0))
j.ice <- grep('アイスクリーム', colnames(d0))
d1 <- d0[, c(j.time, j.ice)]
# 時間コード(10桁)からPOSIX準拠時間を作成
# Extract year and month for date creation
yyyy <- substr(paste(d1[, 1]), 1, 4)
mm <- substr(paste(d1[, 1]), 7, 8)
# Create 'px' as a Date column
px <- as.Date(paste(yyyy, mm, "01"), format = '%Y %m %d')
# Combine the data into a new data frame
d0 <- data.frame(px, date = format(px, '%Y-%m-%d'), y = d1[, 2])
d.ca <- read.csv('https://stats.dip.jp/01_ds/data/calendar/calendar.csv')
d.amedas <- read.csv('https://stats.dip.jp/01_ds/data/amedas/amedas_Tokyo_1h.csv')
d.amedas$date <- format(as.POSIXct(d.amedas$HS), "%Y-%m-%d") # カラムdateを作成
# 暦,気象とマージ
d0 |> merge(d.ca, by = "date") |> merge(d.amedas, by = "date") -> d
head(d)
## date px y dow name event md gr year
## 1 2014-01-01 2014-01-01 396 水 祝日(元日) 年末年始(01-01) 01-01 6 2014
## 2 2014-01-01 2014-01-01 396 水 祝日(元日) 年末年始(01-01) 01-01 6 2014
## 3 2014-01-01 2014-01-01 396 水 祝日(元日) 年末年始(01-01) 01-01 6 2014
## 4 2014-01-01 2014-01-01 396 水 祝日(元日) 年末年始(01-01) 01-01 6 2014
## 5 2014-01-01 2014-01-01 396 水 祝日(元日) 年末年始(01-01) 01-01 6 2014
## 6 2014-01-01 2014-01-01 396 水 祝日(元日) 年末年始(01-01) 01-01 6 2014
## month day wk su mo tu we th fr sa wd.x ho bw af sp ab HS
## 1 1 1 1 0 0 0 1 0 0 0 0 1 0 0 1 0 2014-01-01 13:00:00
## 2 1 1 1 0 0 0 1 0 0 0 0 1 0 0 1 0 2014-01-01 23:00:00
## 3 1 1 1 0 0 0 1 0 0 0 0 1 0 0 1 0 2014-01-01 15:00:00
## 4 1 1 1 0 0 0 1 0 0 0 0 1 0 0 1 0 2014-01-01 21:00:00
## 5 1 1 1 0 0 0 1 0 0 0 0 1 0 0 1 0 2014-01-01 00:00:00
## 6 1 1 1 0 0 0 1 0 0 0 0 1 0 0 1 0 2014-01-01 05:00:00
## HE hpa hpa0 pr hm hm1h ws ws1h ws_max rm rm1h tp
## 1 2014-01-01 14:00:00 997.8 1002.1 0 36 36.50 3.9 3.97 8.1 10 60.00 15.4
## 2 2014-01-02 00:00:00 1002.5 1006.9 0 47 46.00 1.0 1.35 2.3 0 0.00 8.0
## 3 2014-01-01 16:00:00 998.8 1003.1 0 36 35.50 3.9 3.88 6.3 10 55.98 14.3
## 4 2014-01-01 22:00:00 1002.1 1006.6 0 58 54.83 0.6 0.73 1.0 0 0.00 6.4
## 5 2014-01-01 01:00:00 1003.5 1008.0 0 68 67.17 1.0 1.83 1.9 0 0.00 5.3
## 6 2014-01-01 06:00:00 1001.9 1006.3 0 48 50.33 3.4 3.00 4.8 0 0.00 7.2
## tp1h tp5h tp3d tp7d wd.y wd_max missing
## 1 15.02 13.20 6.68 6.40 南南西 南南西
## 2 8.43 8.09 7.29 6.54 北 北東
## 3 14.53 14.60 6.85 6.47 南 南
## 4 7.17 9.89 7.16 6.55 北西 西
## 5 4.82 6.27 5.81 6.13 北北西 北西
## 6 7.03 5.20 6.03 6.16 南 南南西
# Convert 'px' to numeric for regression
d$px_numeric <- as.numeric(d$px)
# Define training, testing, and plotting periods
is.tr <- d$px < as.Date("2022-01-01") # Training: before 2022
is.te <- d$px >= as.Date("2022-01-01") & d$px <= as.Date("2022-11-30") # Testing: 2022
is.plot <- d$px <= as.Date("2022-11-30") # Plotting range: up to November 2022
# Fit the model using training data
fit <- lm(y ~ px_numeric + I(px_numeric^2), data = d[is.tr, ])
# Predict for 2022
pred <- predict(fit, newdata = d[is.te, ], interval = "prediction")
d$y_fit <- NA
d$lwr <- NA
d$upr <- NA
d$y_fit[is.te] <- pred[, "fit"]
d$lwr[is.te] <- pred[, "lwr"]
d$upr[is.te] <- pred[, "upr"]
# Plotting with custom x-axis labels
MAIN <- "アイスクリーム月間世帯支出"
matplot(d$px[is.plot], d$y[is.plot],
type = "o", pch = 16, col = 2, xaxt = "n", # Suppress default x-axis
main = MAIN, xlab = "年", ylab = "円",
ylim = c(500, 2000))
# Add custom x-axis labels (Years)
px.ax <- seq(min(d$px), max(d$px), by = "2 years")
axis(1, at = px.ax, labels = format(px.ax, "%Y"))
# Add vertical lines for reference
abline(v = px.ax, col = "lightgray", lty = 2)
abline(v = as.Date("2022-01-01"), col = 3, lty = 2) # Prediction start line
# Add predicted values and confidence intervals
matlines(d$px[is.te], d$lwr[is.te], lty = 3, col = "lightgray")
matlines(d$px[is.te], d$upr[is.te], lty = 3, col = "lightgray")
matlines(d$px[is.te], d$y_fit[is.te], type = "o", pch = 1, lty = 2, col = 4)
# Add legends
legend("topleft", bg = 0,
lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, "lightgray"),
legend = c("原系列", "予測値", "95%予測区間"))
legend("topright", bg = 0,
legend = sprintf("MAPE: %.1f%%",
mean(abs((d$y_fit[is.te] - d$y[is.te]) / d$y[is.te])) * 100))
# Convert 'px' to numeric for regression
d$px_numeric <- as.numeric(d$px)
# Define training, testing, and plotting periods
is.tr <- d$px < as.Date("2022-01-01") # Training: before 2022
is.te <- d$px >= as.Date("2022-01-01") & d$px <= as.Date("2022-11-30") # Testing: 2022
is.plot <- d$px <= as.Date("2022-11-30") # Plotting range: up to November 2022
# Fit the model using training data
fit <- lm(y ~ px_numeric + I(px_numeric^2), data = d[is.tr, ])
# Predict for 2022
pred <- predict(fit, newdata = d[is.te, ], interval = "prediction")
d$y_fit <- NA
d$lwr <- NA
d$upr <- NA
d$y_fit[is.te] <- pred[, "fit"]
d$lwr[is.te] <- pred[, "lwr"]
d$upr[is.te] <- pred[, "upr"]
for (mon in 1:11) {
# 予測時間 (Filter data for the current month)
is.mon <- format(d$px, "%m") == sprintf("%02d", mon)
# 訓練データ (Training data before 2022)
d.tr <- d[is.mon & is.tr, ]
# テストデータ (Testing data for 2022)
d.te <- d[is.mon & is.te, ]
# フィッティング (Fit the model)
fit <- lm(y ~ px_numeric + I(px_numeric^2), data = d.tr)
# 予測 (Prediction with confidence intervals)
pred <- predict(fit, newdata = d.te, interval = "prediction")
# 毎月の予測値を保存 (Save predictions)
d$y_fit[d$px %in% d.te$px] <- pred[, "fit"]
d$lwr[d$px %in% d.te$px] <- pred[, "lwr"]
d$upr[d$px %in% d.te$px] <- pred[, "upr"]
# 月別グラフ (Plot for each month)
dir.create('./load', showWarnings = F)
png(paste0('./load/load_mon_', mon, '.png'))
matplot(d$px[is.mon & is.plot], d$y[is.mon & is.plot],
main = paste0(mon, "月のアイスクリーム月間世帯支出"),
type = "n", pch = 16, col = 2, xaxt = "n",
xlim = range(d$px[is.plot]), ylim = c(0, 2000))
px.ax <- seq(d$px[1], d$px[nrow(d)], by = "2 years")
abline(v = px.ax, col = "lightgray", lty = 2)
abline(v = as.Date("2022-01-01"), col = 3, lty = 2) # 予測開始線
matlines(d$px[is.plot], d$y[is.plot], type = "o", pch = 1, col = "lightgray")
matlines(d.te$px, pred[, "fit"], type = "o", pch = 1, lty = 2, col = 4)
matlines(d.te$px, pred[, "lwr"], lty = 3, col = "lightgray")
matlines(d.te$px, pred[, "upr"], lty = 3, col = "lightgray")
matlines(d$px[is.mon & is.plot], d$y[is.mon & is.plot], type = "o", pch = 16, col = 2)
# 凡例 (Legend)
legend("topleft", bg = 0,
lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, "lightgray"),
legend = c("原系列", "予測値", "95%予測区間"))
legend("topright", bg = 0,
legend = sprintf("MAPE: %.1f%%",
mean(abs((pred[, "fit"] - d.te$y)/d.te$y)) * 100))
dev.off()
}
# 全期間グラフ (Aggregate Graph with Years on X-Axis)
matplot(d$px[is.plot], d$y[is.plot],
main = "アイスクリーム月間世帯支出",
type = "o", pch = 16, col = 2, xaxt = "n", # Suppress default x-axis
xlim = range(d$px[is.plot]), ylim = c(0, 2000))
# Add custom x-axis labels (Years)
px.ax <- seq(min(d$px), max(d$px), by = "2 years")
axis(1, at = px.ax, labels = format(px.ax, "%Y"))
# Add vertical lines for reference
abline(v = px.ax, col = "lightgray", lty = 2)
abline(v = as.Date("2022-01-01"), col = 3, lty = 2) # Prediction start line
# Add predicted values and confidence intervals
matlines(d$px[is.te], d$lwr[is.te], lty = 3, col = "lightgray")
matlines(d$px[is.te], d$upr[is.te], lty = 3, col = "lightgray")
matlines(d$px[is.te], d$y_fit[is.te], type = "o", pch = 1, lty = 2, col = 4)
# 凡例 (Legend)
legend("topleft", bg = 0,
lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, "lightgray"),
legend = c("原系列", "予測値", "95%予測区間"))
legend("topright", bg = 0,
legend = sprintf("MAPE: %.1f%%",
mean(abs((d$y_fit[is.te] - d$y[is.te]) / d$y[is.te])) * 100))
```