機械学習

d0 <- read.csv('https://stats.dip.jp/01_ds/data/e-stat_expenditure.csv', skip = 11)

# タイムスタンプと支出金額のカラムを探しデータを抽出
j.time <- grep('時間軸.月次..コード', colnames(d0))
j.ice  <- grep('アイスクリーム',      colnames(d0))

d1 <- d0[, c(j.time, j.ice)]

# 時間コード（10桁）からPOSIX準拠時間を作成
yyyy <- substr(paste(d1[, 1]), 1, 4)
mm   <- substr(paste(d1[, 1]), 7, 8)
px <- as.POSIXlt(paste(yyyy, mm, '01 12:00:00'), format = '%Y%m%d %H:%M:%S')

d <- data.frame(px, date = format(px, '%Y-%m-%d'), y = d1[, 2])

# 暦，気象とマージ
d <- merge(d, read.csv('https://stats.dip.jp/01_ds/data/calendar/calendar.csv'))
d <- merge(d, read.csv('https://stats.dip.jp/01_ds/data/amedas_tokyo.csv'))
#d <- merge(d, read.csv('https://stats.dip.jp/01_ds/data/amedas/amedas_Tokyo_1h.csv'))
#View(d)

# グラフ
MAIN <- 'アイスクリーム月間世帯支出'
matplot(d$px, d$y, type = 'o', pch = 16, lty = 1, col = 2, main = MAIN, xlab = '年', ylab = '円')
grid()

library(DT)
datatable(d)

d$yfit <- NA
d$ylwr <- NA
d$yupr <- NA


is.tr <- '2014-06-01' <= d$date & d$date <= '2021-12-31'

is.te <- '2022-01-01' <= d$date & d$date <= '2022-11-30'

is.plot <- '2014-06-01' <= d$date & d$date <= '2022-11-30'
d.plot <- d0[is.plot, ]

for (mon in 1:11)
{
is.mon <- d$month == mon
  
d.tr <- d[is.mon & is.tr, ]
  
d.te <- d[is.mon & is.te, ]
  
fit <- lm(y ~ year, data = d.tr)
  
summary(fit)
  
pred <- predict(fit, newdata = d.te, interval = 'prediction')
  
d$yfit[d$px %in% d.te$px] <- pred[, 'fit']
d$ylwr[d$px %in% d.te$px] <- pred[, 'lwr']
d$yupr[d$px %in% d.te$px] <- pred[, 'upr']
  
  
matplot(d$px[is.mon & is.plot], d$y[is.mon & is.plot], main = paste(MAIN, '（', mon, '月）'),
        xlab = '年', ylab = '円', type = 'o', pch = 16, col = 2, ylim = c(300, 2000))
  
abline(v = as.POSIXct('2022-01-01'), col = 3, lty = 2)
  
matlines(d.te$px, pred[,'fit'], type = 'o', pch = 1, lty = 2, col = 4)
matlines(d.te$px, pred[,'lwr'], lty = 3, col = 'lightgray')
matlines(d.te$px, pred[,'upr'], lty = 3, col = 'lightgray')
  
  
legend('topleft', bg = 0, lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'), legend = c('原系列', '予測値', '95%予測区間'))
legend('topright', bg = 0, legend = sprintf('MAPE: %2.1f%%', mean(abs((pred[, 'fit'] - d.te$gw)/d.te$gw))*100))
dev.off()
}

matplot(d$px[is.plot], d$y[is.plot], 
        main = "アイスクリーム月間世帯支出",
        type = "o", pch = 16, col = 2, xaxt = "n",
        xlim = range(d$px[is.plot]), ylim = c(0, 2000))

px.ax <- seq(d$px[1], d$px[nrow(d)], by = "2 years")
abline(v = px.ax, col = 'lightgray', lty = 2)
abline(v = as.POSIXct('2022-01-01'), col = 3, lty = 2)

matlines(d$px[is.te], d$ylwr[is.te], lty = 3, col = 'lightgray')
matlines(d$px[is.te], d$yupr[is.te], lty = 3, col = 'lightgray')
matlines(d$px[is.te], d$yfit[is.te], type = 'o', pch = 1, lty = 2, col = 4)

legend('topleft', bg = 0, lty = c(1, 2, 3), pch = c(16, 1, NA), col = c(2, 4, 'lightgray'), legend = c('原系列', '予測値', '95%予測区間'))
legend('topright', bg = 0, legend = sprintf("MAPE: %.1f%%",
                                            mean(abs((d$yfit[is.te] - d$y[is.te]) / d$y[is.te])) * 100))

機械学習

演習課題

23150124

2024-12-03