d <- read.csv('https://stats.dip.jp/01_ds/data/heart.data.csv')[, -1]
DT::datatable(round(d,1))
COL <- c(rgb(255, 0, 0, 105, max = 255), # 赤
rgb( 0, 0, 255, 105, max = 255), # 青
rgb( 0, 155, 0, 105, max = 255), # 緑
rgb(100, 100, 100, 55, max = 255)) # 灰
library(e1071)
KERNEL <- c('linear',
'polynomial',
'sigmoid',
'radial')
k <- 1
cv <- tune('svm', heart.disease ~ biking, data = d,
kernel = KERNEL[k], type = 'eps-regression',
ranges = list(#gamma = 2^(-4:4),
epsilon = seq(0, 1, 0.1),
#coef0 = 2^(-4:4),
cost = 2^(-4:4)))
x.new <- seq(0, 100, 0.1)
yhat <- predict(cv$best.model, newdata = data.frame(biking = x.new))
epsilon <- cv$best.model$epsilon * cv$best.model$y.scale$`scaled:scale`
upr <- yhat + epsilon
lwr <- yhat - epsilon
matplot(x = d$biking, y = d$heart.disease,
type = 'p', pch = 1, col = COL[1],
main = paste0('SVR(カーネル:', KERNEL[k], ',ε:', round(epsilon, 2), ')'),
xlab = '自転車通勤の割合(%)',
ylab = '心臓病の人口割合(%)')
# サポートベクター
sv <- d[cv$best.model$index, c('biking', 'heart.disease')]
matpoints(x = sv[, 1], y = sv[, 2], pch = 16, cex = 0.5, col = 1)
# 境界線とマージン(ε)
matlines(x = x.new, y = yhat, col = COL[2], lwd = 2)
matlines(x = x.new, y = upr, col = COL[2], lty = 3)
matlines(x = x.new, y = lwr, col = COL[2], lty = 3)
arrows(0, yhat[1], 0, upr[1], code = 3, length = 0.1)
arrows(0, yhat[1], 0, lwr[1], code = 3, length = 0.1)
library(latex2exp)
text(1, (yhat[1]+upr[1])/2, labels = TeX('$\\epsilon$'))
text(1, (yhat[1]+lwr[1])/2, labels = TeX('$\\epsilon$'))
legend('topright',
col = c(COL[1], 1, COL[2], COL[2]), pch = c(1, 16, NA, NA),
lwd = c(1, 0.5, 2, 1),
lty = c(NA, NA, 1, 3),
border = F, bg = 'white',
legend = c('人口割合', 'サポートベクター', '予測値', '境界線'))

n <- 24*7*2
t <- 1:n + rnorm(n)
set.seed(5)
f <- function(t) 100 + 0.1*t + 0.02*t^2 + 100*sin(2*pi*t/24)
y <- f(t) + rnorm(n, sd = 5)
d <- data.frame(t, y)
matplot(x = d$t, y = d$y, type = 'o', pch = 16, col = COL[1],
main = 'テストデータ', xlab = '時間', ylab = '値')
grid()
matlines(x = t, y = f(t), col = COL[2])
legend('topleft',
col = COL[1:2], pch = c(16, NA), lty = c(NA, 1),
legend = c('観測値', '真値'))
library(plotly)
## 要求されたパッケージ ggplot2 をロード中です
##
## 次のパッケージを付け加えます: 'plotly'
## 以下のオブジェクトは 'package:ggplot2' からマスクされています:
##
## last_plot
## 以下のオブジェクトは 'package:latex2exp' からマスクされています:
##
## TeX
## 以下のオブジェクトは 'package:stats' からマスクされています:
##
## filter
## 以下のオブジェクトは 'package:graphics' からマスクされています:
##
## layout

plot_ly(type = "scatter", mode = "markers") |>
add_trace(x = d$t, y = d$y, mode = 'markers', name = "観測値", marker = list(color = COL[1])) |>
add_trace(x = t, y = f(t), mode = 'lines', name = "真値", line = list(color = COL[2])) |>
layout(title = "テストデータ",
xaxis = list(title = "時間"),
yaxis = list(title = "値"))
d <- read.csv('https://stats.dip.jp/01_ds/data/heart.data.csv')[, -1]
DT::datatable(round(d,1))
# カラーパレット
COL <- c(rgb(255, 0, 0, 105, max = 255), # 赤
rgb( 0, 0, 255, 105, max = 255), # 青
rgb( 0, 155, 0, 105, max = 255), # 緑
rgb(100, 100, 100, 55, max = 255)) # 灰
library(e1071)
KERNEL <- c('linear',
'polynomial',
'sigmoid',
'radial')
k <- 1
cv <- tune('svm', heart.disease ~ biking, data = d,
kernel = KERNEL[k], type = 'eps-regression',
ranges = list(#gamma = 2^(-4:4),
epsilon = seq(0, 1, 0.1),
#coef0 = 2^(-4:4),
cost = 2^(-4:4)))
cv
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## epsilon cost
## 0.6 4
##
## - best performance: 2.615244
x.new <- seq(0, 100, 0.1)
yhat <- predict(cv$best.model, newdata = data.frame(biking = x.new))
epsilon <- cv$best.model$epsilon * cv$best.model$y.scale$`scaled:scale`
upr <- yhat + epsilon
lwr <- yhat - epsilon
matplot(x = d$biking, y = d$heart.disease,
type = 'p', pch = 1, col = COL[1],
main = paste0('SVR(カーネル:', KERNEL[k], ',ε:', round(epsilon, 2), ')'),
xlab = '自転車通勤の割合(%)',
ylab = '心臓病の人口割合(%)')
sv <- d[cv$best.model$index, c('biking', 'heart.disease')]
matpoints(x = sv[, 1], y = sv[, 2], pch = 16, cex = 0.5, col = 1)
matlines(x = x.new, y = yhat, col = COL[2], lwd = 2)
matlines(x = x.new, y = upr, col = COL[2], lty = 3)
matlines(x = x.new, y = lwr, col = COL[2], lty = 3)
arrows(0, yhat[1], 0, upr[1], code = 3, length = 0.1)
arrows(0, yhat[1], 0, lwr[1], code = 3, length = 0.1)
library(latex2exp)
text(1, (yhat[1]+upr[1])/2, labels = TeX('$\\epsilon$'))
text(1, (yhat[1]+lwr[1])/2, labels = TeX('$\\epsilon$'))
legend('topright',
col = c(COL[1], 1, COL[2], COL[2]), pch = c(1, 16, NA, NA),
lwd = c(1, 0.5, 2, 1),
lty = c(NA, NA, 1, 3),
border = F, bg = 'white',
legend = c('人口割合', 'サポートベクター', '予測値', '境界線'))

d <- read.csv('https://stats.dip.jp/01_ds/data/heart.data.csv')[, -1]
DT::datatable(round(d,1))
library(e1071)
KERNEL <- c('linear',
'polynomial',
'sigmoid',
'radial')
k <- 1
cv <- tune('svm', heart.disease ~ smoking, data = d,
kernel = KERNEL[k], type = 'eps-regression',
ranges = list(#gamma = 2^(-4:4),
epsilon = seq(0, 1.5, 0.1),
#coef0 = 2^(-4:4),
cost = 2^(-4:4)))
cv
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## epsilon cost
## 1.5 0.0625
##
## - best performance: 18.9422
x.new <- seq(0, 100, 0.1)
yhat <- predict(cv$best.model, newdata = data.frame(smoking = x.new))
epsilon <- cv$best.model$epsilon * cv$best.model$y.scale$`scaled:scale`
upr <- yhat + epsilon
lwr <- yhat - epsilon
matplot(x = d$smoking, y = d$heart.disease,
type = 'p', pch = 1, col = COL[1],
main = paste0('SVR(カーネル:', KERNEL[k], ',ε:', round(epsilon, 2), ')'),
xlab = '喫煙者の割合(%)',
ylab = '心臓病の人口割合(%)')
# サポートベクター
sv <- d[cv$best.model$index, c('smoking', 'heart.disease')]
matpoints(x = sv[, 1], y = sv[, 2], pch = 16, cex = 0.5, col = 1)
# 境界線とマージン(ε)
matlines(x = x.new, y = yhat, col = COL[2], lwd = 2)
matlines(x = x.new, y = upr, col = COL[2], lty = 3)
matlines(x = x.new, y = lwr, col = COL[2], lty = 3)
arrows(0, yhat[1], 0, upr[1], code = 3, length = 0.1)
arrows(0, yhat[1], 0, lwr[1], code = 3, length = 0.1)
library(latex2exp)
text(1, (yhat[1]+upr[1])/2, labels = TeX('$\\epsilon$'))
text(1, (yhat[1]+lwr[1])/2, labels = TeX('$\\epsilon$'))
legend('topright',
col = c(COL[1], 1, COL[2], COL[2]), pch = c(1, 16, NA, NA),
lwd = c(1, 0.5, 2, 1),
lty = c(NA, NA, 1, 3),
border = F, bg = 'white',
legend = c('人口割合', 'サポートベクター', '予測値', '境界線'))

n <- 24*7*2
t <- 1:n + rnorm(n)
set.seed(5)
f <- function(t) 100 + 0.1*t + 0.02*t^2 + 100*sin(2*pi*t/24)
y <- f(t) + rnorm(n, sd = 5)
d <- data.frame(t, y)
matplot(x = d$t, y = d$y, type = 'o', pch = 16, col = COL[1],
main = 'テストデータ', xlab = '時間', ylab = '値')
grid()
matlines(x = t, y = f(t), col = COL[2])
legend('topleft',
col = COL[1:2], pch = c(16, NA), lty = c(NA, 1),
legend = c('観測値', '真値'))

library(plotly)
plot_ly(type = "scatter", mode = "markers") |>
add_trace(x = d$t, y = d$y, mode = 'markers', name = "観測値", marker = list(color = COL[1])) |>
add_trace(x = t, y = f(t), mode = 'lines', name = "真値", line = list(color = COL[2])) |>
layout(title = "テストデータ",
xaxis = list(title = "時間"),
yaxis = list(title = "値"))
library(e1071)
KERNEL <- c('linear',
'polynomial',
'sigmoid',
'radial')
k <- 4
cv <- tune('svm', y ~ t, data = d,
kernel = KERNEL[k], type = 'eps-regression',
ranges = list(gamma = 10^(1:2),
epsilon = 10^(-2:0),
#coef0 = 2^(-4:4),
cost = 2^(-4:4)))
cv
##
## Parameter tuning of 'svm':
##
## - sampling method: 10-fold cross validation
##
## - best parameters:
## gamma epsilon cost
## 100 0.01 16
##
## - best performance: 37.04775
x.new <- seq(0, n, 0.1)
yhat <- predict(cv$best.model, newdata = data.frame(t = x.new))
epsilon <- cv$best.model$epsilon * cv$best.model$y.scale$`scaled:scale`
upr <- yhat + epsilon
lwr <- yhat - epsilon
library(latex2exp)
matplot(x = d$t, y = d$y,
type = 'p', pch = 1, col = COL[1],
main = paste0('SVR(カーネル:', KERNEL[k], ',ε:', round(epsilon, 2), ')'),
xlab = TeX('$t$'),
ylab = TeX('$y$'))
sv <- d[cv$best.model$index, c('t', 'y')]
matpoints(x = sv[, 1], y = sv[, 2], pch = 16, cex = 0.5, col = 1)
matlines(x = x.new, y = yhat, col = COL[2], lwd = 2)
matlines(x = x.new, y = upr, col = COL[2], lty = 3)
matlines(x = x.new, y = lwr, col = COL[2], lty = 3)
arrows(0, yhat[1], 0, upr[1], code = 3, length = 0.1)
arrows(0, yhat[1], 0, lwr[1], code = 3, length = 0.1)
library(latex2exp)
text(1, (yhat[1]+upr[1])/2, labels = TeX('$\\epsilon$'))
text(1, (yhat[1]+lwr[1])/2, labels = TeX('$\\epsilon$'))
legend('topleft',
col = c(COL[1], 1, COL[2], COL[2]), pch = c(1, 16, NA, NA),
lwd = c(1, 0.5, 2, 1),
lty = c(NA, NA, 1, 3),
border = F, bg = 'white',
legend = c('データ', 'サポートベクター', '予測値', '境界線'))
