d <- na.omit(read.csv('https://stats.dip.jp/01_ds/data/titanic_data_jp.csv'))
n <- nrow(d)
library(DT)
datatable(d, options = list(pageLength = 5))
COL <- c(rgb(255, 0, 0, 105, max = 255),
rgb( 0, 0, 255, 105, max = 255),
rgb( 0, 155, 0, 105, max = 255),
rgb(100, 100, 100, 55, max = 255))
library(rpart)
library(rpart.plot)
tree <- rpart(生死 ~ 年齢 + 性別 + 客室等級 + 運賃 + 兄弟配偶者数 + 親子数 + 乗船地,
data = d, method = 'class', cp = 0.09)
rpart.plot(tree, type = 5)
rpart.plot(tree, branch.type = 5)
plotcp(tree)
plotcp(tree)
tree2 <- prune(tree, cp = 0.9)
rpart.plot(tree2, branch.type = 5)
tree3 <- prune(tree, cp = 0.09)
rpart.plot(tree3, branch.type = 5)
rpart.predict(tree, rules = T,
newdata = data.frame(年齢 = 5,
性別 = '女性',
客室等級 = '3等',
運賃 = 10,
兄弟配偶者数 = 5,
親子数 = 7,
乗船地 = 'S'))
## 死亡 生存
## 1 0.2452107 0.7547893 because 性別 is 女性
d.new <- data.frame(年齢 = 5,
性別 = '女性',
客室等級 = '3等',
運賃 = 10,
兄弟配偶者数 = 5,
親子数 = 7,
乗船地 = 'S')
rpart.predict(tree, rules = T, newdata = d.new)
## 死亡 生存
## 1 0.2452107 0.7547893 because 性別 is 女性
xs <- c(1, 2, 1, 2)
ys <- c(1, 1, 2, 2)
d0 <- data.frame(x = c(xs, xs, xs+5, xs+5),
y = c(ys, ys+5, ys+3, ys+9),
z = c(rep(1, 4), rep(2, 4), rep(3, 4), rep(4, 4)))
matplot(x = d0$x, y = d0$y, pch = 1, cex = 2, col = COL[1])
text(d0$x, d0$y, d0$z)
tree <- rpart(z ~ x + y, data = d0, minsplit = 2, minbucket = 1)
rpart.plot(tree)
matplot(x = d0$x, y = d0$y, pch = 1, cex = 2, col = COL[1])
segments(4, 0, 4,12, lty = 3, col = COL[2])
segments(4, 8, 8, 8, lty = 3, col = COL[2])
segments(0, 4, 4, 4, lty = 3, col = COL[2])
text(d0$x, d0$y, d0$z)
plotcp(tree)
tree <- rpart(年齢 ~ 性別 + 客室等級 + 運賃 + 兄弟配偶者数 + 親子数 + 乗船地, data = d)
rpart.plot(tree, type = 5)
rpart.plot(tree, branch.type = 5)
```