if (!require(rpart)) install.packages("rpart")
if (!require(rpart.plot)) install.packages("rpart.plot")
if (!require(DT)) install.packages("DT")
if (!require(kableExtra)) install.packages("kableExtra")
d <- read.csv('https://stats.dip.jp/01_ds/data/bike_rental.csv')
datatable(d, options = list(pageLength = 5))
tree <- rpart(レンタル数 ~ 季節 + 月 + 祝日 + 曜日 + 休日 + 天気 + 気温 + 湿度 + 風速,
data = d, method = "anova", cp = 0.01)
rpart.plot(tree)

r <- rpart.rules(tree, cover = T)
kable(r)
8 |
1663 |
when |
気温 |
< |
4.1 |
|
|
|
|
|
|
|
|
& |
季節 |
is |
春 or 冬 |
|
|
|
|
|
|
|
|
9% |
18 |
2261 |
when |
気温 |
is |
4.1 |
to |
12.3 |
& |
湿度 |
>= |
|
|
68 |
& |
季節 |
is |
春 or 冬 |
|
|
|
|
|
|
|
|
6% |
38 |
2723 |
when |
気温 |
is |
4.1 |
to |
7.8 |
& |
湿度 |
< |
68 |
|
|
& |
季節 |
is |
春 or 冬 |
|
|
|
|
|
|
|
|
7% |
6 |
3382 |
when |
気温 |
>= |
|
|
12.3 |
& |
湿度 |
>= |
|
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
4% |
39 |
3941 |
when |
気温 |
is |
7.8 |
to |
12.3 |
& |
湿度 |
< |
68 |
|
|
& |
季節 |
is |
春 or 冬 |
|
|
|
|
|
|
|
|
4% |
5 |
4176 |
when |
気温 |
< |
12.3 |
|
|
|
|
|
|
|
|
& |
季節 |
is |
秋 |
|
|
|
|
|
|
|
|
14% |
28 |
4626 |
when |
気温 |
>= |
|
|
12.3 |
& |
湿度 |
is |
69 |
to |
85 |
|
|
|
|
|
|
|
|
& |
風速 |
>= |
12 |
8% |
60 |
4966 |
when |
気温 |
is |
12.3 |
to |
17.1 |
& |
湿度 |
< |
69 |
|
|
|
|
|
|
& |
月 |
< |
9 |
|
|
|
|
6% |
29 |
5644 |
when |
気温 |
>= |
|
|
12.3 |
& |
湿度 |
is |
69 |
to |
85 |
|
|
|
|
|
|
|
|
& |
風速 |
< |
12 |
10% |
61 |
5885 |
when |
気温 |
>= |
|
|
17.1 |
& |
湿度 |
< |
69 |
|
|
|
|
|
|
& |
月 |
< |
9 |
|
|
|
|
24% |
31 |
6615 |
when |
気温 |
>= |
|
|
12.3 |
& |
湿度 |
< |
69 |
|
|
|
|
|
|
& |
月 |
>= |
9 |
|
|
|
|
7% |
printcp(tree)
##
## Regression tree:
## rpart(formula = レンタル数 ~ 季節 + 月 + 祝日 + 曜日 +
## 休日 + 天気 + 気温 + 湿度 + 風速, data = d, method = "anova",
## cp = 0.01)
##
## Variables actually used in tree construction:
## [1] 気温 季節 月 湿度 風速
##
## Root node error: 2739535392/731 = 3747654
##
## n= 731
##
## CP nsplit rel error xerror xstd
## 1 0.390509 0 1.00000 1.00184 0.040333
## 2 0.070302 1 0.60949 0.61165 0.026904
## 3 0.052880 2 0.53919 0.54164 0.026469
## 4 0.022313 3 0.48631 0.49647 0.021846
## 5 0.015319 4 0.46400 0.48405 0.020973
## 6 0.012892 5 0.44868 0.50022 0.021689
## 7 0.012226 6 0.43579 0.49500 0.021629
## 8 0.010671 7 0.42356 0.49870 0.021881
## 9 0.010087 8 0.41289 0.49680 0.022405
## 10 0.010000 10 0.39271 0.49642 0.022635
plotcp(tree)

tree2 <- prune(tree, cp = 0.1)
rpart.plot(tree2, branch.type = 5)

tree3 <- prune(tree, cp = 0.01)
rpart.plot(tree3, branch.type = 5)

kable(rpart.rules(tree3,cover = T))
8 |
1663 |
when |
気温 |
< |
4.1 |
|
|
|
|
|
|
|
|
& |
季節 |
is |
春 or 冬 |
|
|
|
|
|
|
|
|
9% |
18 |
2261 |
when |
気温 |
is |
4.1 |
to |
12.3 |
& |
湿度 |
>= |
|
|
68 |
& |
季節 |
is |
春 or 冬 |
|
|
|
|
|
|
|
|
6% |
38 |
2723 |
when |
気温 |
is |
4.1 |
to |
7.8 |
& |
湿度 |
< |
68 |
|
|
& |
季節 |
is |
春 or 冬 |
|
|
|
|
|
|
|
|
7% |
6 |
3382 |
when |
気温 |
>= |
|
|
12.3 |
& |
湿度 |
>= |
|
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
4% |
39 |
3941 |
when |
気温 |
is |
7.8 |
to |
12.3 |
& |
湿度 |
< |
68 |
|
|
& |
季節 |
is |
春 or 冬 |
|
|
|
|
|
|
|
|
4% |
5 |
4176 |
when |
気温 |
< |
12.3 |
|
|
|
|
|
|
|
|
& |
季節 |
is |
秋 |
|
|
|
|
|
|
|
|
14% |
28 |
4626 |
when |
気温 |
>= |
|
|
12.3 |
& |
湿度 |
is |
69 |
to |
85 |
|
|
|
|
|
|
|
|
& |
風速 |
>= |
12 |
8% |
60 |
4966 |
when |
気温 |
is |
12.3 |
to |
17.1 |
& |
湿度 |
< |
69 |
|
|
|
|
|
|
& |
月 |
< |
9 |
|
|
|
|
6% |
29 |
5644 |
when |
気温 |
>= |
|
|
12.3 |
& |
湿度 |
is |
69 |
to |
85 |
|
|
|
|
|
|
|
|
& |
風速 |
< |
12 |
10% |
61 |
5885 |
when |
気温 |
>= |
|
|
17.1 |
& |
湿度 |
< |
69 |
|
|
|
|
|
|
& |
月 |
< |
9 |
|
|
|
|
24% |
31 |
6615 |
when |
気温 |
>= |
|
|
12.3 |
& |
湿度 |
< |
69 |
|
|
|
|
|
|
& |
月 |
>= |
9 |
|
|
|
|
7% |
気温が12.3℃以上、湿度が69%未満、9月より後のときにレンタル数が最大になる
d2 <- read.csv('https://stats.dip.jp/01_ds/data/iris.csv')
DT::datatable(d2,options = list(pageLength = 5))
tree4 <- rpart(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
data = d2, method = 'class', cp = 0.005)
rpart.plot(tree4, type = 5)

rpart.rules(tree4, cover = T)
rpart.plot(tree4, branch.type = 5)

printcp(tree4)
##
## Classification tree:
## rpart(formula = Species ~ Sepal.Length + Sepal.Width + Petal.Length +
## Petal.Width, data = d2, method = "class", cp = 0.005)
##
## Variables actually used in tree construction:
## [1] Petal.Length Petal.Width
##
## Root node error: 100/150 = 0.66667
##
## n= 150
##
## CP nsplit rel error xerror xstd
## 1 0.500 0 1.00 1.19 0.049592
## 2 0.440 1 0.50 0.67 0.060888
## 3 0.005 2 0.06 0.08 0.027520
plotcp(tree4)

tree5 <- prune(tree4, cp = 0.005)
rpart.plot(tree5, branch.type = 5)

kable(rpart.rules(tree5,cover = T))
2 |
setosa |
[1.00 .00 .00] |
when |
Petal.Length |
< |
2.5 |
|
|
|
|
33% |
6 |
versicolor |
[ .00 .91 .09] |
when |
Petal.Length |
>= |
2.5 |
& |
Petal.Width |
< |
1.8 |
36% |
7 |
virginica |
[ .00 .02 .98] |
when |
Petal.Length |
>= |
2.5 |
& |
Petal.Width |
>= |
1.8 |
31% |
d.new <- data.frame(Sepal.Length = 4.0,
Sepal.Width = 3.0,
Petal.Length = 3.0,
Petal.Width = 1.0
)
rpart.predict(tree4, newdata = d.new)
## setosa versicolor virginica
## 1 0 0.9074074 0.09259259
予測の結果、アイリスの種類は90%以上の確率でversicolorである