####加载程序包
library(rpart)
#rpart包实现了分类和回归决策树,我们将调用其中的rpart()和predict()
#函数。
library(rpart.plot)
#rpart.plot包含各种决策树的可视化函数,我们将调用其中的prp()函数。
library(rattle)
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## XXXX 5.4.0 Copyright (c) 2006-2020 Togaware Pty Ltd.
## 键入'rattle()'去轻摇、晃动、翻滚你的数据。
#rattle可实现数据挖掘和图形交互式可视化界面,我们将调用其中的
#fancyRpartPlot()函数实现决策树可视化。
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#我们将调用其中的管道函数。
library(ggplot2)
#我们将调用其中的ggplot()等函数。
library(sampling) 
#sampling包含有各种抽样函数,这里我们将调用其中的strata()函数。
library(vip) 
## 
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
## 
##     vi
library(pdp) 
View(kyphosis)
fit <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis)
fit2 <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis,
              parms = list(prior = c(.65,.35), split = "information"))
fit3 <- rpart(Kyphosis ~ Age + Number + Start, data = kyphosis,
              control = rpart.control(cp = 0.05))
#par(mfrow = c(1,2), xpd = NA) # otherwise on some devices the text is clipped
plot(fit)
text(fit, use.n = TRUE)

plot(fit2)
text(fit2, use.n = TRUE)

fit 
## n= 81 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 81 17 absent (0.79012346 0.20987654)  
##    2) Start>=8.5 62  6 absent (0.90322581 0.09677419)  
##      4) Start>=14.5 29  0 absent (1.00000000 0.00000000) *
##      5) Start< 14.5 33  6 absent (0.81818182 0.18181818)  
##       10) Age< 55 12  0 absent (1.00000000 0.00000000) *
##       11) Age>=55 21  6 absent (0.71428571 0.28571429)  
##         22) Age>=111 14  2 absent (0.85714286 0.14285714) *
##         23) Age< 111 7  3 present (0.42857143 0.57142857) *
##    3) Start< 8.5 19  8 present (0.42105263 0.57894737) *
fancyRpartPlot(fit, type = 2, digits = 3, 
               main = "", sub="")

vip(fit, num_features = 3, bar = TRUE)

summary(fit)
## Call:
## rpart(formula = Kyphosis ~ Age + Number + Start, data = kyphosis)
##   n= 81 
## 
##           CP nsplit rel error   xerror      xstd
## 1 0.17647059      0 1.0000000 1.000000 0.2155872
## 2 0.01960784      1 0.8235294 1.117647 0.2243268
## 3 0.01000000      4 0.7647059 1.117647 0.2243268
## 
## Variable importance
##  Start    Age Number 
##     64     24     12 
## 
## Node number 1: 81 observations,    complexity param=0.1764706
##   predicted class=absent   expected loss=0.2098765  P(node) =1
##     class counts:    64    17
##    probabilities: 0.790 0.210 
##   left son=2 (62 obs) right son=3 (19 obs)
##   Primary splits:
##       Start  < 8.5  to the right, improve=6.762330, (0 missing)
##       Number < 5.5  to the left,  improve=2.866795, (0 missing)
##       Age    < 39.5 to the left,  improve=2.250212, (0 missing)
##   Surrogate splits:
##       Number < 6.5  to the left,  agree=0.802, adj=0.158, (0 split)
## 
## Node number 2: 62 observations,    complexity param=0.01960784
##   predicted class=absent   expected loss=0.09677419  P(node) =0.7654321
##     class counts:    56     6
##    probabilities: 0.903 0.097 
##   left son=4 (29 obs) right son=5 (33 obs)
##   Primary splits:
##       Start  < 14.5 to the right, improve=1.0205280, (0 missing)
##       Age    < 55   to the left,  improve=0.6848635, (0 missing)
##       Number < 4.5  to the left,  improve=0.2975332, (0 missing)
##   Surrogate splits:
##       Number < 3.5  to the left,  agree=0.645, adj=0.241, (0 split)
##       Age    < 16   to the left,  agree=0.597, adj=0.138, (0 split)
## 
## Node number 3: 19 observations
##   predicted class=present  expected loss=0.4210526  P(node) =0.2345679
##     class counts:     8    11
##    probabilities: 0.421 0.579 
## 
## Node number 4: 29 observations
##   predicted class=absent   expected loss=0  P(node) =0.3580247
##     class counts:    29     0
##    probabilities: 1.000 0.000 
## 
## Node number 5: 33 observations,    complexity param=0.01960784
##   predicted class=absent   expected loss=0.1818182  P(node) =0.4074074
##     class counts:    27     6
##    probabilities: 0.818 0.182 
##   left son=10 (12 obs) right son=11 (21 obs)
##   Primary splits:
##       Age    < 55   to the left,  improve=1.2467530, (0 missing)
##       Start  < 12.5 to the right, improve=0.2887701, (0 missing)
##       Number < 3.5  to the right, improve=0.1753247, (0 missing)
##   Surrogate splits:
##       Start  < 9.5  to the left,  agree=0.758, adj=0.333, (0 split)
##       Number < 5.5  to the right, agree=0.697, adj=0.167, (0 split)
## 
## Node number 10: 12 observations
##   predicted class=absent   expected loss=0  P(node) =0.1481481
##     class counts:    12     0
##    probabilities: 1.000 0.000 
## 
## Node number 11: 21 observations,    complexity param=0.01960784
##   predicted class=absent   expected loss=0.2857143  P(node) =0.2592593
##     class counts:    15     6
##    probabilities: 0.714 0.286 
##   left son=22 (14 obs) right son=23 (7 obs)
##   Primary splits:
##       Age    < 111  to the right, improve=1.71428600, (0 missing)
##       Start  < 12.5 to the right, improve=0.79365080, (0 missing)
##       Number < 3.5  to the right, improve=0.07142857, (0 missing)
## 
## Node number 22: 14 observations
##   predicted class=absent   expected loss=0.1428571  P(node) =0.1728395
##     class counts:    12     2
##    probabilities: 0.857 0.143 
## 
## Node number 23: 7 observations
##   predicted class=present  expected loss=0.4285714  P(node) =0.08641975
##     class counts:     3     4
##    probabilities: 0.429 0.571
predict(fit, type = "prob")
##       absent   present
## 1  0.4210526 0.5789474
## 2  0.8571429 0.1428571
## 3  0.4210526 0.5789474
## 4  0.4210526 0.5789474
## 5  1.0000000 0.0000000
## 6  1.0000000 0.0000000
## 7  1.0000000 0.0000000
## 8  1.0000000 0.0000000
## 9  1.0000000 0.0000000
## 10 0.4285714 0.5714286
## 11 0.4285714 0.5714286
## 12 1.0000000 0.0000000
## 13 0.4210526 0.5789474
## 14 1.0000000 0.0000000
## 15 1.0000000 0.0000000
## 16 1.0000000 0.0000000
## 17 1.0000000 0.0000000
## 18 0.8571429 0.1428571
## 19 1.0000000 0.0000000
## 20 1.0000000 0.0000000
## 21 1.0000000 0.0000000
## 22 0.4210526 0.5789474
## 23 0.4285714 0.5714286
## 24 0.4210526 0.5789474
## 25 0.4210526 0.5789474
## 26 1.0000000 0.0000000
## 27 0.4210526 0.5789474
## 28 0.4285714 0.5714286
## 29 1.0000000 0.0000000
## 30 1.0000000 0.0000000
## 31 1.0000000 0.0000000
## 32 0.8571429 0.1428571
## 33 0.8571429 0.1428571
## 34 1.0000000 0.0000000
## 35 0.8571429 0.1428571
## 36 1.0000000 0.0000000
## 37 1.0000000 0.0000000
## 38 0.4210526 0.5789474
## 39 1.0000000 0.0000000
## 40 0.4285714 0.5714286
## 41 0.4210526 0.5789474
## 42 1.0000000 0.0000000
## 43 0.4210526 0.5789474
## 44 0.4210526 0.5789474
## 45 1.0000000 0.0000000
## 46 0.8571429 0.1428571
## 47 1.0000000 0.0000000
## 48 0.8571429 0.1428571
## 49 0.4210526 0.5789474
## 50 0.8571429 0.1428571
## 51 0.4285714 0.5714286
## 52 1.0000000 0.0000000
## 53 0.4210526 0.5789474
## 54 1.0000000 0.0000000
## 55 1.0000000 0.0000000
## 56 1.0000000 0.0000000
## 57 1.0000000 0.0000000
## 58 0.4210526 0.5789474
## 59 1.0000000 0.0000000
## 60 0.4285714 0.5714286
## 61 0.4210526 0.5789474
## 62 0.4210526 0.5789474
## 63 0.4210526 0.5789474
## 64 1.0000000 0.0000000
## 65 1.0000000 0.0000000
## 66 1.0000000 0.0000000
## 67 1.0000000 0.0000000
## 68 0.8571429 0.1428571
## 69 1.0000000 0.0000000
## 70 1.0000000 0.0000000
## 71 0.8571429 0.1428571
## 72 0.8571429 0.1428571
## 73 1.0000000 0.0000000
## 74 0.8571429 0.1428571
## 75 1.0000000 0.0000000
## 76 1.0000000 0.0000000
## 77 0.8571429 0.1428571
## 78 1.0000000 0.0000000
## 79 0.8571429 0.1428571
## 80 0.4210526 0.5789474
## 81 1.0000000 0.0000000
t_pred =predict(fit, type = "class") 
xpred.rpart(fit)
##    0.58823529 0.05882353     0.01400280
## 1           1          2   7.862057e-67
## 2           1          1  2.052268e-289
## 3           1          1  4.277148e-314
## 4           1          2  2.803072e-309
## 5           1          1  1.379807e-309
## 6           1          1  1.249431e-310
## 7           1          1  1.195108e-310
## 8           1          1  3.259386e-311
## 9           1          1  1.792662e-310
## 10          1          1  1.575370e-310
## 11          1          1  6.518771e-311
## 12          1          1  7.605233e-311
## 13          1          2  5.975540e-311
## 14          1          1  2.172924e-311
## 15          1          1  1.379807e-309
## 16          1          1  6.835427e-304
## 17          1          1  1.390671e-309
## 18          1          1  1.254927e-321
## 19          1          1  2.781342e-309
## 20          1          1  8.224288e-317
## 21          1          1  4.778312e-299
## 22          1          2  3.030718e+180
## 23          1          1  8.224288e-317
## 24          1          2  8.344027e-309
## 25          1          2  2.173140e-312
## 26          1          1  5.389869e-312
## 27          1          2  5.389869e-312
## 28          1          1  5.562706e-309
## 29          1          1  2.781342e-309
## 30          1          1  8.804498e+199
## 31          1          1  1.379807e-309
## 32          1          1  1.385345e-309
## 33          1          1  4.674283e+180
## 34          1          1  8.814426e-280
## 35          1          2 -5.727786e+250
## 36          1          1  2.483463e-265
## 37          1          1  5.389869e-312
## 38          1          1  5.389869e-312
## 39          1          1  5.389869e-312
## 40          1          1  5.389869e-312
## 41          1          1  1.485397e-313
## 42          1          1  3.370629e+160
## 43          1          2  3.225804e-319
## 44          1          2  3.259386e-311
## 45          1          1  3.212612e-319
## 46          1          1  3.259386e-311
## 47          1          1  3.259386e-311
## 48          1          1  3.212612e-319
## 49          1          2  3.212612e-319
## 50          1          1  3.212612e-319
## 51          1          1  3.212612e-319
## 52          1          1  1.320051e-309
## 53          1          2  1.086462e-310
## 54          1          1  1.320051e-309
## 55          1          1  5.432309e-312
## 56          1          1  1.280580e+214
## 57          1          1  4.782145e+180
## 58          1          2  7.068615e-304
## 59          1          1  7.072175e-304
## 60          1          1  6.953356e-309
## 61          1          2  1.390671e-309
## 62          1          1  5.562685e-309
## 63          1          2  1.807873e-308
## 64          1          1  4.172013e-309
## 65          1          1  1.809502e-308
## 66          1          1  2.803072e-309
## 67          1          1  4.782145e+180
## 68          1          1  1.809502e-308
## 69          1          1   0.000000e+00
## 70          1          1  2.225074e-308
## 71          1          1  1.839967e+223
## 72          1          1  5.562685e-309
## 73          1          1  1.251604e-308
## 74          1          1  1.062714e-314
## 75          1          1  2.781342e-309
## 76          1          1  8.344027e-309
## 77          1          1  1.112537e-308
## 78          1          1  1.390671e-308
## 79          1          1  1.807873e-308
## 80          1          1  1.946940e-308
## 81          1          1  2.225074e-308
p1 <- partial(fit, pred.var = "Start") %>% autoplot()
partial(fit, pred.var = "Start")
##    Start       yhat
## 1      1 -0.1592269
## 2      2 -0.1592269
## 3      3 -0.1592269
## 4      4 -0.1592269
## 5      5 -0.1592269
## 6      6 -0.1592269
## 7      7 -0.1592269
## 8      8 -0.1592269
## 9     10  6.7835065
## 10    11  6.7835065
## 11    12  6.7835065
## 12    13  6.7835065
## 13    14  6.7835065
## 14    15 18.0218267
## 15    16 18.0218267
## 16    18 18.0218267
mean(kyphosis$Kyphosis == t_pred)#准确度
## [1] 0.8395062