library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.4.2
## Loading required package: rpart
## Warning: package 'rpart' was built under R version 3.4.2
library(rpart)
Heart1 <- read.csv( file.choose(), header = T)
head(Heart1)
##   SEX dth AGE FVC SPF T2D Smoke
## 1   2   1  47 254 124   0 FALSE
## 2   2   0  63 355 138   0 FALSE
## 3   2   0  44 492 105   0 FALSE
## 4   1   0  47 409 126   0  TRUE
## 5   2   0  64 361 152   0 FALSE
## 6   2   0  41 469 118   0  TRUE
str(Heart1)
## 'data.frame':    2537 obs. of  7 variables:
##  $ SEX  : int  2 2 2 1 2 2 1 1 1 1 ...
##  $ dth  : int  1 0 0 0 0 0 1 0 1 0 ...
##  $ AGE  : int  47 63 44 47 64 41 59 42 58 45 ...
##  $ FVC  : int  254 355 492 409 361 469 569 592 613 576 ...
##  $ SPF  : int  124 138 105 126 152 118 145 110 118 110 ...
##  $ T2D  : int  0 0 0 0 0 0 1 0 0 0 ...
##  $ Smoke: logi  FALSE FALSE FALSE TRUE FALSE TRUE ...
tree.0 <- rpart(as.factor(dth) ~ SEX + AGE + Smoke + FVC + SPF + T2D, method = "class", data=Heart1)
tree.0
## n= 2537 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 2537 817 0 (0.6779661 0.3220339)  
##    2) AGE< 53.5 1691 346 0 (0.7953873 0.2046127) *
##    3) AGE>=53.5 846 375 1 (0.4432624 0.5567376)  
##      6) SEX>=1.5 511 248 0 (0.5146771 0.4853229)  
##       12) AGE< 58.5 242  90 0 (0.6280992 0.3719008) *
##       13) AGE>=58.5 269 111 1 (0.4126394 0.5873606)  
##         26) SPF< 134.5 77  33 0 (0.5714286 0.4285714) *
##         27) SPF>=134.5 192  67 1 (0.3489583 0.6510417) *
##      7) SEX< 1.5 335 112 1 (0.3343284 0.6656716) *
summary(tree.0)
## Call:
## rpart(formula = as.factor(dth) ~ SEX + AGE + Smoke + FVC + SPF + 
##     T2D, data = Heart1, method = "class")
##   n= 2537 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.11750306      0 1.0000000 1.0000000 0.02880666
## 2 0.03794370      1 0.8824969 0.8886169 0.02786411
## 3 0.01346389      3 0.8066095 0.8592411 0.02758064
## 4 0.01000000      4 0.7931457 0.8543452 0.02753191
## 
## Variable importance
##   AGE   FVC   SPF   SEX Smoke 
##    70    13    10     6     1 
## 
## Node number 1: 2537 observations,    complexity param=0.1175031
##   predicted class=0  expected loss=0.3220339  P(node) =1
##     class counts:  1720   817
##    probabilities: 0.678 0.322 
##   left son=2 (1691 obs) right son=3 (846 obs)
##   Primary splits:
##       AGE < 53.5  to the left,  improve=139.83540, (0 missing)
##       SPF < 128.5 to the left,  improve= 62.23731, (0 missing)
##       FVC < 338.5 to the right, improve= 32.76665, (0 missing)
##       SEX < 1.5   to the right, improve= 23.96653, (0 missing)
##       T2D < 0.5   to the left,  improve= 19.99269, (0 missing)
##   Surrogate splits:
##       FVC < 380.5 to the right, agree=0.710, adj=0.130, (0 split)
##       SPF < 148.5 to the left,  agree=0.702, adj=0.106, (0 split)
## 
## Node number 2: 1691 observations
##   predicted class=0  expected loss=0.2046127  P(node) =0.6665353
##     class counts:  1345   346
##    probabilities: 0.795 0.205 
## 
## Node number 3: 846 observations,    complexity param=0.0379437
##   predicted class=1  expected loss=0.4432624  P(node) =0.3334647
##     class counts:   375   471
##    probabilities: 0.443 0.557 
##   left son=6 (511 obs) right son=7 (335 obs)
##   Primary splits:
##       SEX < 1.5   to the right, improve=13.162900, (0 missing)
##       AGE < 61.5  to the left,  improve=12.991040, (0 missing)
##       SPF < 157   to the left,  improve=12.033400, (0 missing)
##       T2D < 0.5   to the left,  improve= 4.432442, (0 missing)
##       FVC < 284.5 to the right, improve= 3.455516, (0 missing)
##   Surrogate splits:
##       FVC   < 461   to the left,  agree=0.837, adj=0.588, (0 split)
##       Smoke < 0.5   to the left,  agree=0.660, adj=0.140, (0 split)
##       SPF   < 124.5 to the right, agree=0.617, adj=0.033, (0 split)
## 
## Node number 6: 511 observations,    complexity param=0.0379437
##   predicted class=0  expected loss=0.4853229  P(node) =0.201419
##     class counts:   263   248
##    probabilities: 0.515 0.485 
##   left son=12 (242 obs) right son=13 (269 obs)
##   Primary splits:
##       AGE   < 58.5  to the left,  improve=11.82794000, (0 missing)
##       SPF   < 157   to the left,  improve= 9.51256200, (0 missing)
##       FVC   < 338.5 to the right, improve= 9.15322700, (0 missing)
##       T2D   < 0.5   to the left,  improve= 3.10593000, (0 missing)
##       Smoke < 0.5   to the right, improve= 0.01480172, (0 missing)
##   Surrogate splits:
##       FVC   < 366   to the right, agree=0.616, adj=0.190, (0 split)
##       SPF   < 135.5 to the left,  agree=0.605, adj=0.165, (0 split)
##       Smoke < 0.5   to the right, agree=0.528, adj=0.004, (0 split)
## 
## Node number 7: 335 observations
##   predicted class=1  expected loss=0.3343284  P(node) =0.1320457
##     class counts:   112   223
##    probabilities: 0.334 0.666 
## 
## Node number 12: 242 observations
##   predicted class=0  expected loss=0.3719008  P(node) =0.09538825
##     class counts:   152    90
##    probabilities: 0.628 0.372 
## 
## Node number 13: 269 observations,    complexity param=0.01346389
##   predicted class=1  expected loss=0.4126394  P(node) =0.1060307
##     class counts:   111   158
##    probabilities: 0.413 0.587 
##   left son=26 (77 obs) right son=27 (192 obs)
##   Primary splits:
##       SPF   < 134.5 to the left,  improve=5.4401830, (0 missing)
##       FVC   < 338.5 to the right, improve=4.4143120, (0 missing)
##       AGE   < 64.5  to the left,  improve=2.2616840, (0 missing)
##       T2D   < 0.5   to the left,  improve=1.3989210, (0 missing)
##       Smoke < 0.5   to the left,  improve=0.2828306, (0 missing)
##   Surrogate splits:
##       FVC < 507.5 to the right, agree=0.717, adj=0.013, (0 split)
## 
## Node number 26: 77 observations
##   predicted class=0  expected loss=0.4285714  P(node) =0.03035081
##     class counts:    44    33
##    probabilities: 0.571 0.429 
## 
## Node number 27: 192 observations
##   predicted class=1  expected loss=0.3489583  P(node) =0.07567994
##     class counts:    67   125
##    probabilities: 0.349 0.651
rpart.plot(tree.0)