library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.4.2
## Loading required package: rpart
## Warning: package 'rpart' was built under R version 3.4.2
library(rpart)
Heart1 <- read.csv( file.choose(), header = T)
head(Heart1)
## SEX dth AGE FVC SPF T2D Smoke
## 1 2 1 47 254 124 0 FALSE
## 2 2 0 63 355 138 0 FALSE
## 3 2 0 44 492 105 0 FALSE
## 4 1 0 47 409 126 0 TRUE
## 5 2 0 64 361 152 0 FALSE
## 6 2 0 41 469 118 0 TRUE
str(Heart1)
## 'data.frame': 2537 obs. of 7 variables:
## $ SEX : int 2 2 2 1 2 2 1 1 1 1 ...
## $ dth : int 1 0 0 0 0 0 1 0 1 0 ...
## $ AGE : int 47 63 44 47 64 41 59 42 58 45 ...
## $ FVC : int 254 355 492 409 361 469 569 592 613 576 ...
## $ SPF : int 124 138 105 126 152 118 145 110 118 110 ...
## $ T2D : int 0 0 0 0 0 0 1 0 0 0 ...
## $ Smoke: logi FALSE FALSE FALSE TRUE FALSE TRUE ...
tree.0 <- rpart(as.factor(dth) ~ SEX + AGE + Smoke + FVC + SPF + T2D, method = "class", data=Heart1)
tree.0
## n= 2537
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 2537 817 0 (0.6779661 0.3220339)
## 2) AGE< 53.5 1691 346 0 (0.7953873 0.2046127) *
## 3) AGE>=53.5 846 375 1 (0.4432624 0.5567376)
## 6) SEX>=1.5 511 248 0 (0.5146771 0.4853229)
## 12) AGE< 58.5 242 90 0 (0.6280992 0.3719008) *
## 13) AGE>=58.5 269 111 1 (0.4126394 0.5873606)
## 26) SPF< 134.5 77 33 0 (0.5714286 0.4285714) *
## 27) SPF>=134.5 192 67 1 (0.3489583 0.6510417) *
## 7) SEX< 1.5 335 112 1 (0.3343284 0.6656716) *
summary(tree.0)
## Call:
## rpart(formula = as.factor(dth) ~ SEX + AGE + Smoke + FVC + SPF +
## T2D, data = Heart1, method = "class")
## n= 2537
##
## CP nsplit rel error xerror xstd
## 1 0.11750306 0 1.0000000 1.0000000 0.02880666
## 2 0.03794370 1 0.8824969 0.8886169 0.02786411
## 3 0.01346389 3 0.8066095 0.8592411 0.02758064
## 4 0.01000000 4 0.7931457 0.8543452 0.02753191
##
## Variable importance
## AGE FVC SPF SEX Smoke
## 70 13 10 6 1
##
## Node number 1: 2537 observations, complexity param=0.1175031
## predicted class=0 expected loss=0.3220339 P(node) =1
## class counts: 1720 817
## probabilities: 0.678 0.322
## left son=2 (1691 obs) right son=3 (846 obs)
## Primary splits:
## AGE < 53.5 to the left, improve=139.83540, (0 missing)
## SPF < 128.5 to the left, improve= 62.23731, (0 missing)
## FVC < 338.5 to the right, improve= 32.76665, (0 missing)
## SEX < 1.5 to the right, improve= 23.96653, (0 missing)
## T2D < 0.5 to the left, improve= 19.99269, (0 missing)
## Surrogate splits:
## FVC < 380.5 to the right, agree=0.710, adj=0.130, (0 split)
## SPF < 148.5 to the left, agree=0.702, adj=0.106, (0 split)
##
## Node number 2: 1691 observations
## predicted class=0 expected loss=0.2046127 P(node) =0.6665353
## class counts: 1345 346
## probabilities: 0.795 0.205
##
## Node number 3: 846 observations, complexity param=0.0379437
## predicted class=1 expected loss=0.4432624 P(node) =0.3334647
## class counts: 375 471
## probabilities: 0.443 0.557
## left son=6 (511 obs) right son=7 (335 obs)
## Primary splits:
## SEX < 1.5 to the right, improve=13.162900, (0 missing)
## AGE < 61.5 to the left, improve=12.991040, (0 missing)
## SPF < 157 to the left, improve=12.033400, (0 missing)
## T2D < 0.5 to the left, improve= 4.432442, (0 missing)
## FVC < 284.5 to the right, improve= 3.455516, (0 missing)
## Surrogate splits:
## FVC < 461 to the left, agree=0.837, adj=0.588, (0 split)
## Smoke < 0.5 to the left, agree=0.660, adj=0.140, (0 split)
## SPF < 124.5 to the right, agree=0.617, adj=0.033, (0 split)
##
## Node number 6: 511 observations, complexity param=0.0379437
## predicted class=0 expected loss=0.4853229 P(node) =0.201419
## class counts: 263 248
## probabilities: 0.515 0.485
## left son=12 (242 obs) right son=13 (269 obs)
## Primary splits:
## AGE < 58.5 to the left, improve=11.82794000, (0 missing)
## SPF < 157 to the left, improve= 9.51256200, (0 missing)
## FVC < 338.5 to the right, improve= 9.15322700, (0 missing)
## T2D < 0.5 to the left, improve= 3.10593000, (0 missing)
## Smoke < 0.5 to the right, improve= 0.01480172, (0 missing)
## Surrogate splits:
## FVC < 366 to the right, agree=0.616, adj=0.190, (0 split)
## SPF < 135.5 to the left, agree=0.605, adj=0.165, (0 split)
## Smoke < 0.5 to the right, agree=0.528, adj=0.004, (0 split)
##
## Node number 7: 335 observations
## predicted class=1 expected loss=0.3343284 P(node) =0.1320457
## class counts: 112 223
## probabilities: 0.334 0.666
##
## Node number 12: 242 observations
## predicted class=0 expected loss=0.3719008 P(node) =0.09538825
## class counts: 152 90
## probabilities: 0.628 0.372
##
## Node number 13: 269 observations, complexity param=0.01346389
## predicted class=1 expected loss=0.4126394 P(node) =0.1060307
## class counts: 111 158
## probabilities: 0.413 0.587
## left son=26 (77 obs) right son=27 (192 obs)
## Primary splits:
## SPF < 134.5 to the left, improve=5.4401830, (0 missing)
## FVC < 338.5 to the right, improve=4.4143120, (0 missing)
## AGE < 64.5 to the left, improve=2.2616840, (0 missing)
## T2D < 0.5 to the left, improve=1.3989210, (0 missing)
## Smoke < 0.5 to the left, improve=0.2828306, (0 missing)
## Surrogate splits:
## FVC < 507.5 to the right, agree=0.717, adj=0.013, (0 split)
##
## Node number 26: 77 observations
## predicted class=0 expected loss=0.4285714 P(node) =0.03035081
## class counts: 44 33
## probabilities: 0.571 0.429
##
## Node number 27: 192 observations
## predicted class=1 expected loss=0.3489583 P(node) =0.07567994
## class counts: 67 125
## probabilities: 0.349 0.651
rpart.plot(tree.0)
