Criando uma arvore de regressão
modelo <- rpart(data = dados, condition~. )
modelo
## n= 297
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 297 73.804710 0.46127950
## 2) thal< 0.5 164 28.652440 0.22560980
## 4) ca< 0.5 115 11.530430 0.11304350
## 8) trestbps< 155.5 108 8.250000 0.08333333
## 16) age< 59.5 88 2.897727 0.03409091 *
## 17) age>=59.5 20 4.200000 0.30000000 *
## 9) trestbps>=155.5 7 1.714286 0.57142860 *
## 5) ca>=0.5 49 12.244900 0.48979590
## 10) cp< 2.5 29 5.310345 0.24137930
## 20) chol>=237.5 18 1.777778 0.11111110 *
## 21) chol< 237.5 11 2.727273 0.45454550 *
## 11) cp>=2.5 20 2.550000 0.85000000 *
## 3) thal>=0.5 133 24.812030 0.75187970
## 6) cp< 2.5 44 10.977270 0.47727270
## 12) ca< 0.5 27 5.629630 0.29629630 *
## 13) ca>=0.5 17 3.058824 0.76470590 *
## 7) cp>=2.5 89 8.876404 0.88764040
## 14) oldpeak< 0.55 21 4.952381 0.61904760
## 28) thalach< 149 7 1.428571 0.28571430 *
## 29) thalach>=149 14 2.357143 0.78571430 *
## 15) oldpeak>=0.55 68 1.941176 0.97058820 *
summary(modelo)
## Call:
## rpart(formula = condition ~ ., data = dados)
## n= 297
##
## CP nsplit rel error xerror xstd
## 1 0.27559547 0 1.0000000 1.0096678 0.009449153
## 2 0.06718206 1 0.7244045 0.7961621 0.055240707
## 3 0.06608123 2 0.6572225 0.7752732 0.059725392
## 4 0.05940749 3 0.5911412 0.7433409 0.060997661
## 5 0.03101183 4 0.5317337 0.6468543 0.062591822
## 6 0.02686613 5 0.5007219 0.6607396 0.064795336
## 7 0.02122018 6 0.4738558 0.6318627 0.063699779
## 8 0.01580748 7 0.4526356 0.6231714 0.064170324
## 9 0.01561245 8 0.4368281 0.6184456 0.063017165
## 10 0.01091115 9 0.4212157 0.6274092 0.063541535
## 11 0.01000000 10 0.4103045 0.6303617 0.064251571
##
## Variable importance
## thal cp thalach oldpeak ca exang sex trestbps
## 24 18 13 11 9 9 6 4
## age slope chol
## 3 2 1
##
## Node number 1: 297 observations, complexity param=0.2755955
## mean=0.4612795, MSE=0.2485007
## left son=2 (164 obs) right son=3 (133 obs)
## Primary splits:
## thal < 0.5 to the left, improve=0.2755955, (0 missing)
## cp < 2.5 to the left, improve=0.2570845, (0 missing)
## ca < 0.5 to the left, improve=0.2338015, (0 missing)
## thalach < 147.5 to the right, improve=0.1779688, (0 missing)
## exang < 0.5 to the left, improve=0.1775404, (0 missing)
## Surrogate splits:
## thalach < 150.5 to the right, agree=0.680, adj=0.286, (0 split)
## cp < 2.5 to the left, agree=0.673, adj=0.271, (0 split)
## exang < 0.5 to the left, agree=0.670, adj=0.263, (0 split)
## sex < 0.5 to the left, agree=0.663, adj=0.248, (0 split)
## oldpeak < 1.55 to the left, agree=0.663, adj=0.248, (0 split)
##
## Node number 2: 164 observations, complexity param=0.06608123
## mean=0.2256098, MSE=0.17471
## left son=4 (115 obs) right son=5 (49 obs)
## Primary splits:
## ca < 0.5 to the left, improve=0.1702161, (0 missing)
## cp < 2.5 to the left, improve=0.1411009, (0 missing)
## age < 54.5 to the left, improve=0.1098348, (0 missing)
## thalach < 119.5 to the right, improve=0.1073298, (0 missing)
## oldpeak < 2.1 to the left, improve=0.1073298, (0 missing)
## Surrogate splits:
## age < 64.5 to the left, agree=0.738, adj=0.122, (0 split)
## thalach < 134 to the right, agree=0.726, adj=0.082, (0 split)
## cp < 0.5 to the right, agree=0.707, adj=0.020, (0 split)
## oldpeak < 1.7 to the left, agree=0.707, adj=0.020, (0 split)
##
## Node number 3: 133 observations, complexity param=0.06718206
## mean=0.7518797, MSE=0.1865566
## left son=6 (44 obs) right son=7 (89 obs)
## Primary splits:
## cp < 2.5 to the left, improve=0.1998366, (0 missing)
## ca < 0.5 to the left, improve=0.1875884, (0 missing)
## oldpeak < 0.7 to the left, improve=0.1478252, (0 missing)
## thalach < 144.5 to the right, improve=0.1243218, (0 missing)
## exang < 0.5 to the left, improve=0.1065615, (0 missing)
## Surrogate splits:
## thalach < 172 to the right, agree=0.722, adj=0.159, (0 split)
## exang < 0.5 to the left, agree=0.692, adj=0.068, (0 split)
## age < 66.5 to the right, agree=0.684, adj=0.045, (0 split)
## trestbps < 106.5 to the left, agree=0.684, adj=0.045, (0 split)
##
## Node number 4: 115 observations, complexity param=0.02122018
## mean=0.1130435, MSE=0.1002647
## left son=8 (108 obs) right son=9 (7 obs)
## Primary splits:
## trestbps < 155.5 to the left, improve=0.13582740, (0 missing)
## age < 58.5 to the left, improve=0.13092180, (0 missing)
## thalach < 161.5 to the right, improve=0.10106150, (0 missing)
## oldpeak < 1.7 to the left, improve=0.07821590, (0 missing)
## chol < 326 to the left, improve=0.05117016, (0 missing)
##
## Node number 5: 49 observations, complexity param=0.05940749
## mean=0.4897959, MSE=0.2498959
## left son=10 (29 obs) right son=11 (20 obs)
## Primary splits:
## cp < 2.5 to the left, improve=0.3580718, (0 missing)
## sex < 0.5 to the left, improve=0.2247166, (0 missing)
## slope < 0.5 to the left, improve=0.1868534, (0 missing)
## thalach < 119.5 to the right, improve=0.1736111, (0 missing)
## exang < 0.5 to the left, improve=0.1401667, (0 missing)
## Surrogate splits:
## thalach < 125.5 to the right, agree=0.755, adj=0.4, (0 split)
## exang < 0.5 to the left, agree=0.755, adj=0.4, (0 split)
## trestbps < 115 to the right, agree=0.714, adj=0.3, (0 split)
## oldpeak < 0.85 to the left, agree=0.714, adj=0.3, (0 split)
## slope < 0.5 to the left, agree=0.714, adj=0.3, (0 split)
##
## Node number 6: 44 observations, complexity param=0.03101183
## mean=0.4772727, MSE=0.2494835
## left son=12 (27 obs) right son=13 (17 obs)
## Primary splits:
## ca < 0.5 to the left, improve=0.20850530, (0 missing)
## thalach < 143.5 to the right, improve=0.19056250, (0 missing)
## slope < 0.5 to the left, improve=0.12937000, (0 missing)
## chol < 207.5 to the left, improve=0.11053600, (0 missing)
## oldpeak < 1.95 to the left, improve=0.09307568, (0 missing)
## Surrogate splits:
## cp < 1.5 to the left, agree=0.705, adj=0.235, (0 split)
## thalach < 125.5 to the right, agree=0.705, adj=0.235, (0 split)
## oldpeak < 1.95 to the left, agree=0.682, adj=0.176, (0 split)
## age < 67.5 to the left, agree=0.659, adj=0.118, (0 split)
## chol < 190.5 to the right, agree=0.636, adj=0.059, (0 split)
##
## Node number 7: 89 observations, complexity param=0.02686613
## mean=0.8876404, MSE=0.09973488
## left son=14 (21 obs) right son=15 (68 obs)
## Primary splits:
## oldpeak < 0.55 to the left, improve=0.22338400, (0 missing)
## ca < 0.5 to the left, improve=0.10664000, (0 missing)
## chol < 236.5 to the left, improve=0.09308414, (0 missing)
## slope < 0.5 to the left, improve=0.06381013, (0 missing)
## restecg < 0.5 to the left, improve=0.06286748, (0 missing)
## Surrogate splits:
## thalach < 146.5 to the right, agree=0.831, adj=0.286, (0 split)
## slope < 0.5 to the left, agree=0.798, adj=0.143, (0 split)
## trestbps < 109 to the left, agree=0.787, adj=0.095, (0 split)
##
## Node number 8: 108 observations, complexity param=0.01561245
## mean=0.08333333, MSE=0.07638889
## left son=16 (88 obs) right son=17 (20 obs)
## Primary splits:
## age < 59.5 to the left, improve=0.13966940, (0 missing)
## thalach < 160.5 to the right, improve=0.07613280, (0 missing)
## oldpeak < 1.7 to the left, improve=0.07438017, (0 missing)
## chol < 323 to the left, improve=0.03716086, (0 missing)
## slope < 0.5 to the left, improve=0.02951594, (0 missing)
## Surrogate splits:
## thalach < 119 to the right, agree=0.861, adj=0.25, (0 split)
## cp < 0.5 to the right, agree=0.824, adj=0.05, (0 split)
##
## Node number 9: 7 observations
## mean=0.5714286, MSE=0.244898
##
## Node number 10: 29 observations, complexity param=0.01091115
## mean=0.2413793, MSE=0.1831153
## left son=20 (18 obs) right son=21 (11 obs)
## Primary splits:
## chol < 237.5 to the right, improve=0.15164630, (0 missing)
## age < 55.5 to the left, improve=0.12000500, (0 missing)
## cp < 1.5 to the right, improve=0.12000500, (0 missing)
## sex < 0.5 to the left, improve=0.09103397, (0 missing)
## trestbps < 139 to the right, improve=0.07556080, (0 missing)
## Surrogate splits:
## cp < 1.5 to the right, agree=0.690, adj=0.182, (0 split)
## trestbps < 153 to the left, agree=0.690, adj=0.182, (0 split)
## sex < 0.5 to the left, agree=0.655, adj=0.091, (0 split)
## thalach < 128 to the right, agree=0.655, adj=0.091, (0 split)
##
## Node number 11: 20 observations
## mean=0.85, MSE=0.1275
##
## Node number 12: 27 observations
## mean=0.2962963, MSE=0.2085048
##
## Node number 13: 17 observations
## mean=0.7647059, MSE=0.1799308
##
## Node number 14: 21 observations, complexity param=0.01580748
## mean=0.6190476, MSE=0.2358277
## left son=28 (7 obs) right son=29 (14 obs)
## Primary splits:
## thalach < 149 to the left, improve=0.2355769, (0 missing)
## chol < 237.5 to the left, improve=0.2315705, (0 missing)
## ca < 0.5 to the left, improve=0.1262238, (0 missing)
## restecg < 1 to the left, improve=0.1201923, (0 missing)
## age < 50 to the right, improve=0.1201923, (0 missing)
## Surrogate splits:
## ca < 2.5 to the right, agree=0.762, adj=0.286, (0 split)
## age < 62.5 to the right, agree=0.714, adj=0.143, (0 split)
## trestbps < 137.5 to the right, agree=0.714, adj=0.143, (0 split)
## oldpeak < 0.05 to the right, agree=0.714, adj=0.143, (0 split)
## slope < 0.5 to the right, agree=0.714, adj=0.143, (0 split)
##
## Node number 15: 68 observations
## mean=0.9705882, MSE=0.02854671
##
## Node number 16: 88 observations
## mean=0.03409091, MSE=0.03292872
##
## Node number 17: 20 observations
## mean=0.3, MSE=0.21
##
## Node number 20: 18 observations
## mean=0.1111111, MSE=0.09876543
##
## Node number 21: 11 observations
## mean=0.4545455, MSE=0.2479339
##
## Node number 28: 7 observations
## mean=0.2857143, MSE=0.2040816
##
## Node number 29: 14 observations
## mean=0.7857143, MSE=0.1683673
par(xpd=TRUE)
plot(modelo, compress = TRUE)
text(modelo, use.n = TRUE)

Predição de um paciente novo que chegou
novopaciente <- rbind.data.frame(c(25,0,0,12,180,1,0,130,1,1.1,0,0,0),
c(25,0,0,12,250,1,0,130,1,1.1,0,0,0),
c(25,0,0,12,300,1,0,130,1,1.1,0,0,0),
c(25,0,0,12,450,1,0,130,1,1.1,0,0,0))
nomes <- colnames(dados)
nomes <- nomes[-14]
nomes
## [1] "age" "sex" "cp" "trestbps" "chol" "fbs"
## [7] "restecg" "thalach" "exang" "oldpeak" "slope" "ca"
## [13] "thal"
colnames(novopaciente)<- nomes
predict(modelo,newdata = novopaciente,type = "vector")
## 1 2 3 4
## 0.03409091 0.03409091 0.03409091 0.03409091