Árbol de decisión

Abraham Castañon - A01747966 Angie Zerón - A00834060

library(tree)
library(dplyr)
library(mlbench)
data(BreastCancer)
?BreastCancer
train=sample(seq(length(BreastCancer$Class)),length(BreastCancer$Class)*0.7,replace=FALSE)
modelo <- tree(BreastCancer$Class ~ ., data = BreastCancer, method = "class")
summary(modelo)
## 
## Classification tree:
## tree(formula = BreastCancer$Class ~ ., data = BreastCancer, method = "class")
## Variables actually used in tree construction:
## [1] "Cell.size"       "Bare.nuclei"     "Epith.c.size"    "Cl.thickness"   
## [5] "Normal.nucleoli"
## Number of terminal nodes:  12 
## Residual mean deviance:  0.09421 = 63.22 / 671 
## Misclassification error rate: 0.02635 = 18 / 683
plot(modelo, cex = 0.8)  
text(modelo, pretty = 0, cex = 0.4)

Ver los valores del árbol

modelo
## node), split, n, deviance, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 683 884.400 benign ( 0.650073 0.349927 )  
##    2) Cell.size: 1,2 418 108.900 benign ( 0.971292 0.028708 )  
##      4) Bare.nuclei: 1,2,3 395  25.130 benign ( 0.994937 0.005063 )  
##        8) Epith.c.size: 1,2,3,4 389   0.000 benign ( 1.000000 0.000000 ) *
##        9) Epith.c.size: 5,6,8,10 6   7.638 benign ( 0.666667 0.333333 ) *
##      5) Bare.nuclei: 4,5,6,7,8,10 23  31.490 benign ( 0.565217 0.434783 )  
##       10) Cl.thickness: 1,2,3 11   0.000 benign ( 1.000000 0.000000 ) *
##       11) Cl.thickness: 4,5,6,7,9,10 12  10.810 malignant ( 0.166667 0.833333 ) *
##    3) Cell.size: 3,4,5,6,7,8,9,10 265 217.900 malignant ( 0.143396 0.856604 )  
##      6) Cell.size: 3,4 90 120.300 malignant ( 0.388889 0.611111 )  
##       12) Bare.nuclei: 1,2 30  27.030 benign ( 0.833333 0.166667 )  
##         24) Normal.nucleoli: 1,2,6,8 22   0.000 benign ( 1.000000 0.000000 ) *
##         25) Normal.nucleoli: 3,9,10 8  10.590 malignant ( 0.375000 0.625000 ) *
##       13) Bare.nuclei: 3,4,5,7,8,9,10 60  54.070 malignant ( 0.166667 0.833333 )  
##         26) Cl.thickness: 3,4,6 12  16.300 benign ( 0.583333 0.416667 ) *
##         27) Cl.thickness: 1,2,5,7,8,9,10 48  22.440 malignant ( 0.062500 0.937500 )  
##           54) Normal.nucleoli: 2,7,8 6   8.318 malignant ( 0.500000 0.500000 ) *
##           55) Normal.nucleoli: 1,3,4,5,6,9,10 42   0.000 malignant ( 0.000000 1.000000 ) *
##      7) Cell.size: 5,6,7,8,9,10 175  30.350 malignant ( 0.017143 0.982857 )  
##       14) Normal.nucleoli: 2,4,7 31  19.710 malignant ( 0.096774 0.903226 )  
##         28) Cl.thickness: 5,6 7   9.561 malignant ( 0.428571 0.571429 ) *
##         29) Cl.thickness: 1,3,4,7,8,10 24   0.000 malignant ( 0.000000 1.000000 ) *
##       15) Normal.nucleoli: 1,3,5,6,8,9,10 144   0.000 malignant ( 0.000000 1.000000 ) *

Predecir el otro 30% de los datos

set.seed(123)
test_indices <- sample(seq(length(BreastCancer$Class)), length(BreastCancer$Class) * 0.30, replace = FALSE)
test_data <- BreastCancer[test_indices, ]
tree_pred <- predict(modelo, test_data, type = "class")
summary(tree_pred)
##    benign malignant 
##       130        79

Matriz de confusion

true_labels <- BreastCancer$Class[test_indices]
mat.conf <- table(tree_pred, true_labels)
mat.conf
##            true_labels
## tree_pred   benign malignant
##   benign       127         3
##   malignant      2        77
(127+77)/(130+79)*100
## [1] 97.60766

Nuestro modelo de árbol de decisón puede predecir con un 98% de efectividad datos de este dataset