Abraham Castañon - A01747966 Angie Zerón - A00834060
library(tree)
library(dplyr)
library(mlbench)
data(BreastCancer)
?BreastCancer
train=sample(seq(length(BreastCancer$Class)),length(BreastCancer$Class)*0.7,replace=FALSE)
modelo <- tree(BreastCancer$Class ~ ., data = BreastCancer, method = "class")
summary(modelo)
##
## Classification tree:
## tree(formula = BreastCancer$Class ~ ., data = BreastCancer, method = "class")
## Variables actually used in tree construction:
## [1] "Cell.size" "Bare.nuclei" "Epith.c.size" "Cl.thickness"
## [5] "Normal.nucleoli"
## Number of terminal nodes: 12
## Residual mean deviance: 0.09421 = 63.22 / 671
## Misclassification error rate: 0.02635 = 18 / 683
plot(modelo, cex = 0.8)
text(modelo, pretty = 0, cex = 0.4)
Ver los valores del árbol
modelo
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 683 884.400 benign ( 0.650073 0.349927 )
## 2) Cell.size: 1,2 418 108.900 benign ( 0.971292 0.028708 )
## 4) Bare.nuclei: 1,2,3 395 25.130 benign ( 0.994937 0.005063 )
## 8) Epith.c.size: 1,2,3,4 389 0.000 benign ( 1.000000 0.000000 ) *
## 9) Epith.c.size: 5,6,8,10 6 7.638 benign ( 0.666667 0.333333 ) *
## 5) Bare.nuclei: 4,5,6,7,8,10 23 31.490 benign ( 0.565217 0.434783 )
## 10) Cl.thickness: 1,2,3 11 0.000 benign ( 1.000000 0.000000 ) *
## 11) Cl.thickness: 4,5,6,7,9,10 12 10.810 malignant ( 0.166667 0.833333 ) *
## 3) Cell.size: 3,4,5,6,7,8,9,10 265 217.900 malignant ( 0.143396 0.856604 )
## 6) Cell.size: 3,4 90 120.300 malignant ( 0.388889 0.611111 )
## 12) Bare.nuclei: 1,2 30 27.030 benign ( 0.833333 0.166667 )
## 24) Normal.nucleoli: 1,2,6,8 22 0.000 benign ( 1.000000 0.000000 ) *
## 25) Normal.nucleoli: 3,9,10 8 10.590 malignant ( 0.375000 0.625000 ) *
## 13) Bare.nuclei: 3,4,5,7,8,9,10 60 54.070 malignant ( 0.166667 0.833333 )
## 26) Cl.thickness: 3,4,6 12 16.300 benign ( 0.583333 0.416667 ) *
## 27) Cl.thickness: 1,2,5,7,8,9,10 48 22.440 malignant ( 0.062500 0.937500 )
## 54) Normal.nucleoli: 2,7,8 6 8.318 malignant ( 0.500000 0.500000 ) *
## 55) Normal.nucleoli: 1,3,4,5,6,9,10 42 0.000 malignant ( 0.000000 1.000000 ) *
## 7) Cell.size: 5,6,7,8,9,10 175 30.350 malignant ( 0.017143 0.982857 )
## 14) Normal.nucleoli: 2,4,7 31 19.710 malignant ( 0.096774 0.903226 )
## 28) Cl.thickness: 5,6 7 9.561 malignant ( 0.428571 0.571429 ) *
## 29) Cl.thickness: 1,3,4,7,8,10 24 0.000 malignant ( 0.000000 1.000000 ) *
## 15) Normal.nucleoli: 1,3,5,6,8,9,10 144 0.000 malignant ( 0.000000 1.000000 ) *
Predecir el otro 30% de los datos
set.seed(123)
test_indices <- sample(seq(length(BreastCancer$Class)), length(BreastCancer$Class) * 0.30, replace = FALSE)
test_data <- BreastCancer[test_indices, ]
tree_pred <- predict(modelo, test_data, type = "class")
summary(tree_pred)
## benign malignant
## 130 79
Matriz de confusion
true_labels <- BreastCancer$Class[test_indices]
mat.conf <- table(tree_pred, true_labels)
mat.conf
## true_labels
## tree_pred benign malignant
## benign 127 3
## malignant 2 77
(127+77)/(130+79)*100
## [1] 97.60766
Nuestro modelo de árbol de decisón puede predecir con un 98% de efectividad datos de este dataset