Árbol de decisión: es un algoritmo de aprendizaje supervisado que presenta un módelo gráfico con las decisiones y sus consecuencias

Instalar paquetes y llamar librerías

# install.packages("cluster")  #Analisis de Agrupamiento
library(cluster)
# install.packages("ggplot2") #Graficar
library(ggplot2)
# install.packages("data.table") #Manejo de muchos datos
library(data.table)
# install.packages("factoextra") #Gráfica optimización de numeros cluster
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# install.packages("rpart") #Gráfica optimización de numeros cluster
library(rpart)
# install.packages("rpart.plot") #Gráfica optimización de numeros cluster
library(rpart.plot)
library(factoextra)

Abrir la base de datos

cancer <- read.csv("/Users/mariajoseflores/Downloads/cancer_de_mama.csv")

Entender la base de datos

summary(cancer)
##   diagnosis          radius_mean      texture_mean   perimeter_mean  
##  Length:569         Min.   : 6.981   Min.   : 9.71   Min.   : 43.79  
##  Class :character   1st Qu.:11.700   1st Qu.:16.17   1st Qu.: 75.17  
##  Mode  :character   Median :13.370   Median :18.84   Median : 86.24  
##                     Mean   :14.127   Mean   :19.29   Mean   : 91.97  
##                     3rd Qu.:15.780   3rd Qu.:21.80   3rd Qu.:104.10  
##                     Max.   :28.110   Max.   :39.28   Max.   :188.50  
##    area_mean      smoothness_mean   compactness_mean  concavity_mean   
##  Min.   : 143.5   Min.   :0.05263   Min.   :0.01938   Min.   :0.00000  
##  1st Qu.: 420.3   1st Qu.:0.08637   1st Qu.:0.06492   1st Qu.:0.02956  
##  Median : 551.1   Median :0.09587   Median :0.09263   Median :0.06154  
##  Mean   : 654.9   Mean   :0.09636   Mean   :0.10434   Mean   :0.08880  
##  3rd Qu.: 782.7   3rd Qu.:0.10530   3rd Qu.:0.13040   3rd Qu.:0.13070  
##  Max.   :2501.0   Max.   :0.16340   Max.   :0.34540   Max.   :0.42680  
##  concave_points_mean symmetry_mean    fractal_dimension_mean   radius_se     
##  Min.   :0.00000     Min.   :0.1060   Min.   :0.04996        Min.   :0.1115  
##  1st Qu.:0.02031     1st Qu.:0.1619   1st Qu.:0.05770        1st Qu.:0.2324  
##  Median :0.03350     Median :0.1792   Median :0.06154        Median :0.3242  
##  Mean   :0.04892     Mean   :0.1812   Mean   :0.06280        Mean   :0.4052  
##  3rd Qu.:0.07400     3rd Qu.:0.1957   3rd Qu.:0.06612        3rd Qu.:0.4789  
##  Max.   :0.20120     Max.   :0.3040   Max.   :0.09744        Max.   :2.8730  
##    texture_se      perimeter_se       area_se        smoothness_se     
##  Min.   :0.3602   Min.   : 0.757   Min.   :  6.802   Min.   :0.001713  
##  1st Qu.:0.8339   1st Qu.: 1.606   1st Qu.: 17.850   1st Qu.:0.005169  
##  Median :1.1080   Median : 2.287   Median : 24.530   Median :0.006380  
##  Mean   :1.2169   Mean   : 2.866   Mean   : 40.337   Mean   :0.007041  
##  3rd Qu.:1.4740   3rd Qu.: 3.357   3rd Qu.: 45.190   3rd Qu.:0.008146  
##  Max.   :4.8850   Max.   :21.980   Max.   :542.200   Max.   :0.031130  
##  compactness_se      concavity_se     concave_points_se   symmetry_se      
##  Min.   :0.002252   Min.   :0.00000   Min.   :0.000000   Min.   :0.007882  
##  1st Qu.:0.013080   1st Qu.:0.01509   1st Qu.:0.007638   1st Qu.:0.015160  
##  Median :0.020450   Median :0.02589   Median :0.010930   Median :0.018730  
##  Mean   :0.025478   Mean   :0.03189   Mean   :0.011796   Mean   :0.020542  
##  3rd Qu.:0.032450   3rd Qu.:0.04205   3rd Qu.:0.014710   3rd Qu.:0.023480  
##  Max.   :0.135400   Max.   :0.39600   Max.   :0.052790   Max.   :0.078950  
##  fractal_dimension_se  radius_worst   texture_worst   perimeter_worst 
##  Min.   :0.0008948    Min.   : 7.93   Min.   :12.02   Min.   : 50.41  
##  1st Qu.:0.0022480    1st Qu.:13.01   1st Qu.:21.08   1st Qu.: 84.11  
##  Median :0.0031870    Median :14.97   Median :25.41   Median : 97.66  
##  Mean   :0.0037949    Mean   :16.27   Mean   :25.68   Mean   :107.26  
##  3rd Qu.:0.0045580    3rd Qu.:18.79   3rd Qu.:29.72   3rd Qu.:125.40  
##  Max.   :0.0298400    Max.   :36.04   Max.   :49.54   Max.   :251.20  
##    area_worst     smoothness_worst  compactness_worst concavity_worst 
##  Min.   : 185.2   Min.   :0.07117   Min.   :0.02729   Min.   :0.0000  
##  1st Qu.: 515.3   1st Qu.:0.11660   1st Qu.:0.14720   1st Qu.:0.1145  
##  Median : 686.5   Median :0.13130   Median :0.21190   Median :0.2267  
##  Mean   : 880.6   Mean   :0.13237   Mean   :0.25427   Mean   :0.2722  
##  3rd Qu.:1084.0   3rd Qu.:0.14600   3rd Qu.:0.33910   3rd Qu.:0.3829  
##  Max.   :4254.0   Max.   :0.22260   Max.   :1.05800   Max.   :1.2520  
##  concave_points_worst symmetry_worst   fractal_dimension_worst
##  Min.   :0.00000      Min.   :0.1565   Min.   :0.05504        
##  1st Qu.:0.06493      1st Qu.:0.2504   1st Qu.:0.07146        
##  Median :0.09993      Median :0.2822   Median :0.08004        
##  Mean   :0.11461      Mean   :0.2901   Mean   :0.08395        
##  3rd Qu.:0.16140      3rd Qu.:0.3179   3rd Qu.:0.09208        
##  Max.   :0.29100      Max.   :0.6638   Max.   :0.20750
str(cancer)
## 'data.frame':    569 obs. of  31 variables:
##  $ diagnosis              : chr  "M" "M" "M" "M" ...
##  $ radius_mean            : num  18 20.6 19.7 11.4 20.3 ...
##  $ texture_mean           : num  10.4 17.8 21.2 20.4 14.3 ...
##  $ perimeter_mean         : num  122.8 132.9 130 77.6 135.1 ...
##  $ area_mean              : num  1001 1326 1203 386 1297 ...
##  $ smoothness_mean        : num  0.1184 0.0847 0.1096 0.1425 0.1003 ...
##  $ compactness_mean       : num  0.2776 0.0786 0.1599 0.2839 0.1328 ...
##  $ concavity_mean         : num  0.3001 0.0869 0.1974 0.2414 0.198 ...
##  $ concave_points_mean    : num  0.1471 0.0702 0.1279 0.1052 0.1043 ...
##  $ symmetry_mean          : num  0.242 0.181 0.207 0.26 0.181 ...
##  $ fractal_dimension_mean : num  0.0787 0.0567 0.06 0.0974 0.0588 ...
##  $ radius_se              : num  1.095 0.543 0.746 0.496 0.757 ...
##  $ texture_se             : num  0.905 0.734 0.787 1.156 0.781 ...
##  $ perimeter_se           : num  8.59 3.4 4.58 3.44 5.44 ...
##  $ area_se                : num  153.4 74.1 94 27.2 94.4 ...
##  $ smoothness_se          : num  0.0064 0.00522 0.00615 0.00911 0.01149 ...
##  $ compactness_se         : num  0.049 0.0131 0.0401 0.0746 0.0246 ...
##  $ concavity_se           : num  0.0537 0.0186 0.0383 0.0566 0.0569 ...
##  $ concave_points_se      : num  0.0159 0.0134 0.0206 0.0187 0.0188 ...
##  $ symmetry_se            : num  0.03 0.0139 0.0225 0.0596 0.0176 ...
##  $ fractal_dimension_se   : num  0.00619 0.00353 0.00457 0.00921 0.00511 ...
##  $ radius_worst           : num  25.4 25 23.6 14.9 22.5 ...
##  $ texture_worst          : num  17.3 23.4 25.5 26.5 16.7 ...
##  $ perimeter_worst        : num  184.6 158.8 152.5 98.9 152.2 ...
##  $ area_worst             : num  2019 1956 1709 568 1575 ...
##  $ smoothness_worst       : num  0.162 0.124 0.144 0.21 0.137 ...
##  $ compactness_worst      : num  0.666 0.187 0.424 0.866 0.205 ...
##  $ concavity_worst        : num  0.712 0.242 0.45 0.687 0.4 ...
##  $ concave_points_worst   : num  0.265 0.186 0.243 0.258 0.163 ...
##  $ symmetry_worst         : num  0.46 0.275 0.361 0.664 0.236 ...
##  $ fractal_dimension_worst: num  0.1189 0.089 0.0876 0.173 0.0768 ...
head(cancer)
##   diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mean
## 1         M       17.99        10.38         122.80    1001.0         0.11840
## 2         M       20.57        17.77         132.90    1326.0         0.08474
## 3         M       19.69        21.25         130.00    1203.0         0.10960
## 4         M       11.42        20.38          77.58     386.1         0.14250
## 5         M       20.29        14.34         135.10    1297.0         0.10030
## 6         M       12.45        15.70          82.57     477.1         0.12780
##   compactness_mean concavity_mean concave_points_mean symmetry_mean
## 1          0.27760         0.3001             0.14710        0.2419
## 2          0.07864         0.0869             0.07017        0.1812
## 3          0.15990         0.1974             0.12790        0.2069
## 4          0.28390         0.2414             0.10520        0.2597
## 5          0.13280         0.1980             0.10430        0.1809
## 6          0.17000         0.1578             0.08089        0.2087
##   fractal_dimension_mean radius_se texture_se perimeter_se area_se
## 1                0.07871    1.0950     0.9053        8.589  153.40
## 2                0.05667    0.5435     0.7339        3.398   74.08
## 3                0.05999    0.7456     0.7869        4.585   94.03
## 4                0.09744    0.4956     1.1560        3.445   27.23
## 5                0.05883    0.7572     0.7813        5.438   94.44
## 6                0.07613    0.3345     0.8902        2.217   27.19
##   smoothness_se compactness_se concavity_se concave_points_se symmetry_se
## 1      0.006399        0.04904      0.05373           0.01587     0.03003
## 2      0.005225        0.01308      0.01860           0.01340     0.01389
## 3      0.006150        0.04006      0.03832           0.02058     0.02250
## 4      0.009110        0.07458      0.05661           0.01867     0.05963
## 5      0.011490        0.02461      0.05688           0.01885     0.01756
## 6      0.007510        0.03345      0.03672           0.01137     0.02165
##   fractal_dimension_se radius_worst texture_worst perimeter_worst area_worst
## 1             0.006193        25.38         17.33          184.60     2019.0
## 2             0.003532        24.99         23.41          158.80     1956.0
## 3             0.004571        23.57         25.53          152.50     1709.0
## 4             0.009208        14.91         26.50           98.87      567.7
## 5             0.005115        22.54         16.67          152.20     1575.0
## 6             0.005082        15.47         23.75          103.40      741.6
##   smoothness_worst compactness_worst concavity_worst concave_points_worst
## 1           0.1622            0.6656          0.7119               0.2654
## 2           0.1238            0.1866          0.2416               0.1860
## 3           0.1444            0.4245          0.4504               0.2430
## 4           0.2098            0.8663          0.6869               0.2575
## 5           0.1374            0.2050          0.4000               0.1625
## 6           0.1791            0.5249          0.5355               0.1741
##   symmetry_worst fractal_dimension_worst
## 1         0.4601                 0.11890
## 2         0.2750                 0.08902
## 3         0.3613                 0.08758
## 4         0.6638                 0.17300
## 5         0.2364                 0.07678
## 6         0.3985                 0.12440

Crear árbol de decisión

# Seleccionar variables
cancer <- cancer[c("radius_mean", "texture_mean", "perimeter_mean", "area_mean", "diagnosis")]

# Convertir diagnosis a factor
cancer$diagnosis <- as.factor(cancer$diagnosis)

# Ver estructura
str(cancer)
## 'data.frame':    569 obs. of  5 variables:
##  $ radius_mean   : num  18 20.6 19.7 11.4 20.3 ...
##  $ texture_mean  : num  10.4 17.8 21.2 20.4 14.3 ...
##  $ perimeter_mean: num  122.8 132.9 130 77.6 135.1 ...
##  $ area_mean     : num  1001 1326 1203 386 1297 ...
##  $ diagnosis     : Factor w/ 2 levels "B","M": 2 2 2 2 2 2 2 2 2 2 ...
# Crear árbol de decisión
arbol_cancer <- rpart(diagnosis ~ ., data = cancer)

# Graficar árbol
rpart.plot(arbol_cancer)

Conclusión

El árbol muestra que los tumores con área pequeña suelen ser benignos, mientras que los de mayor área y textura más alta tienden a ser malignos.

LS0tCnRpdGxlOiAiQ2FuY2VyIgphdXRob3I6ICJNYXJpYSBKb3NlIEZsb3JlcyIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IFRSVUUKICAgIHRvY19mbG9hdDogVFJVRQogICAgY29kZV9kb3dubG9hZDogVFJVRQogICAgdGhlbWU6IGNvc21vCiAgICAKLS0tCgohW10oaHR0cHM6Ly9pZXhlLmVkdS5teC93cC1jb250ZW50L3VwbG9hZHMvMjAyMi8wMi9JTUdfMDkwNS5naWYpCgrDgXJib2wgZGUgZGVjaXNpw7NuOiBlcyB1biBhbGdvcml0bW8gZGUgYXByZW5kaXphamUgc3VwZXJ2aXNhZG8gcXVlIHByZXNlbnRhIHVuIG3Ds2RlbG8gZ3LDoWZpY28gY29uIGxhcyBkZWNpc2lvbmVzIHkgc3VzIGNvbnNlY3VlbmNpYXMgCgojIDxzcGFuIHN0eWxlID0gImNvbG9yOmJsdWU7Ij4gSW5zdGFsYXIgcGFxdWV0ZXMgeSBsbGFtYXIgbGlicmVyw61hcyA8L3NwYW4+CmBgYHtyfQojIGluc3RhbGwucGFja2FnZXMoImNsdXN0ZXIiKSAgI0FuYWxpc2lzIGRlIEFncnVwYW1pZW50bwpsaWJyYXJ5KGNsdXN0ZXIpCiMgaW5zdGFsbC5wYWNrYWdlcygiZ2dwbG90MiIpICNHcmFmaWNhcgpsaWJyYXJ5KGdncGxvdDIpCiMgaW5zdGFsbC5wYWNrYWdlcygiZGF0YS50YWJsZSIpICNNYW5lam8gZGUgbXVjaG9zIGRhdG9zCmxpYnJhcnkoZGF0YS50YWJsZSkKIyBpbnN0YWxsLnBhY2thZ2VzKCJmYWN0b2V4dHJhIikgI0dyw6FmaWNhIG9wdGltaXphY2nDs24gZGUgbnVtZXJvcyBjbHVzdGVyCmxpYnJhcnkoZmFjdG9leHRyYSkKbGlicmFyeShkcGx5cikKIyBpbnN0YWxsLnBhY2thZ2VzKCJycGFydCIpICNHcsOhZmljYSBvcHRpbWl6YWNpw7NuIGRlIG51bWVyb3MgY2x1c3RlcgpsaWJyYXJ5KHJwYXJ0KQojIGluc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKSAjR3LDoWZpY2Egb3B0aW1pemFjacOzbiBkZSBudW1lcm9zIGNsdXN0ZXIKbGlicmFyeShycGFydC5wbG90KQpsaWJyYXJ5KGZhY3RvZXh0cmEpCmBgYAoKIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjpibHVlOyI+IEFicmlyIGxhIGJhc2UgZGUgZGF0b3MgPC9zcGFuPgpgYGB7cn0KY2FuY2VyIDwtIHJlYWQuY3N2KCIvVXNlcnMvbWFyaWFqb3NlZmxvcmVzL0Rvd25sb2Fkcy9jYW5jZXJfZGVfbWFtYS5jc3YiKQpgYGAKCiMgPHNwYW4gc3R5bGUgPSAiY29sb3I6Ymx1ZTsiPiAgRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvcyA8L3NwYW4+CmBgYHtyfQpzdW1tYXJ5KGNhbmNlcikKc3RyKGNhbmNlcikKaGVhZChjYW5jZXIpCmBgYAojIDxzcGFuIHN0eWxlID0gImNvbG9yOmJsdWU7Ij4gIENyZWFyIMOhcmJvbCBkZSBkZWNpc2nDs24gPC9zcGFuPgpgYGB7cn0KIyBTZWxlY2Npb25hciB2YXJpYWJsZXMKY2FuY2VyIDwtIGNhbmNlcltjKCJyYWRpdXNfbWVhbiIsICJ0ZXh0dXJlX21lYW4iLCAicGVyaW1ldGVyX21lYW4iLCAiYXJlYV9tZWFuIiwgImRpYWdub3NpcyIpXQoKIyBDb252ZXJ0aXIgZGlhZ25vc2lzIGEgZmFjdG9yCmNhbmNlciRkaWFnbm9zaXMgPC0gYXMuZmFjdG9yKGNhbmNlciRkaWFnbm9zaXMpCgojIFZlciBlc3RydWN0dXJhCnN0cihjYW5jZXIpCgojIENyZWFyIMOhcmJvbCBkZSBkZWNpc2nDs24KYXJib2xfY2FuY2VyIDwtIHJwYXJ0KGRpYWdub3NpcyB+IC4sIGRhdGEgPSBjYW5jZXIpCgojIEdyYWZpY2FyIMOhcmJvbApycGFydC5wbG90KGFyYm9sX2NhbmNlcikKCmBgYAoKIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjpibHVlOyI+ICBDb25jbHVzacOzbiA8L3NwYW4+CkVsIMOhcmJvbCBtdWVzdHJhIHF1ZSBsb3MgdHVtb3JlcyBjb24gw6FyZWEgcGVxdWXDsWEgc3VlbGVuIHNlciBiZW5pZ25vcywgbWllbnRyYXMgcXVlIGxvcyBkZSBtYXlvciDDoXJlYSB5IHRleHR1cmEgbcOhcyBhbHRhIHRpZW5kZW4gYSBzZXIgbWFsaWdub3MuCg==