JuveYell

Importar la base de datos

file.choose()
## [1] "C:\\Users\\Migue\\OneDrive\\Documentos\\R\\Primer bloque\\cancer_de_mama\\cancer_de_mama.Rmd"
bd<-read.csv("C:\\Users\\Migue\\OneDrive\\Documentos\\R\\Primer bloque\\cancer_de_mama\\cancer_de_mama.csv")

Entender la base de datos

summary(bd)
##   diagnosis          radius_mean      texture_mean   perimeter_mean  
##  Length:569         Min.   : 6.981   Min.   : 9.71   Min.   : 43.79  
##  Class :character   1st Qu.:11.700   1st Qu.:16.17   1st Qu.: 75.17  
##  Mode  :character   Median :13.370   Median :18.84   Median : 86.24  
##                     Mean   :14.127   Mean   :19.29   Mean   : 91.97  
##                     3rd Qu.:15.780   3rd Qu.:21.80   3rd Qu.:104.10  
##                     Max.   :28.110   Max.   :39.28   Max.   :188.50  
##    area_mean      smoothness_mean   compactness_mean  concavity_mean   
##  Min.   : 143.5   Min.   :0.05263   Min.   :0.01938   Min.   :0.00000  
##  1st Qu.: 420.3   1st Qu.:0.08637   1st Qu.:0.06492   1st Qu.:0.02956  
##  Median : 551.1   Median :0.09587   Median :0.09263   Median :0.06154  
##  Mean   : 654.9   Mean   :0.09636   Mean   :0.10434   Mean   :0.08880  
##  3rd Qu.: 782.7   3rd Qu.:0.10530   3rd Qu.:0.13040   3rd Qu.:0.13070  
##  Max.   :2501.0   Max.   :0.16340   Max.   :0.34540   Max.   :0.42680  
##  concave.points_mean symmetry_mean    fractal_dimension_mean   radius_se     
##  Min.   :0.00000     Min.   :0.1060   Min.   :0.04996        Min.   :0.1115  
##  1st Qu.:0.02031     1st Qu.:0.1619   1st Qu.:0.05770        1st Qu.:0.2324  
##  Median :0.03350     Median :0.1792   Median :0.06154        Median :0.3242  
##  Mean   :0.04892     Mean   :0.1812   Mean   :0.06280        Mean   :0.4052  
##  3rd Qu.:0.07400     3rd Qu.:0.1957   3rd Qu.:0.06612        3rd Qu.:0.4789  
##  Max.   :0.20120     Max.   :0.3040   Max.   :0.09744        Max.   :2.8730  
##    texture_se      perimeter_se       area_se        smoothness_se     
##  Min.   :0.3602   Min.   : 0.757   Min.   :  6.802   Min.   :0.001713  
##  1st Qu.:0.8339   1st Qu.: 1.606   1st Qu.: 17.850   1st Qu.:0.005169  
##  Median :1.1080   Median : 2.287   Median : 24.530   Median :0.006380  
##  Mean   :1.2169   Mean   : 2.866   Mean   : 40.337   Mean   :0.007041  
##  3rd Qu.:1.4740   3rd Qu.: 3.357   3rd Qu.: 45.190   3rd Qu.:0.008146  
##  Max.   :4.8850   Max.   :21.980   Max.   :542.200   Max.   :0.031130  
##  compactness_se      concavity_se     concave.points_se   symmetry_se      
##  Min.   :0.002252   Min.   :0.00000   Min.   :0.000000   Min.   :0.007882  
##  1st Qu.:0.013080   1st Qu.:0.01509   1st Qu.:0.007638   1st Qu.:0.015160  
##  Median :0.020450   Median :0.02589   Median :0.010930   Median :0.018730  
##  Mean   :0.025478   Mean   :0.03189   Mean   :0.011796   Mean   :0.020542  
##  3rd Qu.:0.032450   3rd Qu.:0.04205   3rd Qu.:0.014710   3rd Qu.:0.023480  
##  Max.   :0.135400   Max.   :0.39600   Max.   :0.052790   Max.   :0.078950  
##  fractal_dimension_se  radius_worst   texture_worst   perimeter_worst 
##  Min.   :0.0008948    Min.   : 7.93   Min.   :12.02   Min.   : 50.41  
##  1st Qu.:0.0022480    1st Qu.:13.01   1st Qu.:21.08   1st Qu.: 84.11  
##  Median :0.0031870    Median :14.97   Median :25.41   Median : 97.66  
##  Mean   :0.0037949    Mean   :16.27   Mean   :25.68   Mean   :107.26  
##  3rd Qu.:0.0045580    3rd Qu.:18.79   3rd Qu.:29.72   3rd Qu.:125.40  
##  Max.   :0.0298400    Max.   :36.04   Max.   :49.54   Max.   :251.20  
##    area_worst     smoothness_worst  compactness_worst concavity_worst 
##  Min.   : 185.2   Min.   :0.07117   Min.   :0.02729   Min.   :0.0000  
##  1st Qu.: 515.3   1st Qu.:0.11660   1st Qu.:0.14720   1st Qu.:0.1145  
##  Median : 686.5   Median :0.13130   Median :0.21190   Median :0.2267  
##  Mean   : 880.6   Mean   :0.13237   Mean   :0.25427   Mean   :0.2722  
##  3rd Qu.:1084.0   3rd Qu.:0.14600   3rd Qu.:0.33910   3rd Qu.:0.3829  
##  Max.   :4254.0   Max.   :0.22260   Max.   :1.05800   Max.   :1.2520  
##  concave.points_worst symmetry_worst   fractal_dimension_worst
##  Min.   :0.00000      Min.   :0.1565   Min.   :0.05504        
##  1st Qu.:0.06493      1st Qu.:0.2504   1st Qu.:0.07146        
##  Median :0.09993      Median :0.2822   Median :0.08004        
##  Mean   :0.11461      Mean   :0.2901   Mean   :0.08395        
##  3rd Qu.:0.16140      3rd Qu.:0.3179   3rd Qu.:0.09208        
##  Max.   :0.29100      Max.   :0.6638   Max.   :0.20750

Filtrar la base de datos

Cancer_de_mama <- bd[,c("diagnosis", "radius_mean", "texture_mean", "perimeter_mean", "area_mean", "smoothness_mean", "compactness_mean", "concavity_mean", "concave.points_mean", "symmetry_mean", "fractal_dimension_mean", "radius_se", "texture_se", "perimeter_se", "area_se", "smoothness_se", "compactness_se", "concavity_se", "concave.points_se", "symmetry_se", "fractal_dimension_se", "radius_worst", "texture_worst","perimeter_worst", "area_worst", "smoothness_worst", "compactness_worst", "concavity_worst", "concave.points_worst", "symmetry_worst", "fractal_dimension_worst")]

Cancer_de_mama$diagnosis<- as.factor (ifelse(Cancer_de_mama$diagnosis==0, "Maligno", "Benigno"))
Cancer_de_mama$radius_mean<- as.factor(Cancer_de_mama$radius_mean)
Cancer_de_mama$smoothness_mean<- as.factor(Cancer_de_mama$smoothness_mean)
str(Cancer_de_mama)
## 'data.frame':    569 obs. of  31 variables:
##  $ diagnosis              : Factor w/ 1 level "Benigno": 1 1 1 1 1 1 1 1 1 1 ...
##  $ radius_mean            : Factor w/ 456 levels "6.981","7.691",..: 371 427 407 99 419 160 377 234 193 161 ...
##  $ texture_mean           : num  10.4 17.8 21.2 20.4 14.3 ...
##  $ perimeter_mean         : num  122.8 132.9 130 77.6 135.1 ...
##  $ area_mean              : num  1001 1326 1203 386 1297 ...
##  $ smoothness_mean        : Factor w/ 474 levels "0.05263","0.06251",..: 445 121 404 472 333 464 252 447 463 446 ...
##  $ compactness_mean       : num  0.2776 0.0786 0.1599 0.2839 0.1328 ...
##  $ concavity_mean         : num  0.3001 0.0869 0.1974 0.2414 0.198 ...
##  $ concave.points_mean    : num  0.1471 0.0702 0.1279 0.1052 0.1043 ...
##  $ symmetry_mean          : num  0.242 0.181 0.207 0.26 0.181 ...
##  $ fractal_dimension_mean : num  0.0787 0.0567 0.06 0.0974 0.0588 ...
##  $ radius_se              : num  1.095 0.543 0.746 0.496 0.757 ...
##  $ texture_se             : num  0.905 0.734 0.787 1.156 0.781 ...
##  $ perimeter_se           : num  8.59 3.4 4.58 3.44 5.44 ...
##  $ area_se                : num  153.4 74.1 94 27.2 94.4 ...
##  $ smoothness_se          : num  0.0064 0.00522 0.00615 0.00911 0.01149 ...
##  $ compactness_se         : num  0.049 0.0131 0.0401 0.0746 0.0246 ...
##  $ concavity_se           : num  0.0537 0.0186 0.0383 0.0566 0.0569 ...
##  $ concave.points_se      : num  0.0159 0.0134 0.0206 0.0187 0.0188 ...
##  $ symmetry_se            : num  0.03 0.0139 0.0225 0.0596 0.0176 ...
##  $ fractal_dimension_se   : num  0.00619 0.00353 0.00457 0.00921 0.00511 ...
##  $ radius_worst           : num  25.4 25 23.6 14.9 22.5 ...
##  $ texture_worst          : num  17.3 23.4 25.5 26.5 16.7 ...
##  $ perimeter_worst        : num  184.6 158.8 152.5 98.9 152.2 ...
##  $ area_worst             : num  2019 1956 1709 568 1575 ...
##  $ smoothness_worst       : num  0.162 0.124 0.144 0.21 0.137 ...
##  $ compactness_worst      : num  0.666 0.187 0.424 0.866 0.205 ...
##  $ concavity_worst        : num  0.712 0.242 0.45 0.687 0.4 ...
##  $ concave.points_worst   : num  0.265 0.186 0.243 0.258 0.163 ...
##  $ symmetry_worst         : num  0.46 0.275 0.361 0.664 0.236 ...
##  $ fractal_dimension_worst: num  0.1189 0.089 0.0876 0.173 0.0768 ...
sum(is.na(Cancer_de_mama))
## [1] 0
Cancer_de_mama<- na.omit(Cancer_de_mama)

Crear el árbol de decisión

#install.packages("rpart.plot")
library(rpart.plot)
## Loading required package: rpart
arbol<-rpart(formula = diagnosis ~ ., data=bd)
arbol
## n= 569 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 569 212 B (0.62741652 0.37258348)  
##    2) radius_worst< 16.795 379  33 B (0.91292876 0.08707124)  
##      4) concave.points_worst< 0.1358 333   5 B (0.98498498 0.01501502) *
##      5) concave.points_worst>=0.1358 46  18 M (0.39130435 0.60869565)  
##       10) texture_worst< 25.67 19   4 B (0.78947368 0.21052632) *
##       11) texture_worst>=25.67 27   3 M (0.11111111 0.88888889) *
##    3) radius_worst>=16.795 190  11 M (0.05789474 0.94210526) *
#install.packages("rpart.plot")
library(rpart.plot)

rpart.plot(arbol)

prp(arbol,extra=7,prefix="fracción\n")

?ggplot
## No documentation for 'ggplot' in specified packages and libraries:
## you could try '??ggplot'
#install.packages("ggplot")
library("ggplot2")

library("tidyverse")
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ✔ purrr   0.3.4      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
ggplot(data=bd, mapping = aes(radius_worst, concave.points_worst))+ geom_point(aes(color = diagnosis)) + theme_bw()

Conclusiones

Los árboles de decisiones nos muestran los posibles resultados de una serie de decisiones relacionadas. Permite que un individuo o una organización comparen posibles acciones entre sí según sus costos, probabilidades y beneficios. Para este ejercicio se muestran los árboles de decisiones para personas con cáncer de mama de acuerdo al radio de su tumor, si son benignos o malignos, a puntos cóncavos y a su textura. Acorde a los análisis, se muestra que aquellos con un radio menor a 17 cm son benignos y que si estos tienen puntos cóncavos menores a .14 mm se muestran como benignos, mientras que aquellos que tengan un radio mayor a 17 cm son malignos.

Con esto en mente, se muestran los posibles escenarios de acuerdo a factores positivos y negativos, y como resultado se tiene que en su mayoría aquellos con un peor radio y peores puntos cóncavos tienen mayor probabilidad de tener un tumor maligno de aquellos que no.

¡Recuerda prevenir!

LS0tDQp0aXRsZTogPHNwYW4gc3R5bGU9IkNvbG9yOlBpbmsiPiAiQ2FuY2VyIGRlIE1hbWEiDQphdXRob3I6ICJKaW1lbmEgTWlndWVsIC0gQTAxMzY1ODE5Ig0KZGF0ZTogIjIwMjItMDktMDYiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiB0cnVlDQogICAgdG9jX2Zsb2F0OiB0cnVlDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KLS0tDQoNCjxkaXY+DQo8cCBzdHlsZSA9ICd0ZXh0LWFsaWduOmNlbnRlcjsnPg0KPGltZyBzcmM9Imh0dHBzOi8vd3d3LmVsc2V2aWVyLmNvbS9fX2RhdGEvYXNzZXRzL2ltYWdlLzAwMTkvMTIxMTQ0Ni8wYzg0NjUxM2NkY2M3NmM3ZWIxOTZhMzBhNzI2ZmYwZmM1OGE0ODliLmpwZyIgYWx0PSJKdXZlWWVsbCIgd2lkdGg9IjMwMHB4Ij4NCjwvcD4NCjwvZGl2Pg0KDQojIyBJbXBvcnRhciBsYSBiYXNlIGRlIGRhdG9zDQoNCmBgYHtyfQ0KZmlsZS5jaG9vc2UoKQ0KYmQ8LXJlYWQuY3N2KCJDOlxcVXNlcnNcXE1pZ3VlXFxPbmVEcml2ZVxcRG9jdW1lbnRvc1xcUlxcUHJpbWVyIGJsb3F1ZVxcY2FuY2VyX2RlX21hbWFcXGNhbmNlcl9kZV9tYW1hLmNzdiIpDQpgYGANCg0KIyMgRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvcw0KDQpgYGB7cn0NCnN1bW1hcnkoYmQpDQpgYGANCg0KDQojIyBGaWx0cmFyIGxhIGJhc2UgZGUgZGF0b3MNCg0KYGBge3J9DQpDYW5jZXJfZGVfbWFtYSA8LSBiZFssYygiZGlhZ25vc2lzIiwgInJhZGl1c19tZWFuIiwgInRleHR1cmVfbWVhbiIsICJwZXJpbWV0ZXJfbWVhbiIsICJhcmVhX21lYW4iLCAic21vb3RobmVzc19tZWFuIiwgImNvbXBhY3RuZXNzX21lYW4iLCAiY29uY2F2aXR5X21lYW4iLCAiY29uY2F2ZS5wb2ludHNfbWVhbiIsICJzeW1tZXRyeV9tZWFuIiwgImZyYWN0YWxfZGltZW5zaW9uX21lYW4iLCAicmFkaXVzX3NlIiwgInRleHR1cmVfc2UiLCAicGVyaW1ldGVyX3NlIiwgImFyZWFfc2UiLCAic21vb3RobmVzc19zZSIsICJjb21wYWN0bmVzc19zZSIsICJjb25jYXZpdHlfc2UiLCAiY29uY2F2ZS5wb2ludHNfc2UiLCAic3ltbWV0cnlfc2UiLCAiZnJhY3RhbF9kaW1lbnNpb25fc2UiLCAicmFkaXVzX3dvcnN0IiwgInRleHR1cmVfd29yc3QiLCJwZXJpbWV0ZXJfd29yc3QiLCAiYXJlYV93b3JzdCIsICJzbW9vdGhuZXNzX3dvcnN0IiwgImNvbXBhY3RuZXNzX3dvcnN0IiwgImNvbmNhdml0eV93b3JzdCIsICJjb25jYXZlLnBvaW50c193b3JzdCIsICJzeW1tZXRyeV93b3JzdCIsICJmcmFjdGFsX2RpbWVuc2lvbl93b3JzdCIpXQ0KDQpDYW5jZXJfZGVfbWFtYSRkaWFnbm9zaXM8LSBhcy5mYWN0b3IgKGlmZWxzZShDYW5jZXJfZGVfbWFtYSRkaWFnbm9zaXM9PTAsICJNYWxpZ25vIiwgIkJlbmlnbm8iKSkNCkNhbmNlcl9kZV9tYW1hJHJhZGl1c19tZWFuPC0gYXMuZmFjdG9yKENhbmNlcl9kZV9tYW1hJHJhZGl1c19tZWFuKQ0KQ2FuY2VyX2RlX21hbWEkc21vb3RobmVzc19tZWFuPC0gYXMuZmFjdG9yKENhbmNlcl9kZV9tYW1hJHNtb290aG5lc3NfbWVhbikNCnN0cihDYW5jZXJfZGVfbWFtYSkNCg0Kc3VtKGlzLm5hKENhbmNlcl9kZV9tYW1hKSkNCg0KQ2FuY2VyX2RlX21hbWE8LSBuYS5vbWl0KENhbmNlcl9kZV9tYW1hKQ0KYGBgDQoNCg0KIyMgQ3JlYXIgZWwgw6FyYm9sIGRlIGRlY2lzacOzbg0KDQpgYGB7cn0NCiNpbnN0YWxsLnBhY2thZ2VzKCJycGFydC5wbG90IikNCmxpYnJhcnkocnBhcnQucGxvdCkNCmFyYm9sPC1ycGFydChmb3JtdWxhID0gZGlhZ25vc2lzIH4gLiwgZGF0YT1iZCkNCmFyYm9sDQojaW5zdGFsbC5wYWNrYWdlcygicnBhcnQucGxvdCIpDQpsaWJyYXJ5KHJwYXJ0LnBsb3QpDQoNCnJwYXJ0LnBsb3QoYXJib2wpDQoNCnBycChhcmJvbCxleHRyYT03LHByZWZpeD0iZnJhY2Npw7NuXG4iKQ0KDQo/Z2dwbG90DQoNCiNpbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QiKQ0KbGlicmFyeSgiZ2dwbG90MiIpDQpsaWJyYXJ5KCJ0aWR5dmVyc2UiKQ0KDQpnZ3Bsb3QoZGF0YT1iZCwgbWFwcGluZyA9IGFlcyhyYWRpdXNfd29yc3QsIGNvbmNhdmUucG9pbnRzX3dvcnN0KSkrIGdlb21fcG9pbnQoYWVzKGNvbG9yID0gZGlhZ25vc2lzKSkgKyB0aGVtZV9idygpDQpgYGANCg0KIyMgQ29uY2x1c2lvbmVzDQoNCkxvcyBfw6FyYm9sZXMgZGUgZGVjaXNpb25lc18gbm9zIG11ZXN0cmFuIGxvcyBwb3NpYmxlcyByZXN1bHRhZG9zIGRlIHVuYSBzZXJpZSBkZSBkZWNpc2lvbmVzIHJlbGFjaW9uYWRhcy4gUGVybWl0ZSBxdWUgdW4gaW5kaXZpZHVvIG8gdW5hIG9yZ2FuaXphY2nDs24gY29tcGFyZW4gcG9zaWJsZXMgYWNjaW9uZXMgZW50cmUgc8OtIHNlZ8O6biBzdXMgY29zdG9zLCBwcm9iYWJpbGlkYWRlcyB5IGJlbmVmaWNpb3MuIFBhcmEgZXN0ZSBlamVyY2ljaW8gc2UgbXVlc3RyYW4gbG9zIMOhcmJvbGVzIGRlIGRlY2lzaW9uZXMgcGFyYSBwZXJzb25hcyBjb24gY8OhbmNlciBkZSBtYW1hIGRlIGFjdWVyZG8gYWwgcmFkaW8gZGUgc3UgdHVtb3IsIHNpIHNvbiBiZW5pZ25vcyBvIG1hbGlnbm9zLCBhIHB1bnRvcyBjw7NuY2F2b3MgeSBhIHN1IHRleHR1cmEuIEFjb3JkZSBhIGxvcyBhbsOhbGlzaXMsIHNlIG11ZXN0cmEgcXVlIGFxdWVsbG9zIGNvbiB1biByYWRpbyBtZW5vciBhIDE3IGNtIHNvbiBiZW5pZ25vcyB5IHF1ZSBzaSBlc3RvcyB0aWVuZW4gcHVudG9zIGPDs25jYXZvcyBtZW5vcmVzIGEgLjE0IG1tIHNlIG11ZXN0cmFuIGNvbW8gYmVuaWdub3MsIG1pZW50cmFzIHF1ZSBhcXVlbGxvcyBxdWUgdGVuZ2FuIHVuIHJhZGlvIG1heW9yIGEgMTcgY20gc29uIG1hbGlnbm9zLg0KDQpDb24gZXN0byBlbiBtZW50ZSwgc2UgbXVlc3RyYW4gbG9zIHBvc2libGVzIGVzY2VuYXJpb3MgZGUgYWN1ZXJkbyBhIGZhY3RvcmVzIHBvc2l0aXZvcyB5IG5lZ2F0aXZvcywgeSBjb21vIHJlc3VsdGFkbyBzZSB0aWVuZSBxdWUgZW4gc3UgbWF5b3LDrWEgYXF1ZWxsb3MgY29uIHVuIHBlb3IgcmFkaW8geSBwZW9yZXMgcHVudG9zIGPDs25jYXZvcyB0aWVuZW4gbWF5b3IgcHJvYmFiaWxpZGFkIGRlIHRlbmVyIHVuIHR1bW9yIG1hbGlnbm8gZGUgYXF1ZWxsb3MgcXVlIG5vLiANCg0KX1/CoVJlY3VlcmRhIHByZXZlbmlyIV9f