#install.packages("rpart")
library(rpart)
#install.packages("rpart.plot")
library(rpart.plot)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages("caret") # si no lo tienes instalado
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
#install.packages("neuralnet")
library(neuralnet)
##
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
##
## compute
library(readr)
df<-read_csv('/Users/karlalopez/Downloads/cancer_de_mama.csv')
## Rows: 569 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): diagnosis
## dbl (30): radius_mean, texture_mean, perimeter_mean, area_mean, smoothness_m...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
arbol <- rpart(diagnosis ~ radius_mean + texture_mean + area_mean + smoothness_mean,
data = df, method = "class",
control = rpart.control(cp = 0.01, minsplit = 20))
# 3. Ver tabla de complejidad
printcp(arbol)
##
## Classification tree:
## rpart(formula = diagnosis ~ radius_mean + texture_mean + area_mean +
## smoothness_mean, data = df, method = "class", control = rpart.control(cp = 0.01,
## minsplit = 20))
##
## Variables actually used in tree construction:
## [1] area_mean smoothness_mean texture_mean
##
## Root node error: 212/569 = 0.37258
##
## n= 569
##
## CP nsplit rel error xerror xstd
## 1 0.716981 0 1.00000 1.00000 0.054401
## 2 0.039308 1 0.28302 0.28302 0.034558
## 3 0.010000 4 0.16509 0.23585 0.031855
# 4. Elegir mejor cp (menor error de validación cruzada)
mejor_cp <- arbol$cptable[which.min(arbol$cptable[,"xerror"]), "CP"]
# 5. Podar árbol con mejor cp
arbol_podado <- prune(arbol, cp = mejor_cp)
# 6. Graficar árbol podado (más claro y robusto)
prp(arbol_podado,
extra = 104, # muestra clase, probabilidad y % de obs en cada nodo
box.palette = "Blues",# colores bonitos
branch.lty = 3, # ramas punteadas
shadow.col = "gray", # sombra para cajas
split.cex = 1.2, # tamaño del texto en divisiones
main = "Árbol de decisión - Diagnóstico de cáncer de mama")

fallen.leaves=TRUE
# 2) Preparar (B/M -> 0/1) y quedarnos con variables numéricas
df <- df %>%
mutate(diagnosis_num = ifelse(diagnosis == "M", 1, 0)) %>%
select(diagnosis_num, where(is.numeric)) %>%
na.omit()
# 3) Escalar a [0,1]
scale01 <- function(x) (x - min(x)) / (max(x) - min(x))
x_cols <- setdiff(names(df), "diagnosis_num")
df[x_cols] <- lapply(df[x_cols], scale01)
# 4) Partición train/test con sample()
set.seed(123)
idx <- sample(seq_len(nrow(df)), size = 0.7 * nrow(df))
train <- df[idx, ]
test <- df[-idx, ]
# 5) Fórmula salida ~ entradas
form <- as.formula(paste("diagnosis_num ~", paste(x_cols, collapse = " + ")))
# 6) Entrenar red
red <- neuralnet(
form, data = train,
hidden = 5, # puedes probar 3, 5, 8...
act.fct = "logistic",
linear.output = FALSE,
stepmax = 1e6
)
# 7) Graficar como en clase
plot(red, rep = "best")

# 8) Predicción en TEST
prediccion <- compute(red, test[, x_cols])
probabilidad <- prediccion$net.result # matriz de probabilidades
resultado <- ifelse(probabilidad > 0.5, 1, 0) # 1 = Maligno, 0 = Benigno
# 9) Tablar rápida y accuracy
print(table(Real = test$diagnosis_num, Pred = as.vector(resultado)))
## Pred
## Real 0 1
## 0 94 4
## 1 2 71
cat("Accuracy:", mean(as.vector(resultado) == test$diagnosis_num), "\n")
## Accuracy: 0.9649123
# 10) (Opcional) “prueba” de 3 casos como en clase
prueba <- test[1:3, x_cols]
pred_prueba <- compute(red, prueba)
prob_prueba <- pred_prueba$net.result
resultado_prueba <- ifelse(prob_prueba > 0.5, 1, 0)
prob_prueba
## [,1]
## [1,] 1.0000000
## [2,] 1.0000000
## [3,] 0.9999996
resultado_prueba
## [,1]
## [1,] 1
## [2,] 1
## [3,] 1
LS0tCnRpdGxlOiAiQ2FuY2VyIGRlIE1hbWEiCmF1dGhvcjogIkthcmxhIE1pcmV5YSBWZWxkZXJyYWluIEEwMDIyNzQxMSIKZGF0ZTogIjIwMjUtMDgtMjUiCm91dHB1dDoKICAgIGh0bWxfZG9jdW1lbnQ6CiAgICAgICAgdG9jOiB0cnVlCiAgICAgICAgdG9jX2Zsb2F0OiB0cnVlCiAgICAgICAgY29kZV9kb3dubG9hZDogdHJ1ZQogICAgICAgIHRoZW1lOiAic3BhY2VsYWIiCi0tLQpgYGB7cn0KI2luc3RhbGwucGFja2FnZXMoInJwYXJ0IikKbGlicmFyeShycGFydCkKI2luc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCmxpYnJhcnkoZHBseXIpIAojaW5zdGFsbC5wYWNrYWdlcygiY2FyZXQiKSAgICMgc2kgbm8gbG8gdGllbmVzIGluc3RhbGFkbwpsaWJyYXJ5KGNhcmV0KQojaW5zdGFsbC5wYWNrYWdlcygibmV1cmFsbmV0IikKbGlicmFyeShuZXVyYWxuZXQpCmxpYnJhcnkocmVhZHIpIApgYGAKYGBge3J9CmRmPC1yZWFkX2NzdignL1VzZXJzL2thcmxhbG9wZXovRG93bmxvYWRzL2NhbmNlcl9kZV9tYW1hLmNzdicpCmBgYAoKCgpgYGB7cn0KCgphcmJvbCA8LSBycGFydChkaWFnbm9zaXMgfiByYWRpdXNfbWVhbiArIHRleHR1cmVfbWVhbiArIGFyZWFfbWVhbiArIHNtb290aG5lc3NfbWVhbiwKICAgICAgICAgICAgICAgZGF0YSA9IGRmLCBtZXRob2QgPSAiY2xhc3MiLCAKICAgICAgICAgICAgICAgY29udHJvbCA9IHJwYXJ0LmNvbnRyb2woY3AgPSAwLjAxLCBtaW5zcGxpdCA9IDIwKSkKCiMgMy4gVmVyIHRhYmxhIGRlIGNvbXBsZWppZGFkCnByaW50Y3AoYXJib2wpCgojIDQuIEVsZWdpciBtZWpvciBjcCAobWVub3IgZXJyb3IgZGUgdmFsaWRhY2nDs24gY3J1emFkYSkKbWVqb3JfY3AgPC0gYXJib2wkY3B0YWJsZVt3aGljaC5taW4oYXJib2wkY3B0YWJsZVssInhlcnJvciJdKSwgIkNQIl0KCiMgNS4gUG9kYXIgw6FyYm9sIGNvbiBtZWpvciBjcAphcmJvbF9wb2RhZG8gPC0gcHJ1bmUoYXJib2wsIGNwID0gbWVqb3JfY3ApCgojIDYuIEdyYWZpY2FyIMOhcmJvbCBwb2RhZG8gKG3DoXMgY2xhcm8geSByb2J1c3RvKQpwcnAoYXJib2xfcG9kYWRvLCAKICAgIGV4dHJhID0gMTA0LCAgICAgICAgICAjIG11ZXN0cmEgY2xhc2UsIHByb2JhYmlsaWRhZCB5ICUgZGUgb2JzIGVuIGNhZGEgbm9kbwogICAgYm94LnBhbGV0dGUgPSAiQmx1ZXMiLCMgY29sb3JlcyBib25pdG9zCiAgICBicmFuY2gubHR5ID0gMywgICAgICAgIyByYW1hcyBwdW50ZWFkYXMKICAgIHNoYWRvdy5jb2wgPSAiZ3JheSIsICAjIHNvbWJyYSBwYXJhIGNhamFzCiAgICBzcGxpdC5jZXggPSAxLjIsICAgICAgIyB0YW1hw7FvIGRlbCB0ZXh0byBlbiBkaXZpc2lvbmVzCiAgICBtYWluID0gIsOBcmJvbCBkZSBkZWNpc2nDs24gLSBEaWFnbsOzc3RpY28gZGUgY8OhbmNlciBkZSBtYW1hIikKCmZhbGxlbi5sZWF2ZXM9VFJVRQoKCmBgYAoKYGBge3J9CiMgMikgUHJlcGFyYXIgKEIvTSAtPiAwLzEpIHkgcXVlZGFybm9zIGNvbiB2YXJpYWJsZXMgbnVtw6lyaWNhcwpkZiA8LSBkZiAlPiUKICBtdXRhdGUoZGlhZ25vc2lzX251bSA9IGlmZWxzZShkaWFnbm9zaXMgPT0gIk0iLCAxLCAwKSkgJT4lCiAgc2VsZWN0KGRpYWdub3Npc19udW0sIHdoZXJlKGlzLm51bWVyaWMpKSAlPiUKICBuYS5vbWl0KCkKCiMgMykgRXNjYWxhciBhIFswLDFdCnNjYWxlMDEgPC0gZnVuY3Rpb24oeCkgKHggLSBtaW4oeCkpIC8gKG1heCh4KSAtIG1pbih4KSkKeF9jb2xzIDwtIHNldGRpZmYobmFtZXMoZGYpLCAiZGlhZ25vc2lzX251bSIpCmRmW3hfY29sc10gPC0gbGFwcGx5KGRmW3hfY29sc10sIHNjYWxlMDEpCgojIDQpIFBhcnRpY2nDs24gdHJhaW4vdGVzdCBjb24gc2FtcGxlKCkgCnNldC5zZWVkKDEyMykKaWR4ICA8LSBzYW1wbGUoc2VxX2xlbihucm93KGRmKSksIHNpemUgPSAwLjcgKiBucm93KGRmKSkKdHJhaW4gPC0gZGZbaWR4LCBdCnRlc3QgIDwtIGRmWy1pZHgsIF0KCiMgNSkgRsOzcm11bGEgc2FsaWRhIH4gZW50cmFkYXMKZm9ybSA8LSBhcy5mb3JtdWxhKHBhc3RlKCJkaWFnbm9zaXNfbnVtIH4iLCBwYXN0ZSh4X2NvbHMsIGNvbGxhcHNlID0gIiArICIpKSkKCiMgNikgRW50cmVuYXIgcmVkIApyZWQgPC0gbmV1cmFsbmV0KAogIGZvcm0sIGRhdGEgPSB0cmFpbiwKICBoaWRkZW4gPSA1LCAgICAgICAgICAgICMgcHVlZGVzIHByb2JhciAzLCA1LCA4Li4uCiAgYWN0LmZjdCA9ICJsb2dpc3RpYyIsCiAgbGluZWFyLm91dHB1dCA9IEZBTFNFLAogIHN0ZXBtYXggPSAxZTYKKQoKIyA3KSBHcmFmaWNhciBjb21vIGVuIGNsYXNlCnBsb3QocmVkLCByZXAgPSAiYmVzdCIpCgojIDgpIFByZWRpY2Npw7NuIGVuIFRFU1QgCnByZWRpY2Npb24gPC0gY29tcHV0ZShyZWQsIHRlc3RbLCB4X2NvbHNdKQpwcm9iYWJpbGlkYWQgPC0gcHJlZGljY2lvbiRuZXQucmVzdWx0ICAgICAgICAgICAjIG1hdHJpeiBkZSBwcm9iYWJpbGlkYWRlcwpyZXN1bHRhZG8gPC0gaWZlbHNlKHByb2JhYmlsaWRhZCA+IDAuNSwgMSwgMCkgICAjIDEgPSBNYWxpZ25vLCAwID0gQmVuaWdubwoKIyA5KSBUYWJsYXIgcsOhcGlkYSB5IGFjY3VyYWN5CnByaW50KHRhYmxlKFJlYWwgPSB0ZXN0JGRpYWdub3Npc19udW0sIFByZWQgPSBhcy52ZWN0b3IocmVzdWx0YWRvKSkpCmNhdCgiQWNjdXJhY3k6IiwgbWVhbihhcy52ZWN0b3IocmVzdWx0YWRvKSA9PSB0ZXN0JGRpYWdub3Npc19udW0pLCAiXG4iKQoKIyAxMCkgKE9wY2lvbmFsKSDigJxwcnVlYmHigJ0gZGUgMyBjYXNvcyBjb21vIGVuIGNsYXNlCnBydWViYSA8LSB0ZXN0WzE6MywgeF9jb2xzXQpwcmVkX3BydWViYSA8LSBjb21wdXRlKHJlZCwgcHJ1ZWJhKQpwcm9iX3BydWViYSA8LSBwcmVkX3BydWViYSRuZXQucmVzdWx0CnJlc3VsdGFkb19wcnVlYmEgPC0gaWZlbHNlKHByb2JfcHJ1ZWJhID4gMC41LCAxLCAwKQpwcm9iX3BydWViYQpyZXN1bHRhZG9fcHJ1ZWJhCgpgYGAKCg==