#install.packages("rpart")
library(rpart)
#install.packages("rpart.plot")
library(rpart.plot)
library(dplyr) 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages("caret")   # si no lo tienes instalado
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
#install.packages("neuralnet")
library(neuralnet)
## 
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
## 
##     compute
library(readr) 
df<-read_csv('/Users/karlalopez/Downloads/cancer_de_mama.csv')
## Rows: 569 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): diagnosis
## dbl (30): radius_mean, texture_mean, perimeter_mean, area_mean, smoothness_m...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
arbol <- rpart(diagnosis ~ radius_mean + texture_mean + area_mean + smoothness_mean,
               data = df, method = "class", 
               control = rpart.control(cp = 0.01, minsplit = 20))

# 3. Ver tabla de complejidad
printcp(arbol)
## 
## Classification tree:
## rpart(formula = diagnosis ~ radius_mean + texture_mean + area_mean + 
##     smoothness_mean, data = df, method = "class", control = rpart.control(cp = 0.01, 
##     minsplit = 20))
## 
## Variables actually used in tree construction:
## [1] area_mean       smoothness_mean texture_mean   
## 
## Root node error: 212/569 = 0.37258
## 
## n= 569 
## 
##         CP nsplit rel error  xerror     xstd
## 1 0.716981      0   1.00000 1.00000 0.054401
## 2 0.039308      1   0.28302 0.28302 0.034558
## 3 0.010000      4   0.16509 0.23585 0.031855
# 4. Elegir mejor cp (menor error de validación cruzada)
mejor_cp <- arbol$cptable[which.min(arbol$cptable[,"xerror"]), "CP"]

# 5. Podar árbol con mejor cp
arbol_podado <- prune(arbol, cp = mejor_cp)

# 6. Graficar árbol podado (más claro y robusto)
prp(arbol_podado, 
    extra = 104,          # muestra clase, probabilidad y % de obs en cada nodo
    box.palette = "Blues",# colores bonitos
    branch.lty = 3,       # ramas punteadas
    shadow.col = "gray",  # sombra para cajas
    split.cex = 1.2,      # tamaño del texto en divisiones
    main = "Árbol de decisión - Diagnóstico de cáncer de mama")

fallen.leaves=TRUE
# 2) Preparar (B/M -> 0/1) y quedarnos con variables numéricas
df <- df %>%
  mutate(diagnosis_num = ifelse(diagnosis == "M", 1, 0)) %>%
  select(diagnosis_num, where(is.numeric)) %>%
  na.omit()

# 3) Escalar a [0,1]
scale01 <- function(x) (x - min(x)) / (max(x) - min(x))
x_cols <- setdiff(names(df), "diagnosis_num")
df[x_cols] <- lapply(df[x_cols], scale01)

# 4) Partición train/test con sample() 
set.seed(123)
idx  <- sample(seq_len(nrow(df)), size = 0.7 * nrow(df))
train <- df[idx, ]
test  <- df[-idx, ]

# 5) Fórmula salida ~ entradas
form <- as.formula(paste("diagnosis_num ~", paste(x_cols, collapse = " + ")))

# 6) Entrenar red 
red <- neuralnet(
  form, data = train,
  hidden = 5,            # puedes probar 3, 5, 8...
  act.fct = "logistic",
  linear.output = FALSE,
  stepmax = 1e6
)

# 7) Graficar como en clase
plot(red, rep = "best")

# 8) Predicción en TEST 
prediccion <- compute(red, test[, x_cols])
probabilidad <- prediccion$net.result           # matriz de probabilidades
resultado <- ifelse(probabilidad > 0.5, 1, 0)   # 1 = Maligno, 0 = Benigno

# 9) Tablar rápida y accuracy
print(table(Real = test$diagnosis_num, Pred = as.vector(resultado)))
##     Pred
## Real  0  1
##    0 94  4
##    1  2 71
cat("Accuracy:", mean(as.vector(resultado) == test$diagnosis_num), "\n")
## Accuracy: 0.9649123
# 10) (Opcional) “prueba” de 3 casos como en clase
prueba <- test[1:3, x_cols]
pred_prueba <- compute(red, prueba)
prob_prueba <- pred_prueba$net.result
resultado_prueba <- ifelse(prob_prueba > 0.5, 1, 0)
prob_prueba
##           [,1]
## [1,] 1.0000000
## [2,] 1.0000000
## [3,] 0.9999996
resultado_prueba
##      [,1]
## [1,]    1
## [2,]    1
## [3,]    1
LS0tCnRpdGxlOiAiQ2FuY2VyIGRlIE1hbWEiCmF1dGhvcjogIkthcmxhIE1pcmV5YSBWZWxkZXJyYWluIEEwMDIyNzQxMSIKZGF0ZTogIjIwMjUtMDgtMjUiCm91dHB1dDoKICAgIGh0bWxfZG9jdW1lbnQ6CiAgICAgICAgdG9jOiB0cnVlCiAgICAgICAgdG9jX2Zsb2F0OiB0cnVlCiAgICAgICAgY29kZV9kb3dubG9hZDogdHJ1ZQogICAgICAgIHRoZW1lOiAic3BhY2VsYWIiCi0tLQpgYGB7cn0KI2luc3RhbGwucGFja2FnZXMoInJwYXJ0IikKbGlicmFyeShycGFydCkKI2luc3RhbGwucGFja2FnZXMoInJwYXJ0LnBsb3QiKQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCmxpYnJhcnkoZHBseXIpIAojaW5zdGFsbC5wYWNrYWdlcygiY2FyZXQiKSAgICMgc2kgbm8gbG8gdGllbmVzIGluc3RhbGFkbwpsaWJyYXJ5KGNhcmV0KQojaW5zdGFsbC5wYWNrYWdlcygibmV1cmFsbmV0IikKbGlicmFyeShuZXVyYWxuZXQpCmxpYnJhcnkocmVhZHIpIApgYGAKYGBge3J9CmRmPC1yZWFkX2NzdignL1VzZXJzL2thcmxhbG9wZXovRG93bmxvYWRzL2NhbmNlcl9kZV9tYW1hLmNzdicpCmBgYAoKCgpgYGB7cn0KCgphcmJvbCA8LSBycGFydChkaWFnbm9zaXMgfiByYWRpdXNfbWVhbiArIHRleHR1cmVfbWVhbiArIGFyZWFfbWVhbiArIHNtb290aG5lc3NfbWVhbiwKICAgICAgICAgICAgICAgZGF0YSA9IGRmLCBtZXRob2QgPSAiY2xhc3MiLCAKICAgICAgICAgICAgICAgY29udHJvbCA9IHJwYXJ0LmNvbnRyb2woY3AgPSAwLjAxLCBtaW5zcGxpdCA9IDIwKSkKCiMgMy4gVmVyIHRhYmxhIGRlIGNvbXBsZWppZGFkCnByaW50Y3AoYXJib2wpCgojIDQuIEVsZWdpciBtZWpvciBjcCAobWVub3IgZXJyb3IgZGUgdmFsaWRhY2nDs24gY3J1emFkYSkKbWVqb3JfY3AgPC0gYXJib2wkY3B0YWJsZVt3aGljaC5taW4oYXJib2wkY3B0YWJsZVssInhlcnJvciJdKSwgIkNQIl0KCiMgNS4gUG9kYXIgw6FyYm9sIGNvbiBtZWpvciBjcAphcmJvbF9wb2RhZG8gPC0gcHJ1bmUoYXJib2wsIGNwID0gbWVqb3JfY3ApCgojIDYuIEdyYWZpY2FyIMOhcmJvbCBwb2RhZG8gKG3DoXMgY2xhcm8geSByb2J1c3RvKQpwcnAoYXJib2xfcG9kYWRvLCAKICAgIGV4dHJhID0gMTA0LCAgICAgICAgICAjIG11ZXN0cmEgY2xhc2UsIHByb2JhYmlsaWRhZCB5ICUgZGUgb2JzIGVuIGNhZGEgbm9kbwogICAgYm94LnBhbGV0dGUgPSAiQmx1ZXMiLCMgY29sb3JlcyBib25pdG9zCiAgICBicmFuY2gubHR5ID0gMywgICAgICAgIyByYW1hcyBwdW50ZWFkYXMKICAgIHNoYWRvdy5jb2wgPSAiZ3JheSIsICAjIHNvbWJyYSBwYXJhIGNhamFzCiAgICBzcGxpdC5jZXggPSAxLjIsICAgICAgIyB0YW1hw7FvIGRlbCB0ZXh0byBlbiBkaXZpc2lvbmVzCiAgICBtYWluID0gIsOBcmJvbCBkZSBkZWNpc2nDs24gLSBEaWFnbsOzc3RpY28gZGUgY8OhbmNlciBkZSBtYW1hIikKCmZhbGxlbi5sZWF2ZXM9VFJVRQoKCmBgYAoKYGBge3J9CiMgMikgUHJlcGFyYXIgKEIvTSAtPiAwLzEpIHkgcXVlZGFybm9zIGNvbiB2YXJpYWJsZXMgbnVtw6lyaWNhcwpkZiA8LSBkZiAlPiUKICBtdXRhdGUoZGlhZ25vc2lzX251bSA9IGlmZWxzZShkaWFnbm9zaXMgPT0gIk0iLCAxLCAwKSkgJT4lCiAgc2VsZWN0KGRpYWdub3Npc19udW0sIHdoZXJlKGlzLm51bWVyaWMpKSAlPiUKICBuYS5vbWl0KCkKCiMgMykgRXNjYWxhciBhIFswLDFdCnNjYWxlMDEgPC0gZnVuY3Rpb24oeCkgKHggLSBtaW4oeCkpIC8gKG1heCh4KSAtIG1pbih4KSkKeF9jb2xzIDwtIHNldGRpZmYobmFtZXMoZGYpLCAiZGlhZ25vc2lzX251bSIpCmRmW3hfY29sc10gPC0gbGFwcGx5KGRmW3hfY29sc10sIHNjYWxlMDEpCgojIDQpIFBhcnRpY2nDs24gdHJhaW4vdGVzdCBjb24gc2FtcGxlKCkgCnNldC5zZWVkKDEyMykKaWR4ICA8LSBzYW1wbGUoc2VxX2xlbihucm93KGRmKSksIHNpemUgPSAwLjcgKiBucm93KGRmKSkKdHJhaW4gPC0gZGZbaWR4LCBdCnRlc3QgIDwtIGRmWy1pZHgsIF0KCiMgNSkgRsOzcm11bGEgc2FsaWRhIH4gZW50cmFkYXMKZm9ybSA8LSBhcy5mb3JtdWxhKHBhc3RlKCJkaWFnbm9zaXNfbnVtIH4iLCBwYXN0ZSh4X2NvbHMsIGNvbGxhcHNlID0gIiArICIpKSkKCiMgNikgRW50cmVuYXIgcmVkIApyZWQgPC0gbmV1cmFsbmV0KAogIGZvcm0sIGRhdGEgPSB0cmFpbiwKICBoaWRkZW4gPSA1LCAgICAgICAgICAgICMgcHVlZGVzIHByb2JhciAzLCA1LCA4Li4uCiAgYWN0LmZjdCA9ICJsb2dpc3RpYyIsCiAgbGluZWFyLm91dHB1dCA9IEZBTFNFLAogIHN0ZXBtYXggPSAxZTYKKQoKIyA3KSBHcmFmaWNhciBjb21vIGVuIGNsYXNlCnBsb3QocmVkLCByZXAgPSAiYmVzdCIpCgojIDgpIFByZWRpY2Npw7NuIGVuIFRFU1QgCnByZWRpY2Npb24gPC0gY29tcHV0ZShyZWQsIHRlc3RbLCB4X2NvbHNdKQpwcm9iYWJpbGlkYWQgPC0gcHJlZGljY2lvbiRuZXQucmVzdWx0ICAgICAgICAgICAjIG1hdHJpeiBkZSBwcm9iYWJpbGlkYWRlcwpyZXN1bHRhZG8gPC0gaWZlbHNlKHByb2JhYmlsaWRhZCA+IDAuNSwgMSwgMCkgICAjIDEgPSBNYWxpZ25vLCAwID0gQmVuaWdubwoKIyA5KSBUYWJsYXIgcsOhcGlkYSB5IGFjY3VyYWN5CnByaW50KHRhYmxlKFJlYWwgPSB0ZXN0JGRpYWdub3Npc19udW0sIFByZWQgPSBhcy52ZWN0b3IocmVzdWx0YWRvKSkpCmNhdCgiQWNjdXJhY3k6IiwgbWVhbihhcy52ZWN0b3IocmVzdWx0YWRvKSA9PSB0ZXN0JGRpYWdub3Npc19udW0pLCAiXG4iKQoKIyAxMCkgKE9wY2lvbmFsKSDigJxwcnVlYmHigJ0gZGUgMyBjYXNvcyBjb21vIGVuIGNsYXNlCnBydWViYSA8LSB0ZXN0WzE6MywgeF9jb2xzXQpwcmVkX3BydWViYSA8LSBjb21wdXRlKHJlZCwgcHJ1ZWJhKQpwcm9iX3BydWViYSA8LSBwcmVkX3BydWViYSRuZXQucmVzdWx0CnJlc3VsdGFkb19wcnVlYmEgPC0gaWZlbHNlKHByb2JfcHJ1ZWJhID4gMC41LCAxLCAwKQpwcm9iX3BydWViYQpyZXN1bHRhZG9fcHJ1ZWJhCgpgYGAKCg==