Librerías

library(caret)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
## Loading required package: lattice
library(ggplot2) # Crear gráficos
library(datasets) # Usar la base de datos "Iris"
library(lattice) # Crear gráficos
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.2.3
library(mlbench)

Crear base de datos

data(BreastCancer)

df <- data.frame(BreastCancer)

Análisis exploratorio

summary(df)
##       Id             Cl.thickness   Cell.size     Cell.shape  Marg.adhesion
##  Length:699         1      :145   1      :384   1      :353   1      :407  
##  Class :character   5      :130   10     : 67   2      : 59   2      : 58  
##  Mode  :character   3      :108   3      : 52   10     : 58   3      : 58  
##                     4      : 80   2      : 45   3      : 56   10     : 55  
##                     10     : 69   4      : 40   4      : 44   4      : 33  
##                     2      : 50   5      : 30   5      : 34   8      : 25  
##                     (Other):117   (Other): 81   (Other): 95   (Other): 63  
##   Epith.c.size  Bare.nuclei   Bl.cromatin  Normal.nucleoli    Mitoses   
##  2      :386   1      :402   2      :166   1      :443     1      :579  
##  3      : 72   10     :132   3      :165   10     : 61     2      : 35  
##  4      : 48   2      : 30   1      :152   3      : 44     3      : 33  
##  1      : 47   5      : 30   7      : 73   2      : 36     10     : 14  
##  6      : 41   3      : 28   4      : 40   8      : 24     4      : 12  
##  5      : 39   (Other): 61   5      : 34   6      : 22     7      :  9  
##  (Other): 66   NA's   : 16   (Other): 69   (Other): 69     (Other): 17  
##        Class    
##  benign   :458  
##  malignant:241  
##                 
##                 
##                 
##                 
## 
str(df)
## 'data.frame':    699 obs. of  11 variables:
##  $ Id             : chr  "1000025" "1002945" "1015425" "1016277" ...
##  $ Cl.thickness   : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 5 5 3 6 4 8 1 2 2 4 ...
##  $ Cell.size      : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 4 1 8 1 10 1 1 1 2 ...
##  $ Cell.shape     : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 4 1 8 1 10 1 2 1 1 ...
##  $ Marg.adhesion  : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 1 5 1 1 3 8 1 1 1 1 ...
##  $ Epith.c.size   : Ord.factor w/ 10 levels "1"<"2"<"3"<"4"<..: 2 7 2 3 2 7 2 2 2 2 ...
##  $ Bare.nuclei    : Factor w/ 10 levels "1","2","3","4",..: 1 10 2 4 1 10 10 1 1 1 ...
##  $ Bl.cromatin    : Factor w/ 10 levels "1","2","3","4",..: 3 3 3 3 3 9 3 3 1 2 ...
##  $ Normal.nucleoli: Factor w/ 10 levels "1","2","3","4",..: 1 2 1 7 1 7 1 1 1 1 ...
##  $ Mitoses        : Factor w/ 9 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 5 1 ...
##  $ Class          : Factor w/ 2 levels "benign","malignant": 1 1 1 1 1 2 1 1 1 1 ...
# Limpieza de datos

df$Id <- NULL


#Limpieza

df$Cl.thickness <- as.numeric(df$Cl.thickness)
df$Cell.size <- as.numeric(df$Cell.size)
df$Cell.shape <- as.numeric(df$Cell.shape)
df$Marg.adhesion <- as.numeric(df$Marg.adhesion)
df$Epith.c.size <- as.numeric(df$Epith.c.size)
df$Bare.nuclei <- as.numeric(df$Bare.nuclei)
df$Bl.cromatin <- as.numeric(df$Bl.cromatin)
df$Normal.nucleoli <- as.numeric(df$Normal.nucleoli)
df$Mitoses <- as.numeric(df$Mitoses)
df$Class <- as.factor(df$Class)



#Quitar 16 nulos
df <- na.omit(df)
plot_missing(df)

plot_histogram(df)

plot_correlation(df)

La variable debe ser un factor

Partición de datos

set.seed(123)

renglones_entrenamiento <-createDataPartition(df$Class, p=0.8, list=FALSE)
entrenamiento <- df[renglones_entrenamiento, ]

prueba <- df[-renglones_entrenamiento, ]

Modelo svmlineal

modelo1 <- train(Class ~ ., data= entrenamiento, method = "svmLinear", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(C=1)) #Cuando es svmLinear

resultado_entrenamiento1 <- predict(modelo1, entrenamiento)

resultado_prueba1 <- predict(modelo1, prueba)

Matriz de confusión

mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Class)

mcre1 
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       347         7
##   malignant      9       185
##                                          
##                Accuracy : 0.9708         
##                  95% CI : (0.953, 0.9832)
##     No Information Rate : 0.6496         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.936          
##                                          
##  Mcnemar's Test P-Value : 0.8026         
##                                          
##             Sensitivity : 0.9747         
##             Specificity : 0.9635         
##          Pos Pred Value : 0.9802         
##          Neg Pred Value : 0.9536         
##              Prevalence : 0.6496         
##          Detection Rate : 0.6332         
##    Detection Prevalence : 0.6460         
##       Balanced Accuracy : 0.9691         
##                                          
##        'Positive' Class : benign         
## 
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Class)

mcrp1
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        87         2
##   malignant      1        45
##                                           
##                Accuracy : 0.9778          
##                  95% CI : (0.9364, 0.9954)
##     No Information Rate : 0.6519          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9508          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9886          
##             Specificity : 0.9574          
##          Pos Pred Value : 0.9775          
##          Neg Pred Value : 0.9783          
##              Prevalence : 0.6519          
##          Detection Rate : 0.6444          
##    Detection Prevalence : 0.6593          
##       Balanced Accuracy : 0.9730          
##                                           
##        'Positive' Class : benign          
## 

Modelo svmradial

modelo2 <- train(Class ~ ., data= entrenamiento, method = "svmRadial", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(sigma=1, C=1)) #Cambiar

resultado_entrenamiento2 <- predict(modelo2, entrenamiento)

resultado_prueba2 <- predict(modelo2, prueba)

Matriz de confusión

mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Class)

mcre2 
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       354         0
##   malignant      2       192
##                                           
##                Accuracy : 0.9964          
##                  95% CI : (0.9869, 0.9996)
##     No Information Rate : 0.6496          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.992           
##                                           
##  Mcnemar's Test P-Value : 0.4795          
##                                           
##             Sensitivity : 0.9944          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9897          
##              Prevalence : 0.6496          
##          Detection Rate : 0.6460          
##    Detection Prevalence : 0.6460          
##       Balanced Accuracy : 0.9972          
##                                           
##        'Positive' Class : benign          
## 
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Class)

mcrp2
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        82         0
##   malignant      6        47
##                                           
##                Accuracy : 0.9556          
##                  95% CI : (0.9058, 0.9835)
##     No Information Rate : 0.6519          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.9049          
##                                           
##  Mcnemar's Test P-Value : 0.04123         
##                                           
##             Sensitivity : 0.9318          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.8868          
##              Prevalence : 0.6519          
##          Detection Rate : 0.6074          
##    Detection Prevalence : 0.6074          
##       Balanced Accuracy : 0.9659          
##                                           
##        'Positive' Class : benign          
## 

Modelo svmPoly

modelo3 <- train(Class ~ ., data= entrenamiento, method = "svmPoly", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(degree=1,scale=1, C=1)) #Cambiar

resultado_entrenamiento3 <- predict(modelo3, entrenamiento)

resultado_prueba3 <- predict(modelo3, prueba)

Matriz de confusión

mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Class)

mcre3 
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       347         7
##   malignant      9       185
##                                          
##                Accuracy : 0.9708         
##                  95% CI : (0.953, 0.9832)
##     No Information Rate : 0.6496         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.936          
##                                          
##  Mcnemar's Test P-Value : 0.8026         
##                                          
##             Sensitivity : 0.9747         
##             Specificity : 0.9635         
##          Pos Pred Value : 0.9802         
##          Neg Pred Value : 0.9536         
##              Prevalence : 0.6496         
##          Detection Rate : 0.6332         
##    Detection Prevalence : 0.6460         
##       Balanced Accuracy : 0.9691         
##                                          
##        'Positive' Class : benign         
## 
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Class)

mcrp3
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        87         2
##   malignant      1        45
##                                           
##                Accuracy : 0.9778          
##                  95% CI : (0.9364, 0.9954)
##     No Information Rate : 0.6519          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9508          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9886          
##             Specificity : 0.9574          
##          Pos Pred Value : 0.9775          
##          Neg Pred Value : 0.9783          
##              Prevalence : 0.6519          
##          Detection Rate : 0.6444          
##    Detection Prevalence : 0.6593          
##       Balanced Accuracy : 0.9730          
##                                           
##        'Positive' Class : benign          
## 

Modelo árbol de decisión

modelo4 <- train(Class ~ ., data = entrenamiento, method = "rpart", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneLength = 10)

resultado_entrenamiento4 <- predict(modelo4, entrenamiento)

resultado_prueba4 <- predict(modelo4, prueba)

Matriz de confusión

mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Class)

mcre4 
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       345         9
##   malignant     11       183
##                                           
##                Accuracy : 0.9635          
##                  95% CI : (0.9442, 0.9776)
##     No Information Rate : 0.6496          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.92            
##                                           
##  Mcnemar's Test P-Value : 0.8231          
##                                           
##             Sensitivity : 0.9691          
##             Specificity : 0.9531          
##          Pos Pred Value : 0.9746          
##          Neg Pred Value : 0.9433          
##              Prevalence : 0.6496          
##          Detection Rate : 0.6296          
##    Detection Prevalence : 0.6460          
##       Balanced Accuracy : 0.9611          
##                                           
##        'Positive' Class : benign          
## 
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Class)

mcrp4
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        87         5
##   malignant      1        42
##                                           
##                Accuracy : 0.9556          
##                  95% CI : (0.9058, 0.9835)
##     No Information Rate : 0.6519          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9001          
##                                           
##  Mcnemar's Test P-Value : 0.2207          
##                                           
##             Sensitivity : 0.9886          
##             Specificity : 0.8936          
##          Pos Pred Value : 0.9457          
##          Neg Pred Value : 0.9767          
##              Prevalence : 0.6519          
##          Detection Rate : 0.6444          
##    Detection Prevalence : 0.6815          
##       Balanced Accuracy : 0.9411          
##                                           
##        'Positive' Class : benign          
## 

Modelo neural net

modelo5 <- train(Class ~ ., data = entrenamiento, method = "nnet", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10))
## # weights:  12
## initial  value 376.477149 
## iter  10 value 42.437195
## iter  20 value 37.383594
## iter  30 value 37.366404
## iter  40 value 36.703788
## iter  50 value 32.802253
## iter  60 value 32.783185
## iter  70 value 32.780576
## iter  80 value 32.780352
## iter  90 value 32.780267
## iter 100 value 32.779932
## final  value 32.779932 
## stopped after 100 iterations
## # weights:  34
## initial  value 398.967389 
## iter  10 value 41.580064
## iter  20 value 32.819841
## iter  30 value 30.580631
## iter  40 value 30.171660
## iter  50 value 29.018950
## iter  60 value 26.270274
## iter  70 value 25.101933
## iter  80 value 24.289118
## iter  90 value 23.970739
## iter 100 value 23.966875
## final  value 23.966875 
## stopped after 100 iterations
## # weights:  56
## initial  value 392.380535 
## iter  10 value 36.130217
## iter  20 value 16.520603
## iter  30 value 7.379376
## iter  40 value 4.979779
## iter  50 value 1.881996
## iter  60 value 1.413954
## iter  70 value 1.397568
## iter  80 value 1.389585
## iter  90 value 1.386721
## iter 100 value 1.386333
## final  value 1.386333 
## stopped after 100 iterations
## # weights:  12
## initial  value 350.410635 
## iter  10 value 74.916798
## iter  20 value 58.878074
## iter  30 value 49.960077
## iter  40 value 49.661904
## final  value 49.661892 
## converged
## # weights:  34
## initial  value 349.539635 
## iter  10 value 115.137963
## iter  20 value 45.339839
## iter  30 value 41.959631
## iter  40 value 38.444785
## iter  50 value 37.928941
## iter  60 value 37.672511
## iter  70 value 37.662808
## final  value 37.662804 
## converged
## # weights:  56
## initial  value 385.799685 
## iter  10 value 41.550941
## iter  20 value 37.775498
## iter  30 value 37.351510
## iter  40 value 37.203503
## iter  50 value 36.881480
## iter  60 value 36.816477
## iter  70 value 36.815808
## iter  80 value 36.815763
## iter  80 value 36.815763
## final  value 36.815763 
## converged
## # weights:  12
## initial  value 411.250561 
## iter  10 value 128.481791
## iter  20 value 48.775343
## iter  30 value 48.108500
## iter  40 value 45.635617
## iter  50 value 42.789717
## iter  60 value 42.603333
## iter  70 value 39.693478
## iter  80 value 39.677257
## iter  90 value 39.671032
## iter 100 value 39.658964
## final  value 39.658964 
## stopped after 100 iterations
## # weights:  34
## initial  value 396.907245 
## iter  10 value 36.935107
## iter  20 value 27.845747
## iter  30 value 24.064004
## iter  40 value 22.507661
## iter  50 value 22.161343
## iter  60 value 21.912002
## iter  70 value 21.436296
## iter  80 value 21.176512
## iter  90 value 21.115843
## iter 100 value 20.994476
## final  value 20.994476 
## stopped after 100 iterations
## # weights:  56
## initial  value 470.209121 
## iter  10 value 36.854692
## iter  20 value 29.775951
## iter  30 value 26.512592
## iter  40 value 23.490629
## iter  50 value 21.090972
## iter  60 value 14.528117
## iter  70 value 14.147383
## iter  80 value 13.015758
## iter  90 value 12.840415
## iter 100 value 12.812813
## final  value 12.812813 
## stopped after 100 iterations
## # weights:  12
## initial  value 355.508036 
## iter  10 value 52.186983
## iter  20 value 49.731412
## iter  30 value 42.751850
## iter  40 value 42.589636
## iter  50 value 40.970318
## iter  60 value 39.558646
## iter  70 value 39.543586
## iter  80 value 39.515130
## iter  90 value 39.503451
## iter 100 value 39.499587
## final  value 39.499587 
## stopped after 100 iterations
## # weights:  34
## initial  value 345.293855 
## iter  10 value 38.850419
## iter  20 value 35.299467
## iter  30 value 29.063227
## iter  40 value 25.318280
## iter  50 value 22.886781
## iter  60 value 21.721975
## iter  70 value 21.281768
## iter  80 value 21.101518
## iter  90 value 21.071842
## iter 100 value 21.066335
## final  value 21.066335 
## stopped after 100 iterations
## # weights:  56
## initial  value 318.370065 
## iter  10 value 40.055119
## iter  20 value 24.416365
## iter  30 value 13.343905
## iter  40 value 12.497316
## iter  50 value 12.101419
## iter  60 value 11.714407
## iter  70 value 10.361632
## iter  80 value 10.093093
## iter  90 value 9.858132
## iter 100 value 9.611812
## final  value 9.611812 
## stopped after 100 iterations
## # weights:  12
## initial  value 383.405588 
## iter  10 value 70.986072
## iter  20 value 54.771996
## iter  30 value 53.597555
## iter  40 value 53.544775
## iter  40 value 53.544774
## iter  40 value 53.544774
## final  value 53.544774 
## converged
## # weights:  34
## initial  value 437.860910 
## iter  10 value 52.917571
## iter  20 value 47.485871
## iter  30 value 43.339800
## iter  40 value 42.347250
## iter  50 value 41.987058
## iter  60 value 41.759375
## iter  70 value 41.757165
## iter  70 value 41.757165
## final  value 41.757165 
## converged
## # weights:  56
## initial  value 432.938183 
## iter  10 value 53.241057
## iter  20 value 42.793023
## iter  30 value 40.750430
## iter  40 value 40.140010
## iter  50 value 40.065188
## iter  60 value 40.059476
## iter  70 value 40.058576
## final  value 40.058575 
## converged
## # weights:  12
## initial  value 370.559464 
## iter  10 value 56.330575
## iter  20 value 45.487568
## iter  30 value 40.718222
## iter  40 value 39.656038
## iter  50 value 39.640343
## iter  60 value 39.639341
## iter  70 value 39.638382
## iter  80 value 39.638099
## iter  90 value 39.638035
## iter 100 value 39.637985
## final  value 39.637985 
## stopped after 100 iterations
## # weights:  34
## initial  value 371.184689 
## iter  10 value 42.069486
## iter  20 value 36.888133
## iter  30 value 35.233768
## iter  40 value 31.583862
## iter  50 value 29.951261
## iter  60 value 27.882599
## iter  70 value 27.350586
## iter  80 value 27.191210
## iter  90 value 27.150219
## iter 100 value 26.998852
## final  value 26.998852 
## stopped after 100 iterations
## # weights:  56
## initial  value 324.010663 
## iter  10 value 35.285669
## iter  20 value 18.983575
## iter  30 value 8.864095
## iter  40 value 8.493163
## iter  50 value 8.373325
## iter  60 value 5.023711
## iter  70 value 4.414094
## iter  80 value 4.245991
## iter  90 value 4.032582
## iter 100 value 2.658630
## final  value 2.658630 
## stopped after 100 iterations
## # weights:  12
## initial  value 376.965522 
## iter  10 value 57.458884
## iter  20 value 48.268436
## iter  30 value 45.450565
## iter  40 value 43.475756
## iter  50 value 39.525989
## iter  60 value 39.499126
## iter  70 value 39.494123
## iter  80 value 39.484240
## iter  90 value 39.481187
## iter 100 value 39.480996
## final  value 39.480996 
## stopped after 100 iterations
## # weights:  34
## initial  value 461.169324 
## iter  10 value 37.143796
## iter  20 value 28.209150
## iter  30 value 19.639361
## iter  40 value 17.612668
## iter  50 value 17.529577
## iter  60 value 17.528701
## final  value 17.528675 
## converged
## # weights:  56
## initial  value 291.377968 
## iter  10 value 34.872750
## iter  20 value 27.020476
## iter  30 value 21.365333
## iter  40 value 18.322587
## iter  50 value 14.852247
## iter  60 value 14.005387
## iter  70 value 13.813872
## iter  80 value 13.634998
## iter  90 value 13.585523
## iter 100 value 13.561454
## final  value 13.561454 
## stopped after 100 iterations
## # weights:  12
## initial  value 350.760977 
## iter  10 value 62.985282
## iter  20 value 52.810932
## iter  30 value 52.778785
## final  value 52.777991 
## converged
## # weights:  34
## initial  value 325.655243 
## iter  10 value 53.894733
## iter  20 value 47.675190
## iter  30 value 45.276035
## iter  40 value 43.160908
## iter  50 value 41.825807
## iter  60 value 41.617544
## iter  70 value 41.610425
## iter  80 value 41.555818
## iter  90 value 41.553218
## iter  90 value 41.553218
## iter  90 value 41.553218
## final  value 41.553218 
## converged
## # weights:  56
## initial  value 290.669028 
## iter  10 value 85.091956
## iter  20 value 49.886743
## iter  30 value 44.222060
## iter  40 value 40.735343
## iter  50 value 39.664872
## iter  60 value 39.313892
## iter  70 value 38.909658
## iter  80 value 38.818642
## iter  90 value 38.811901
## iter 100 value 38.811641
## final  value 38.811641 
## stopped after 100 iterations
## # weights:  12
## initial  value 429.343504 
## iter  10 value 48.000220
## iter  20 value 43.121829
## iter  30 value 41.515601
## iter  40 value 37.669596
## iter  50 value 37.396040
## iter  60 value 37.383183
## iter  70 value 37.341077
## iter  80 value 37.328979
## iter  90 value 37.320610
## iter 100 value 37.293722
## final  value 37.293722 
## stopped after 100 iterations
## # weights:  34
## initial  value 476.188548 
## iter  10 value 48.622911
## iter  20 value 32.034715
## iter  30 value 28.036310
## iter  40 value 24.999756
## iter  50 value 23.129264
## iter  60 value 22.975342
## iter  70 value 22.913586
## iter  80 value 22.892855
## iter  90 value 22.858722
## iter 100 value 22.833042
## final  value 22.833042 
## stopped after 100 iterations
## # weights:  56
## initial  value 378.900773 
## iter  10 value 36.248260
## iter  20 value 20.146364
## iter  30 value 11.327904
## iter  40 value 9.748409
## iter  50 value 9.432211
## iter  60 value 9.055354
## iter  70 value 8.966362
## iter  80 value 8.935854
## iter  90 value 8.917093
## iter 100 value 8.902201
## final  value 8.902201 
## stopped after 100 iterations
## # weights:  12
## initial  value 344.806105 
## iter  10 value 53.314981
## iter  20 value 51.864827
## iter  30 value 48.439608
## iter  40 value 47.074809
## iter  50 value 45.171586
## iter  60 value 45.010562
## iter  70 value 44.965893
## iter  80 value 44.919082
## iter  90 value 44.853411
## iter 100 value 44.822481
## final  value 44.822481 
## stopped after 100 iterations
## # weights:  34
## initial  value 324.352768 
## iter  10 value 33.754090
## iter  20 value 26.301046
## iter  30 value 21.302920
## iter  40 value 20.470135
## iter  50 value 20.022692
## iter  60 value 19.903231
## iter  70 value 19.895252
## iter  80 value 19.894923
## iter  90 value 19.894423
## final  value 19.894412 
## converged
## # weights:  56
## initial  value 301.048227 
## iter  10 value 31.006045
## iter  20 value 26.243828
## iter  30 value 20.745469
## iter  40 value 18.745179
## iter  50 value 18.149268
## iter  60 value 17.213758
## iter  70 value 17.126430
## iter  80 value 17.035218
## iter  90 value 16.949961
## iter 100 value 16.783764
## final  value 16.783764 
## stopped after 100 iterations
## # weights:  12
## initial  value 441.401693 
## iter  10 value 52.177398
## iter  20 value 47.978570
## iter  30 value 46.688911
## final  value 46.686683 
## converged
## # weights:  34
## initial  value 400.891990 
## iter  10 value 38.167635
## iter  20 value 36.493121
## iter  30 value 36.422004
## iter  40 value 36.355602
## iter  50 value 36.351007
## final  value 36.351006 
## converged
## # weights:  56
## initial  value 354.480594 
## iter  10 value 40.204286
## iter  20 value 36.276416
## iter  30 value 34.831402
## iter  40 value 34.638085
## iter  50 value 34.632346
## final  value 34.630418 
## converged
## # weights:  12
## initial  value 382.278414 
## iter  10 value 51.491238
## iter  20 value 42.900868
## iter  30 value 36.397646
## iter  40 value 36.365532
## iter  50 value 36.354776
## iter  60 value 36.352952
## iter  70 value 36.351207
## iter  80 value 36.350648
## iter  90 value 36.350404
## iter 100 value 36.350228
## final  value 36.350228 
## stopped after 100 iterations
## # weights:  34
## initial  value 385.229559 
## iter  10 value 48.761838
## iter  20 value 34.609357
## iter  30 value 22.131388
## iter  40 value 17.743321
## iter  50 value 16.603891
## iter  60 value 16.337974
## iter  70 value 16.142630
## iter  80 value 16.099927
## iter  90 value 16.066784
## iter 100 value 16.036454
## final  value 16.036454 
## stopped after 100 iterations
## # weights:  56
## initial  value 517.073810 
## iter  10 value 208.699725
## iter  20 value 22.379999
## iter  30 value 16.405697
## iter  40 value 14.761128
## iter  50 value 14.203133
## iter  60 value 13.855094
## iter  70 value 13.643576
## iter  80 value 13.000648
## iter  90 value 10.708189
## iter 100 value 10.152099
## final  value 10.152099 
## stopped after 100 iterations
## # weights:  12
## initial  value 292.895306 
## iter  10 value 52.551393
## iter  20 value 47.795990
## iter  30 value 43.326449
## iter  40 value 42.498683
## iter  50 value 42.483639
## iter  60 value 42.481460
## iter  70 value 42.480221
## iter  80 value 42.478425
## iter  90 value 42.476811
## iter 100 value 42.476437
## final  value 42.476437 
## stopped after 100 iterations
## # weights:  34
## initial  value 370.482153 
## iter  10 value 38.851578
## iter  20 value 33.185345
## iter  30 value 26.837372
## iter  40 value 23.550785
## iter  50 value 22.282349
## iter  60 value 21.741716
## iter  70 value 21.617553
## iter  80 value 21.606068
## iter  90 value 21.594047
## iter 100 value 21.590763
## final  value 21.590763 
## stopped after 100 iterations
## # weights:  56
## initial  value 331.163435 
## iter  10 value 45.763128
## iter  20 value 29.174052
## iter  30 value 22.347069
## iter  40 value 16.843208
## iter  50 value 15.554057
## iter  60 value 15.069029
## iter  70 value 14.686885
## iter  80 value 14.392322
## iter  90 value 14.005408
## iter 100 value 13.840468
## final  value 13.840468 
## stopped after 100 iterations
## # weights:  12
## initial  value 445.911318 
## iter  10 value 50.664074
## iter  20 value 49.103146
## iter  30 value 48.764748
## final  value 48.764740 
## converged
## # weights:  34
## initial  value 539.171803 
## iter  10 value 71.174738
## iter  20 value 44.888487
## iter  30 value 40.773068
## iter  40 value 39.291940
## iter  50 value 38.948505
## iter  60 value 38.805208
## iter  70 value 38.789757
## iter  80 value 38.777548
## iter  90 value 38.777242
## final  value 38.777240 
## converged
## # weights:  56
## initial  value 300.526929 
## iter  10 value 92.670644
## iter  20 value 45.607100
## iter  30 value 38.482565
## iter  40 value 37.620450
## iter  50 value 37.333679
## iter  60 value 37.305632
## iter  70 value 37.304888
## final  value 37.304880 
## converged
## # weights:  12
## initial  value 342.113667 
## iter  10 value 39.784475
## iter  20 value 37.502481
## iter  30 value 37.235549
## iter  40 value 37.094419
## iter  50 value 36.355221
## iter  60 value 36.235421
## iter  70 value 35.963827
## iter  80 value 35.822521
## iter  90 value 35.822367
## iter 100 value 35.821303
## final  value 35.821303 
## stopped after 100 iterations
## # weights:  34
## initial  value 379.416717 
## iter  10 value 42.835820
## iter  20 value 38.625630
## iter  30 value 33.091455
## iter  40 value 32.424501
## iter  50 value 31.958173
## iter  60 value 31.780375
## iter  70 value 31.571448
## iter  80 value 31.478999
## iter  90 value 31.464766
## iter 100 value 31.375798
## final  value 31.375798 
## stopped after 100 iterations
## # weights:  56
## initial  value 369.465817 
## iter  10 value 36.101780
## iter  20 value 17.316087
## iter  30 value 10.213775
## iter  40 value 7.378137
## iter  50 value 6.626386
## iter  60 value 6.505362
## iter  70 value 6.344177
## iter  80 value 5.833663
## iter  90 value 5.743775
## iter 100 value 5.674803
## final  value 5.674803 
## stopped after 100 iterations
## # weights:  12
## initial  value 321.197058 
## iter  10 value 37.992700
## iter  20 value 35.792204
## iter  30 value 35.389851
## iter  40 value 35.128294
## iter  50 value 34.925662
## iter  60 value 34.886890
## iter  70 value 34.877191
## iter  80 value 34.875236
## iter  90 value 34.873956
## iter 100 value 34.872360
## final  value 34.872360 
## stopped after 100 iterations
## # weights:  34
## initial  value 389.682359 
## iter  10 value 33.677993
## iter  20 value 21.377728
## iter  30 value 14.878733
## iter  40 value 10.094345
## iter  50 value 9.445055
## iter  60 value 9.423292
## iter  70 value 9.419223
## iter  80 value 9.418886
## iter  90 value 9.418811
## iter 100 value 9.418768
## final  value 9.418768 
## stopped after 100 iterations
## # weights:  56
## initial  value 266.773981 
## iter  10 value 23.397408
## iter  20 value 12.453893
## iter  30 value 9.921559
## iter  40 value 9.334908
## iter  50 value 9.183171
## iter  60 value 9.172849
## iter  70 value 9.127395
## iter  80 value 9.044680
## iter  90 value 8.967422
## iter 100 value 8.951409
## final  value 8.951409 
## stopped after 100 iterations
## # weights:  12
## initial  value 320.530859 
## iter  10 value 60.495799
## iter  20 value 46.020850
## iter  30 value 44.525762
## final  value 44.523218 
## converged
## # weights:  34
## initial  value 407.705422 
## iter  10 value 54.162248
## iter  20 value 34.360603
## iter  30 value 34.038423
## iter  40 value 34.035667
## final  value 34.035016 
## converged
## # weights:  56
## initial  value 454.017135 
## iter  10 value 69.049091
## iter  20 value 33.996446
## iter  30 value 31.515000
## iter  40 value 31.231004
## iter  50 value 30.903611
## iter  60 value 30.710978
## iter  70 value 30.627870
## iter  80 value 30.624795
## iter  80 value 30.624795
## iter  80 value 30.624795
## final  value 30.624795 
## converged
## # weights:  12
## initial  value 400.796335 
## iter  10 value 36.480916
## iter  20 value 32.950616
## iter  30 value 32.894397
## iter  40 value 32.885858
## iter  50 value 32.882953
## iter  60 value 32.875174
## iter  70 value 32.873837
## iter  80 value 32.873014
## iter  90 value 32.870765
## iter 100 value 32.499482
## final  value 32.499482 
## stopped after 100 iterations
## # weights:  34
## initial  value 382.700891 
## iter  10 value 35.650965
## iter  20 value 23.665512
## iter  30 value 17.281386
## iter  40 value 10.154502
## iter  50 value 6.686735
## iter  60 value 6.244087
## iter  70 value 6.211796
## iter  80 value 6.196180
## iter  90 value 6.162809
## iter 100 value 6.152142
## final  value 6.152142 
## stopped after 100 iterations
## # weights:  56
## initial  value 297.860371 
## iter  10 value 27.305970
## iter  20 value 15.483042
## iter  30 value 9.690338
## iter  40 value 8.129648
## iter  50 value 6.973134
## iter  60 value 6.906713
## iter  70 value 6.869101
## iter  80 value 6.840714
## iter  90 value 6.822533
## iter 100 value 6.807764
## final  value 6.807764 
## stopped after 100 iterations
## # weights:  12
## initial  value 339.456790 
## iter  10 value 158.046015
## iter  20 value 65.717796
## iter  30 value 46.787679
## iter  40 value 38.271274
## iter  50 value 36.848776
## iter  60 value 36.575094
## iter  70 value 36.226332
## iter  80 value 36.046096
## iter  90 value 36.039025
## iter 100 value 35.937393
## final  value 35.937393 
## stopped after 100 iterations
## # weights:  34
## initial  value 396.446083 
## iter  10 value 49.591717
## iter  20 value 33.545530
## iter  30 value 29.656480
## iter  40 value 27.447190
## iter  50 value 26.319019
## iter  60 value 24.899691
## iter  70 value 24.678813
## iter  80 value 24.516495
## iter  90 value 24.149603
## iter 100 value 23.599254
## final  value 23.599254 
## stopped after 100 iterations
## # weights:  56
## initial  value 325.372656 
## iter  10 value 34.301235
## iter  20 value 20.901145
## iter  30 value 13.020419
## iter  40 value 12.256197
## iter  50 value 12.250246
## iter  60 value 12.250019
## iter  70 value 12.249909
## iter  80 value 12.033268
## iter  90 value 11.818461
## iter 100 value 11.811133
## final  value 11.811133 
## stopped after 100 iterations
## # weights:  12
## initial  value 327.906693 
## iter  10 value 61.219539
## iter  20 value 49.734517
## iter  30 value 49.118216
## iter  40 value 49.101690
## iter  40 value 49.101690
## iter  40 value 49.101690
## final  value 49.101690 
## converged
## # weights:  34
## initial  value 323.934935 
## iter  10 value 46.152240
## iter  20 value 40.840436
## iter  30 value 39.749908
## iter  40 value 39.514367
## iter  50 value 39.426968
## final  value 39.426886 
## converged
## # weights:  56
## initial  value 449.227090 
## iter  10 value 81.409576
## iter  20 value 46.841838
## iter  30 value 43.443981
## iter  40 value 41.657814
## iter  50 value 40.991857
## iter  60 value 40.891983
## iter  70 value 40.873833
## iter  80 value 40.872856
## final  value 40.872681 
## converged
## # weights:  12
## initial  value 326.766631 
## iter  10 value 44.353548
## iter  20 value 37.431520
## iter  30 value 33.556132
## iter  40 value 30.479939
## iter  50 value 30.467985
## iter  60 value 30.463700
## iter  70 value 30.458655
## iter  80 value 30.458144
## iter  90 value 30.457322
## final  value 30.457314 
## converged
## # weights:  34
## initial  value 428.667035 
## iter  10 value 43.686421
## iter  20 value 30.592240
## iter  30 value 26.969605
## iter  40 value 24.266306
## iter  50 value 23.839056
## iter  60 value 22.804244
## iter  70 value 22.668008
## iter  80 value 22.540945
## iter  90 value 22.469185
## iter 100 value 22.427527
## final  value 22.427527 
## stopped after 100 iterations
## # weights:  56
## initial  value 408.874223 
## iter  10 value 34.951565
## iter  20 value 12.870980
## iter  30 value 6.483366
## iter  40 value 6.307095
## iter  50 value 6.206916
## iter  60 value 6.150471
## iter  70 value 6.133738
## iter  80 value 6.117790
## iter  90 value 6.088511
## iter 100 value 6.066790
## final  value 6.066790 
## stopped after 100 iterations
## # weights:  12
## initial  value 321.754451 
## iter  10 value 42.735297
## iter  20 value 36.553903
## iter  30 value 36.425583
## iter  40 value 36.361319
## iter  50 value 36.346991
## iter  60 value 36.339317
## iter  70 value 36.331902
## iter  80 value 36.324998
## iter  90 value 36.321886
## iter 100 value 36.318710
## final  value 36.318710 
## stopped after 100 iterations
## # weights:  34
## initial  value 300.446391 
## iter  10 value 41.961751
## iter  20 value 30.257550
## iter  30 value 28.183516
## iter  40 value 22.689068
## iter  50 value 20.829629
## iter  60 value 19.144823
## iter  70 value 16.434577
## iter  80 value 15.876475
## iter  90 value 15.736186
## iter 100 value 15.618940
## final  value 15.618940 
## stopped after 100 iterations
## # weights:  56
## initial  value 282.021100 
## iter  10 value 32.901459
## iter  20 value 16.152079
## iter  30 value 12.074062
## iter  40 value 10.867879
## iter  50 value 10.565675
## iter  60 value 10.464206
## iter  70 value 10.422227
## iter  80 value 10.405932
## iter  90 value 10.394482
## iter 100 value 10.390740
## final  value 10.390740 
## stopped after 100 iterations
## # weights:  12
## initial  value 338.542762 
## iter  10 value 58.364417
## iter  20 value 47.889755
## iter  30 value 46.959061
## iter  40 value 46.913157
## final  value 46.913156 
## converged
## # weights:  34
## initial  value 389.067606 
## iter  10 value 64.563624
## iter  20 value 47.862307
## iter  30 value 41.700191
## iter  40 value 38.501372
## iter  50 value 37.391377
## iter  60 value 37.049787
## iter  70 value 36.855211
## iter  80 value 36.830464
## iter  90 value 36.830243
## final  value 36.830243 
## converged
## # weights:  56
## initial  value 314.006605 
## iter  10 value 39.786364
## iter  20 value 36.825762
## iter  30 value 36.042459
## iter  40 value 35.790063
## iter  50 value 35.749193
## iter  60 value 35.630582
## iter  70 value 34.800295
## iter  80 value 34.775821
## final  value 34.775630 
## converged
## # weights:  12
## initial  value 317.842554 
## iter  10 value 55.219614
## iter  20 value 42.411436
## iter  30 value 41.698575
## iter  40 value 36.533028
## iter  50 value 36.474701
## iter  60 value 36.466201
## iter  70 value 36.462348
## iter  80 value 36.458830
## iter  90 value 36.457076
## iter 100 value 36.456465
## final  value 36.456465 
## stopped after 100 iterations
## # weights:  34
## initial  value 315.805100 
## iter  10 value 35.696766
## iter  20 value 25.008868
## iter  30 value 19.314407
## iter  40 value 18.470075
## iter  50 value 18.313588
## iter  60 value 18.124856
## iter  70 value 17.847942
## iter  80 value 17.804029
## iter  90 value 17.675273
## iter 100 value 17.511005
## final  value 17.511005 
## stopped after 100 iterations
## # weights:  56
## initial  value 336.033457 
## iter  10 value 29.843576
## iter  20 value 19.062788
## iter  30 value 12.266330
## iter  40 value 9.990162
## iter  50 value 9.510100
## iter  60 value 7.360044
## iter  70 value 5.808873
## iter  80 value 5.238995
## iter  90 value 5.006630
## iter 100 value 3.884940
## final  value 3.884940 
## stopped after 100 iterations
## # weights:  12
## initial  value 373.448951 
## iter  10 value 45.158611
## iter  20 value 44.096037
## iter  30 value 43.343767
## iter  40 value 42.568253
## iter  50 value 42.525430
## final  value 42.525369 
## converged
## # weights:  34
## initial  value 303.311657 
## iter  10 value 39.258404
## iter  20 value 35.907574
## iter  30 value 32.926632
## iter  40 value 31.719819
## iter  50 value 31.054626
## iter  60 value 30.293635
## iter  70 value 28.886476
## iter  80 value 27.435616
## iter  90 value 26.566727
## iter 100 value 26.357336
## final  value 26.357336 
## stopped after 100 iterations
## # weights:  56
## initial  value 297.717687 
## iter  10 value 34.910317
## iter  20 value 19.097253
## iter  30 value 12.162892
## iter  40 value 11.962253
## iter  50 value 11.882074
## iter  60 value 11.825032
## iter  70 value 11.803696
## iter  80 value 11.799958
## iter  90 value 11.789507
## iter 100 value 11.633687
## final  value 11.633687 
## stopped after 100 iterations
## # weights:  12
## initial  value 323.344070 
## iter  10 value 59.196471
## iter  20 value 54.572127
## iter  30 value 54.313230
## final  value 54.265527 
## converged
## # weights:  34
## initial  value 422.110821 
## iter  10 value 46.135214
## iter  20 value 42.100740
## iter  30 value 41.349736
## iter  40 value 40.946203
## iter  50 value 40.871524
## iter  60 value 40.864045
## iter  70 value 40.862853
## iter  80 value 40.859037
## iter  90 value 40.856820
## final  value 40.856812 
## converged
## # weights:  56
## initial  value 306.653715 
## iter  10 value 61.734533
## iter  20 value 46.398463
## iter  30 value 42.976429
## iter  40 value 40.535781
## iter  50 value 38.903458
## iter  60 value 38.436156
## iter  70 value 38.371343
## iter  80 value 37.583306
## iter  90 value 37.167963
## iter 100 value 37.139218
## final  value 37.139218 
## stopped after 100 iterations
## # weights:  12
## initial  value 302.848084 
## iter  10 value 86.222643
## iter  20 value 62.549374
## iter  30 value 50.117327
## iter  40 value 42.790245
## iter  50 value 42.746444
## iter  60 value 42.704572
## iter  70 value 42.700133
## iter  80 value 42.697799
## iter  90 value 42.695362
## iter 100 value 42.693661
## final  value 42.693661 
## stopped after 100 iterations
## # weights:  34
## initial  value 357.848719 
## iter  10 value 50.353275
## iter  20 value 33.751623
## iter  30 value 32.033219
## iter  40 value 29.597684
## iter  50 value 28.331533
## iter  60 value 28.273034
## iter  70 value 28.207846
## iter  80 value 28.122741
## iter  90 value 28.036842
## iter 100 value 27.937599
## final  value 27.937599 
## stopped after 100 iterations
## # weights:  56
## initial  value 401.661057 
## iter  10 value 36.655848
## iter  20 value 16.011617
## iter  30 value 6.895899
## iter  40 value 6.428734
## iter  50 value 6.185441
## iter  60 value 6.111802
## iter  70 value 6.021786
## iter  80 value 5.957063
## iter  90 value 4.807588
## iter 100 value 4.566685
## final  value 4.566685 
## stopped after 100 iterations
## # weights:  12
## initial  value 341.667320 
## iter  10 value 50.797393
## iter  20 value 39.826114
## iter  30 value 39.597097
## iter  40 value 39.548037
## iter  50 value 39.525868
## iter  60 value 39.514783
## iter  70 value 39.503536
## iter  80 value 39.500868
## iter  90 value 39.494332
## iter 100 value 39.491669
## final  value 39.491669 
## stopped after 100 iterations
## # weights:  34
## initial  value 330.883225 
## iter  10 value 33.931619
## iter  20 value 20.963873
## iter  30 value 14.558637
## iter  40 value 13.880806
## iter  50 value 13.768648
## iter  60 value 13.766768
## iter  70 value 13.766501
## iter  80 value 13.766430
## final  value 13.766410 
## converged
## # weights:  56
## initial  value 366.759875 
## iter  10 value 31.998240
## iter  20 value 19.326931
## iter  30 value 13.284252
## iter  40 value 13.139520
## iter  50 value 13.128890
## final  value 13.128861 
## converged
## # weights:  12
## initial  value 360.723364 
## iter  10 value 79.764384
## iter  20 value 58.554909
## iter  30 value 51.915182
## iter  40 value 48.699427
## final  value 48.688574 
## converged
## # weights:  34
## initial  value 421.298943 
## iter  10 value 85.137440
## iter  20 value 41.359032
## iter  30 value 38.548152
## iter  40 value 36.875606
## iter  50 value 36.830442
## final  value 36.830432 
## converged
## # weights:  56
## initial  value 304.019070 
## iter  10 value 43.255238
## iter  20 value 38.254552
## iter  30 value 36.850424
## iter  40 value 36.648594
## iter  50 value 36.550726
## iter  60 value 36.327745
## iter  70 value 36.271464
## iter  80 value 36.254873
## iter  90 value 36.253762
## final  value 36.253760 
## converged
## # weights:  12
## initial  value 328.582102 
## iter  10 value 46.350500
## iter  20 value 40.087911
## iter  30 value 39.780897
## iter  40 value 39.642761
## iter  50 value 39.626044
## iter  60 value 39.620422
## iter  70 value 39.617573
## iter  80 value 39.616729
## iter  90 value 39.616403
## iter 100 value 39.616162
## final  value 39.616162 
## stopped after 100 iterations
## # weights:  34
## initial  value 420.985148 
## iter  10 value 40.782041
## iter  20 value 31.436978
## iter  30 value 29.810226
## iter  40 value 29.752897
## iter  50 value 29.731764
## iter  60 value 29.722300
## iter  70 value 29.708030
## iter  80 value 29.701166
## iter  90 value 29.695962
## iter 100 value 29.687023
## final  value 29.687023 
## stopped after 100 iterations
## # weights:  56
## initial  value 305.807773 
## iter  10 value 32.868515
## iter  20 value 28.672268
## iter  30 value 23.994374
## iter  40 value 22.396665
## iter  50 value 22.173441
## iter  60 value 22.095568
## iter  70 value 21.691679
## iter  80 value 20.198882
## iter  90 value 19.546455
## iter 100 value 19.371765
## final  value 19.371765 
## stopped after 100 iterations
## # weights:  34
## initial  value 326.919377 
## iter  10 value 36.157512
## iter  20 value 30.097667
## iter  30 value 23.574816
## iter  40 value 23.268963
## iter  50 value 23.077785
## iter  60 value 20.578788
## iter  70 value 19.367780
## iter  80 value 19.204978
## iter  90 value 18.867884
## iter 100 value 18.771249
## final  value 18.771249 
## stopped after 100 iterations
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)

resultado_prueba5 <- predict(modelo5, prueba)

Matriz de confusión

mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Class)

mcre5 
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       351         0
##   malignant      5       192
##                                          
##                Accuracy : 0.9909         
##                  95% CI : (0.9788, 0.997)
##     No Information Rate : 0.6496         
##     P-Value [Acc > NIR] : < 2e-16        
##                                          
##                   Kappa : 0.9801         
##                                          
##  Mcnemar's Test P-Value : 0.07364        
##                                          
##             Sensitivity : 0.9860         
##             Specificity : 1.0000         
##          Pos Pred Value : 1.0000         
##          Neg Pred Value : 0.9746         
##              Prevalence : 0.6496         
##          Detection Rate : 0.6405         
##    Detection Prevalence : 0.6405         
##       Balanced Accuracy : 0.9930         
##                                          
##        'Positive' Class : benign         
## 
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Class)

mcrp5
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        86         4
##   malignant      2        43
##                                           
##                Accuracy : 0.9556          
##                  95% CI : (0.9058, 0.9835)
##     No Information Rate : 0.6519          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9011          
##                                           
##  Mcnemar's Test P-Value : 0.6831          
##                                           
##             Sensitivity : 0.9773          
##             Specificity : 0.9149          
##          Pos Pred Value : 0.9556          
##          Neg Pred Value : 0.9556          
##              Prevalence : 0.6519          
##          Detection Rate : 0.6370          
##    Detection Prevalence : 0.6667          
##       Balanced Accuracy : 0.9461          
##                                           
##        'Positive' Class : benign          
## 

Modelo random forest

modelo6 <- train(Class ~ ., data = entrenamiento, method = "rf", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = expand.grid(mtry =c(2,4,6)))


resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)

Matriz de confusión

mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Class)
mcre6 
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       356         0
##   malignant      0       192
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9933, 1)
##     No Information Rate : 0.6496     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
##                                      
##             Sensitivity : 1.0000     
##             Specificity : 1.0000     
##          Pos Pred Value : 1.0000     
##          Neg Pred Value : 1.0000     
##              Prevalence : 0.6496     
##          Detection Rate : 0.6496     
##    Detection Prevalence : 0.6496     
##       Balanced Accuracy : 1.0000     
##                                      
##        'Positive' Class : benign     
## 
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Class)
mcrp6
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        85         1
##   malignant      3        46
##                                           
##                Accuracy : 0.9704          
##                  95% CI : (0.9259, 0.9919)
##     No Information Rate : 0.6519          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9354          
##                                           
##  Mcnemar's Test P-Value : 0.6171          
##                                           
##             Sensitivity : 0.9659          
##             Specificity : 0.9787          
##          Pos Pred Value : 0.9884          
##          Neg Pred Value : 0.9388          
##              Prevalence : 0.6519          
##          Detection Rate : 0.6296          
##    Detection Prevalence : 0.6370          
##       Balanced Accuracy : 0.9723          
##                                           
##        'Positive' Class : benign          
## 

Resumen de resultados

resumen <- data.frame(
  
  
  "svmLinear" = c(mcre1$overall["Accuracy"], mcre1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "NeuralNet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "RandomForest" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)

rownames(resumen) <- c("Precision de entrenamiento", "Precision de prueba")
resumen
##                            svmLinear svmRadial   svmPoly     rpart NeuralNet
## Precision de entrenamiento 0.9708029 0.9963504 0.9708029 0.9635036 0.9908759
## Precision de prueba        0.9708029 0.9555556 0.9777778 0.9555556 0.9555556
##                            RandomForest
## Precision de entrenamiento    1.0000000
## Precision de prueba           0.9703704

Conclusión

El modelo que utiliza la técnica de redes neuronales muestra claros signos de sobreajuste. Este fenómeno se hace evidente al observar que el modelo alcanza niveles de precisión excepcionalmente altos durante la etapa de entrenamiento, pero dicha precisión experimenta una notable reducción cuando el modelo es sometido a pruebas con datos nuevos. Este comportamiento indica que, aunque el modelo es muy eficaz para predecir resultados dentro del conjunto de datos con el que fue entrenado, su capacidad para generalizar y predecir con precisión sobre conjuntos de datos no vistos es limitada.

De acuerdo con el análisis detallado de los resultados obtenidos de diferentes modelos, se ha determinado que los modelos basados en Máquina de Vectores de Soporte Lineal y Máquina de Vectores de Soporte con Kernel Polinómico (Máquina de Vectores Poly) destacan sobre el resto. Estos modelos no solo han demostrado tener una capacidad superior para mantener una alta precisión tanto en las fases de entrenamiento como de prueba, sino que también han evitado el problema del sobreajuste. Este equilibrio entre la precisión en el entrenamiento y en la prueba sugiere que ambos modelos tienen una robusta capacidad de generalización, haciéndolos adecuados para predecir resultados de manera confiable en datos nuevos o desconocidos. La eficacia de estos modelos en mantener la consistencia de su rendimiento a través de diferentes conjuntos de datos resalta su valor y los posiciona como las opciones preferentes para tareas de predicción en este contexto.

---
title: "Machine Learning - Breast Cancer"
author: "Gabriel Medina - A01275763"
date: "2024-02-20"
output:
  html_document:
    code_folding: hide
    toc: yes
    toc_float: yes
    code_download: yes
    theme: spacelab
    highlight: tango
  pdf_document:
    toc: yes
---

![](/Users/gabrielmedina/Downloads/M2/cancer.gif)


## <span style="color: Purple;"> Librerías</span>


```{r}
library(caret)
library(ggplot2) # Crear gráficos
library(datasets) # Usar la base de datos "Iris"
library(lattice) # Crear gráficos
library(DataExplorer)
library(mlbench)
```


## <span style="color: Purple;"> Crear base de datos</span>


```{r}

data(BreastCancer)

df <- data.frame(BreastCancer)
```

## <span style="color: Purple;">Análisis exploratorio</span>
```{r}

summary(df)
str(df)

# Limpieza de datos

df$Id <- NULL


#Limpieza

df$Cl.thickness <- as.numeric(df$Cl.thickness)
df$Cell.size <- as.numeric(df$Cell.size)
df$Cell.shape <- as.numeric(df$Cell.shape)
df$Marg.adhesion <- as.numeric(df$Marg.adhesion)
df$Epith.c.size <- as.numeric(df$Epith.c.size)
df$Bare.nuclei <- as.numeric(df$Bare.nuclei)
df$Bl.cromatin <- as.numeric(df$Bl.cromatin)
df$Normal.nucleoli <- as.numeric(df$Normal.nucleoli)
df$Mitoses <- as.numeric(df$Mitoses)
df$Class <- as.factor(df$Class)



#Quitar 16 nulos
df <- na.omit(df)
plot_missing(df)
plot_histogram(df)
plot_correlation(df)
```

La variable debe ser un factor

## <span style="color: Purple;"> Partición de datos</span>

```{r}
set.seed(123)

renglones_entrenamiento <-createDataPartition(df$Class, p=0.8, list=FALSE)
entrenamiento <- df[renglones_entrenamiento, ]

prueba <- df[-renglones_entrenamiento, ]
```


## <span style="color: Purple;"> Modelo svmlineal</span>

```{r}

modelo1 <- train(Class ~ ., data= entrenamiento, method = "svmLinear", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(C=1)) #Cuando es svmLinear

resultado_entrenamiento1 <- predict(modelo1, entrenamiento)

resultado_prueba1 <- predict(modelo1, prueba)


```

### <span style="color: Purple;"> Matriz de confusión</span>

```{r}

mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Class)

mcre1 

mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Class)

mcrp1
```

## <span style="color: Purple;"> Modelo svmradial</span>
```{r}


modelo2 <- train(Class ~ ., data= entrenamiento, method = "svmRadial", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(sigma=1, C=1)) #Cambiar

resultado_entrenamiento2 <- predict(modelo2, entrenamiento)

resultado_prueba2 <- predict(modelo2, prueba)


```

#### <span style="color: Purple;"> Matriz de confusión</span>

```{r}
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Class)

mcre2 

mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Class)

mcrp2
```



## <span style="color: Purple;"> Modelo svmPoly</span>

```{r}


modelo3 <- train(Class ~ ., data= entrenamiento, method = "svmPoly", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(degree=1,scale=1, C=1)) #Cambiar

resultado_entrenamiento3 <- predict(modelo3, entrenamiento)

resultado_prueba3 <- predict(modelo3, prueba)


```


#### <span style="color: Purple;"> Matriz de confusión</span>

```{r}
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Class)

mcre3 

mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Class)

mcrp3
```


## <span style="color: Purple;"> Modelo árbol de decisión</span>

```{r}


modelo4 <- train(Class ~ ., data = entrenamiento, method = "rpart", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneLength = 10)

resultado_entrenamiento4 <- predict(modelo4, entrenamiento)

resultado_prueba4 <- predict(modelo4, prueba)


```

#### <span style="color: Purple;"> Matriz de confusión</span>

```{r}
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Class)

mcre4 

mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Class)

mcrp4
```


## <span style="color: Purple;"> Modelo neural net</span>
```{r}

modelo5 <- train(Class ~ ., data = entrenamiento, method = "nnet", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10))

resultado_entrenamiento5 <- predict(modelo5, entrenamiento)

resultado_prueba5 <- predict(modelo5, prueba)


```

#### <span style="color: Purple;"> Matriz de confusión</span>

```{r}

mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Class)

mcre5 

mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Class)

mcrp5
```


## <span style="color: Purple;"> Modelo random forest</span>

```{r}

modelo6 <- train(Class ~ ., data = entrenamiento, method = "rf", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = expand.grid(mtry =c(2,4,6)))


resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)


```

#### <span style="color: Purple;"> Matriz de confusión</span>

```{r}
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Class)
mcre6 


mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Class)
mcrp6
```



## <span style="color: Purple;"> Resumen de resultados</span>

```{r}


resumen <- data.frame(
  
  
  "svmLinear" = c(mcre1$overall["Accuracy"], mcre1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "NeuralNet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "RandomForest" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)

rownames(resumen) <- c("Precision de entrenamiento", "Precision de prueba")
resumen
```


## <span style="color: Purple;"> Conclusión</span>


El modelo que utiliza la técnica de redes neuronales muestra claros signos de sobreajuste. Este fenómeno se hace evidente al observar que el modelo alcanza niveles de precisión excepcionalmente altos durante la etapa de entrenamiento, pero dicha precisión experimenta una notable reducción cuando el modelo es sometido a pruebas con datos nuevos. Este comportamiento indica que, aunque el modelo es muy eficaz para predecir resultados dentro del conjunto de datos con el que fue entrenado, su capacidad para generalizar y predecir con precisión sobre conjuntos de datos no vistos es limitada.

De acuerdo con el análisis detallado de los resultados obtenidos de diferentes modelos, se ha determinado que los modelos basados en Máquina de Vectores de Soporte Lineal y Máquina de Vectores de Soporte con Kernel Polinómico (Máquina de Vectores Poly) destacan sobre el resto. Estos modelos no solo han demostrado tener una capacidad superior para mantener una alta precisión tanto en las fases de entrenamiento como de prueba, sino que también han evitado el problema del sobreajuste. Este equilibrio entre la precisión en el entrenamiento y en la prueba sugiere que ambos modelos tienen una robusta capacidad de generalización, haciéndolos adecuados para predecir resultados de manera confiable en datos nuevos o desconocidos. La eficacia de estos modelos en mantener la consistencia de su rendimiento a través de diferentes conjuntos de datos resalta su valor y los posiciona como las opciones preferentes para tareas de predicción en este contexto.