Teoría

El paquete CARET (Clasification and Regression Training) tiene una amplia variedad de algoritmos para el aprendizaje automático.

Instalar paquetes y llamar librerías

library(caret) # Algoritmos de aprendizaje automatico
## Loading required package: ggplot2
## Loading required package: lattice
library(datasets) # Usar la base de datos "Iris"
library(ggplot2) # Gráficar con un mejor diseño
library(lattice) # Crear gráficos
library(DataExplorer)

Crear base de datos

df <- data.frame(iris)

Analisis exploratorio

summary(df)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
str(df)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# create_report(df)

Nota: La variable que queremos predecir debe tener formato de factor

Partir los datos 80 20

# Usar el 80 por ciento de los datos para entrenamiento y el 20 para la prueba
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list=FALSE)
entrenamiento <- iris[renglones_entrenamiento, ]
prueba <- iris[-renglones_entrenamiento, ]

Distintos tipos de métodos para modelar

Los métodos mas utilizados para modelar aprendizaje automatico son:

  • SVM: Support Vector Machine o Máquina de Vectores de Soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc.
  • Árbol de Decisión: rpart.
  • Redes Neuronales: nnet.
  • Random Forest o Bósques Aleatorios: rf.

1. Modelo con el método svmLinear

modeloLinear <- train(Species ~ ., data = entrenamiento, method = "svmLinear", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(C=1) # Cuando es svmLinear
)
resultado_entrenamiento1 <- predict(modeloLinear, entrenamiento)
resultado_prueba1 <- predict(modeloLinear, prueba)

# Matriz de confusión
mcre <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
mcrp <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

2. Modelo con el método svmRadial

modeloRadial <- train(Species ~ ., data = entrenamiento, method = "svmRadial", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(sigma=1, C=1))

resultado_entrenamiento2 <- predict(modeloRadial, entrenamiento)
resultado_prueba2 <- predict(modeloRadial, prueba)

# Matriz de confusión
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

3. Modelo con el método svmPoly

modeloPoly <- train(Species ~ ., data = entrenamiento, method = "svmPoly", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(degree=1, scale=1, C=1))

resultado_entrenamiento3 <- predict(modeloPoly, entrenamiento)
resultado_prueba3 <- predict(modeloPoly, prueba)

# Matriz de confusión
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

4. Modelo con el método rpart

modeloRpart <- train(Species ~ ., data = entrenamiento, method = "rpart", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneLength = 10)

resultado_entrenamiento4 <- predict(modeloRpart, entrenamiento)
resultado_prueba4 <- predict(modeloRpart, prueba)

# Matriz de confusión
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         3
##   virginica       0          1        37
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9169, 0.9908)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           0.9250
## Specificity                 1.0000            0.9625           0.9875
## Pos Pred Value              1.0000            0.9286           0.9737
## Neg Pred Value              1.0000            0.9872           0.9634
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3083
## Detection Prevalence        0.3333            0.3500           0.3167
## Balanced Accuracy           1.0000            0.9688           0.9563
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Species)
mcrp4 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

5. Modelo con el método nnet

modeloNnet <- train(Species ~ ., data = entrenamiento, method = "nnet", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10))
## # weights:  11
## initial  value 130.530132 
## iter  10 value 50.031494
## iter  20 value 48.622939
## iter  30 value 46.051782
## iter  40 value 45.435982
## iter  50 value 45.023331
## iter  60 value 41.544637
## iter  70 value 18.345218
## iter  80 value 4.630621
## iter  90 value 3.573146
## iter 100 value 2.696218
## final  value 2.696218 
## stopped after 100 iterations
## # weights:  27
## initial  value 132.517409 
## iter  10 value 22.263231
## iter  20 value 2.574680
## iter  30 value 0.008513
## final  value 0.000051 
## converged
## # weights:  43
## initial  value 136.160730 
## iter  10 value 3.642258
## iter  20 value 0.051614
## iter  30 value 0.013220
## iter  40 value 0.001249
## final  value 0.000086 
## converged
## # weights:  11
## initial  value 124.472165 
## iter  10 value 57.985437
## iter  20 value 43.232595
## final  value 43.170440 
## converged
## # weights:  27
## initial  value 118.611044 
## iter  10 value 30.413305
## iter  20 value 21.077103
## iter  30 value 20.192922
## iter  40 value 20.153936
## final  value 20.153924 
## converged
## # weights:  43
## initial  value 131.301286 
## iter  10 value 26.646865
## iter  20 value 17.682102
## iter  30 value 17.633586
## iter  40 value 17.623573
## iter  50 value 17.364993
## iter  60 value 17.295129
## iter  70 value 17.290694
## final  value 17.290666 
## converged
## # weights:  11
## initial  value 115.622911 
## iter  10 value 33.350769
## iter  20 value 4.676969
## iter  30 value 3.131052
## iter  40 value 2.922591
## iter  50 value 2.825976
## iter  60 value 2.769974
## iter  70 value 2.741299
## iter  80 value 2.741136
## iter  90 value 2.739093
## final  value 2.739035 
## converged
## # weights:  27
## initial  value 139.822975 
## iter  10 value 37.447376
## iter  20 value 1.445699
## iter  30 value 0.316497
## iter  40 value 0.287713
## iter  50 value 0.260591
## iter  60 value 0.236249
## iter  70 value 0.224761
## iter  80 value 0.215415
## iter  90 value 0.194816
## iter 100 value 0.189471
## final  value 0.189471 
## stopped after 100 iterations
## # weights:  43
## initial  value 123.298044 
## iter  10 value 4.177632
## iter  20 value 0.257205
## iter  30 value 0.224601
## iter  40 value 0.200241
## iter  50 value 0.193031
## iter  60 value 0.182082
## iter  70 value 0.164800
## iter  80 value 0.149792
## iter  90 value 0.144373
## iter 100 value 0.142810
## final  value 0.142810 
## stopped after 100 iterations
## # weights:  11
## initial  value 123.243079 
## iter  10 value 49.923348
## iter  20 value 49.909994
## iter  30 value 49.907880
## final  value 49.906719 
## converged
## # weights:  27
## initial  value 117.894759 
## iter  10 value 9.481781
## iter  20 value 0.026637
## iter  30 value 0.001156
## final  value 0.000052 
## converged
## # weights:  43
## initial  value 131.870976 
## iter  10 value 17.010430
## iter  20 value 0.698814
## iter  30 value 0.001401
## final  value 0.000067 
## converged
## # weights:  11
## initial  value 141.804121 
## iter  10 value 63.315182
## iter  20 value 44.532148
## iter  30 value 42.998412
## final  value 42.994034 
## converged
## # weights:  27
## initial  value 129.180442 
## iter  10 value 44.217928
## iter  20 value 19.729677
## iter  30 value 18.527378
## iter  40 value 18.411074
## iter  50 value 18.393711
## iter  60 value 18.393129
## final  value 18.393125 
## converged
## # weights:  43
## initial  value 143.533117 
## iter  10 value 21.063126
## iter  20 value 17.843661
## iter  30 value 17.106737
## iter  40 value 16.985544
## iter  50 value 16.981278
## iter  60 value 16.980626
## final  value 16.980585 
## converged
## # weights:  11
## initial  value 123.091645 
## iter  10 value 49.148390
## iter  20 value 35.943210
## iter  30 value 10.736283
## iter  40 value 2.021433
## iter  50 value 1.687393
## iter  60 value 1.640809
## iter  70 value 1.636953
## iter  80 value 1.613389
## iter  90 value 1.611928
## iter 100 value 1.611136
## final  value 1.611136 
## stopped after 100 iterations
## # weights:  27
## initial  value 113.416728 
## iter  10 value 6.236444
## iter  20 value 0.187917
## iter  30 value 0.166748
## iter  40 value 0.155642
## iter  50 value 0.144249
## iter  60 value 0.141208
## iter  70 value 0.138463
## iter  80 value 0.136774
## iter  90 value 0.134567
## iter 100 value 0.132971
## final  value 0.132971 
## stopped after 100 iterations
## # weights:  43
## initial  value 124.153763 
## iter  10 value 6.673362
## iter  20 value 0.166533
## iter  30 value 0.154159
## iter  40 value 0.149227
## iter  50 value 0.136832
## iter  60 value 0.125718
## iter  70 value 0.121478
## iter  80 value 0.115540
## iter  90 value 0.113390
## iter 100 value 0.110992
## final  value 0.110992 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.347385 
## iter  10 value 55.157651
## iter  20 value 47.800562
## iter  30 value 47.763719
## iter  40 value 47.763542
## iter  50 value 47.762534
## final  value 47.762465 
## converged
## # weights:  27
## initial  value 115.590774 
## iter  10 value 5.054265
## iter  20 value 1.048058
## iter  30 value 0.000979
## final  value 0.000072 
## converged
## # weights:  43
## initial  value 123.951869 
## iter  10 value 13.178443
## iter  20 value 0.965118
## iter  30 value 0.002392
## final  value 0.000078 
## converged
## # weights:  11
## initial  value 123.195822 
## iter  10 value 53.656490
## iter  20 value 43.803131
## iter  30 value 43.734766
## final  value 43.734347 
## converged
## # weights:  27
## initial  value 123.651803 
## iter  10 value 29.880588
## iter  20 value 19.921143
## iter  30 value 19.707388
## iter  40 value 19.705704
## final  value 19.705624 
## converged
## # weights:  43
## initial  value 148.336280 
## iter  10 value 27.474145
## iter  20 value 18.301737
## iter  30 value 18.138015
## iter  40 value 18.086240
## iter  50 value 18.084155
## iter  60 value 18.083934
## final  value 18.083909 
## converged
## # weights:  11
## initial  value 122.563728 
## iter  10 value 32.122176
## iter  20 value 10.269949
## iter  30 value 4.526292
## iter  40 value 3.900620
## iter  50 value 3.805816
## iter  60 value 3.743349
## iter  70 value 3.733207
## iter  80 value 3.721238
## iter  90 value 3.713938
## iter 100 value 3.705684
## final  value 3.705684 
## stopped after 100 iterations
## # weights:  27
## initial  value 130.631378 
## iter  10 value 4.944652
## iter  20 value 0.903581
## iter  30 value 0.602599
## iter  40 value 0.449328
## iter  50 value 0.416076
## iter  60 value 0.405323
## iter  70 value 0.397568
## iter  80 value 0.392801
## iter  90 value 0.386606
## iter 100 value 0.380965
## final  value 0.380965 
## stopped after 100 iterations
## # weights:  43
## initial  value 152.884265 
## iter  10 value 11.737646
## iter  20 value 1.402922
## iter  30 value 0.553654
## iter  40 value 0.456488
## iter  50 value 0.433353
## iter  60 value 0.391721
## iter  70 value 0.350673
## iter  80 value 0.322382
## iter  90 value 0.309362
## iter 100 value 0.302224
## final  value 0.302224 
## stopped after 100 iterations
## # weights:  11
## initial  value 133.677265 
## iter  10 value 49.425529
## iter  20 value 45.125104
## iter  30 value 24.714814
## iter  40 value 6.951374
## iter  50 value 3.962940
## iter  60 value 3.585057
## iter  70 value 2.556588
## iter  80 value 2.219301
## iter  90 value 2.033936
## iter 100 value 2.011517
## final  value 2.011517 
## stopped after 100 iterations
## # weights:  27
## initial  value 120.219437 
## iter  10 value 20.105178
## iter  20 value 0.691846
## iter  30 value 0.000424
## final  value 0.000094 
## converged
## # weights:  43
## initial  value 130.013247 
## iter  10 value 6.990719
## iter  20 value 0.117056
## final  value 0.000078 
## converged
## # weights:  11
## initial  value 122.587894 
## iter  10 value 55.646479
## iter  20 value 44.073616
## iter  30 value 44.056707
## final  value 44.056649 
## converged
## # weights:  27
## initial  value 122.488484 
## iter  10 value 30.042105
## iter  20 value 22.364237
## iter  30 value 21.402694
## iter  40 value 21.391770
## final  value 21.391728 
## converged
## # weights:  43
## initial  value 151.848122 
## iter  10 value 27.150882
## iter  20 value 20.889994
## iter  30 value 19.061592
## iter  40 value 18.857339
## iter  50 value 18.636402
## iter  60 value 18.597842
## iter  70 value 18.581420
## final  value 18.581304 
## converged
## # weights:  11
## initial  value 125.447189 
## iter  10 value 42.432302
## iter  20 value 14.708081
## iter  30 value 5.928158
## iter  40 value 4.717183
## iter  50 value 4.261072
## iter  60 value 3.990872
## iter  70 value 3.894029
## iter  80 value 3.877352
## iter  90 value 3.868847
## iter 100 value 3.865924
## final  value 3.865924 
## stopped after 100 iterations
## # weights:  27
## initial  value 141.522247 
## iter  10 value 19.693351
## iter  20 value 2.060082
## iter  30 value 0.713635
## iter  40 value 0.684010
## iter  50 value 0.651024
## iter  60 value 0.599068
## iter  70 value 0.534727
## iter  80 value 0.525302
## iter  90 value 0.477461
## iter 100 value 0.468105
## final  value 0.468105 
## stopped after 100 iterations
## # weights:  43
## initial  value 117.492171 
## iter  10 value 5.474776
## iter  20 value 0.633193
## iter  30 value 0.523049
## iter  40 value 0.506835
## iter  50 value 0.486677
## iter  60 value 0.470314
## iter  70 value 0.423468
## iter  80 value 0.413761
## iter  90 value 0.406423
## iter 100 value 0.383741
## final  value 0.383741 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.494859 
## iter  10 value 67.868204
## iter  20 value 40.370984
## iter  30 value 8.030160
## iter  40 value 3.602779
## iter  50 value 3.354454
## iter  60 value 3.245703
## iter  70 value 3.148407
## iter  80 value 3.017263
## iter  90 value 2.916368
## iter 100 value 2.697585
## final  value 2.697585 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.387618 
## iter  10 value 17.333188
## iter  20 value 6.562404
## iter  30 value 4.218606
## iter  40 value 0.023796
## iter  50 value 0.013835
## iter  60 value 0.007181
## iter  70 value 0.000265
## final  value 0.000094 
## converged
## # weights:  43
## initial  value 131.764022 
## iter  10 value 6.923964
## iter  20 value 0.585918
## iter  30 value 0.001510
## final  value 0.000094 
## converged
## # weights:  11
## initial  value 117.924376 
## iter  10 value 59.153858
## iter  20 value 45.980503
## iter  30 value 43.965813
## final  value 43.965807 
## converged
## # weights:  27
## initial  value 122.524569 
## iter  10 value 28.252379
## iter  20 value 20.308998
## iter  30 value 19.983255
## iter  40 value 19.969846
## final  value 19.969845 
## converged
## # weights:  43
## initial  value 175.722543 
## iter  10 value 24.152694
## iter  20 value 19.351652
## iter  30 value 18.570128
## iter  40 value 18.540253
## iter  50 value 18.531786
## iter  60 value 18.531273
## final  value 18.531272 
## converged
## # weights:  11
## initial  value 125.626851 
## iter  10 value 50.695359
## iter  20 value 28.615271
## iter  30 value 12.424432
## iter  40 value 5.029030
## iter  50 value 4.166888
## iter  60 value 3.979676
## iter  70 value 3.882211
## iter  80 value 3.873043
## iter  90 value 3.872674
## iter 100 value 3.871442
## final  value 3.871442 
## stopped after 100 iterations
## # weights:  27
## initial  value 123.025871 
## iter  10 value 27.020381
## iter  20 value 2.694706
## iter  30 value 1.092737
## iter  40 value 0.872715
## iter  50 value 0.758401
## iter  60 value 0.630276
## iter  70 value 0.571755
## iter  80 value 0.515264
## iter  90 value 0.475373
## iter 100 value 0.452081
## final  value 0.452081 
## stopped after 100 iterations
## # weights:  43
## initial  value 134.385829 
## iter  10 value 5.396493
## iter  20 value 1.952502
## iter  30 value 0.810078
## iter  40 value 0.740163
## iter  50 value 0.700944
## iter  60 value 0.648312
## iter  70 value 0.581811
## iter  80 value 0.540064
## iter  90 value 0.513923
## iter 100 value 0.483298
## final  value 0.483298 
## stopped after 100 iterations
## # weights:  11
## initial  value 124.033991 
## iter  10 value 53.598901
## iter  20 value 53.094417
## iter  30 value 51.710795
## iter  40 value 44.732729
## iter  50 value 17.279075
## iter  60 value 6.529735
## iter  70 value 3.465736
## iter  80 value 3.270944
## iter  90 value 3.153543
## iter 100 value 3.002420
## final  value 3.002420 
## stopped after 100 iterations
## # weights:  27
## initial  value 126.207925 
## iter  10 value 6.867316
## iter  20 value 0.342203
## iter  30 value 0.000889
## final  value 0.000071 
## converged
## # weights:  43
## initial  value 146.268437 
## iter  10 value 7.061711
## iter  20 value 1.073309
## iter  30 value 0.000467
## final  value 0.000066 
## converged
## # weights:  11
## initial  value 120.866935 
## iter  10 value 85.950877
## iter  20 value 60.671406
## iter  30 value 50.749580
## iter  40 value 43.846120
## final  value 43.846095 
## converged
## # weights:  27
## initial  value 126.514320 
## iter  10 value 46.451931
## iter  20 value 22.288378
## iter  30 value 21.611509
## iter  40 value 21.142364
## iter  50 value 20.374688
## iter  60 value 19.975509
## iter  70 value 19.860029
## final  value 19.859991 
## converged
## # weights:  43
## initial  value 113.521981 
## iter  10 value 27.307122
## iter  20 value 19.069629
## iter  30 value 18.496103
## iter  40 value 18.414947
## iter  50 value 18.412091
## iter  60 value 18.411932
## final  value 18.411927 
## converged
## # weights:  11
## initial  value 119.931364 
## iter  10 value 33.212563
## iter  20 value 6.825543
## iter  30 value 4.153607
## iter  40 value 3.996719
## iter  50 value 3.936301
## iter  60 value 3.900913
## iter  70 value 3.868653
## iter  80 value 3.868193
## iter  90 value 3.864798
## iter 100 value 3.860658
## final  value 3.860658 
## stopped after 100 iterations
## # weights:  27
## initial  value 125.980953 
## iter  10 value 3.828376
## iter  20 value 1.757039
## iter  30 value 1.084888
## iter  40 value 0.779504
## iter  50 value 0.534913
## iter  60 value 0.521705
## iter  70 value 0.515783
## iter  80 value 0.504124
## iter  90 value 0.485201
## iter 100 value 0.483827
## final  value 0.483827 
## stopped after 100 iterations
## # weights:  43
## initial  value 143.013185 
## iter  10 value 7.195354
## iter  20 value 1.984745
## iter  30 value 0.713672
## iter  40 value 0.552459
## iter  50 value 0.437450
## iter  60 value 0.403627
## iter  70 value 0.363382
## iter  80 value 0.356303
## iter  90 value 0.346628
## iter 100 value 0.337926
## final  value 0.337926 
## stopped after 100 iterations
## # weights:  11
## initial  value 119.603843 
## iter  10 value 66.519353
## iter  20 value 48.085237
## iter  30 value 10.691129
## iter  40 value 4.343493
## iter  50 value 3.486657
## iter  60 value 2.937962
## iter  70 value 2.185862
## iter  80 value 1.910157
## iter  90 value 1.802781
## iter 100 value 1.791733
## final  value 1.791733 
## stopped after 100 iterations
## # weights:  27
## initial  value 120.493313 
## iter  10 value 14.568437
## iter  20 value 1.413139
## iter  30 value 0.002421
## final  value 0.000049 
## converged
## # weights:  43
## initial  value 131.990396 
## iter  10 value 3.607345
## iter  20 value 0.869522
## iter  30 value 0.000776
## final  value 0.000079 
## converged
## # weights:  11
## initial  value 127.213395 
## iter  10 value 58.997762
## iter  20 value 44.424763
## final  value 43.139243 
## converged
## # weights:  27
## initial  value 117.195869 
## iter  10 value 28.619024
## iter  20 value 19.206476
## iter  30 value 18.621574
## iter  40 value 18.619068
## iter  40 value 18.619068
## iter  40 value 18.619068
## final  value 18.619068 
## converged
## # weights:  43
## initial  value 165.598734 
## iter  10 value 24.205649
## iter  20 value 17.629535
## iter  30 value 17.222776
## iter  40 value 17.168752
## iter  50 value 17.168464
## iter  60 value 17.168428
## iter  60 value 17.168428
## iter  60 value 17.168428
## final  value 17.168428 
## converged
## # weights:  11
## initial  value 115.941037 
## iter  10 value 48.705139
## iter  20 value 47.783092
## iter  30 value 43.562064
## iter  40 value 11.101593
## iter  50 value 4.031437
## iter  60 value 3.116711
## iter  70 value 3.019260
## iter  80 value 2.993105
## iter  90 value 2.981303
## iter 100 value 2.969047
## final  value 2.969047 
## stopped after 100 iterations
## # weights:  27
## initial  value 132.813339 
## iter  10 value 3.715700
## iter  20 value 1.056815
## iter  30 value 0.558748
## iter  40 value 0.530262
## iter  50 value 0.467614
## iter  60 value 0.445847
## iter  70 value 0.424130
## iter  80 value 0.373259
## iter  90 value 0.354379
## iter 100 value 0.342801
## final  value 0.342801 
## stopped after 100 iterations
## # weights:  43
## initial  value 126.886256 
## iter  10 value 3.942342
## iter  20 value 1.736816
## iter  30 value 0.630651
## iter  40 value 0.552680
## iter  50 value 0.489807
## iter  60 value 0.396264
## iter  70 value 0.356221
## iter  80 value 0.340605
## iter  90 value 0.328238
## iter 100 value 0.321360
## final  value 0.321360 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.489378 
## iter  10 value 49.909576
## iter  20 value 49.876540
## iter  30 value 47.945970
## iter  40 value 39.847846
## iter  50 value 8.019855
## iter  60 value 4.613532
## iter  70 value 2.856566
## iter  80 value 1.479799
## iter  90 value 1.304505
## iter 100 value 1.264325
## final  value 1.264325 
## stopped after 100 iterations
## # weights:  27
## initial  value 141.912242 
## iter  10 value 7.102731
## iter  20 value 0.339738
## final  value 0.000079 
## converged
## # weights:  43
## initial  value 128.771330 
## iter  10 value 21.354630
## iter  20 value 2.784172
## iter  30 value 0.013786
## iter  40 value 0.000332
## final  value 0.000076 
## converged
## # weights:  11
## initial  value 120.181179 
## iter  10 value 46.347790
## iter  20 value 43.064428
## iter  30 value 43.054040
## final  value 43.054021 
## converged
## # weights:  27
## initial  value 126.647230 
## iter  10 value 25.682812
## iter  20 value 20.660342
## iter  30 value 19.500529
## iter  40 value 19.121600
## iter  50 value 19.088454
## iter  60 value 19.083697
## final  value 19.083689 
## converged
## # weights:  43
## initial  value 132.234904 
## iter  10 value 29.615687
## iter  20 value 19.279132
## iter  30 value 17.877712
## iter  40 value 17.806996
## iter  50 value 17.793960
## iter  60 value 17.793819
## final  value 17.793686 
## converged
## # weights:  11
## initial  value 121.579687 
## iter  10 value 49.472914
## iter  20 value 48.410085
## iter  30 value 45.340464
## iter  40 value 37.104905
## iter  50 value 8.129207
## iter  60 value 4.703713
## iter  70 value 4.278400
## iter  80 value 3.667245
## iter  90 value 3.604730
## iter 100 value 3.568160
## final  value 3.568160 
## stopped after 100 iterations
## # weights:  27
## initial  value 135.360878 
## iter  10 value 10.436945
## iter  20 value 2.222820
## iter  30 value 0.763058
## iter  40 value 0.725440
## iter  50 value 0.677966
## iter  60 value 0.570628
## iter  70 value 0.518380
## iter  80 value 0.502364
## iter  90 value 0.462332
## iter 100 value 0.455880
## final  value 0.455880 
## stopped after 100 iterations
## # weights:  43
## initial  value 125.924213 
## iter  10 value 3.865138
## iter  20 value 1.025246
## iter  30 value 0.422681
## iter  40 value 0.379135
## iter  50 value 0.353145
## iter  60 value 0.335865
## iter  70 value 0.319622
## iter  80 value 0.303895
## iter  90 value 0.289299
## iter 100 value 0.271561
## final  value 0.271561 
## stopped after 100 iterations
## # weights:  11
## initial  value 114.925820 
## iter  10 value 45.333263
## iter  20 value 21.250608
## iter  30 value 6.082611
## iter  40 value 4.448976
## iter  50 value 3.266614
## iter  60 value 1.880390
## iter  70 value 1.733764
## iter  80 value 1.089267
## iter  90 value 1.045776
## iter 100 value 0.950634
## final  value 0.950634 
## stopped after 100 iterations
## # weights:  27
## initial  value 116.607224 
## iter  10 value 6.159810
## iter  20 value 1.197702
## iter  30 value 0.000196
## final  value 0.000057 
## converged
## # weights:  43
## initial  value 123.125697 
## iter  10 value 4.793414
## iter  20 value 0.073094
## iter  30 value 0.000393
## final  value 0.000088 
## converged
## # weights:  11
## initial  value 120.471214 
## iter  10 value 45.420303
## iter  20 value 43.694661
## iter  30 value 43.690235
## final  value 43.690202 
## converged
## # weights:  27
## initial  value 168.714249 
## iter  10 value 28.073376
## iter  20 value 21.126580
## iter  30 value 20.968508
## iter  40 value 20.968134
## final  value 20.968117 
## converged
## # weights:  43
## initial  value 134.057733 
## iter  10 value 44.240823
## iter  20 value 19.621880
## iter  30 value 18.596469
## iter  40 value 18.220014
## iter  50 value 18.200869
## iter  60 value 18.194706
## final  value 18.194547 
## converged
## # weights:  11
## initial  value 137.081572 
## iter  10 value 53.546736
## iter  20 value 49.263649
## iter  30 value 49.116099
## iter  40 value 49.041348
## iter  50 value 48.683090
## iter  60 value 48.634845
## iter  70 value 48.489442
## iter  80 value 48.480790
## iter  90 value 48.451846
## iter 100 value 48.179017
## final  value 48.179017 
## stopped after 100 iterations
## # weights:  27
## initial  value 143.490043 
## iter  10 value 4.357251
## iter  20 value 1.321252
## iter  30 value 0.645280
## iter  40 value 0.616636
## iter  50 value 0.565996
## iter  60 value 0.521660
## iter  70 value 0.508617
## iter  80 value 0.487870
## iter  90 value 0.483152
## iter 100 value 0.479423
## final  value 0.479423 
## stopped after 100 iterations
## # weights:  43
## initial  value 178.832632 
## iter  10 value 8.121158
## iter  20 value 1.422046
## iter  30 value 0.568662
## iter  40 value 0.518952
## iter  50 value 0.434974
## iter  60 value 0.392568
## iter  70 value 0.345835
## iter  80 value 0.285289
## iter  90 value 0.268178
## iter 100 value 0.253675
## final  value 0.253675 
## stopped after 100 iterations
## # weights:  11
## initial  value 123.307045 
## iter  10 value 43.672929
## iter  20 value 8.049676
## iter  30 value 3.773651
## iter  40 value 3.173208
## iter  50 value 3.060201
## iter  60 value 2.971167
## iter  70 value 2.563371
## iter  80 value 2.471224
## iter  90 value 2.341221
## iter 100 value 2.320048
## final  value 2.320048 
## stopped after 100 iterations
## # weights:  27
## initial  value 129.270569 
## iter  10 value 10.575847
## iter  20 value 2.930770
## iter  30 value 1.689612
## iter  40 value 0.097359
## iter  50 value 0.000123
## iter  50 value 0.000057
## iter  50 value 0.000057
## final  value 0.000057 
## converged
## # weights:  43
## initial  value 119.634242 
## iter  10 value 6.310691
## iter  20 value 1.591412
## iter  30 value 0.028391
## iter  40 value 0.000902
## final  value 0.000069 
## converged
## # weights:  11
## initial  value 120.069235 
## iter  10 value 60.195069
## iter  20 value 51.394914
## iter  30 value 43.991436
## final  value 43.991141 
## converged
## # weights:  27
## initial  value 152.809198 
## iter  10 value 25.471737
## iter  20 value 21.511163
## iter  30 value 21.387357
## iter  40 value 21.386800
## final  value 21.386800 
## converged
## # weights:  43
## initial  value 137.024287 
## iter  10 value 22.447246
## iter  20 value 19.002967
## iter  30 value 18.519064
## iter  40 value 18.404215
## iter  50 value 18.397540
## iter  60 value 18.396716
## final  value 18.396607 
## converged
## # weights:  11
## initial  value 121.726735 
## iter  10 value 50.373336
## iter  20 value 50.105529
## iter  30 value 49.998791
## iter  40 value 49.958270
## iter  50 value 49.774790
## iter  60 value 48.541266
## iter  70 value 18.978385
## iter  80 value 6.743585
## iter  90 value 4.055527
## iter 100 value 3.921743
## final  value 3.921743 
## stopped after 100 iterations
## # weights:  27
## initial  value 146.633351 
## iter  10 value 6.579898
## iter  20 value 0.624311
## iter  30 value 0.562510
## iter  40 value 0.514462
## iter  50 value 0.457198
## iter  60 value 0.403961
## iter  70 value 0.382785
## iter  80 value 0.371306
## iter  90 value 0.358751
## iter 100 value 0.317469
## final  value 0.317469 
## stopped after 100 iterations
## # weights:  43
## initial  value 127.981900 
## iter  10 value 7.369546
## iter  20 value 0.839917
## iter  30 value 0.675447
## iter  40 value 0.617273
## iter  50 value 0.540482
## iter  60 value 0.477520
## iter  70 value 0.443309
## iter  80 value 0.359346
## iter  90 value 0.308424
## iter 100 value 0.292198
## final  value 0.292198 
## stopped after 100 iterations
## # weights:  11
## initial  value 133.510869 
## iter  10 value 66.279276
## iter  20 value 49.065891
## iter  30 value 46.607987
## final  value 46.598156 
## converged
resultado_entrenamiento5 <- predict(modeloNnet, entrenamiento)
resultado_prueba5 <- predict(modeloNnet, prueba)

# Matriz de confusión
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
mcre5 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         36         0
##   virginica       0          4        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9169, 0.9908)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9000           1.0000
## Specificity                 1.0000            1.0000           0.9500
## Pos Pred Value              1.0000            1.0000           0.9091
## Neg Pred Value              1.0000            0.9524           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3000           0.3333
## Detection Prevalence        0.3333            0.3000           0.3667
## Balanced Accuracy           1.0000            0.9500           0.9750
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
mcrp5 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0          9         0
##   virginica       0          1        10
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9000           1.0000
## Specificity                 1.0000            1.0000           0.9500
## Pos Pred Value              1.0000            1.0000           0.9091
## Neg Pred Value              1.0000            0.9524           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3000           0.3333
## Detection Prevalence        0.3333            0.3000           0.3667
## Balanced Accuracy           1.0000            0.9500           0.9750

6. Modelo con el método rf

modeloRf <- train(Species ~ ., data = entrenamiento, method = "rf", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = expand.grid(mtry = c(2,4,6)))
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
resultado_entrenamiento6 <- predict(modeloRf, entrenamiento)
resultado_prueba6 <- predict(modeloRf, prueba)

# Matriz de confusión
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcre6 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         0
##   virginica       0          0        40
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9697, 1)
##     No Information Rate : 0.3333     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           1.0000
## Specificity                 1.0000            1.0000           1.0000
## Pos Pred Value              1.0000            1.0000           1.0000
## Neg Pred Value              1.0000            1.0000           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3333
## Detection Prevalence        0.3333            0.3333           0.3333
## Balanced Accuracy           1.0000            1.0000           1.0000
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
mcrp6 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Resumen de resultados

datos <- data.frame(
  "1. svmLinear" = c(mcre$overall["Accuracy"], mcrp$overall["Accuracy"]),
  "1. svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "1. svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "1. rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "1. nnet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "1. rf" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)
rownames(datos) <- c("Precisión de entrenamiento", "Precisión de prueba")
datos
##                            X1..svmLinear X1..svmRadial X1..svmPoly X1..rpart
## Precisión de entrenamiento     0.9916667     0.9916667   0.9916667 0.9666667
## Precisión de prueba            0.9666667     0.9333333   0.9666667 0.9333333
##                             X1..nnet    X1..rf
## Precisión de entrenamiento 0.9666667 1.0000000
## Precisión de prueba        0.9666667 0.9333333

Conclusión

El modelo con el metodo de bosques aleatorios o random forest presenta sobreajuste, ya que tiene una alta precisión en entrenamiento pero baja en prueba. Acorde al resumen de resultado, el mejor modelo es el Máquina de Vectores de Soporte Lineal.

Ejercicio 1

Instalar librerías

library(mlbench)

Descargar base de datos y borrar la columna ID

data("BreastCancer")
can <- BreastCancer

# Copiar la columna "Id" a otra base de datos
ids <- can$Id

# Eliminar la columna "Id" de la base de datos "cancer"
cancer <- can[, !names(can) %in% c("Id")]

# Verificar la nueva base de datos sin la columna "Id"
head(cancer)
##   Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size Bare.nuclei
## 1            5         1          1             1            2           1
## 2            5         4          4             5            7          10
## 3            3         1          1             1            2           2
## 4            6         8          8             1            3           4
## 5            4         1          1             3            2           1
## 6            8        10         10             8            7          10
##   Bl.cromatin Normal.nucleoli Mitoses     Class
## 1           3               1       1    benign
## 2           3               2       1    benign
## 3           3               1       1    benign
## 4           3               7       1    benign
## 5           3               1       1    benign
## 6           9               7       1 malignant
# Verificar la base de datos con las columnas "Id" solamente
head(ids)
## [1] "1000025" "1002945" "1015425" "1016277" "1017023" "1017122"
#Convertir las variables a numericas
cancer$Cl.thickness <- as.numeric(cancer$Cl.thickness)
cancer$Cell.size <- as.numeric(cancer$Cell.size)
cancer$Cell.shape <- as.numeric(cancer$Cell.shape)
cancer$Marg.adhesion <- as.numeric(cancer$Marg.adhesion)
cancer$Epith.c.size <- as.numeric(cancer$Epith.c.size)
cancer$Bare.nuclei <- as.numeric(cancer$Bare.nuclei)
cancer$Bl.cromatin <- as.numeric(cancer$Bl.cromatin)
cancer$Normal.nucleoli <- as.numeric(cancer$Normal.nucleoli)
cancer$Mitoses <- as.numeric(cancer$Mitoses)
cancer$Class <- as.factor(cancer$Class)

Partir los datos 80 20

# Usar el 80 por ciento de los datos para entrenamiento y el 20 para la prueba
set.seed(123)
renglones_entrenamiento1 <- createDataPartition(cancer$Class, p=0.8, list=FALSE)
entrenamiento1 <- cancer[renglones_entrenamiento1, ]
prueba1 <- cancer[-renglones_entrenamiento1, ]

# Eliminar filas con valores faltantes
entrenamiento1 <- na.omit(entrenamiento1)
prueba1 <- na.omit(prueba1)

1. Modelo con el método svmLinear

modeloLinear1 <- train(Class ~ ., data = entrenamiento1, method = "svmLinear", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(C=1))

resultadoEn1 <- predict(modeloLinear1, entrenamiento1)
resultadoPr1 <- predict(modeloLinear1, prueba1)

# Matriz de confusión
mce <- confusionMatrix(resultadoEn1, entrenamiento1$Class)
mce # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       347         5
##   malignant     10       186
##                                           
##                Accuracy : 0.9726          
##                  95% CI : (0.9553, 0.9846)
##     No Information Rate : 0.6515          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9401          
##                                           
##  Mcnemar's Test P-Value : 0.3017          
##                                           
##             Sensitivity : 0.9720          
##             Specificity : 0.9738          
##          Pos Pred Value : 0.9858          
##          Neg Pred Value : 0.9490          
##              Prevalence : 0.6515          
##          Detection Rate : 0.6332          
##    Detection Prevalence : 0.6423          
##       Balanced Accuracy : 0.9729          
##                                           
##        'Positive' Class : benign          
## 
mcp <- confusionMatrix(resultadoPr1, prueba1$Class)
mcp # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        85         1
##   malignant      2        47
##                                           
##                Accuracy : 0.9778          
##                  95% CI : (0.9364, 0.9954)
##     No Information Rate : 0.6444          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9517          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9770          
##             Specificity : 0.9792          
##          Pos Pred Value : 0.9884          
##          Neg Pred Value : 0.9592          
##              Prevalence : 0.6444          
##          Detection Rate : 0.6296          
##    Detection Prevalence : 0.6370          
##       Balanced Accuracy : 0.9781          
##                                           
##        'Positive' Class : benign          
## 

2. Modelo con el método svmRadial

modeloRadial1 <- train(Class ~ ., data = entrenamiento1, method = "svmRadial", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(sigma=1, C=1))

resultadoEn2 <- predict(modeloRadial1, entrenamiento1)
resultadoPr2 <- predict(modeloRadial1, prueba1)

# Matriz de confusión
mce2 <- confusionMatrix(resultadoEn2, entrenamiento1$Class)
mce2 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       355         0
##   malignant      2       191
##                                           
##                Accuracy : 0.9964          
##                  95% CI : (0.9869, 0.9996)
##     No Information Rate : 0.6515          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.992           
##                                           
##  Mcnemar's Test P-Value : 0.4795          
##                                           
##             Sensitivity : 0.9944          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9896          
##              Prevalence : 0.6515          
##          Detection Rate : 0.6478          
##    Detection Prevalence : 0.6478          
##       Balanced Accuracy : 0.9972          
##                                           
##        'Positive' Class : benign          
## 
mcp2 <- confusionMatrix(resultadoPr2, prueba1$Class)
mcp2 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        80         0
##   malignant      7        48
##                                           
##                Accuracy : 0.9481          
##                  95% CI : (0.8961, 0.9789)
##     No Information Rate : 0.6444          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.8904          
##                                           
##  Mcnemar's Test P-Value : 0.02334         
##                                           
##             Sensitivity : 0.9195          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.8727          
##              Prevalence : 0.6444          
##          Detection Rate : 0.5926          
##    Detection Prevalence : 0.5926          
##       Balanced Accuracy : 0.9598          
##                                           
##        'Positive' Class : benign          
## 

3. Modelo con el método svmPoly

modeloPoly1 <- train(Class ~ ., data = entrenamiento1, method = "svmPoly", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(degree=1, scale=1, C=1))

resultadoEn3 <- predict(modeloPoly1, entrenamiento1)
resultadoPr3 <- predict(modeloPoly1, prueba1)

# Matriz de confusión
mce3 <- confusionMatrix(resultadoEn3, entrenamiento1$Class)
mce3 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       347         5
##   malignant     10       186
##                                           
##                Accuracy : 0.9726          
##                  95% CI : (0.9553, 0.9846)
##     No Information Rate : 0.6515          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9401          
##                                           
##  Mcnemar's Test P-Value : 0.3017          
##                                           
##             Sensitivity : 0.9720          
##             Specificity : 0.9738          
##          Pos Pred Value : 0.9858          
##          Neg Pred Value : 0.9490          
##              Prevalence : 0.6515          
##          Detection Rate : 0.6332          
##    Detection Prevalence : 0.6423          
##       Balanced Accuracy : 0.9729          
##                                           
##        'Positive' Class : benign          
## 
mcp3 <- confusionMatrix(resultadoPr3, prueba1$Class)
mcp3 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        85         1
##   malignant      2        47
##                                           
##                Accuracy : 0.9778          
##                  95% CI : (0.9364, 0.9954)
##     No Information Rate : 0.6444          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9517          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9770          
##             Specificity : 0.9792          
##          Pos Pred Value : 0.9884          
##          Neg Pred Value : 0.9592          
##              Prevalence : 0.6444          
##          Detection Rate : 0.6296          
##    Detection Prevalence : 0.6370          
##       Balanced Accuracy : 0.9781          
##                                           
##        'Positive' Class : benign          
## 

4. Modelo con el método rpart

modeloRpart1 <- train(Class ~ ., data = entrenamiento1, method = "rpart", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneLength = 10)

resultadoEn4 <- predict(modeloRpart1, entrenamiento1)
resultadoPr4 <- predict(modeloRpart1, prueba1)

# Matriz de confusión
mce4 <- confusionMatrix(resultadoEn4, entrenamiento1$Class)
mce4 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       347        12
##   malignant     10       179
##                                           
##                Accuracy : 0.9599          
##                  95% CI : (0.9398, 0.9747)
##     No Information Rate : 0.6515          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9114          
##                                           
##  Mcnemar's Test P-Value : 0.8312          
##                                           
##             Sensitivity : 0.9720          
##             Specificity : 0.9372          
##          Pos Pred Value : 0.9666          
##          Neg Pred Value : 0.9471          
##              Prevalence : 0.6515          
##          Detection Rate : 0.6332          
##    Detection Prevalence : 0.6551          
##       Balanced Accuracy : 0.9546          
##                                           
##        'Positive' Class : benign          
## 
mcp4 <- confusionMatrix(resultadoPr4, prueba1$Class)
mcp4 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        84         1
##   malignant      3        47
##                                           
##                Accuracy : 0.9704          
##                  95% CI : (0.9259, 0.9919)
##     No Information Rate : 0.6444          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9359          
##                                           
##  Mcnemar's Test P-Value : 0.6171          
##                                           
##             Sensitivity : 0.9655          
##             Specificity : 0.9792          
##          Pos Pred Value : 0.9882          
##          Neg Pred Value : 0.9400          
##              Prevalence : 0.6444          
##          Detection Rate : 0.6222          
##    Detection Prevalence : 0.6296          
##       Balanced Accuracy : 0.9723          
##                                           
##        'Positive' Class : benign          
## 

5. Modelo con el método nnet

modeloNnet1 <- train(Class ~ ., data = entrenamiento1, method = "nnet", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10))
## # weights:  12
## initial  value 394.147413 
## iter  10 value 60.543193
## iter  20 value 54.142196
## iter  30 value 46.469368
## iter  40 value 46.433403
## iter  50 value 45.404509
## iter  60 value 45.024359
## iter  70 value 44.983704
## iter  80 value 44.736752
## iter  90 value 44.732692
## iter 100 value 44.725439
## final  value 44.725439 
## stopped after 100 iterations
## # weights:  34
## initial  value 322.336160 
## iter  10 value 36.276132
## iter  20 value 31.805694
## iter  30 value 29.725559
## iter  40 value 28.336262
## iter  50 value 26.633113
## iter  60 value 23.745956
## iter  70 value 23.018392
## iter  80 value 22.473823
## iter  90 value 22.070307
## iter 100 value 21.554206
## final  value 21.554206 
## stopped after 100 iterations
## # weights:  56
## initial  value 304.004218 
## iter  10 value 33.516332
## iter  20 value 22.851750
## iter  30 value 15.930673
## iter  40 value 14.928930
## iter  50 value 14.801730
## iter  60 value 14.755997
## iter  70 value 14.728261
## iter  80 value 14.717904
## iter  90 value 14.717299
## iter 100 value 14.716479
## final  value 14.716479 
## stopped after 100 iterations
## # weights:  12
## initial  value 358.500872 
## iter  10 value 61.296821
## iter  20 value 49.897824
## iter  30 value 49.233128
## final  value 49.231408 
## converged
## # weights:  34
## initial  value 395.230259 
## iter  10 value 60.759664
## iter  20 value 43.026613
## iter  30 value 39.851141
## iter  40 value 38.630543
## iter  50 value 38.534179
## iter  60 value 38.533512
## final  value 38.533427 
## converged
## # weights:  56
## initial  value 369.143167 
## iter  10 value 40.496293
## iter  20 value 37.403972
## iter  30 value 36.454602
## iter  40 value 36.189127
## iter  50 value 35.501889
## iter  60 value 35.470652
## iter  70 value 35.466120
## final  value 35.466115 
## converged
## # weights:  12
## initial  value 313.195959 
## iter  10 value 61.218011
## iter  20 value 47.876889
## iter  30 value 44.250915
## iter  40 value 42.631862
## iter  50 value 42.597652
## iter  60 value 42.593135
## iter  70 value 42.591990
## iter  80 value 42.591641
## iter  90 value 42.591415
## iter 100 value 42.591246
## final  value 42.591246 
## stopped after 100 iterations
## # weights:  34
## initial  value 373.099919 
## iter  10 value 36.283920
## iter  20 value 26.812603
## iter  30 value 22.459466
## iter  40 value 20.767506
## iter  50 value 20.718382
## iter  60 value 20.624343
## iter  70 value 20.224075
## iter  80 value 20.009327
## iter  90 value 19.916848
## iter 100 value 19.826176
## final  value 19.826176 
## stopped after 100 iterations
## # weights:  56
## initial  value 255.547907 
## iter  10 value 31.729217
## iter  20 value 18.869651
## iter  30 value 9.817343
## iter  40 value 8.504981
## iter  50 value 7.268421
## iter  60 value 6.234593
## iter  70 value 5.654847
## iter  80 value 4.845387
## iter  90 value 4.704591
## iter 100 value 2.453330
## final  value 2.453330 
## stopped after 100 iterations
## # weights:  12
## initial  value 331.054765 
## iter  10 value 43.566042
## iter  20 value 38.996390
## iter  30 value 38.821758
## iter  40 value 38.740589
## iter  50 value 38.737696
## iter  60 value 38.736662
## iter  70 value 38.736359
## iter  80 value 38.736087
## iter  90 value 38.735959
## iter 100 value 38.735809
## final  value 38.735809 
## stopped after 100 iterations
## # weights:  34
## initial  value 491.590869 
## iter  10 value 48.045651
## iter  20 value 34.489216
## iter  30 value 31.505698
## iter  40 value 30.552648
## iter  50 value 29.825068
## iter  60 value 29.060343
## iter  70 value 27.824193
## iter  80 value 27.208299
## iter  90 value 26.867138
## iter 100 value 26.672986
## final  value 26.672986 
## stopped after 100 iterations
## # weights:  56
## initial  value 348.811556 
## iter  10 value 35.427114
## iter  20 value 20.748077
## iter  30 value 11.834131
## iter  40 value 8.743197
## iter  50 value 8.154409
## iter  60 value 8.072431
## iter  70 value 8.045690
## iter  80 value 7.264990
## iter  90 value 7.236527
## iter 100 value 6.927876
## final  value 6.927876 
## stopped after 100 iterations
## # weights:  12
## initial  value 340.491905 
## iter  10 value 61.828609
## iter  20 value 54.883919
## iter  30 value 51.195947
## iter  40 value 50.840079
## iter  40 value 50.840079
## iter  40 value 50.840079
## final  value 50.840079 
## converged
## # weights:  34
## initial  value 480.237605 
## iter  10 value 58.853891
## iter  20 value 48.662227
## iter  30 value 42.714897
## iter  40 value 41.102509
## iter  50 value 40.257094
## iter  60 value 40.140263
## iter  70 value 39.857766
## iter  80 value 39.339472
## final  value 39.337601 
## converged
## # weights:  56
## initial  value 365.549947 
## iter  10 value 43.828863
## iter  20 value 40.505686
## iter  30 value 39.624961
## iter  40 value 38.782947
## iter  50 value 38.494557
## iter  60 value 38.492982
## iter  70 value 38.492867
## iter  70 value 38.492867
## iter  70 value 38.492867
## final  value 38.492867 
## converged
## # weights:  12
## initial  value 308.011807 
## iter  10 value 40.453905
## iter  20 value 39.358880
## iter  30 value 38.545393
## iter  40 value 38.396583
## iter  50 value 38.377044
## iter  60 value 37.730519
## iter  70 value 36.905317
## iter  80 value 36.880123
## iter  90 value 36.841683
## iter 100 value 36.837149
## final  value 36.837149 
## stopped after 100 iterations
## # weights:  34
## initial  value 487.227489 
## iter  10 value 40.979350
## iter  20 value 30.963369
## iter  30 value 27.411526
## iter  40 value 22.780289
## iter  50 value 22.333639
## iter  60 value 22.206556
## iter  70 value 22.111743
## iter  80 value 22.033793
## iter  90 value 21.962832
## iter 100 value 21.890793
## final  value 21.890793 
## stopped after 100 iterations
## # weights:  56
## initial  value 317.535736 
## iter  10 value 38.888003
## iter  20 value 29.904957
## iter  30 value 20.666133
## iter  40 value 17.008939
## iter  50 value 15.725881
## iter  60 value 14.702170
## iter  70 value 14.304975
## iter  80 value 13.807309
## iter  90 value 13.574635
## iter 100 value 13.190153
## final  value 13.190153 
## stopped after 100 iterations
## # weights:  12
## initial  value 390.107852 
## iter  10 value 43.316558
## iter  20 value 42.807953
## iter  30 value 42.608083
## iter  40 value 36.830264
## iter  50 value 36.373112
## iter  60 value 36.264621
## final  value 36.262950 
## converged
## # weights:  34
## initial  value 362.137385 
## iter  10 value 42.602726
## iter  20 value 29.670811
## iter  30 value 26.387324
## iter  40 value 25.766380
## iter  50 value 25.639380
## iter  60 value 25.060787
## iter  70 value 24.365027
## iter  80 value 22.421698
## iter  90 value 22.175931
## iter 100 value 21.887297
## final  value 21.887297 
## stopped after 100 iterations
## # weights:  56
## initial  value 375.675567 
## iter  10 value 39.637970
## iter  20 value 20.810327
## iter  30 value 15.473559
## iter  40 value 13.548321
## iter  50 value 13.173338
## iter  60 value 13.018936
## iter  70 value 12.983444
## iter  80 value 12.908560
## iter  90 value 12.866620
## iter 100 value 12.833836
## final  value 12.833836 
## stopped after 100 iterations
## # weights:  12
## initial  value 321.588421 
## iter  10 value 74.408115
## iter  20 value 50.827938
## iter  30 value 47.279090
## iter  40 value 45.713379
## final  value 45.690883 
## converged
## # weights:  34
## initial  value 332.635433 
## iter  10 value 45.674813
## iter  20 value 39.666584
## iter  30 value 37.770258
## iter  40 value 37.073807
## iter  50 value 36.807724
## iter  60 value 36.598913
## iter  70 value 36.529164
## final  value 36.529162 
## converged
## # weights:  56
## initial  value 325.745999 
## iter  10 value 62.822351
## iter  20 value 38.948278
## iter  30 value 35.413149
## iter  40 value 34.922413
## iter  50 value 34.628027
## iter  60 value 34.518698
## iter  70 value 34.516847
## iter  80 value 34.516417
## iter  80 value 34.516417
## iter  80 value 34.516417
## final  value 34.516417 
## converged
## # weights:  12
## initial  value 315.068046 
## iter  10 value 43.046851
## iter  20 value 42.607236
## iter  30 value 42.586152
## iter  40 value 42.571062
## iter  50 value 42.561588
## iter  60 value 42.560665
## iter  70 value 42.560269
## iter  80 value 42.559913
## iter  90 value 42.559854
## iter 100 value 42.559714
## final  value 42.559714 
## stopped after 100 iterations
## # weights:  34
## initial  value 309.400145 
## iter  10 value 33.832498
## iter  20 value 26.953429
## iter  30 value 24.481258
## iter  40 value 21.459768
## iter  50 value 20.104347
## iter  60 value 19.879081
## iter  70 value 19.823061
## iter  80 value 19.786189
## iter  90 value 19.511058
## iter 100 value 19.449904
## final  value 19.449904 
## stopped after 100 iterations
## # weights:  56
## initial  value 431.318487 
## iter  10 value 33.805467
## iter  20 value 19.455171
## iter  30 value 17.272717
## iter  40 value 16.905189
## iter  50 value 16.680048
## iter  60 value 16.620817
## iter  70 value 16.593537
## iter  80 value 16.542711
## iter  90 value 16.529921
## iter 100 value 16.508314
## final  value 16.508314 
## stopped after 100 iterations
## # weights:  12
## initial  value 342.903404 
## iter  10 value 57.506127
## iter  20 value 53.725365
## iter  30 value 53.712088
## final  value 53.711252 
## converged
## # weights:  34
## initial  value 368.172712 
## iter  10 value 42.654602
## iter  20 value 39.847153
## iter  30 value 37.929920
## iter  40 value 37.404219
## iter  50 value 37.382939
## iter  60 value 37.380168
## iter  70 value 37.328886
## iter  80 value 37.281034
## iter  90 value 37.257866
## iter 100 value 37.216144
## final  value 37.216144 
## stopped after 100 iterations
## # weights:  56
## initial  value 332.181856 
## iter  10 value 34.734426
## iter  20 value 20.468803
## iter  30 value 9.674593
## iter  40 value 6.158383
## iter  50 value 5.792353
## iter  60 value 5.740329
## iter  70 value 5.721949
## iter  80 value 5.719025
## iter  90 value 5.716834
## iter 100 value 5.710523
## final  value 5.710523 
## stopped after 100 iterations
## # weights:  12
## initial  value 317.135348 
## iter  10 value 73.364340
## iter  20 value 53.399308
## iter  30 value 52.175446
## final  value 52.171947 
## converged
## # weights:  34
## initial  value 398.723373 
## iter  10 value 64.977581
## iter  20 value 48.887340
## iter  30 value 43.650374
## iter  40 value 42.028090
## iter  50 value 41.483814
## iter  60 value 41.461357
## iter  70 value 41.444618
## iter  80 value 41.437935
## final  value 41.437900 
## converged
## # weights:  56
## initial  value 357.683012 
## iter  10 value 49.124620
## iter  20 value 42.374416
## iter  30 value 40.827903
## iter  40 value 39.210811
## iter  50 value 38.860911
## iter  60 value 38.440324
## iter  70 value 37.964660
## iter  80 value 37.846478
## final  value 37.845623 
## converged
## # weights:  12
## initial  value 351.339109 
## iter  10 value 71.183041
## iter  20 value 66.470884
## iter  30 value 60.358816
## iter  40 value 54.976726
## iter  50 value 48.017454
## iter  60 value 47.773206
## iter  70 value 47.485641
## iter  80 value 47.213468
## iter  90 value 47.182288
## iter 100 value 47.181943
## final  value 47.181943 
## stopped after 100 iterations
## # weights:  34
## initial  value 283.493167 
## iter  10 value 42.846708
## iter  20 value 36.983391
## iter  30 value 35.516251
## iter  40 value 35.345308
## iter  50 value 35.220919
## iter  60 value 35.105990
## iter  70 value 35.044326
## iter  80 value 35.027312
## iter  90 value 35.013994
## iter 100 value 34.994979
## final  value 34.994979 
## stopped after 100 iterations
## # weights:  56
## initial  value 425.445767 
## iter  10 value 35.699117
## iter  20 value 19.255515
## iter  30 value 10.877347
## iter  40 value 9.728599
## iter  50 value 9.559314
## iter  60 value 9.102881
## iter  70 value 8.910328
## iter  80 value 8.839956
## iter  90 value 8.804698
## iter 100 value 8.763787
## final  value 8.763787 
## stopped after 100 iterations
## # weights:  12
## initial  value 318.403744 
## iter  10 value 53.210068
## iter  20 value 51.057200
## iter  30 value 51.043303
## iter  40 value 51.039758
## iter  50 value 50.885597
## iter  60 value 47.332443
## iter  70 value 42.319783
## iter  80 value 39.106461
## iter  90 value 38.791680
## iter 100 value 38.775813
## final  value 38.775813 
## stopped after 100 iterations
## # weights:  34
## initial  value 360.219844 
## iter  10 value 28.105060
## iter  20 value 21.029891
## iter  30 value 15.648535
## iter  40 value 14.940465
## iter  50 value 14.914287
## iter  60 value 14.913335
## final  value 14.913317 
## converged
## # weights:  56
## initial  value 280.726834 
## iter  10 value 41.009037
## iter  20 value 10.310238
## iter  30 value 1.218406
## iter  40 value 0.057052
## iter  50 value 0.011337
## iter  60 value 0.000180
## final  value 0.000100 
## converged
## # weights:  12
## initial  value 320.289238 
## iter  10 value 68.364562
## iter  20 value 51.529780
## iter  30 value 50.649423
## iter  40 value 50.643287
## iter  40 value 50.643287
## iter  40 value 50.643287
## final  value 50.643287 
## converged
## # weights:  34
## initial  value 360.064513 
## iter  10 value 56.872905
## iter  20 value 46.066484
## iter  30 value 44.033532
## iter  40 value 40.074558
## iter  50 value 37.192276
## iter  60 value 35.960149
## iter  70 value 35.889765
## iter  80 value 35.889390
## final  value 35.889386 
## converged
## # weights:  56
## initial  value 364.347324 
## iter  10 value 40.845841
## iter  20 value 37.773189
## iter  30 value 35.045244
## iter  40 value 33.794059
## iter  50 value 33.247810
## iter  60 value 33.175397
## iter  70 value 33.142778
## iter  80 value 33.140306
## iter  80 value 33.140306
## iter  80 value 33.140306
## final  value 33.140306 
## converged
## # weights:  12
## initial  value 303.688221 
## iter  10 value 64.056735
## iter  20 value 41.271695
## iter  30 value 38.260731
## iter  40 value 36.828003
## iter  50 value 36.720862
## iter  60 value 36.714118
## iter  70 value 36.703038
## iter  80 value 36.699555
## iter  90 value 36.699490
## iter 100 value 36.699387
## final  value 36.699387 
## stopped after 100 iterations
## # weights:  34
## initial  value 346.522052 
## iter  10 value 32.965926
## iter  20 value 20.819338
## iter  30 value 15.435571
## iter  40 value 14.888410
## iter  50 value 14.657076
## iter  60 value 14.577027
## iter  70 value 14.316283
## iter  80 value 14.293576
## iter  90 value 14.271018
## iter 100 value 14.252862
## final  value 14.252862 
## stopped after 100 iterations
## # weights:  56
## initial  value 390.347583 
## iter  10 value 36.669194
## iter  20 value 19.819300
## iter  30 value 15.457732
## iter  40 value 14.122914
## iter  50 value 13.945301
## iter  60 value 13.816932
## iter  70 value 13.310038
## iter  80 value 12.075653
## iter  90 value 9.040337
## iter 100 value 8.255322
## final  value 8.255322 
## stopped after 100 iterations
## # weights:  12
## initial  value 329.495465 
## iter  10 value 48.270304
## iter  20 value 48.206318
## iter  30 value 48.204707
## iter  40 value 47.432546
## iter  50 value 42.593755
## iter  60 value 42.458319
## iter  70 value 42.236709
## iter  80 value 39.416020
## iter  90 value 39.405956
## iter 100 value 39.399959
## final  value 39.399959 
## stopped after 100 iterations
## # weights:  34
## initial  value 385.558229 
## iter  10 value 31.290991
## iter  20 value 25.092976
## iter  30 value 21.229731
## iter  40 value 19.553560
## iter  50 value 19.223480
## iter  60 value 18.393965
## iter  70 value 17.779125
## iter  80 value 17.574581
## iter  90 value 17.509523
## iter 100 value 17.253660
## final  value 17.253660 
## stopped after 100 iterations
## # weights:  56
## initial  value 373.237200 
## iter  10 value 35.021457
## iter  20 value 31.180216
## iter  30 value 24.001199
## iter  40 value 20.346392
## iter  50 value 19.663134
## iter  60 value 19.444009
## iter  70 value 18.844560
## iter  80 value 18.245569
## iter  90 value 17.813923
## iter 100 value 15.951741
## final  value 15.951741 
## stopped after 100 iterations
## # weights:  12
## initial  value 319.047271 
## iter  10 value 48.767269
## iter  20 value 47.830556
## final  value 47.797860 
## converged
## # weights:  34
## initial  value 347.915980 
## iter  10 value 44.037893
## iter  20 value 38.650304
## iter  30 value 38.513667
## iter  40 value 38.495585
## final  value 38.495570 
## converged
## # weights:  56
## initial  value 331.743977 
## iter  10 value 42.646066
## iter  20 value 37.456344
## iter  30 value 35.114814
## iter  40 value 34.576526
## iter  50 value 34.127425
## iter  60 value 33.997874
## iter  70 value 33.988340
## final  value 33.988322 
## converged
## # weights:  12
## initial  value 422.386643 
## iter  10 value 64.637632
## iter  20 value 36.075991
## iter  30 value 35.930359
## iter  40 value 35.896104
## iter  50 value 35.868923
## final  value 35.868618 
## converged
## # weights:  34
## initial  value 351.014612 
## iter  10 value 31.946761
## iter  20 value 23.148517
## iter  30 value 16.425721
## iter  40 value 14.667003
## iter  50 value 14.494814
## iter  60 value 11.372531
## iter  70 value 10.628856
## iter  80 value 10.576171
## iter  90 value 10.528399
## iter 100 value 10.516585
## final  value 10.516585 
## stopped after 100 iterations
## # weights:  56
## initial  value 324.528921 
## iter  10 value 40.376297
## iter  20 value 26.361090
## iter  30 value 17.400570
## iter  40 value 15.127242
## iter  50 value 13.543174
## iter  60 value 12.909027
## iter  70 value 12.742067
## iter  80 value 12.333467
## iter  90 value 9.521729
## iter 100 value 8.401392
## final  value 8.401392 
## stopped after 100 iterations
## # weights:  12
## initial  value 398.182696 
## iter  10 value 35.159280
## iter  20 value 33.543200
## iter  30 value 30.830314
## iter  40 value 30.516709
## iter  50 value 28.955935
## iter  60 value 28.887803
## iter  70 value 28.816507
## iter  80 value 28.740605
## iter  90 value 26.299162
## iter 100 value 26.272198
## final  value 26.272198 
## stopped after 100 iterations
## # weights:  34
## initial  value 309.355153 
## iter  10 value 30.452716
## iter  20 value 29.756748
## iter  30 value 29.586545
## iter  40 value 29.556621
## iter  50 value 29.551982
## final  value 29.551757 
## converged
## # weights:  56
## initial  value 372.346923 
## iter  10 value 26.482081
## iter  20 value 18.362381
## iter  30 value 14.880684
## iter  40 value 14.691510
## iter  50 value 14.667183
## iter  60 value 14.648156
## iter  70 value 14.644948
## iter  80 value 14.644742
## iter  90 value 14.644264
## iter 100 value 14.643933
## final  value 14.643933 
## stopped after 100 iterations
## # weights:  12
## initial  value 346.777626 
## iter  10 value 50.427816
## iter  20 value 46.328126
## iter  30 value 45.727388
## iter  30 value 45.727388
## iter  30 value 45.727388
## final  value 45.727388 
## converged
## # weights:  34
## initial  value 343.369188 
## iter  10 value 61.363794
## iter  20 value 36.405413
## iter  30 value 34.678907
## iter  40 value 33.593571
## iter  50 value 32.260109
## iter  60 value 32.242976
## final  value 32.242952 
## converged
## # weights:  56
## initial  value 341.102870 
## iter  10 value 47.824591
## iter  20 value 34.655579
## iter  30 value 32.260853
## iter  40 value 31.912510
## iter  50 value 31.412203
## iter  60 value 30.281186
## iter  70 value 30.242036
## final  value 30.241831 
## converged
## # weights:  12
## initial  value 368.391389 
## iter  10 value 36.517696
## iter  20 value 35.859395
## iter  30 value 33.305365
## iter  40 value 32.814264
## iter  50 value 32.808823
## iter  60 value 32.806718
## iter  70 value 32.803884
## iter  80 value 32.803549
## iter  90 value 32.803114
## iter 100 value 32.803052
## final  value 32.803052 
## stopped after 100 iterations
## # weights:  34
## initial  value 350.499357 
## iter  10 value 25.846250
## iter  20 value 21.338095
## iter  30 value 16.126191
## iter  40 value 12.673299
## iter  50 value 11.676439
## iter  60 value 11.552721
## iter  70 value 11.514219
## iter  80 value 11.494776
## iter  90 value 11.488949
## iter 100 value 11.485410
## final  value 11.485410 
## stopped after 100 iterations
## # weights:  56
## initial  value 327.530158 
## iter  10 value 27.474454
## iter  20 value 20.787082
## iter  30 value 17.426255
## iter  40 value 15.563857
## iter  50 value 13.997636
## iter  60 value 13.819420
## iter  70 value 13.715219
## iter  80 value 13.549639
## iter  90 value 13.430116
## iter 100 value 13.199589
## final  value 13.199589 
## stopped after 100 iterations
## # weights:  12
## initial  value 312.252031 
## iter  10 value 55.336469
## iter  20 value 47.371755
## iter  30 value 47.191940
## iter  40 value 44.875485
## iter  50 value 44.755262
## final  value 44.755159 
## converged
## # weights:  34
## initial  value 310.756357 
## iter  10 value 37.649744
## iter  20 value 31.165267
## iter  30 value 26.568187
## iter  40 value 25.017729
## iter  50 value 22.890610
## iter  60 value 22.080414
## iter  70 value 21.599482
## iter  80 value 21.461060
## iter  90 value 21.181696
## iter 100 value 20.837140
## final  value 20.837140 
## stopped after 100 iterations
## # weights:  56
## initial  value 443.594346 
## iter  10 value 34.924593
## iter  20 value 22.953366
## iter  30 value 14.560052
## iter  40 value 7.382286
## iter  50 value 6.046032
## iter  60 value 5.703594
## iter  70 value 5.176927
## iter  80 value 4.655616
## iter  90 value 4.043634
## iter 100 value 3.740364
## final  value 3.740364 
## stopped after 100 iterations
## # weights:  12
## initial  value 414.517244 
## iter  10 value 69.175859
## iter  20 value 53.658822
## iter  30 value 51.770031
## iter  40 value 50.883969
## final  value 50.883891 
## converged
## # weights:  34
## initial  value 396.740966 
## iter  10 value 62.949969
## iter  20 value 49.846338
## iter  30 value 44.065881
## iter  40 value 42.972351
## iter  50 value 42.377212
## iter  60 value 42.301867
## final  value 42.301352 
## converged
## # weights:  56
## initial  value 317.075620 
## iter  10 value 76.797987
## iter  20 value 42.819149
## iter  30 value 40.134566
## iter  40 value 39.069267
## iter  50 value 38.949175
## iter  60 value 38.400601
## iter  70 value 38.282147
## iter  80 value 37.720124
## iter  90 value 37.509685
## iter 100 value 37.318342
## final  value 37.318342 
## stopped after 100 iterations
## # weights:  12
## initial  value 355.865391 
## iter  10 value 46.613325
## iter  20 value 45.701628
## iter  30 value 43.143980
## iter  40 value 40.427206
## iter  50 value 39.942106
## iter  60 value 39.891054
## iter  70 value 39.888610
## iter  80 value 39.888485
## iter  90 value 39.888427
## iter 100 value 39.888330
## final  value 39.888330 
## stopped after 100 iterations
## # weights:  34
## initial  value 293.329880 
## iter  10 value 38.228979
## iter  20 value 28.297813
## iter  30 value 25.287696
## iter  40 value 24.902261
## iter  50 value 24.891272
## iter  60 value 24.884055
## iter  70 value 24.879127
## iter  80 value 24.876047
## iter  90 value 24.875210
## iter 100 value 24.875060
## final  value 24.875060 
## stopped after 100 iterations
## # weights:  56
## initial  value 509.573610 
## iter  10 value 35.471812
## iter  20 value 16.165802
## iter  30 value 8.422809
## iter  40 value 3.709503
## iter  50 value 2.587363
## iter  60 value 1.468428
## iter  70 value 0.851013
## iter  80 value 0.764656
## iter  90 value 0.713106
## iter 100 value 0.674609
## final  value 0.674609 
## stopped after 100 iterations
## # weights:  12
## initial  value 333.595956 
## iter  10 value 43.757853
## iter  20 value 39.587140
## iter  30 value 39.520921
## iter  40 value 39.499155
## iter  50 value 39.495459
## iter  60 value 39.492716
## iter  70 value 39.490412
## iter  80 value 39.489591
## iter  90 value 39.465644
## iter 100 value 39.453508
## final  value 39.453508 
## stopped after 100 iterations
## # weights:  34
## initial  value 372.964941 
## iter  10 value 45.266510
## iter  20 value 42.272164
## iter  30 value 42.209604
## iter  40 value 42.150183
## iter  50 value 41.344263
## iter  60 value 40.846887
## iter  70 value 40.770142
## iter  80 value 40.548401
## iter  90 value 40.509528
## iter 100 value 40.359536
## final  value 40.359536 
## stopped after 100 iterations
## # weights:  56
## initial  value 305.810020 
## iter  10 value 36.208655
## iter  20 value 20.314443
## iter  30 value 13.440368
## iter  40 value 9.058683
## iter  50 value 7.616727
## iter  60 value 6.879022
## iter  70 value 6.543357
## iter  80 value 6.337916
## iter  90 value 6.230668
## iter 100 value 5.987368
## final  value 5.987368 
## stopped after 100 iterations
## # weights:  12
## initial  value 335.483060 
## iter  10 value 72.386928
## iter  20 value 50.466871
## iter  30 value 49.061972
## final  value 49.056780 
## converged
## # weights:  34
## initial  value 348.362678 
## iter  10 value 68.512022
## iter  20 value 41.073256
## iter  30 value 40.011444
## iter  40 value 39.470911
## iter  50 value 38.977691
## iter  60 value 38.820267
## iter  70 value 38.675377
## iter  80 value 38.675319
## iter  80 value 38.675319
## iter  80 value 38.675319
## final  value 38.675319 
## converged
## # weights:  56
## initial  value 384.921790 
## iter  10 value 40.574966
## iter  20 value 38.796677
## iter  30 value 37.945496
## iter  40 value 37.465279
## iter  50 value 37.289583
## iter  60 value 37.233719
## iter  70 value 37.233240
## final  value 37.233240 
## converged
## # weights:  12
## initial  value 338.943789 
## iter  10 value 43.764930
## iter  20 value 39.032861
## iter  30 value 36.599245
## iter  40 value 36.533318
## iter  50 value 36.520501
## iter  60 value 36.512538
## iter  70 value 36.508677
## iter  80 value 36.506309
## iter  90 value 36.503712
## iter 100 value 36.503002
## final  value 36.503002 
## stopped after 100 iterations
## # weights:  34
## initial  value 491.113542 
## iter  10 value 48.174633
## iter  20 value 41.278888
## iter  30 value 41.190306
## iter  40 value 41.126299
## iter  50 value 38.624181
## iter  60 value 37.962218
## iter  70 value 35.252369
## iter  80 value 35.223527
## iter  90 value 35.189493
## iter 100 value 34.737566
## final  value 34.737566 
## stopped after 100 iterations
## # weights:  56
## initial  value 474.208280 
## final  value 229.895134 
## converged
## # weights:  12
## initial  value 340.188151 
## iter  10 value 41.795634
## iter  20 value 38.333281
## iter  30 value 36.476338
## iter  40 value 36.263246
## final  value 36.262705 
## converged
## # weights:  34
## initial  value 319.529182 
## iter  10 value 44.529766
## iter  20 value 39.541811
## iter  30 value 35.304975
## iter  40 value 31.014246
## iter  50 value 30.621157
## iter  60 value 30.403328
## iter  70 value 30.251753
## iter  80 value 29.947274
## iter  90 value 29.590208
## iter 100 value 29.370655
## final  value 29.370655 
## stopped after 100 iterations
## # weights:  56
## initial  value 480.718621 
## iter  10 value 34.020456
## iter  20 value 15.086952
## iter  30 value 7.351333
## iter  40 value 5.782868
## iter  50 value 5.688207
## iter  60 value 5.678268
## iter  70 value 5.677826
## iter  80 value 5.677479
## final  value 5.677434 
## converged
## # weights:  12
## initial  value 413.980274 
## iter  10 value 56.517928
## iter  20 value 48.786924
## iter  30 value 48.662451
## final  value 48.662450 
## converged
## # weights:  34
## initial  value 376.393510 
## iter  10 value 67.505338
## iter  20 value 44.207729
## iter  30 value 40.514886
## iter  40 value 40.145211
## iter  50 value 40.135575
## iter  60 value 40.133424
## final  value 40.133415 
## converged
## # weights:  56
## initial  value 410.970691 
## iter  10 value 42.441931
## iter  20 value 38.519634
## iter  30 value 37.692837
## iter  40 value 37.556868
## iter  50 value 37.499477
## iter  60 value 37.133145
## iter  70 value 37.043231
## final  value 37.042858 
## converged
## # weights:  12
## initial  value 352.825990 
## iter  10 value 49.033300
## iter  20 value 44.897291
## iter  30 value 44.852988
## iter  40 value 44.842350
## iter  50 value 44.823315
## iter  60 value 40.213939
## iter  70 value 39.549642
## iter  80 value 39.545032
## iter  90 value 39.542656
## iter 100 value 39.540919
## final  value 39.540919 
## stopped after 100 iterations
## # weights:  34
## initial  value 406.220122 
## iter  10 value 37.660843
## iter  20 value 34.075710
## iter  30 value 30.562463
## iter  40 value 28.419793
## iter  50 value 27.421714
## iter  60 value 25.019091
## iter  70 value 24.121865
## iter  80 value 23.046003
## iter  90 value 22.491483
## iter 100 value 22.182463
## final  value 22.182463 
## stopped after 100 iterations
## # weights:  56
## initial  value 350.962770 
## iter  10 value 36.768229
## iter  20 value 26.609842
## iter  30 value 21.190131
## iter  40 value 12.580102
## iter  50 value 8.549448
## iter  60 value 6.676607
## iter  70 value 3.419745
## iter  80 value 0.765480
## iter  90 value 0.562687
## iter 100 value 0.534073
## final  value 0.534073 
## stopped after 100 iterations
## # weights:  34
## initial  value 347.042730 
## iter  10 value 37.634595
## iter  20 value 28.942849
## iter  30 value 25.719526
## iter  40 value 24.301711
## iter  50 value 24.074027
## iter  60 value 23.990039
## iter  70 value 23.970925
## iter  80 value 23.970792
## final  value 23.970619 
## converged
resultadoEn5 <- predict(modeloNnet1, entrenamiento1)
resultadoPr5 <- predict(modeloNnet1, prueba1)

# Matriz de confusión
mce5 <- confusionMatrix(resultadoEn5, entrenamiento1$Class)
mce5 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       350         0
##   malignant      7       191
##                                           
##                Accuracy : 0.9872          
##                  95% CI : (0.9739, 0.9948)
##     No Information Rate : 0.6515          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.9721          
##                                           
##  Mcnemar's Test P-Value : 0.02334         
##                                           
##             Sensitivity : 0.9804          
##             Specificity : 1.0000          
##          Pos Pred Value : 1.0000          
##          Neg Pred Value : 0.9646          
##              Prevalence : 0.6515          
##          Detection Rate : 0.6387          
##    Detection Prevalence : 0.6387          
##       Balanced Accuracy : 0.9902          
##                                           
##        'Positive' Class : benign          
## 
mcp5 <- confusionMatrix(resultadoPr5, prueba1$Class)
mcp5 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        83         1
##   malignant      4        47
##                                           
##                Accuracy : 0.963           
##                  95% CI : (0.9157, 0.9879)
##     No Information Rate : 0.6444          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9203          
##                                           
##  Mcnemar's Test P-Value : 0.3711          
##                                           
##             Sensitivity : 0.9540          
##             Specificity : 0.9792          
##          Pos Pred Value : 0.9881          
##          Neg Pred Value : 0.9216          
##              Prevalence : 0.6444          
##          Detection Rate : 0.6148          
##    Detection Prevalence : 0.6222          
##       Balanced Accuracy : 0.9666          
##                                           
##        'Positive' Class : benign          
## 

6. Modelo con el método rf

modeloRf1 <- train(Class ~ ., data = entrenamiento1, method = "rf", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = expand.grid(mtry = c(2,4,6)))

resultadoEn6 <- predict(modeloRf1, entrenamiento1)
resultadoPr6 <- predict(modeloRf1, prueba1)

# Matriz de confusión
mce6 <- confusionMatrix(resultadoEn6, entrenamiento1$Class)
mce6 # Matriz de confusión de resultado del entrenamiento
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign       357         0
##   malignant      0       191
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9933, 1)
##     No Information Rate : 0.6515     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
##                                      
##             Sensitivity : 1.0000     
##             Specificity : 1.0000     
##          Pos Pred Value : 1.0000     
##          Neg Pred Value : 1.0000     
##              Prevalence : 0.6515     
##          Detection Rate : 0.6515     
##    Detection Prevalence : 0.6515     
##       Balanced Accuracy : 1.0000     
##                                      
##        'Positive' Class : benign     
## 
mcp6 <- confusionMatrix(resultadoPr6, prueba1$Class)
mcp6 # Matriz de confusión de resultado de la prueba
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  benign malignant
##   benign        85         1
##   malignant      2        47
##                                           
##                Accuracy : 0.9778          
##                  95% CI : (0.9364, 0.9954)
##     No Information Rate : 0.6444          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.9517          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.9770          
##             Specificity : 0.9792          
##          Pos Pred Value : 0.9884          
##          Neg Pred Value : 0.9592          
##              Prevalence : 0.6444          
##          Detection Rate : 0.6296          
##    Detection Prevalence : 0.6370          
##       Balanced Accuracy : 0.9781          
##                                           
##        'Positive' Class : benign          
## 

Resumen de resultados

datos1 <- data.frame(
  "1. svmLinear1" = c(mce$overall["Accuracy"], mcp$overall["Accuracy"]),
  "1. svmRadial1" = c(mce2$overall["Accuracy"], mcp2$overall["Accuracy"]),
  "1. svmPoly1" = c(mce3$overall["Accuracy"], mcp3$overall["Accuracy"]),
  "1. rpart1" = c(mce4$overall["Accuracy"], mcp4$overall["Accuracy"]),
  "1. nnet1" = c(mce5$overall["Accuracy"], mcp5$overall["Accuracy"]),
  "1. rf1" = c(mce6$overall["Accuracy"], mcp6$overall["Accuracy"])
)
rownames(datos1) <- c("Precisión de entrenamiento", "Precisión de prueba")
datos1
##                            X1..svmLinear1 X1..svmRadial1 X1..svmPoly1
## Precisión de entrenamiento      0.9726277      0.9963504    0.9726277
## Precisión de prueba             0.9777778      0.9481481    0.9777778
##                            X1..rpart1 X1..nnet1   X1..rf1
## Precisión de entrenamiento  0.9598540 0.9872263 1.0000000
## Precisión de prueba         0.9703704 0.9629630 0.9777778
---
title: 'Machine learning: Paquete CARET'
author: "Héctor Guadalupe de la Garza Treviño - A01177960"
date: "2024-02-28"
output: 
  html_document:
    toc: true
    toc_float: true
    code_download: true
---


![](/Users/hectordelagarzatrevino/Library/CloudStorage/GoogleDrive-a01177960@tec.mx/Mi unidad/LIT/Sexto semestre/Inteligencia Artificial con Impacto Empresarial/Modulo 2/Sesion 8/iris-machinelearning.png)

# <span style="color: blue;">Teoría</span>
El paquete *CARET (Clasification and Regression Training)* tiene una amplia variedad de algoritmos para el aprendizaje automático.  

# <span style="color: blue;">Instalar paquetes y llamar librerías</span>
```{r}
library(caret) # Algoritmos de aprendizaje automatico
library(datasets) # Usar la base de datos "Iris"
library(ggplot2) # Gráficar con un mejor diseño
library(lattice) # Crear gráficos
library(DataExplorer)
```

# <span style="color: blue;">Crear base de datos</span>
```{r}
df <- data.frame(iris)
```

# <span style="color: blue;">Analisis exploratorio</span>
```{r}
summary(df)
str(df)
# create_report(df)
```

**Nota: La variable que queremos predecir debe tener formato de factor**

# <span style="color: blue;">Partir los datos 80 20</span>
```{r}
# Usar el 80 por ciento de los datos para entrenamiento y el 20 para la prueba
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list=FALSE)
entrenamiento <- iris[renglones_entrenamiento, ]
prueba <- iris[-renglones_entrenamiento, ]
```

# <span style="color: blue;">Distintos tipos de métodos para modelar</span>
Los métodos mas utilizados para modelar aprendizaje automatico son:  

* **SVM**: *Support Vector Machine* o Máquina de Vectores de Soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc.  
* **Árbol de Decisión**: rpart. 
* **Redes Neuronales**: nnet. 
* **Random Forest** o Bósques Aleatorios: rf. 

# <span style="color: blue;">1. Modelo con el método svmLinear</span>
```{r}
modeloLinear <- train(Species ~ ., data = entrenamiento, method = "svmLinear", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(C=1) # Cuando es svmLinear
)
resultado_entrenamiento1 <- predict(modeloLinear, entrenamiento)
resultado_prueba1 <- predict(modeloLinear, prueba)

# Matriz de confusión
mcre <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre # Matriz de confusión de resultado del entrenamiento
mcrp <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">2. Modelo con el método svmRadial</span>
```{r}
modeloRadial <- train(Species ~ ., data = entrenamiento, method = "svmRadial", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(sigma=1, C=1))

resultado_entrenamiento2 <- predict(modeloRadial, entrenamiento)
resultado_prueba2 <- predict(modeloRadial, prueba)

# Matriz de confusión
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2 # Matriz de confusión de resultado del entrenamiento
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2 # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">3. Modelo con el método svmPoly</span>
```{r}
modeloPoly <- train(Species ~ ., data = entrenamiento, method = "svmPoly", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(degree=1, scale=1, C=1))

resultado_entrenamiento3 <- predict(modeloPoly, entrenamiento)
resultado_prueba3 <- predict(modeloPoly, prueba)

# Matriz de confusión
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3 # Matriz de confusión de resultado del entrenamiento
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3 # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">4. Modelo con el método rpart</span>
```{r}
modeloRpart <- train(Species ~ ., data = entrenamiento, method = "rpart", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneLength = 10)

resultado_entrenamiento4 <- predict(modeloRpart, entrenamiento)
resultado_prueba4 <- predict(modeloRpart, prueba)

# Matriz de confusión
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4 # Matriz de confusión de resultado del entrenamiento
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Species)
mcrp4 # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">5. Modelo con el método nnet</span>
```{r}
modeloNnet <- train(Species ~ ., data = entrenamiento, method = "nnet", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10))

resultado_entrenamiento5 <- predict(modeloNnet, entrenamiento)
resultado_prueba5 <- predict(modeloNnet, prueba)

# Matriz de confusión
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
mcre5 # Matriz de confusión de resultado del entrenamiento
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
mcrp5 # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">6. Modelo con el método rf</span>
```{r}
modeloRf <- train(Species ~ ., data = entrenamiento, method = "rf", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = expand.grid(mtry = c(2,4,6)))

resultado_entrenamiento6 <- predict(modeloRf, entrenamiento)
resultado_prueba6 <- predict(modeloRf, prueba)

# Matriz de confusión
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcre6 # Matriz de confusión de resultado del entrenamiento
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
mcrp6 # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">Resumen de resultados</span>
```{r}
datos <- data.frame(
  "1. svmLinear" = c(mcre$overall["Accuracy"], mcrp$overall["Accuracy"]),
  "1. svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "1. svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "1. rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "1. nnet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "1. rf" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)
rownames(datos) <- c("Precisión de entrenamiento", "Precisión de prueba")
datos
```

# <span style="color: blue;">Conclusión</span>
El modelo con el metodo de bosques aleatorios o random forest presenta sobreajuste, ya que tiene una alta precisión en entrenamiento pero baja en prueba. Acorde al resumen de resultado, el mejor modelo es el **Máquina de Vectores de Soporte Lineal**.

# Ejercicio 1

# Instalar librerías
```{r}
library(mlbench)
```

# Descargar base de datos y borrar la columna ID
```{r}
data("BreastCancer")
can <- BreastCancer

# Copiar la columna "Id" a otra base de datos
ids <- can$Id

# Eliminar la columna "Id" de la base de datos "cancer"
cancer <- can[, !names(can) %in% c("Id")]

# Verificar la nueva base de datos sin la columna "Id"
head(cancer)

# Verificar la base de datos con las columnas "Id" solamente
head(ids)
```

```{r}
#Convertir las variables a numericas
cancer$Cl.thickness <- as.numeric(cancer$Cl.thickness)
cancer$Cell.size <- as.numeric(cancer$Cell.size)
cancer$Cell.shape <- as.numeric(cancer$Cell.shape)
cancer$Marg.adhesion <- as.numeric(cancer$Marg.adhesion)
cancer$Epith.c.size <- as.numeric(cancer$Epith.c.size)
cancer$Bare.nuclei <- as.numeric(cancer$Bare.nuclei)
cancer$Bl.cromatin <- as.numeric(cancer$Bl.cromatin)
cancer$Normal.nucleoli <- as.numeric(cancer$Normal.nucleoli)
cancer$Mitoses <- as.numeric(cancer$Mitoses)
cancer$Class <- as.factor(cancer$Class)
```

# <span style="color: blue;">Partir los datos 80 20</span>
```{r}
# Usar el 80 por ciento de los datos para entrenamiento y el 20 para la prueba
set.seed(123)
renglones_entrenamiento1 <- createDataPartition(cancer$Class, p=0.8, list=FALSE)
entrenamiento1 <- cancer[renglones_entrenamiento1, ]
prueba1 <- cancer[-renglones_entrenamiento1, ]

# Eliminar filas con valores faltantes
entrenamiento1 <- na.omit(entrenamiento1)
prueba1 <- na.omit(prueba1)
```

# <span style="color: blue;">1. Modelo con el método svmLinear</span>
```{r}
modeloLinear1 <- train(Class ~ ., data = entrenamiento1, method = "svmLinear", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(C=1))

resultadoEn1 <- predict(modeloLinear1, entrenamiento1)
resultadoPr1 <- predict(modeloLinear1, prueba1)

# Matriz de confusión
mce <- confusionMatrix(resultadoEn1, entrenamiento1$Class)
mce # Matriz de confusión de resultado del entrenamiento
mcp <- confusionMatrix(resultadoPr1, prueba1$Class)
mcp # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">2. Modelo con el método svmRadial</span>
```{r}
modeloRadial1 <- train(Class ~ ., data = entrenamiento1, method = "svmRadial", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(sigma=1, C=1))

resultadoEn2 <- predict(modeloRadial1, entrenamiento1)
resultadoPr2 <- predict(modeloRadial1, prueba1)

# Matriz de confusión
mce2 <- confusionMatrix(resultadoEn2, entrenamiento1$Class)
mce2 # Matriz de confusión de resultado del entrenamiento
mcp2 <- confusionMatrix(resultadoPr2, prueba1$Class)
mcp2 # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">3. Modelo con el método svmPoly</span>
```{r}
modeloPoly1 <- train(Class ~ ., data = entrenamiento1, method = "svmPoly", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = data.frame(degree=1, scale=1, C=1))

resultadoEn3 <- predict(modeloPoly1, entrenamiento1)
resultadoPr3 <- predict(modeloPoly1, prueba1)

# Matriz de confusión
mce3 <- confusionMatrix(resultadoEn3, entrenamiento1$Class)
mce3 # Matriz de confusión de resultado del entrenamiento
mcp3 <- confusionMatrix(resultadoPr3, prueba1$Class)
mcp3 # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">4. Modelo con el método rpart</span>
```{r}
modeloRpart1 <- train(Class ~ ., data = entrenamiento1, method = "rpart", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneLength = 10)

resultadoEn4 <- predict(modeloRpart1, entrenamiento1)
resultadoPr4 <- predict(modeloRpart1, prueba1)

# Matriz de confusión
mce4 <- confusionMatrix(resultadoEn4, entrenamiento1$Class)
mce4 # Matriz de confusión de resultado del entrenamiento
mcp4 <- confusionMatrix(resultadoPr4, prueba1$Class)
mcp4 # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">5. Modelo con el método nnet</span>
```{r}
modeloNnet1 <- train(Class ~ ., data = entrenamiento1, method = "nnet", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10))

resultadoEn5 <- predict(modeloNnet1, entrenamiento1)
resultadoPr5 <- predict(modeloNnet1, prueba1)

# Matriz de confusión
mce5 <- confusionMatrix(resultadoEn5, entrenamiento1$Class)
mce5 # Matriz de confusión de resultado del entrenamiento
mcp5 <- confusionMatrix(resultadoPr5, prueba1$Class)
mcp5 # Matriz de confusión de resultado de la prueba
```

# <span style="color: blue;">6. Modelo con el método rf</span>
```{r}
modeloRf1 <- train(Class ~ ., data = entrenamiento1, method = "rf", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = expand.grid(mtry = c(2,4,6)))

resultadoEn6 <- predict(modeloRf1, entrenamiento1)
resultadoPr6 <- predict(modeloRf1, prueba1)

# Matriz de confusión
mce6 <- confusionMatrix(resultadoEn6, entrenamiento1$Class)
mce6 # Matriz de confusión de resultado del entrenamiento
mcp6 <- confusionMatrix(resultadoPr6, prueba1$Class)
mcp6 # Matriz de confusión de resultado de la prueba
```

# Resumen de resultados
```{r}
datos1 <- data.frame(
  "1. svmLinear1" = c(mce$overall["Accuracy"], mcp$overall["Accuracy"]),
  "1. svmRadial1" = c(mce2$overall["Accuracy"], mcp2$overall["Accuracy"]),
  "1. svmPoly1" = c(mce3$overall["Accuracy"], mcp3$overall["Accuracy"]),
  "1. rpart1" = c(mce4$overall["Accuracy"], mcp4$overall["Accuracy"]),
  "1. nnet1" = c(mce5$overall["Accuracy"], mcp5$overall["Accuracy"]),
  "1. rf1" = c(mce6$overall["Accuracy"], mcp6$overall["Accuracy"])
)
rownames(datos1) <- c("Precisión de entrenamiento", "Precisión de prueba")
datos1
```