Teoria

El paquete CARET (Classification And Regression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automatico.

Instalar paquetes y cargar librerias

#install.packages("ggplot2") # Gráficas
library(ggplot2)
#install.packages("lattice") # Crear gráficos
library(lattice)
#install.packages ("caret") # Algoritmos de aprendizaje automático
library (caret)
#install.packages ("datasets") # Usar bases de datos, en este caso Iris
library(datasets)
#install.packages ("DataExplorer") # Análisis Exploratorio
library (DataExplorer)
#install.packages("kernlab")
library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
## 
##     alpha

Crear la base de datos

df <- data.frame(iris)

Entender la base de datos

summary(df)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
str(df)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
plot_missing(df)

plot_histogram(df)

plot_correlation(df)

Partir la base de datos

# Normalmente 80-20
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list = FALSE)
entrenamiento <- iris[renglones_entrenamiento, ]
prueba <- iris[-renglones_entrenamiento, ]

Distintos tipos de métodos para Modelar

Los métodos más utilizados para modelar aprendizaje automático:

  • SVM: Support Vector Machine o Máquina de Vectores de Soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc.

  • Árbol de Decisión: rpart

  • Redes Neuronales: nnet

  • Random Forest o Bosques Aleatorios: rf

Modelo 1. SVM Lineal

modelo1 <- train(Species ~ ., data=entrenamiento,
                 method = "svmLinear", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(C=1) #Cambiar
                 )

resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict(modelo1, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
# Matriz de confusion del resultado de la prueba
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

Modelo 2. SVM Radial

modelo2 <- train(Species ~ ., data=entrenamiento,
                 method = "svmRadial", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(sigma=1, C=1) #Cambiar
                 )

resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict(modelo2, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
# Matriz de confusion del resultado de la prueba
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Modelo 3. SVM Poly

modelo3 <- train(Species ~ ., data=entrenamiento,
                 method = "svmRadial", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(degree=1, C=1, scale=1) #Cambiar
                 )

resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         38         0
##   virginica       0          2        40
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9833         
##                  95% CI : (0.9411, 0.998)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.975          
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9500           1.0000
## Specificity                 1.0000            1.0000           0.9750
## Pos Pred Value              1.0000            1.0000           0.9524
## Neg Pred Value              1.0000            0.9756           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3167           0.3333
## Detection Prevalence        0.3333            0.3167           0.3500
## Balanced Accuracy           1.0000            0.9750           0.9875
# Matriz de confusion del resultado de la prueba
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Modelo 4. Arboles de decision

modelo4 <- train(Species ~ ., data=entrenamiento,
                 method = "rpart", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneLength = 10
                 )

resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo3, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         3
##   virginica       0          1        37
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9169, 0.9908)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           0.9250
## Specificity                 1.0000            0.9625           0.9875
## Pos Pred Value              1.0000            0.9286           0.9737
## Neg Pred Value              1.0000            0.9872           0.9634
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3083
## Detection Prevalence        0.3333            0.3500           0.3167
## Balanced Accuracy           1.0000            0.9688           0.9563
# Matriz de confusion del resultado de la prueba
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Species)
mcrp4
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Modelo 5. Redes Neuronales

modelo5 <- train(Species ~ ., data=entrenamiento,
                 method = "nnet", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10)
                 
                 )
## # weights:  11
## initial  value 119.364237 
## iter  10 value 50.064888
## iter  20 value 48.670264
## iter  30 value 47.831568
## iter  40 value 47.729855
## iter  50 value 47.630183
## iter  60 value 46.093819
## iter  70 value 43.722824
## iter  80 value 18.376958
## iter  90 value 2.742274
## iter 100 value 1.801628
## final  value 1.801628 
## stopped after 100 iterations
## # weights:  27
## initial  value 123.793414 
## iter  10 value 5.868269
## iter  20 value 0.081242
## iter  30 value 0.000303
## final  value 0.000076 
## converged
## # weights:  43
## initial  value 122.731863 
## iter  10 value 3.624311
## iter  20 value 0.010911
## final  value 0.000055 
## converged
## # weights:  11
## initial  value 118.612128 
## iter  10 value 59.262402
## iter  20 value 44.973063
## iter  30 value 43.405883
## final  value 43.399628 
## converged
## # weights:  27
## initial  value 135.631812 
## iter  10 value 26.153324
## iter  20 value 19.519935
## iter  30 value 19.159834
## iter  40 value 19.141040
## final  value 19.140707 
## converged
## # weights:  43
## initial  value 121.810346 
## iter  10 value 25.282006
## iter  20 value 18.168535
## iter  30 value 18.106805
## iter  40 value 18.042974
## iter  50 value 17.658982
## iter  60 value 17.547503
## iter  70 value 17.312348
## iter  80 value 17.282247
## iter  90 value 17.277857
## iter 100 value 17.277302
## final  value 17.277302 
## stopped after 100 iterations
## # weights:  11
## initial  value 133.417421 
## iter  10 value 50.157026
## iter  20 value 49.977347
## iter  30 value 49.964410
## iter  40 value 48.859866
## iter  50 value 31.567030
## iter  60 value 7.857236
## iter  70 value 2.148236
## iter  80 value 2.065323
## iter  90 value 2.039104
## iter 100 value 1.999937
## final  value 1.999937 
## stopped after 100 iterations
## # weights:  27
## initial  value 110.745240 
## iter  10 value 3.752499
## iter  20 value 0.193975
## iter  30 value 0.185500
## iter  40 value 0.176438
## iter  50 value 0.167373
## iter  60 value 0.164644
## iter  70 value 0.162408
## iter  80 value 0.159110
## iter  90 value 0.155530
## iter 100 value 0.152384
## final  value 0.152384 
## stopped after 100 iterations
## # weights:  43
## initial  value 116.918727 
## iter  10 value 3.580954
## iter  20 value 0.259672
## iter  30 value 0.237494
## iter  40 value 0.221850
## iter  50 value 0.183126
## iter  60 value 0.174202
## iter  70 value 0.158959
## iter  80 value 0.145791
## iter  90 value 0.141314
## iter 100 value 0.131996
## final  value 0.131996 
## stopped after 100 iterations
## # weights:  11
## initial  value 131.916635 
## iter  10 value 52.434019
## iter  20 value 49.536656
## iter  30 value 48.762687
## iter  40 value 48.424392
## iter  50 value 47.459406
## iter  60 value 36.638325
## iter  70 value 7.255328
## iter  80 value 3.432719
## iter  90 value 3.028823
## iter 100 value 2.715973
## final  value 2.715973 
## stopped after 100 iterations
## # weights:  27
## initial  value 116.560224 
## iter  10 value 22.377209
## iter  20 value 1.434404
## iter  30 value 0.015301
## final  value 0.000063 
## converged
## # weights:  43
## initial  value 118.392692 
## iter  10 value 3.944569
## iter  20 value 0.306409
## iter  30 value 0.004517
## iter  40 value 0.000251
## final  value 0.000083 
## converged
## # weights:  11
## initial  value 118.460051 
## iter  10 value 64.880183
## iter  20 value 50.068934
## iter  30 value 43.488436
## final  value 43.487537 
## converged
## # weights:  27
## initial  value 126.586947 
## iter  10 value 43.279225
## iter  20 value 21.224741
## iter  30 value 20.749248
## iter  40 value 20.749048
## final  value 20.749047 
## converged
## # weights:  43
## initial  value 151.937613 
## iter  10 value 23.734242
## iter  20 value 18.753404
## iter  30 value 18.118863
## iter  40 value 17.940374
## iter  50 value 17.930575
## iter  60 value 17.929988
## iter  70 value 17.929922
## final  value 17.929921 
## converged
## # weights:  11
## initial  value 132.070198 
## iter  10 value 52.355646
## iter  20 value 48.275663
## iter  30 value 18.372238
## iter  40 value 3.649689
## iter  50 value 3.406804
## iter  60 value 3.276053
## iter  70 value 3.273048
## iter  80 value 3.268892
## iter  90 value 3.259668
## iter 100 value 3.258851
## final  value 3.258851 
## stopped after 100 iterations
## # weights:  27
## initial  value 113.529901 
## iter  10 value 6.241670
## iter  20 value 1.208916
## iter  30 value 0.397811
## iter  40 value 0.386273
## iter  50 value 0.368184
## iter  60 value 0.361141
## iter  70 value 0.359369
## iter  80 value 0.350759
## iter  90 value 0.347615
## iter 100 value 0.342804
## final  value 0.342804 
## stopped after 100 iterations
## # weights:  43
## initial  value 111.717272 
## iter  10 value 3.416090
## iter  20 value 0.377952
## iter  30 value 0.317075
## iter  40 value 0.312838
## iter  50 value 0.309274
## iter  60 value 0.303265
## iter  70 value 0.297596
## iter  80 value 0.295684
## iter  90 value 0.294852
## iter 100 value 0.289916
## final  value 0.289916 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.608870 
## iter  10 value 27.776794
## iter  20 value 5.087471
## iter  30 value 3.563326
## iter  40 value 3.394366
## iter  50 value 3.217410
## iter  60 value 2.980093
## iter  70 value 2.883858
## iter  80 value 2.838704
## iter  90 value 2.675153
## iter 100 value 2.610229
## final  value 2.610229 
## stopped after 100 iterations
## # weights:  27
## initial  value 118.190633 
## iter  10 value 13.123265
## iter  20 value 1.221350
## iter  30 value 0.000379
## final  value 0.000081 
## converged
## # weights:  43
## initial  value 137.653895 
## iter  10 value 5.426639
## iter  20 value 0.178492
## iter  30 value 0.000237
## final  value 0.000096 
## converged
## # weights:  11
## initial  value 119.446713 
## iter  10 value 64.603563
## iter  20 value 43.882482
## iter  30 value 43.662641
## final  value 43.660320 
## converged
## # weights:  27
## initial  value 121.223486 
## iter  10 value 42.509333
## iter  20 value 21.539342
## iter  30 value 20.363180
## iter  40 value 20.229352
## iter  50 value 20.224014
## final  value 20.223979 
## converged
## # weights:  43
## initial  value 148.735730 
## iter  10 value 24.147018
## iter  20 value 19.161635
## iter  30 value 19.000060
## iter  40 value 18.956075
## iter  50 value 18.954353
## final  value 18.954194 
## converged
## # weights:  11
## initial  value 122.206168 
## iter  10 value 28.418454
## iter  20 value 5.277740
## iter  30 value 4.113257
## iter  40 value 4.004701
## iter  50 value 3.877920
## iter  60 value 3.872033
## iter  70 value 3.871948
## iter  80 value 3.871938
## final  value 3.871933 
## converged
## # weights:  27
## initial  value 119.896820 
## iter  10 value 9.755985
## iter  20 value 2.721336
## iter  30 value 0.781265
## iter  40 value 0.654267
## iter  50 value 0.538525
## iter  60 value 0.490774
## iter  70 value 0.486090
## iter  80 value 0.484447
## iter  90 value 0.482461
## iter 100 value 0.479810
## final  value 0.479810 
## stopped after 100 iterations
## # weights:  43
## initial  value 132.364707 
## iter  10 value 18.212595
## iter  20 value 2.692662
## iter  30 value 0.488049
## iter  40 value 0.427922
## iter  50 value 0.379209
## iter  60 value 0.335707
## iter  70 value 0.324587
## iter  80 value 0.312451
## iter  90 value 0.296794
## iter 100 value 0.283337
## final  value 0.283337 
## stopped after 100 iterations
## # weights:  11
## initial  value 114.770789 
## iter  10 value 50.895915
## iter  20 value 21.594661
## iter  30 value 8.579404
## iter  40 value 4.491888
## iter  50 value 1.888615
## iter  60 value 1.774330
## iter  70 value 1.435249
## iter  80 value 1.417706
## iter  90 value 1.274634
## iter 100 value 1.239344
## final  value 1.239344 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.671018 
## iter  10 value 7.637314
## iter  20 value 1.023347
## iter  30 value 0.000176
## iter  30 value 0.000084
## iter  30 value 0.000084
## final  value 0.000084 
## converged
## # weights:  43
## initial  value 125.122579 
## iter  10 value 4.878708
## iter  20 value 0.082765
## iter  30 value 0.000501
## final  value 0.000065 
## converged
## # weights:  11
## initial  value 118.540638 
## iter  10 value 55.901291
## iter  20 value 44.190846
## iter  30 value 44.122993
## final  value 44.122341 
## converged
## # weights:  27
## initial  value 128.560755 
## iter  10 value 31.362040
## iter  20 value 21.703871
## iter  30 value 21.365892
## iter  40 value 21.277271
## final  value 21.274727 
## converged
## # weights:  43
## initial  value 135.332676 
## iter  10 value 36.608914
## iter  20 value 20.428722
## iter  30 value 18.841831
## iter  40 value 18.538820
## iter  50 value 18.465358
## iter  60 value 18.457341
## iter  70 value 18.457287
## iter  70 value 18.457287
## iter  70 value 18.457287
## final  value 18.457287 
## converged
## # weights:  11
## initial  value 122.464598 
## iter  10 value 48.486860
## iter  20 value 27.244665
## iter  30 value 7.563582
## iter  40 value 4.541484
## iter  50 value 4.194044
## iter  60 value 4.061244
## iter  70 value 3.913031
## iter  80 value 3.888366
## iter  90 value 3.865349
## iter 100 value 3.862664
## final  value 3.862664 
## stopped after 100 iterations
## # weights:  27
## initial  value 109.388538 
## iter  10 value 4.747327
## iter  20 value 0.751945
## iter  30 value 0.726463
## iter  40 value 0.654058
## iter  50 value 0.591235
## iter  60 value 0.564247
## iter  70 value 0.544641
## iter  80 value 0.507726
## iter  90 value 0.474908
## iter 100 value 0.453801
## final  value 0.453801 
## stopped after 100 iterations
## # weights:  43
## initial  value 123.169841 
## iter  10 value 4.382165
## iter  20 value 1.563711
## iter  30 value 0.479811
## iter  40 value 0.448049
## iter  50 value 0.432482
## iter  60 value 0.398597
## iter  70 value 0.388515
## iter  80 value 0.377880
## iter  90 value 0.370443
## iter 100 value 0.361109
## final  value 0.361109 
## stopped after 100 iterations
## # weights:  11
## initial  value 119.256445 
## iter  10 value 56.285210
## iter  20 value 7.469969
## iter  30 value 4.099817
## iter  40 value 3.635803
## iter  50 value 3.208790
## iter  60 value 2.977949
## iter  70 value 2.893926
## iter  80 value 2.838948
## iter  90 value 2.732318
## iter 100 value 2.619618
## final  value 2.619618 
## stopped after 100 iterations
## # weights:  27
## initial  value 114.122617 
## iter  10 value 5.558564
## iter  20 value 0.243031
## iter  30 value 0.000334
## final  value 0.000072 
## converged
## # weights:  43
## initial  value 143.357526 
## iter  10 value 5.815688
## iter  20 value 1.869046
## iter  30 value 0.005197
## final  value 0.000051 
## converged
## # weights:  11
## initial  value 121.113960 
## iter  10 value 74.640372
## iter  20 value 46.951522
## iter  30 value 43.380961
## final  value 43.380924 
## converged
## # weights:  27
## initial  value 121.861199 
## iter  10 value 35.206448
## iter  20 value 20.961318
## iter  30 value 19.985500
## iter  40 value 19.932686
## iter  50 value 19.922197
## iter  60 value 19.921722
## final  value 19.921712 
## converged
## # weights:  43
## initial  value 131.939376 
## iter  10 value 25.702828
## iter  20 value 18.608282
## iter  30 value 18.122011
## iter  40 value 18.053090
## iter  50 value 18.039842
## iter  60 value 18.038080
## final  value 18.038075 
## converged
## # weights:  11
## initial  value 117.982337 
## iter  10 value 51.103593
## iter  20 value 48.035321
## iter  30 value 33.273056
## iter  40 value 8.919168
## iter  50 value 4.838973
## iter  60 value 4.530733
## iter  70 value 4.028449
## iter  80 value 3.905338
## iter  90 value 3.870748
## iter 100 value 3.864652
## final  value 3.864652 
## stopped after 100 iterations
## # weights:  27
## initial  value 144.068507 
## iter  10 value 4.404914
## iter  20 value 1.124010
## iter  30 value 0.871467
## iter  40 value 0.719849
## iter  50 value 0.601527
## iter  60 value 0.575084
## iter  70 value 0.560945
## iter  80 value 0.510113
## iter  90 value 0.486598
## iter 100 value 0.474407
## final  value 0.474407 
## stopped after 100 iterations
## # weights:  43
## initial  value 140.508226 
## iter  10 value 9.593176
## iter  20 value 0.781330
## iter  30 value 0.461086
## iter  40 value 0.445335
## iter  50 value 0.411271
## iter  60 value 0.390060
## iter  70 value 0.372303
## iter  80 value 0.363494
## iter  90 value 0.357105
## iter 100 value 0.344014
## final  value 0.344014 
## stopped after 100 iterations
## # weights:  11
## initial  value 129.031039 
## iter  10 value 61.978707
## iter  20 value 58.008696
## iter  30 value 57.768938
## iter  40 value 55.583867
## iter  50 value 52.889223
## iter  60 value 43.759611
## iter  70 value 39.263004
## iter  80 value 37.489744
## iter  90 value 23.536776
## iter 100 value 4.035245
## final  value 4.035245 
## stopped after 100 iterations
## # weights:  27
## initial  value 126.475926 
## iter  10 value 7.096031
## iter  20 value 0.639765
## iter  30 value 0.000106
## iter  30 value 0.000052
## iter  30 value 0.000052
## final  value 0.000052 
## converged
## # weights:  43
## initial  value 134.270158 
## iter  10 value 5.930885
## iter  20 value 1.228181
## iter  30 value 0.010612
## final  value 0.000084 
## converged
## # weights:  11
## initial  value 123.227860 
## iter  10 value 53.165906
## iter  20 value 43.229062
## final  value 43.228624 
## converged
## # weights:  27
## initial  value 127.334763 
## iter  10 value 24.083708
## iter  20 value 19.538749
## iter  30 value 19.295874
## iter  40 value 19.174203
## iter  50 value 19.034503
## final  value 19.034362 
## converged
## # weights:  43
## initial  value 131.621100 
## iter  10 value 35.666079
## iter  20 value 19.486685
## iter  30 value 18.420880
## iter  40 value 17.192733
## iter  50 value 17.154065
## iter  60 value 17.148652
## iter  70 value 17.148346
## final  value 17.148327 
## converged
## # weights:  11
## initial  value 128.213413 
## iter  10 value 49.978543
## iter  20 value 48.871330
## iter  30 value 48.360713
## iter  40 value 44.556051
## iter  50 value 17.898217
## iter  60 value 4.580083
## iter  70 value 3.877043
## iter  80 value 3.204017
## iter  90 value 3.135699
## iter 100 value 2.976457
## final  value 2.976457 
## stopped after 100 iterations
## # weights:  27
## initial  value 127.197897 
## iter  10 value 4.354141
## iter  20 value 1.009145
## iter  30 value 0.462364
## iter  40 value 0.402815
## iter  50 value 0.365000
## iter  60 value 0.341482
## iter  70 value 0.333382
## iter  80 value 0.324477
## iter  90 value 0.314822
## iter 100 value 0.295699
## final  value 0.295699 
## stopped after 100 iterations
## # weights:  43
## initial  value 120.457941 
## iter  10 value 4.476405
## iter  20 value 1.443496
## iter  30 value 0.600594
## iter  40 value 0.498873
## iter  50 value 0.401362
## iter  60 value 0.376749
## iter  70 value 0.357924
## iter  80 value 0.348783
## iter  90 value 0.331318
## iter 100 value 0.326491
## final  value 0.326491 
## stopped after 100 iterations
## # weights:  11
## initial  value 135.093527 
## iter  10 value 21.860640
## iter  20 value 3.813716
## iter  30 value 2.509928
## iter  40 value 2.384751
## iter  50 value 2.276547
## iter  60 value 2.250532
## iter  70 value 2.153859
## iter  80 value 1.699918
## iter  90 value 0.834569
## iter 100 value 0.573835
## final  value 0.573835 
## stopped after 100 iterations
## # weights:  27
## initial  value 126.256021 
## iter  10 value 5.294353
## iter  20 value 0.037650
## final  value 0.000071 
## converged
## # weights:  43
## initial  value 130.459188 
## iter  10 value 5.484744
## iter  20 value 0.810428
## iter  30 value 0.002412
## final  value 0.000073 
## converged
## # weights:  11
## initial  value 127.596687 
## iter  10 value 53.593172
## iter  20 value 44.128618
## iter  30 value 43.966078
## final  value 43.966037 
## converged
## # weights:  27
## initial  value 155.037982 
## iter  10 value 27.774198
## iter  20 value 20.783237
## iter  30 value 19.876683
## iter  40 value 19.868884
## final  value 19.868385 
## converged
## # weights:  43
## initial  value 171.725834 
## iter  10 value 23.134324
## iter  20 value 18.599155
## iter  30 value 18.436512
## iter  40 value 18.422714
## iter  50 value 18.422201
## final  value 18.422189 
## converged
## # weights:  11
## initial  value 130.488537 
## iter  10 value 85.816017
## iter  20 value 50.748640
## iter  30 value 48.927856
## iter  40 value 47.709677
## iter  50 value 47.582838
## iter  60 value 47.519960
## iter  70 value 47.507163
## iter  80 value 47.501926
## iter  90 value 47.497119
## final  value 47.494823 
## converged
## # weights:  27
## initial  value 122.543665 
## iter  10 value 19.518466
## iter  20 value 2.052800
## iter  30 value 0.886034
## iter  40 value 0.804745
## iter  50 value 0.648603
## iter  60 value 0.585834
## iter  70 value 0.553642
## iter  80 value 0.497885
## iter  90 value 0.458749
## iter 100 value 0.449044
## final  value 0.449044 
## stopped after 100 iterations
## # weights:  43
## initial  value 153.095425 
## iter  10 value 7.901518
## iter  20 value 0.690893
## iter  30 value 0.636107
## iter  40 value 0.584777
## iter  50 value 0.567513
## iter  60 value 0.534749
## iter  70 value 0.522132
## iter  80 value 0.506614
## iter  90 value 0.501144
## iter 100 value 0.474534
## final  value 0.474534 
## stopped after 100 iterations
## # weights:  11
## initial  value 135.364864 
## iter  10 value 59.004600
## iter  20 value 49.914439
## iter  30 value 49.908413
## final  value 49.906914 
## converged
## # weights:  27
## initial  value 128.151297 
## iter  10 value 6.337487
## iter  20 value 0.146259
## iter  30 value 0.000104
## iter  30 value 0.000057
## iter  30 value 0.000056
## final  value 0.000056 
## converged
## # weights:  43
## initial  value 139.035430 
## iter  10 value 5.342379
## iter  20 value 0.250943
## iter  30 value 0.000708
## final  value 0.000094 
## converged
## # weights:  11
## initial  value 118.420609 
## iter  10 value 64.974347
## iter  20 value 56.804050
## iter  30 value 44.067872
## final  value 42.994871 
## converged
## # weights:  27
## initial  value 136.089534 
## iter  10 value 27.295517
## iter  20 value 19.795109
## iter  30 value 18.797967
## iter  40 value 18.724373
## iter  50 value 18.718133
## final  value 18.718133 
## converged
## # weights:  43
## initial  value 117.387025 
## iter  10 value 26.556247
## iter  20 value 17.899514
## iter  30 value 17.809471
## iter  40 value 17.764813
## iter  50 value 17.756615
## final  value 17.756610 
## converged
## # weights:  11
## initial  value 136.528004 
## iter  10 value 23.039236
## iter  20 value 3.818892
## iter  30 value 3.195511
## iter  40 value 3.122077
## iter  50 value 3.106837
## iter  60 value 3.094371
## iter  70 value 3.093128
## iter  80 value 3.092464
## iter  90 value 3.092407
## iter 100 value 3.092292
## final  value 3.092292 
## stopped after 100 iterations
## # weights:  27
## initial  value 123.626570 
## iter  10 value 5.162502
## iter  20 value 0.512650
## iter  30 value 0.387310
## iter  40 value 0.363980
## iter  50 value 0.342939
## iter  60 value 0.322494
## iter  70 value 0.309017
## iter  80 value 0.303599
## iter  90 value 0.292640
## iter 100 value 0.274828
## final  value 0.274828 
## stopped after 100 iterations
## # weights:  43
## initial  value 129.709743 
## iter  10 value 6.399049
## iter  20 value 1.484154
## iter  30 value 0.424066
## iter  40 value 0.333323
## iter  50 value 0.290299
## iter  60 value 0.271136
## iter  70 value 0.254488
## iter  80 value 0.244931
## iter  90 value 0.241006
## iter 100 value 0.235645
## final  value 0.235645 
## stopped after 100 iterations
## # weights:  11
## initial  value 120.630519 
## iter  10 value 59.934734
## iter  20 value 50.061401
## iter  30 value 48.902842
## iter  40 value 40.689201
## iter  50 value 9.246081
## iter  60 value 4.798559
## iter  70 value 4.402083
## iter  80 value 3.747997
## iter  90 value 2.052315
## iter 100 value 1.813680
## final  value 1.813680 
## stopped after 100 iterations
## # weights:  27
## initial  value 115.288383 
## iter  10 value 17.230857
## iter  20 value 0.332887
## iter  30 value 0.000452
## final  value 0.000047 
## converged
## # weights:  43
## initial  value 115.350758 
## iter  10 value 15.083811
## iter  20 value 1.751399
## iter  30 value 0.182284
## iter  40 value 0.001019
## iter  50 value 0.000302
## final  value 0.000086 
## converged
## # weights:  11
## initial  value 127.529840 
## iter  10 value 55.578483
## iter  20 value 44.112002
## final  value 44.104970 
## converged
## # weights:  27
## initial  value 147.959026 
## iter  10 value 29.305877
## iter  20 value 20.335408
## iter  30 value 20.112010
## iter  40 value 20.104860
## final  value 20.104627 
## converged
## # weights:  43
## initial  value 131.392852 
## iter  10 value 32.272199
## iter  20 value 19.766570
## iter  30 value 18.836523
## iter  40 value 18.443541
## iter  50 value 18.340096
## iter  60 value 18.328809
## iter  70 value 18.327322
## final  value 18.327123 
## converged
## # weights:  11
## initial  value 125.205767 
## iter  10 value 48.380736
## iter  20 value 35.618174
## iter  30 value 11.397083
## iter  40 value 4.475791
## iter  50 value 4.074765
## iter  60 value 4.016958
## iter  70 value 3.974500
## iter  80 value 3.912831
## iter  90 value 3.848654
## iter 100 value 3.844326
## final  value 3.844326 
## stopped after 100 iterations
## # weights:  27
## initial  value 128.588080 
## iter  10 value 5.384040
## iter  20 value 1.619297
## iter  30 value 0.882242
## iter  40 value 0.851849
## iter  50 value 0.641502
## iter  60 value 0.619737
## iter  70 value 0.595281
## iter  80 value 0.535622
## iter  90 value 0.493317
## iter 100 value 0.486438
## final  value 0.486438 
## stopped after 100 iterations
## # weights:  43
## initial  value 126.933423 
## iter  10 value 11.502520
## iter  20 value 2.523881
## iter  30 value 0.724044
## iter  40 value 0.650789
## iter  50 value 0.604091
## iter  60 value 0.567950
## iter  70 value 0.545173
## iter  80 value 0.515780
## iter  90 value 0.502725
## iter 100 value 0.469805
## final  value 0.469805 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.841813 
## iter  10 value 49.888557
## iter  20 value 48.854861
## iter  30 value 46.284065
## iter  40 value 45.933053
## iter  50 value 44.949410
## iter  60 value 42.929579
## iter  70 value 8.036083
## iter  80 value 4.112127
## iter  90 value 3.437175
## iter 100 value 1.621614
## final  value 1.621614 
## stopped after 100 iterations
## # weights:  27
## initial  value 155.763709 
## iter  10 value 22.044760
## iter  20 value 3.180958
## iter  30 value 0.258484
## iter  40 value 0.001788
## final  value 0.000094 
## converged
## # weights:  43
## initial  value 122.457708 
## iter  10 value 3.485470
## iter  20 value 0.019486
## final  value 0.000064 
## converged
## # weights:  11
## initial  value 115.895686 
## iter  10 value 59.149623
## iter  20 value 51.309942
## iter  30 value 43.719794
## final  value 43.715117 
## converged
## # weights:  27
## initial  value 114.668249 
## iter  10 value 28.092323
## iter  20 value 20.979628
## iter  30 value 20.625230
## iter  40 value 20.624005
## iter  40 value 20.624005
## iter  40 value 20.624005
## final  value 20.624005 
## converged
## # weights:  43
## initial  value 129.863362 
## iter  10 value 27.825492
## iter  20 value 19.487986
## iter  30 value 18.594719
## iter  40 value 18.496859
## iter  50 value 18.474010
## iter  60 value 18.420539
## iter  70 value 18.058704
## iter  80 value 17.803440
## iter  90 value 17.708693
## iter 100 value 17.696027
## final  value 17.696027 
## stopped after 100 iterations
## # weights:  11
## initial  value 129.196853 
## iter  10 value 49.961302
## iter  20 value 49.655075
## iter  30 value 48.919385
## iter  40 value 16.000681
## iter  50 value 5.572172
## iter  60 value 3.368035
## iter  70 value 3.145750
## iter  80 value 2.993983
## iter  90 value 2.943685
## iter 100 value 2.929524
## final  value 2.929524 
## stopped after 100 iterations
## # weights:  27
## initial  value 136.352067 
## iter  10 value 19.746721
## iter  20 value 1.762322
## iter  30 value 0.773155
## iter  40 value 0.739820
## iter  50 value 0.683111
## iter  60 value 0.531832
## iter  70 value 0.499564
## iter  80 value 0.416175
## iter  90 value 0.395498
## iter 100 value 0.327184
## final  value 0.327184 
## stopped after 100 iterations
## # weights:  43
## initial  value 130.762361 
## iter  10 value 7.155063
## iter  20 value 0.314087
## iter  30 value 0.295210
## iter  40 value 0.287887
## iter  50 value 0.264294
## iter  60 value 0.251011
## iter  70 value 0.228303
## iter  80 value 0.207026
## iter  90 value 0.200442
## iter 100 value 0.194906
## final  value 0.194906 
## stopped after 100 iterations
## # weights:  11
## initial  value 145.900216 
## iter  10 value 64.757220
## iter  20 value 48.348164
## iter  30 value 46.796576
## final  value 46.796573 
## converged
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
mcre5
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         36         0
##   virginica       0          4        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9169, 0.9908)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9000           1.0000
## Specificity                 1.0000            1.0000           0.9500
## Pos Pred Value              1.0000            1.0000           0.9091
## Neg Pred Value              1.0000            0.9524           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3000           0.3333
## Detection Prevalence        0.3333            0.3000           0.3667
## Balanced Accuracy           1.0000            0.9500           0.9750
# Matriz de confusion del resultado de la prueba
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
mcrp5
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0          9         0
##   virginica       0          1        10
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9000           1.0000
## Specificity                 1.0000            1.0000           0.9500
## Pos Pred Value              1.0000            1.0000           0.9091
## Neg Pred Value              1.0000            0.9524           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3000           0.3333
## Detection Prevalence        0.3333            0.3000           0.3667
## Balanced Accuracy           1.0000            0.9500           0.9750

Modelo 6. Random Forest

modelo6 <- train(Species ~ ., data=entrenamiento,
                 method = "rf", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGrid = expand.grid(mtry = c(2,4,6))
                 )
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcre6
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         0
##   virginica       0          0        40
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9697, 1)
##     No Information Rate : 0.3333     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           1.0000
## Specificity                 1.0000            1.0000           1.0000
## Pos Pred Value              1.0000            1.0000           1.0000
## Neg Pred Value              1.0000            1.0000           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3333
## Detection Prevalence        0.3333            0.3333           0.3333
## Balanced Accuracy           1.0000            1.0000           1.0000
# Matriz de confusion del resultado de la prueba
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
mcrp6
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Resultados

resultados <- data.frame(
  "svmLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "nnet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "rf" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)
rownames(resultados) <- c("Precisón de entrenamiento", "Precisión de Prueba")
resultados
##                           svmLinear svmRadial   svmPoly     rpart      nnet
## Precisón de entrenamiento 0.9916667 0.9916667 0.9833333 0.9666667 0.9666667
## Precisión de Prueba       0.9666667 0.9333333 0.9333333 0.9333333 0.9666667
##                                  rf
## Precisón de entrenamiento 1.0000000
## Precisión de Prueba       0.9333333

Conclusiones

Acorde a la tabla de resultados, observamos que ningún método presenta sobreajuste. Podemos seleccionar el de redes neuronales por su desempeño.

---
title: "CARET"
author: "Adrian Morales A01722532"
date: "2025-08-22"
output: 
  html_document:
    toc: True
    toc_float: True
    code_download: true
    theme: yeti
---

<center>
![](https://encrypted-tbn3.gstatic.com/images?q=tbn:ANd9GcRNrkWGY3XSVrIIcDiggl_yJpiRPugibDkH79v-gA7GIL0YwE9ZSEwqxJCFu3kBinNOAHvzwIR4vNeLVCiCp_vTMg)

</center>

# <span style="color:blue;"> Teoria </span>
El paquete CARET (Classification And Regression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automatico.

# <span style="color:blue;"> Instalar paquetes y cargar librerias </span>
```{r}
#install.packages("ggplot2") # Gráficas
library(ggplot2)
#install.packages("lattice") # Crear gráficos
library(lattice)
#install.packages ("caret") # Algoritmos de aprendizaje automático
library (caret)
#install.packages ("datasets") # Usar bases de datos, en este caso Iris
library(datasets)
#install.packages ("DataExplorer") # Análisis Exploratorio
library (DataExplorer)
#install.packages("kernlab")
library(kernlab)
```


# <span style="color:blue;"> Crear la base de datos </span>

```{r}
df <- data.frame(iris)
```

# <span style="color:blue;"> Entender la base de datos </span>
```{r}
summary(df)
str(df)
plot_missing(df)
plot_histogram(df)
plot_correlation(df)
```

# <span style="color:blue;"> Partir la base de datos </span>

```{r}
# Normalmente 80-20
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list = FALSE)
entrenamiento <- iris[renglones_entrenamiento, ]
prueba <- iris[-renglones_entrenamiento, ]
```

# <span style="color:blue;"> Distintos tipos de métodos para Modelar </span>

Los métodos más utilizados para modelar aprendizaje automático:

* *SVM: *Support Vector Machine o Máquina de Vectores de Soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc.

* *Árbol de Decisión*: rpart
* *Redes Neuronales*: nnet
* *Random Forest* o Bosques Aleatorios: rf

# <span style="color:blue;"> Modelo 1. SVM Lineal </span>
```{r}
modelo1 <- train(Species ~ ., data=entrenamiento,
                 method = "svmLinear", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(C=1) #Cambiar
                 )

resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict(modelo1, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1
# Matriz de confusion del resultado de la prueba
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1
```

# <span style="color:blue;"> Modelo 2. SVM Radial </span>
```{r}
modelo2 <- train(Species ~ ., data=entrenamiento,
                 method = "svmRadial", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(sigma=1, C=1) #Cambiar
                 )

resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict(modelo2, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2
# Matriz de confusion del resultado de la prueba
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2
```

# <span style="color:blue;"> Modelo 3. SVM Poly </span>
```{r}
modelo3 <- train(Species ~ ., data=entrenamiento,
                 method = "svmRadial", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(degree=1, C=1, scale=1) #Cambiar
                 )

resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3
# Matriz de confusion del resultado de la prueba
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3
```

# <span style="color:blue;"> Modelo 4. Arboles de decision </span>
```{r message=FALSE, warning=FALSE}
modelo4 <- train(Species ~ ., data=entrenamiento,
                 method = "rpart", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneLength = 10
                 )

resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo3, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4
# Matriz de confusion del resultado de la prueba
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Species)
mcrp4
```

# <span style="color:blue;"> Modelo 5. Redes Neuronales </span>
```{r}
modelo5 <- train(Species ~ ., data=entrenamiento,
                 method = "nnet", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10)
                 
                 )

resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
mcre5
# Matriz de confusion del resultado de la prueba
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
mcrp5
```

# <span style="color:blue;"> Modelo 6. Random Forest </span>
```{r}
modelo6 <- train(Species ~ ., data=entrenamiento,
                 method = "rf", #Cambiar
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGrid = expand.grid(mtry = c(2,4,6))
                 )

resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)

#Matrices de confusión
# Es una tabla de evaluación que desglosa el rendimiento del modelo de clasficiación.

#Matriz de confusion del resultado del entrenamiento
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcre6
# Matriz de confusion del resultado de la prueba
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
mcrp6
```

# <span style="color:blue;"> Resultados</span>
```{r}
resultados <- data.frame(
  "svmLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "nnet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "rf" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)
rownames(resultados) <- c("Precisón de entrenamiento", "Precisión de Prueba")
resultados
```

# <span style="color: blue"> Conclusiones </span>
Acorde a la tabla de resultados, observamos que ningún método presenta sobreajuste. Podemos seleccionar el de *redes neuronales* por su desempeño.