Teoría

La función caret (Clasification And REgression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.

Obtención y entendimiento de los datos

Librerías

library(ggplot2) # Gráficas con mejor diseño
library(lattice) # Crear gráficos
library(caret) # Algoritmos de aprendizaje automático
library(datasets) # Usar la base de datos "Iris"
library(DataExplorer) # Análisis exploratorio de los datos
library(kernlab)
library(tidyverse)
library(kableExtra)

Obetener la base de datos

df = data.frame(iris)

EDA

summary(df)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
# Con las estadísticas descriptiva de las variables explicativas se puede considerar que manejan una buena distribución, la variable que puede estar más afectada sería "Petal.Length" al tener los datos cargados hacía arriba del promedio.
str(df) # Confirmar que las variables vengan en el formato necesario.
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
#create_report(df)
plot_missing(df)

# No esxisten registros faltantes dentro de algún campo del data frame.
plot_boxplot(df, by = "Species")

plot_histogram(df)

plot_bar(df)

plot_correlation(df)

Uso de CARET

Partición de datos

set.seed(123)
DataPart= createDataPartition(df$Species, p=0.8, list = FALSE)
train_set= iris[DataPart,]
test_set = iris[-DataPart,]

Métodos para Modelar

Los métodos más utilizados para modelar aprendixaje automático son:

  • SVM: Support Vector Machine o Máquina de Vectores de soporte. Hay varios subtipos:
    • Lineal (svmLinear)
    • Radial (svmRadial)
    • Polinómico (svmPoly)
  • Árbol de decisión: rpart
  • Redes Neuronales: nnet
  • Random Forests: rf

SVM Lineal

Entrenamiento de Modelo

modelo_svml = train(Species ~., data = train_set,
                    method = "svmLinear",
                    preProcess = c("scale", "center"),
                    trControl = trainControl(method = "cv", number =10),
                    tuneGrid = data.frame(C=1)
                    )

Resultados de Modelo

resultados_train_svml = predict(modelo_svml, train_set)
resultados_test_svml = predict(modelo_svml, test_set)

Matriz de Confusión

##### ENTRENAMIENTO ######
MCRE_svml = confusionMatrix(resultados_train_svml, train_set$Species) # MCRE - Matriz de Confusión de Resultados de Entrenamiento.
#MCRE_svml

##### PRUEBA #####
MCRP_svml = confusionMatrix(resultados_test_svml, test_set$Species) # MCRE - Matriz de Confusión de Resultados de Prueba.
MCRP_svml
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

SVM Radial

Entrenamiento de Modelo

modelo_svmr = train(Species ~., data = train_set,
                    method = "svmRadial",
                    preProcess = c("scale", "center"),
                    trControl = trainControl(method = "cv", number =10),
                    tuneGrid = data.frame(sigma =1, C=1)
                    )

Resultados de Modelo

resultados_train_svmr = predict(modelo_svmr, train_set)
resultados_test_svmr = predict(modelo_svmr, test_set)

Matriz de Confusión

##### ENTRENAMIENTO ######
MCRE_svmr = confusionMatrix(resultados_train_svmr, train_set$Species) # MCRE - Matriz de Confusión de Resultados de Entrenamiento.
#MCRE_svmr

##### PRUEBA #####
MCRP_svmr = confusionMatrix(resultados_test_svmr, test_set$Species) # MCRE - Matriz de Confusión de Resultados de Prueba.
MCRP_svmr
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

SVM Polinómico

Entrenamiento de Modelo

modelo_svmp = train(Species ~., data = train_set,
                    method = "svmPoly",
                    preProcess = c("scale", "center"),
                    trControl = trainControl(method = "cv", number =10),
                    tuneGrid = data.frame(degree = 1, scale = 1, C= 1)
                    )

Resultados de Modelo

resultados_train_svmp = predict(modelo_svmp, train_set)
resultados_test_svmp = predict(modelo_svmp, test_set)

Matriz de Confusión

##### ENTRENAMIENTO ######
MCRE_svmp = confusionMatrix(resultados_train_svmp, train_set$Species) # MCRE - Matriz de Confusión de Resultados de Entrenamiento.
#MCRE_svmp

##### PRUEBA #####
MCRP_svmp = confusionMatrix(resultados_test_svmp, test_set$Species) # MCRE - Matriz de Confusión de Resultados de Prueba.
MCRP_svmp
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

Árbol de decisión

Entrenamiento de Modelo

modelo_ad = train(Species ~., data = train_set,
                    method = "rpart",
                    preProcess = c("scale", "center"),
                    trControl = trainControl(method = "cv", number =10),
                    tuneLength = 10
                    )

Resultados de Modelo

resultados_train_ad = predict(modelo_ad, train_set)
resultados_test_ad = predict(modelo_ad, test_set)

Matriz de Confusión

##### ENTRENAMIENTO ######
MCRE_ad = confusionMatrix(resultados_train_ad, train_set$Species) # MCRE - Matriz de Confusión de Resultados de Entrenamiento.
#MCRE_ad

##### PRUEBA #####
MCRP_ad = confusionMatrix(resultados_test_ad, test_set$Species) # MCRE - Matriz de Confusión de Resultados de Prueba.
MCRP_ad
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Redes Neuronales

Entrenamiento de Modelo

modelo_rn = train(Species ~., data = train_set,
                    method = "nnet",
                    preProcess = c("scale", "center"),
                    trControl = trainControl(method = "cv", number =10)
                    )
## # weights:  11
## initial  value 130.530132 
## iter  10 value 50.031494
## iter  20 value 48.622939
## iter  30 value 46.051782
## iter  40 value 45.435982
## iter  50 value 45.023331
## iter  60 value 41.544443
## iter  70 value 18.376424
## iter  80 value 4.629967
## iter  90 value 3.675228
## iter 100 value 3.275824
## final  value 3.275824 
## stopped after 100 iterations
## # weights:  27
## initial  value 132.517409 
## iter  10 value 22.263231
## iter  20 value 2.574680
## iter  30 value 0.008513
## final  value 0.000051 
## converged
## # weights:  43
## initial  value 136.160730 
## iter  10 value 3.642258
## iter  20 value 0.051614
## iter  30 value 0.013220
## iter  40 value 0.001249
## final  value 0.000086 
## converged
## # weights:  11
## initial  value 124.472165 
## iter  10 value 57.985437
## iter  20 value 43.232595
## final  value 43.170440 
## converged
## # weights:  27
## initial  value 118.611044 
## iter  10 value 30.413305
## iter  20 value 21.077103
## iter  30 value 20.192922
## iter  40 value 20.153936
## final  value 20.153924 
## converged
## # weights:  43
## initial  value 131.301286 
## iter  10 value 26.646865
## iter  20 value 17.682102
## iter  30 value 17.633586
## iter  40 value 17.623573
## iter  50 value 17.364993
## iter  60 value 17.295129
## iter  70 value 17.290694
## final  value 17.290666 
## converged
## # weights:  11
## initial  value 115.622911 
## iter  10 value 33.350769
## iter  20 value 4.676969
## iter  30 value 3.131052
## iter  40 value 2.922591
## iter  50 value 2.825976
## iter  60 value 2.769974
## iter  70 value 2.741299
## iter  80 value 2.741136
## iter  90 value 2.739093
## final  value 2.739035 
## converged
## # weights:  27
## initial  value 139.822975 
## iter  10 value 37.447376
## iter  20 value 1.445699
## iter  30 value 0.316497
## iter  40 value 0.287713
## iter  50 value 0.260591
## iter  60 value 0.236249
## iter  70 value 0.224761
## iter  80 value 0.215415
## iter  90 value 0.194816
## iter 100 value 0.189471
## final  value 0.189471 
## stopped after 100 iterations
## # weights:  43
## initial  value 123.298044 
## iter  10 value 4.177632
## iter  20 value 0.257205
## iter  30 value 0.224601
## iter  40 value 0.200241
## iter  50 value 0.193031
## iter  60 value 0.182082
## iter  70 value 0.164800
## iter  80 value 0.149792
## iter  90 value 0.144373
## iter 100 value 0.142810
## final  value 0.142810 
## stopped after 100 iterations
## # weights:  11
## initial  value 123.243079 
## iter  10 value 49.923348
## iter  20 value 49.909994
## iter  30 value 49.907880
## final  value 49.906719 
## converged
## # weights:  27
## initial  value 117.894759 
## iter  10 value 9.481781
## iter  20 value 0.026637
## iter  30 value 0.001156
## final  value 0.000052 
## converged
## # weights:  43
## initial  value 131.870976 
## iter  10 value 17.010430
## iter  20 value 0.698814
## iter  30 value 0.001401
## final  value 0.000067 
## converged
## # weights:  11
## initial  value 141.804121 
## iter  10 value 63.315182
## iter  20 value 44.532148
## iter  30 value 42.998412
## final  value 42.994034 
## converged
## # weights:  27
## initial  value 129.180442 
## iter  10 value 44.217928
## iter  20 value 19.729677
## iter  30 value 18.527378
## iter  40 value 18.411074
## iter  50 value 18.393711
## iter  60 value 18.393129
## final  value 18.393125 
## converged
## # weights:  43
## initial  value 143.533117 
## iter  10 value 21.063126
## iter  20 value 17.843661
## iter  30 value 17.106737
## iter  40 value 16.985544
## iter  50 value 16.981278
## iter  60 value 16.980626
## final  value 16.980585 
## converged
## # weights:  11
## initial  value 123.091645 
## iter  10 value 49.148390
## iter  20 value 35.943210
## iter  30 value 10.736283
## iter  40 value 2.021433
## iter  50 value 1.687392
## iter  60 value 1.640809
## iter  70 value 1.636953
## iter  80 value 1.613389
## iter  90 value 1.611928
## iter 100 value 1.611137
## final  value 1.611137 
## stopped after 100 iterations
## # weights:  27
## initial  value 113.416728 
## iter  10 value 6.236444
## iter  20 value 0.187917
## iter  30 value 0.166748
## iter  40 value 0.155642
## iter  50 value 0.144249
## iter  60 value 0.141208
## iter  70 value 0.138463
## iter  80 value 0.136774
## iter  90 value 0.134567
## iter 100 value 0.132971
## final  value 0.132971 
## stopped after 100 iterations
## # weights:  43
## initial  value 124.153763 
## iter  10 value 6.673362
## iter  20 value 0.166533
## iter  30 value 0.154159
## iter  40 value 0.149227
## iter  50 value 0.136832
## iter  60 value 0.125718
## iter  70 value 0.121478
## iter  80 value 0.115540
## iter  90 value 0.113390
## iter 100 value 0.110992
## final  value 0.110992 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.347385 
## iter  10 value 55.157651
## iter  20 value 47.800562
## iter  30 value 47.763719
## iter  40 value 47.763542
## iter  50 value 47.762534
## final  value 47.762465 
## converged
## # weights:  27
## initial  value 115.590774 
## iter  10 value 5.054265
## iter  20 value 1.048058
## iter  30 value 0.000979
## final  value 0.000072 
## converged
## # weights:  43
## initial  value 123.951869 
## iter  10 value 13.178443
## iter  20 value 0.965118
## iter  30 value 0.002392
## final  value 0.000078 
## converged
## # weights:  11
## initial  value 123.195822 
## iter  10 value 53.656490
## iter  20 value 43.803131
## iter  30 value 43.734766
## final  value 43.734347 
## converged
## # weights:  27
## initial  value 123.651803 
## iter  10 value 29.880588
## iter  20 value 19.921143
## iter  30 value 19.707388
## iter  40 value 19.705704
## final  value 19.705624 
## converged
## # weights:  43
## initial  value 148.336280 
## iter  10 value 27.474145
## iter  20 value 18.301737
## iter  30 value 18.138015
## iter  40 value 18.086240
## iter  50 value 18.084155
## iter  60 value 18.083934
## final  value 18.083909 
## converged
## # weights:  11
## initial  value 122.563728 
## iter  10 value 32.122176
## iter  20 value 10.269949
## iter  30 value 4.526292
## iter  40 value 3.900620
## iter  50 value 3.805816
## iter  60 value 3.743349
## iter  70 value 3.733207
## iter  80 value 3.721238
## iter  90 value 3.713938
## iter 100 value 3.705684
## final  value 3.705684 
## stopped after 100 iterations
## # weights:  27
## initial  value 130.631378 
## iter  10 value 4.944652
## iter  20 value 0.903581
## iter  30 value 0.602599
## iter  40 value 0.449328
## iter  50 value 0.416076
## iter  60 value 0.405323
## iter  70 value 0.397568
## iter  80 value 0.392801
## iter  90 value 0.386606
## iter 100 value 0.380965
## final  value 0.380965 
## stopped after 100 iterations
## # weights:  43
## initial  value 152.884265 
## iter  10 value 11.737646
## iter  20 value 1.402922
## iter  30 value 0.553654
## iter  40 value 0.456488
## iter  50 value 0.433353
## iter  60 value 0.391721
## iter  70 value 0.350673
## iter  80 value 0.322382
## iter  90 value 0.309362
## iter 100 value 0.302224
## final  value 0.302224 
## stopped after 100 iterations
## # weights:  11
## initial  value 133.677265 
## iter  10 value 49.425529
## iter  20 value 45.125104
## iter  30 value 24.714814
## iter  40 value 6.951374
## iter  50 value 3.962940
## iter  60 value 3.585057
## iter  70 value 2.556588
## iter  80 value 2.219301
## iter  90 value 2.033936
## iter 100 value 2.011517
## final  value 2.011517 
## stopped after 100 iterations
## # weights:  27
## initial  value 120.219437 
## iter  10 value 20.105178
## iter  20 value 0.691846
## iter  30 value 0.000424
## final  value 0.000094 
## converged
## # weights:  43
## initial  value 130.013247 
## iter  10 value 6.990719
## iter  20 value 0.117056
## final  value 0.000078 
## converged
## # weights:  11
## initial  value 122.587894 
## iter  10 value 55.646479
## iter  20 value 44.073616
## iter  30 value 44.056707
## final  value 44.056649 
## converged
## # weights:  27
## initial  value 122.488484 
## iter  10 value 30.042105
## iter  20 value 22.364237
## iter  30 value 21.402694
## iter  40 value 21.391770
## final  value 21.391728 
## converged
## # weights:  43
## initial  value 151.848122 
## iter  10 value 27.150882
## iter  20 value 20.889994
## iter  30 value 19.061592
## iter  40 value 18.857339
## iter  50 value 18.636402
## iter  60 value 18.597842
## iter  70 value 18.581420
## final  value 18.581304 
## converged
## # weights:  11
## initial  value 125.447189 
## iter  10 value 42.432302
## iter  20 value 14.708081
## iter  30 value 5.928158
## iter  40 value 4.717183
## iter  50 value 4.261072
## iter  60 value 3.990872
## iter  70 value 3.894028
## iter  80 value 3.877352
## iter  90 value 3.868846
## iter 100 value 3.865924
## final  value 3.865924 
## stopped after 100 iterations
## # weights:  27
## initial  value 141.522247 
## iter  10 value 19.693351
## iter  20 value 2.060082
## iter  30 value 0.713635
## iter  40 value 0.684010
## iter  50 value 0.651024
## iter  60 value 0.599068
## iter  70 value 0.534726
## iter  80 value 0.525302
## iter  90 value 0.477461
## iter 100 value 0.468104
## final  value 0.468104 
## stopped after 100 iterations
## # weights:  43
## initial  value 117.492171 
## iter  10 value 5.474776
## iter  20 value 0.633193
## iter  30 value 0.523049
## iter  40 value 0.506835
## iter  50 value 0.486677
## iter  60 value 0.470314
## iter  70 value 0.423468
## iter  80 value 0.413761
## iter  90 value 0.406423
## iter 100 value 0.383741
## final  value 0.383741 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.494859 
## iter  10 value 67.868204
## iter  20 value 40.370984
## iter  30 value 8.030160
## iter  40 value 3.602779
## iter  50 value 3.354456
## iter  60 value 3.245703
## iter  70 value 3.148381
## iter  80 value 3.017232
## iter  90 value 2.916738
## iter 100 value 2.698927
## final  value 2.698927 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.387618 
## iter  10 value 17.333188
## iter  20 value 6.562404
## iter  30 value 4.218606
## iter  40 value 0.023796
## iter  50 value 0.013835
## iter  60 value 0.007181
## iter  70 value 0.000265
## final  value 0.000094 
## converged
## # weights:  43
## initial  value 131.764022 
## iter  10 value 6.923964
## iter  20 value 0.585918
## iter  30 value 0.001510
## final  value 0.000094 
## converged
## # weights:  11
## initial  value 117.924376 
## iter  10 value 59.153858
## iter  20 value 45.980503
## iter  30 value 43.965813
## final  value 43.965807 
## converged
## # weights:  27
## initial  value 122.524569 
## iter  10 value 28.252379
## iter  20 value 20.308998
## iter  30 value 19.983255
## iter  40 value 19.969846
## final  value 19.969845 
## converged
## # weights:  43
## initial  value 175.722543 
## iter  10 value 24.152694
## iter  20 value 19.351652
## iter  30 value 18.570128
## iter  40 value 18.540253
## iter  50 value 18.531786
## iter  60 value 18.531273
## final  value 18.531272 
## converged
## # weights:  11
## initial  value 125.626851 
## iter  10 value 50.695359
## iter  20 value 28.615271
## iter  30 value 12.424432
## iter  40 value 5.029030
## iter  50 value 4.166888
## iter  60 value 3.979676
## iter  70 value 3.882211
## iter  80 value 3.873043
## iter  90 value 3.872674
## iter 100 value 3.871442
## final  value 3.871442 
## stopped after 100 iterations
## # weights:  27
## initial  value 123.025871 
## iter  10 value 27.020381
## iter  20 value 2.694706
## iter  30 value 1.092737
## iter  40 value 0.872715
## iter  50 value 0.758401
## iter  60 value 0.630276
## iter  70 value 0.571755
## iter  80 value 0.515264
## iter  90 value 0.475373
## iter 100 value 0.452080
## final  value 0.452080 
## stopped after 100 iterations
## # weights:  43
## initial  value 134.385829 
## iter  10 value 5.396493
## iter  20 value 1.952502
## iter  30 value 0.810078
## iter  40 value 0.740163
## iter  50 value 0.700944
## iter  60 value 0.648312
## iter  70 value 0.581811
## iter  80 value 0.540064
## iter  90 value 0.513923
## iter 100 value 0.483298
## final  value 0.483298 
## stopped after 100 iterations
## # weights:  11
## initial  value 124.033991 
## iter  10 value 53.598901
## iter  20 value 53.094417
## iter  30 value 51.710795
## iter  40 value 44.732729
## iter  50 value 17.281237
## iter  60 value 6.529030
## iter  70 value 3.473730
## iter  80 value 3.279187
## iter  90 value 3.156556
## iter 100 value 2.981555
## final  value 2.981555 
## stopped after 100 iterations
## # weights:  27
## initial  value 126.207925 
## iter  10 value 6.867316
## iter  20 value 0.342203
## iter  30 value 0.000889
## final  value 0.000071 
## converged
## # weights:  43
## initial  value 146.268437 
## iter  10 value 7.061711
## iter  20 value 1.073309
## iter  30 value 0.000467
## final  value 0.000066 
## converged
## # weights:  11
## initial  value 120.866935 
## iter  10 value 85.950877
## iter  20 value 60.671406
## iter  30 value 50.749580
## iter  40 value 43.846120
## final  value 43.846095 
## converged
## # weights:  27
## initial  value 126.514320 
## iter  10 value 46.451931
## iter  20 value 22.288378
## iter  30 value 21.611509
## iter  40 value 21.142364
## iter  50 value 20.374688
## iter  60 value 19.975509
## iter  70 value 19.860029
## final  value 19.859991 
## converged
## # weights:  43
## initial  value 113.521981 
## iter  10 value 27.307122
## iter  20 value 19.069629
## iter  30 value 18.496103
## iter  40 value 18.414947
## iter  50 value 18.412091
## iter  60 value 18.411932
## final  value 18.411927 
## converged
## # weights:  11
## initial  value 119.931364 
## iter  10 value 33.212563
## iter  20 value 6.825543
## iter  30 value 4.153607
## iter  40 value 3.996719
## iter  50 value 3.936301
## iter  60 value 3.900913
## iter  70 value 3.868653
## iter  80 value 3.868193
## iter  90 value 3.864798
## iter 100 value 3.860658
## final  value 3.860658 
## stopped after 100 iterations
## # weights:  27
## initial  value 125.980953 
## iter  10 value 3.828376
## iter  20 value 1.757039
## iter  30 value 1.084888
## iter  40 value 0.779504
## iter  50 value 0.534913
## iter  60 value 0.521705
## iter  70 value 0.515783
## iter  80 value 0.504124
## iter  90 value 0.485201
## iter 100 value 0.483827
## final  value 0.483827 
## stopped after 100 iterations
## # weights:  43
## initial  value 143.013185 
## iter  10 value 7.195354
## iter  20 value 1.984745
## iter  30 value 0.713672
## iter  40 value 0.552459
## iter  50 value 0.437450
## iter  60 value 0.403627
## iter  70 value 0.363382
## iter  80 value 0.356303
## iter  90 value 0.346628
## iter 100 value 0.337926
## final  value 0.337926 
## stopped after 100 iterations
## # weights:  11
## initial  value 119.603843 
## iter  10 value 66.519353
## iter  20 value 48.085237
## iter  30 value 10.691129
## iter  40 value 4.343493
## iter  50 value 3.486657
## iter  60 value 2.937962
## iter  70 value 2.185862
## iter  80 value 1.910157
## iter  90 value 1.802781
## iter 100 value 1.791736
## final  value 1.791736 
## stopped after 100 iterations
## # weights:  27
## initial  value 120.493313 
## iter  10 value 14.568437
## iter  20 value 1.413139
## iter  30 value 0.002421
## final  value 0.000049 
## converged
## # weights:  43
## initial  value 131.990396 
## iter  10 value 3.607345
## iter  20 value 0.869522
## iter  30 value 0.000776
## final  value 0.000079 
## converged
## # weights:  11
## initial  value 127.213395 
## iter  10 value 58.997762
## iter  20 value 44.424763
## final  value 43.139243 
## converged
## # weights:  27
## initial  value 117.195869 
## iter  10 value 28.619024
## iter  20 value 19.206476
## iter  30 value 18.621574
## iter  40 value 18.619068
## iter  40 value 18.619068
## iter  40 value 18.619068
## final  value 18.619068 
## converged
## # weights:  43
## initial  value 165.598734 
## iter  10 value 24.205649
## iter  20 value 17.629535
## iter  30 value 17.222776
## iter  40 value 17.168752
## iter  50 value 17.168464
## iter  60 value 17.168428
## iter  60 value 17.168428
## iter  60 value 17.168428
## final  value 17.168428 
## converged
## # weights:  11
## initial  value 115.941037 
## iter  10 value 48.705139
## iter  20 value 47.783092
## iter  30 value 43.562064
## iter  40 value 11.101593
## iter  50 value 4.031437
## iter  60 value 3.116711
## iter  70 value 3.019260
## iter  80 value 2.993105
## iter  90 value 2.981303
## iter 100 value 2.969047
## final  value 2.969047 
## stopped after 100 iterations
## # weights:  27
## initial  value 132.813339 
## iter  10 value 3.715700
## iter  20 value 1.056815
## iter  30 value 0.558748
## iter  40 value 0.530262
## iter  50 value 0.467614
## iter  60 value 0.445847
## iter  70 value 0.424130
## iter  80 value 0.373259
## iter  90 value 0.354379
## iter 100 value 0.342801
## final  value 0.342801 
## stopped after 100 iterations
## # weights:  43
## initial  value 126.886256 
## iter  10 value 3.942342
## iter  20 value 1.736816
## iter  30 value 0.630651
## iter  40 value 0.552680
## iter  50 value 0.489807
## iter  60 value 0.396264
## iter  70 value 0.356221
## iter  80 value 0.340605
## iter  90 value 0.328238
## iter 100 value 0.321359
## final  value 0.321359 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.489378 
## iter  10 value 49.909576
## iter  20 value 49.876540
## iter  30 value 47.945970
## iter  40 value 39.847983
## iter  50 value 8.016537
## iter  60 value 4.619364
## iter  70 value 2.386452
## iter  80 value 1.338165
## iter  90 value 1.178344
## iter 100 value 1.100248
## final  value 1.100248 
## stopped after 100 iterations
## # weights:  27
## initial  value 141.912242 
## iter  10 value 7.102731
## iter  20 value 0.339738
## final  value 0.000079 
## converged
## # weights:  43
## initial  value 128.771330 
## iter  10 value 21.354630
## iter  20 value 2.784172
## iter  30 value 0.013786
## iter  40 value 0.000332
## final  value 0.000076 
## converged
## # weights:  11
## initial  value 120.181179 
## iter  10 value 46.347790
## iter  20 value 43.064428
## iter  30 value 43.054040
## final  value 43.054021 
## converged
## # weights:  27
## initial  value 126.647230 
## iter  10 value 25.682812
## iter  20 value 20.660342
## iter  30 value 19.500529
## iter  40 value 19.121600
## iter  50 value 19.088454
## iter  60 value 19.083697
## final  value 19.083689 
## converged
## # weights:  43
## initial  value 132.234904 
## iter  10 value 29.615687
## iter  20 value 19.279132
## iter  30 value 17.877712
## iter  40 value 17.806996
## iter  50 value 17.793960
## iter  60 value 17.793819
## final  value 17.793686 
## converged
## # weights:  11
## initial  value 121.579687 
## iter  10 value 49.472914
## iter  20 value 48.410085
## iter  30 value 45.340464
## iter  40 value 37.104905
## iter  50 value 8.129202
## iter  60 value 4.703745
## iter  70 value 4.278312
## iter  80 value 3.668066
## iter  90 value 3.605900
## iter 100 value 3.568123
## final  value 3.568123 
## stopped after 100 iterations
## # weights:  27
## initial  value 135.360878 
## iter  10 value 10.436945
## iter  20 value 2.222820
## iter  30 value 0.763058
## iter  40 value 0.725440
## iter  50 value 0.677966
## iter  60 value 0.570628
## iter  70 value 0.518380
## iter  80 value 0.502364
## iter  90 value 0.462332
## iter 100 value 0.455880
## final  value 0.455880 
## stopped after 100 iterations
## # weights:  43
## initial  value 125.924213 
## iter  10 value 3.865138
## iter  20 value 1.025246
## iter  30 value 0.422681
## iter  40 value 0.379135
## iter  50 value 0.353145
## iter  60 value 0.335865
## iter  70 value 0.319622
## iter  80 value 0.303895
## iter  90 value 0.289299
## iter 100 value 0.271561
## final  value 0.271561 
## stopped after 100 iterations
## # weights:  11
## initial  value 114.925820 
## iter  10 value 45.333263
## iter  20 value 21.250608
## iter  30 value 6.082611
## iter  40 value 4.448976
## iter  50 value 3.266614
## iter  60 value 1.880390
## iter  70 value 1.733764
## iter  80 value 1.089267
## iter  90 value 1.045776
## iter 100 value 0.950636
## final  value 0.950636 
## stopped after 100 iterations
## # weights:  27
## initial  value 116.607224 
## iter  10 value 6.159810
## iter  20 value 1.197702
## iter  30 value 0.000196
## final  value 0.000057 
## converged
## # weights:  43
## initial  value 123.125697 
## iter  10 value 4.793414
## iter  20 value 0.073094
## iter  30 value 0.000393
## final  value 0.000088 
## converged
## # weights:  11
## initial  value 120.471214 
## iter  10 value 45.420303
## iter  20 value 43.694661
## iter  30 value 43.690235
## final  value 43.690202 
## converged
## # weights:  27
## initial  value 168.714249 
## iter  10 value 28.073376
## iter  20 value 21.126580
## iter  30 value 20.968508
## iter  40 value 20.968134
## final  value 20.968117 
## converged
## # weights:  43
## initial  value 134.057733 
## iter  10 value 44.240823
## iter  20 value 19.621880
## iter  30 value 18.596469
## iter  40 value 18.220014
## iter  50 value 18.200869
## iter  60 value 18.194706
## final  value 18.194547 
## converged
## # weights:  11
## initial  value 137.081572 
## iter  10 value 53.546736
## iter  20 value 49.263649
## iter  30 value 49.116099
## iter  40 value 49.041348
## iter  50 value 48.683090
## iter  60 value 48.634845
## iter  70 value 48.489442
## iter  80 value 48.480790
## iter  90 value 48.451846
## iter 100 value 48.179024
## final  value 48.179024 
## stopped after 100 iterations
## # weights:  27
## initial  value 143.490043 
## iter  10 value 4.357251
## iter  20 value 1.321252
## iter  30 value 0.645280
## iter  40 value 0.616636
## iter  50 value 0.565996
## iter  60 value 0.521660
## iter  70 value 0.508617
## iter  80 value 0.487870
## iter  90 value 0.483152
## iter 100 value 0.479423
## final  value 0.479423 
## stopped after 100 iterations
## # weights:  43
## initial  value 178.832632 
## iter  10 value 8.121158
## iter  20 value 1.422046
## iter  30 value 0.568662
## iter  40 value 0.518952
## iter  50 value 0.434974
## iter  60 value 0.392568
## iter  70 value 0.345835
## iter  80 value 0.285289
## iter  90 value 0.268178
## iter 100 value 0.253675
## final  value 0.253675 
## stopped after 100 iterations
## # weights:  11
## initial  value 123.307045 
## iter  10 value 43.672929
## iter  20 value 8.049676
## iter  30 value 3.773651
## iter  40 value 3.173208
## iter  50 value 3.060201
## iter  60 value 2.971167
## iter  70 value 2.563371
## iter  80 value 2.471224
## iter  90 value 2.341221
## iter 100 value 2.320048
## final  value 2.320048 
## stopped after 100 iterations
## # weights:  27
## initial  value 129.270569 
## iter  10 value 10.575847
## iter  20 value 2.930770
## iter  30 value 1.689612
## iter  40 value 0.097359
## iter  50 value 0.000123
## iter  50 value 0.000057
## iter  50 value 0.000057
## final  value 0.000057 
## converged
## # weights:  43
## initial  value 119.634242 
## iter  10 value 6.310691
## iter  20 value 1.591412
## iter  30 value 0.028391
## iter  40 value 0.000902
## final  value 0.000069 
## converged
## # weights:  11
## initial  value 120.069235 
## iter  10 value 60.195069
## iter  20 value 51.394914
## iter  30 value 43.991436
## final  value 43.991141 
## converged
## # weights:  27
## initial  value 152.809198 
## iter  10 value 25.471737
## iter  20 value 21.511163
## iter  30 value 21.387357
## iter  40 value 21.386800
## final  value 21.386800 
## converged
## # weights:  43
## initial  value 137.024287 
## iter  10 value 22.447246
## iter  20 value 19.002967
## iter  30 value 18.519064
## iter  40 value 18.404215
## iter  50 value 18.397540
## iter  60 value 18.396716
## final  value 18.396607 
## converged
## # weights:  11
## initial  value 121.726735 
## iter  10 value 50.373336
## iter  20 value 50.105529
## iter  30 value 49.998791
## iter  40 value 49.958270
## iter  50 value 49.774790
## iter  60 value 48.541266
## iter  70 value 18.978222
## iter  80 value 6.742676
## iter  90 value 4.056469
## iter 100 value 3.922763
## final  value 3.922763 
## stopped after 100 iterations
## # weights:  27
## initial  value 146.633351 
## iter  10 value 6.579898
## iter  20 value 0.624311
## iter  30 value 0.562510
## iter  40 value 0.514462
## iter  50 value 0.457198
## iter  60 value 0.403961
## iter  70 value 0.382785
## iter  80 value 0.371306
## iter  90 value 0.358751
## iter 100 value 0.317469
## final  value 0.317469 
## stopped after 100 iterations
## # weights:  43
## initial  value 127.981900 
## iter  10 value 7.369546
## iter  20 value 0.839917
## iter  30 value 0.675447
## iter  40 value 0.617273
## iter  50 value 0.540482
## iter  60 value 0.477520
## iter  70 value 0.443309
## iter  80 value 0.359346
## iter  90 value 0.308424
## iter 100 value 0.292198
## final  value 0.292198 
## stopped after 100 iterations
## # weights:  11
## initial  value 133.510869 
## iter  10 value 66.279276
## iter  20 value 49.065891
## iter  30 value 46.607987
## final  value 46.598156 
## converged

Resultados de Modelo

resultados_train_rn = predict(modelo_rn, train_set)
resultados_test_rn = predict(modelo_rn, test_set)

Matriz de Confusión

##### ENTRENAMIENTO ######
MCRE_rn = confusionMatrix(resultados_train_rn, train_set$Species) # MCRE - Matriz de Confusión de Resultados de Entrenamiento.
#MCRE_rn

##### PRUEBA #####
MCRP_rn = confusionMatrix(resultados_test_rn, test_set$Species) # MCRE - Matriz de Confusión de Resultados de Prueba.
MCRP_rn
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0          9         0
##   virginica       0          1        10
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9000           1.0000
## Specificity                 1.0000            1.0000           0.9500
## Pos Pred Value              1.0000            1.0000           0.9091
## Neg Pred Value              1.0000            0.9524           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3000           0.3333
## Detection Prevalence        0.3333            0.3000           0.3667
## Balanced Accuracy           1.0000            0.9500           0.9750

Random Forests

Entrenamiento de Modelo

modelo_rf = train(Species ~., data = train_set,
                    method = "rf",
                    preProcess = c("scale", "center"),
                    trControl = trainControl(method = "cv", number =10),
                    tuneGrid = expand.grid(mtry =c(2,4,6))
                    )

Resultados de Modelo

resultados_train_rf = predict(modelo_rf, train_set)
resultados_test_rf = predict(modelo_rf, test_set)

Matriz de Confusión

##### ENTRENAMIENTO ######
MCRE_rf = confusionMatrix(resultados_train_rf, train_set$Species) # MCRE - Matriz de Confusión de Resultados de Entrenamiento.
#MCRE_rf

##### PRUEBA #####
MCRP_rf = confusionMatrix(resultados_test_rf, test_set$Species) # MCRE - Matriz de Confusión de Resultados de Prueba.
MCRP_rf
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Comparativa entre Modelos

resultados = data.frame(
  "svmLinear" = c(MCRE_svml$overall["Accuracy"],MCRP_svml$overall["Accuracy"]),
  "svmRadial" = c(MCRE_svmr$overall["Accuracy"],MCRP_svmr$overall["Accuracy"]),
  "svmPoly" = c(MCRE_svmp$overall["Accuracy"],MCRP_svmp$overall["Accuracy"]),
  "Arboles" = c(MCRE_ad$overall["Accuracy"],MCRP_ad$overall["Accuracy"]),
  "Redes" = c(MCRE_rn$overall["Accuracy"],MCRP_rn$overall["Accuracy"]),
  "RandomForest" = c(MCRE_rf$overall["Accuracy"],MCRP_rf$overall["Accuracy"])
)

rownames(resultados) = c("Precisión de entrenamiento", "Precisión de prueba")
resultados
##                            svmLinear svmRadial   svmPoly   Arboles     Redes
## Precisión de entrenamiento 0.9916667 0.9916667 0.9916667 0.9666667 0.9666667
## Precisión de prueba        0.9666667 0.9333333 0.9666667 0.9333333 0.9666667
##                            RandomForest
## Precisión de entrenamiento    1.0000000
## Precisión de prueba           0.9333333
Metodo<-c('SVM Linear','SVM Radial', 'SVM Poly', 'Árboles de decisión', 'Redes Neuronales', 'Random Forest')
ACC_Train<-c(MCRE_svml$overall["Accuracy"], MCRE_svmr$overall["Accuracy"], MCRE_svmp$overall["Accuracy"], MCRE_ad$overall["Accuracy"],MCRE_rn$overall["Accuracy"],MCRE_rf$overall["Accuracy"])
ACC_Test<-c(MCRP_svml$overall["Accuracy"], MCRP_svmr$overall["Accuracy"], MCRP_svmp$overall["Accuracy"], MCRP_ad$overall["Accuracy"],MCRP_rn$overall["Accuracy"],MCRP_rf$overall["Accuracy"])
RMSE_df<-data.frame(Metodo,ACC_Train, ACC_Test)
RMSE_df %>%
  kbl() %>%
  kable_styling()
Metodo ACC_Train ACC_Test
SVM Linear 0.9916667 0.9666667
SVM Radial 0.9916667 0.9333333
SVM Poly 0.9916667 0.9666667
Árboles de decisión 0.9666667 0.9333333
Redes Neuronales 0.9666667 0.9666667
Random Forest 1.0000000 0.9333333