Teoría

El paquete caret (Classification And REgression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.

Librerías

library(caret)
library(ggplot2) # Crear gráficos
library(datasets) # Usar la base de datos "Iris"
library(lattice) # Crear gráficos
library(DataExplorer)
library(mlbench)

Base de datos

df <- data.frame(iris)

Análisis exploratorio de datos

summary(df)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
str(df)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
#create_report(df)
plot_missing(df)

plot_histogram(df)

plot_correlation(df)

La variable será factor

Partición

set.seed(123)
renglones_entrenamiento <-createDataPartition(df$Species, p=0.8, list=FALSE)
entrenamiento <- iris[renglones_entrenamiento, ]
prueba <- iris[-renglones_entrenamiento, ]

Diferentes modelos

Los métodos más utilizados para modelar aprendizaje automático son:

  • SVM: Support Vector Machine o Máquina de Vectores de Soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómicos (svmPoly), etc.

  • Árbol de Decisión: rpart

  • Redes Neuronales: nnet

  • Random Forests o Bosques aleatorios: rf

Lineal

modelo1 <- train(Species ~ ., data= entrenamiento, method = "svmLinear", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(C=1)) 

resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict(modelo1, prueba)

Matriz de confusión

mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1 
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

Modelo svmRadial

modelo2 <- train(Species ~ ., data= entrenamiento, method = "svmRadial", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(sigma=1, C=1)) #Cambiar



resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict(modelo2, prueba)

Matriz de confusión

mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2 
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Modelo svmPoly

modelo3 <- train(Species ~ ., data= entrenamiento, method = "svmPoly", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(degree=1,scale=1, C=1)) #Cambiar


resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)

Matriz de confusión

mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3 
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

Árbol de decisión

modelo4 <- train(Species ~ ., data = entrenamiento, method = "rpart", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneLength = 10)

resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo4, prueba)

Matriz de confusión

mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4 
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         3
##   virginica       0          1        37
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9169, 0.9908)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           0.9250
## Specificity                 1.0000            0.9625           0.9875
## Pos Pred Value              1.0000            0.9286           0.9737
## Neg Pred Value              1.0000            0.9872           0.9634
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3083
## Detection Prevalence        0.3333            0.3500           0.3167
## Balanced Accuracy           1.0000            0.9688           0.9563
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Species)
mcrp4
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Modelo natural net

modelo5 <- train(Species ~ ., data = entrenamiento, method = "nnet", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10))
## # weights:  11
## initial  value 130.530132 
## iter  10 value 50.031494
## iter  20 value 48.622939
## iter  30 value 46.051782
## iter  40 value 45.435982
## iter  50 value 45.023331
## iter  60 value 41.544637
## iter  70 value 18.345218
## iter  80 value 4.630621
## iter  90 value 3.573146
## iter 100 value 2.696218
## final  value 2.696218 
## stopped after 100 iterations
## # weights:  27
## initial  value 132.517409 
## iter  10 value 22.263231
## iter  20 value 2.574680
## iter  30 value 0.008513
## final  value 0.000051 
## converged
## # weights:  43
## initial  value 136.160730 
## iter  10 value 3.642258
## iter  20 value 0.051614
## iter  30 value 0.013220
## iter  40 value 0.001249
## final  value 0.000086 
## converged
## # weights:  11
## initial  value 124.472165 
## iter  10 value 57.985437
## iter  20 value 43.232595
## final  value 43.170440 
## converged
## # weights:  27
## initial  value 118.611044 
## iter  10 value 30.413305
## iter  20 value 21.077103
## iter  30 value 20.192922
## iter  40 value 20.153936
## final  value 20.153924 
## converged
## # weights:  43
## initial  value 131.301286 
## iter  10 value 26.646865
## iter  20 value 17.682102
## iter  30 value 17.633586
## iter  40 value 17.623573
## iter  50 value 17.364993
## iter  60 value 17.295129
## iter  70 value 17.290694
## final  value 17.290666 
## converged
## # weights:  11
## initial  value 115.622911 
## iter  10 value 33.350769
## iter  20 value 4.676969
## iter  30 value 3.131052
## iter  40 value 2.922591
## iter  50 value 2.825976
## iter  60 value 2.769974
## iter  70 value 2.741299
## iter  80 value 2.741136
## iter  90 value 2.739093
## final  value 2.739035 
## converged
## # weights:  27
## initial  value 139.822975 
## iter  10 value 37.447376
## iter  20 value 1.445699
## iter  30 value 0.316497
## iter  40 value 0.287713
## iter  50 value 0.260591
## iter  60 value 0.236249
## iter  70 value 0.224761
## iter  80 value 0.215415
## iter  90 value 0.194816
## iter 100 value 0.189471
## final  value 0.189471 
## stopped after 100 iterations
## # weights:  43
## initial  value 123.298044 
## iter  10 value 4.177632
## iter  20 value 0.257205
## iter  30 value 0.224601
## iter  40 value 0.200241
## iter  50 value 0.193031
## iter  60 value 0.182082
## iter  70 value 0.164800
## iter  80 value 0.149792
## iter  90 value 0.144373
## iter 100 value 0.142810
## final  value 0.142810 
## stopped after 100 iterations
## # weights:  11
## initial  value 123.243079 
## iter  10 value 49.923348
## iter  20 value 49.909994
## iter  30 value 49.907880
## final  value 49.906719 
## converged
## # weights:  27
## initial  value 117.894759 
## iter  10 value 9.481781
## iter  20 value 0.026637
## iter  30 value 0.001156
## final  value 0.000052 
## converged
## # weights:  43
## initial  value 131.870976 
## iter  10 value 17.010430
## iter  20 value 0.698814
## iter  30 value 0.001401
## final  value 0.000067 
## converged
## # weights:  11
## initial  value 141.804121 
## iter  10 value 63.315182
## iter  20 value 44.532148
## iter  30 value 42.998412
## final  value 42.994034 
## converged
## # weights:  27
## initial  value 129.180442 
## iter  10 value 44.217928
## iter  20 value 19.729677
## iter  30 value 18.527378
## iter  40 value 18.411074
## iter  50 value 18.393711
## iter  60 value 18.393129
## final  value 18.393125 
## converged
## # weights:  43
## initial  value 143.533117 
## iter  10 value 21.063126
## iter  20 value 17.843661
## iter  30 value 17.106737
## iter  40 value 16.985544
## iter  50 value 16.981278
## iter  60 value 16.980626
## final  value 16.980585 
## converged
## # weights:  11
## initial  value 123.091645 
## iter  10 value 49.148390
## iter  20 value 35.943210
## iter  30 value 10.736283
## iter  40 value 2.021433
## iter  50 value 1.687393
## iter  60 value 1.640809
## iter  70 value 1.636953
## iter  80 value 1.613389
## iter  90 value 1.611928
## iter 100 value 1.611136
## final  value 1.611136 
## stopped after 100 iterations
## # weights:  27
## initial  value 113.416728 
## iter  10 value 6.236444
## iter  20 value 0.187917
## iter  30 value 0.166748
## iter  40 value 0.155642
## iter  50 value 0.144249
## iter  60 value 0.141208
## iter  70 value 0.138463
## iter  80 value 0.136774
## iter  90 value 0.134567
## iter 100 value 0.132971
## final  value 0.132971 
## stopped after 100 iterations
## # weights:  43
## initial  value 124.153763 
## iter  10 value 6.673362
## iter  20 value 0.166533
## iter  30 value 0.154159
## iter  40 value 0.149227
## iter  50 value 0.136832
## iter  60 value 0.125718
## iter  70 value 0.121478
## iter  80 value 0.115540
## iter  90 value 0.113390
## iter 100 value 0.110992
## final  value 0.110992 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.347385 
## iter  10 value 55.157651
## iter  20 value 47.800562
## iter  30 value 47.763719
## iter  40 value 47.763542
## iter  50 value 47.762534
## final  value 47.762465 
## converged
## # weights:  27
## initial  value 115.590774 
## iter  10 value 5.054265
## iter  20 value 1.048058
## iter  30 value 0.000979
## final  value 0.000072 
## converged
## # weights:  43
## initial  value 123.951869 
## iter  10 value 13.178443
## iter  20 value 0.965118
## iter  30 value 0.002392
## final  value 0.000078 
## converged
## # weights:  11
## initial  value 123.195822 
## iter  10 value 53.656490
## iter  20 value 43.803131
## iter  30 value 43.734766
## final  value 43.734347 
## converged
## # weights:  27
## initial  value 123.651803 
## iter  10 value 29.880588
## iter  20 value 19.921143
## iter  30 value 19.707388
## iter  40 value 19.705704
## final  value 19.705624 
## converged
## # weights:  43
## initial  value 148.336280 
## iter  10 value 27.474145
## iter  20 value 18.301737
## iter  30 value 18.138015
## iter  40 value 18.086240
## iter  50 value 18.084155
## iter  60 value 18.083934
## final  value 18.083909 
## converged
## # weights:  11
## initial  value 122.563728 
## iter  10 value 32.122176
## iter  20 value 10.269949
## iter  30 value 4.526292
## iter  40 value 3.900620
## iter  50 value 3.805816
## iter  60 value 3.743349
## iter  70 value 3.733207
## iter  80 value 3.721238
## iter  90 value 3.713938
## iter 100 value 3.705684
## final  value 3.705684 
## stopped after 100 iterations
## # weights:  27
## initial  value 130.631378 
## iter  10 value 4.944652
## iter  20 value 0.903581
## iter  30 value 0.602599
## iter  40 value 0.449328
## iter  50 value 0.416076
## iter  60 value 0.405323
## iter  70 value 0.397568
## iter  80 value 0.392801
## iter  90 value 0.386606
## iter 100 value 0.380965
## final  value 0.380965 
## stopped after 100 iterations
## # weights:  43
## initial  value 152.884265 
## iter  10 value 11.737646
## iter  20 value 1.402922
## iter  30 value 0.553654
## iter  40 value 0.456488
## iter  50 value 0.433353
## iter  60 value 0.391721
## iter  70 value 0.350673
## iter  80 value 0.322382
## iter  90 value 0.309362
## iter 100 value 0.302224
## final  value 0.302224 
## stopped after 100 iterations
## # weights:  11
## initial  value 133.677265 
## iter  10 value 49.425529
## iter  20 value 45.125104
## iter  30 value 24.714814
## iter  40 value 6.951374
## iter  50 value 3.962940
## iter  60 value 3.585057
## iter  70 value 2.556588
## iter  80 value 2.219301
## iter  90 value 2.033936
## iter 100 value 2.011517
## final  value 2.011517 
## stopped after 100 iterations
## # weights:  27
## initial  value 120.219437 
## iter  10 value 20.105178
## iter  20 value 0.691846
## iter  30 value 0.000424
## final  value 0.000094 
## converged
## # weights:  43
## initial  value 130.013247 
## iter  10 value 6.990719
## iter  20 value 0.117056
## final  value 0.000078 
## converged
## # weights:  11
## initial  value 122.587894 
## iter  10 value 55.646479
## iter  20 value 44.073616
## iter  30 value 44.056707
## final  value 44.056649 
## converged
## # weights:  27
## initial  value 122.488484 
## iter  10 value 30.042105
## iter  20 value 22.364237
## iter  30 value 21.402694
## iter  40 value 21.391770
## final  value 21.391728 
## converged
## # weights:  43
## initial  value 151.848122 
## iter  10 value 27.150882
## iter  20 value 20.889994
## iter  30 value 19.061592
## iter  40 value 18.857339
## iter  50 value 18.636402
## iter  60 value 18.597842
## iter  70 value 18.581420
## final  value 18.581304 
## converged
## # weights:  11
## initial  value 125.447189 
## iter  10 value 42.432302
## iter  20 value 14.708081
## iter  30 value 5.928158
## iter  40 value 4.717183
## iter  50 value 4.261072
## iter  60 value 3.990872
## iter  70 value 3.894029
## iter  80 value 3.877352
## iter  90 value 3.868847
## iter 100 value 3.865924
## final  value 3.865924 
## stopped after 100 iterations
## # weights:  27
## initial  value 141.522247 
## iter  10 value 19.693351
## iter  20 value 2.060082
## iter  30 value 0.713635
## iter  40 value 0.684010
## iter  50 value 0.651024
## iter  60 value 0.599068
## iter  70 value 0.534727
## iter  80 value 0.525302
## iter  90 value 0.477461
## iter 100 value 0.468105
## final  value 0.468105 
## stopped after 100 iterations
## # weights:  43
## initial  value 117.492171 
## iter  10 value 5.474776
## iter  20 value 0.633193
## iter  30 value 0.523049
## iter  40 value 0.506835
## iter  50 value 0.486677
## iter  60 value 0.470314
## iter  70 value 0.423468
## iter  80 value 0.413761
## iter  90 value 0.406423
## iter 100 value 0.383741
## final  value 0.383741 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.494859 
## iter  10 value 67.868204
## iter  20 value 40.370984
## iter  30 value 8.030160
## iter  40 value 3.602779
## iter  50 value 3.354454
## iter  60 value 3.245703
## iter  70 value 3.148407
## iter  80 value 3.017263
## iter  90 value 2.916368
## iter 100 value 2.697585
## final  value 2.697585 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.387618 
## iter  10 value 17.333188
## iter  20 value 6.562404
## iter  30 value 4.218606
## iter  40 value 0.023796
## iter  50 value 0.013835
## iter  60 value 0.007181
## iter  70 value 0.000265
## final  value 0.000094 
## converged
## # weights:  43
## initial  value 131.764022 
## iter  10 value 6.923964
## iter  20 value 0.585918
## iter  30 value 0.001510
## final  value 0.000094 
## converged
## # weights:  11
## initial  value 117.924376 
## iter  10 value 59.153858
## iter  20 value 45.980503
## iter  30 value 43.965813
## final  value 43.965807 
## converged
## # weights:  27
## initial  value 122.524569 
## iter  10 value 28.252379
## iter  20 value 20.308998
## iter  30 value 19.983255
## iter  40 value 19.969846
## final  value 19.969845 
## converged
## # weights:  43
## initial  value 175.722543 
## iter  10 value 24.152694
## iter  20 value 19.351652
## iter  30 value 18.570128
## iter  40 value 18.540253
## iter  50 value 18.531786
## iter  60 value 18.531273
## final  value 18.531272 
## converged
## # weights:  11
## initial  value 125.626851 
## iter  10 value 50.695359
## iter  20 value 28.615271
## iter  30 value 12.424432
## iter  40 value 5.029030
## iter  50 value 4.166888
## iter  60 value 3.979676
## iter  70 value 3.882211
## iter  80 value 3.873043
## iter  90 value 3.872674
## iter 100 value 3.871442
## final  value 3.871442 
## stopped after 100 iterations
## # weights:  27
## initial  value 123.025871 
## iter  10 value 27.020381
## iter  20 value 2.694706
## iter  30 value 1.092737
## iter  40 value 0.872715
## iter  50 value 0.758401
## iter  60 value 0.630276
## iter  70 value 0.571755
## iter  80 value 0.515264
## iter  90 value 0.475373
## iter 100 value 0.452081
## final  value 0.452081 
## stopped after 100 iterations
## # weights:  43
## initial  value 134.385829 
## iter  10 value 5.396493
## iter  20 value 1.952502
## iter  30 value 0.810078
## iter  40 value 0.740163
## iter  50 value 0.700944
## iter  60 value 0.648312
## iter  70 value 0.581811
## iter  80 value 0.540064
## iter  90 value 0.513923
## iter 100 value 0.483298
## final  value 0.483298 
## stopped after 100 iterations
## # weights:  11
## initial  value 124.033991 
## iter  10 value 53.598901
## iter  20 value 53.094417
## iter  30 value 51.710795
## iter  40 value 44.732729
## iter  50 value 17.279075
## iter  60 value 6.529735
## iter  70 value 3.465736
## iter  80 value 3.270944
## iter  90 value 3.153543
## iter 100 value 3.002420
## final  value 3.002420 
## stopped after 100 iterations
## # weights:  27
## initial  value 126.207925 
## iter  10 value 6.867316
## iter  20 value 0.342203
## iter  30 value 0.000889
## final  value 0.000071 
## converged
## # weights:  43
## initial  value 146.268437 
## iter  10 value 7.061711
## iter  20 value 1.073309
## iter  30 value 0.000467
## final  value 0.000066 
## converged
## # weights:  11
## initial  value 120.866935 
## iter  10 value 85.950877
## iter  20 value 60.671406
## iter  30 value 50.749580
## iter  40 value 43.846120
## final  value 43.846095 
## converged
## # weights:  27
## initial  value 126.514320 
## iter  10 value 46.451931
## iter  20 value 22.288378
## iter  30 value 21.611509
## iter  40 value 21.142364
## iter  50 value 20.374688
## iter  60 value 19.975509
## iter  70 value 19.860029
## final  value 19.859991 
## converged
## # weights:  43
## initial  value 113.521981 
## iter  10 value 27.307122
## iter  20 value 19.069629
## iter  30 value 18.496103
## iter  40 value 18.414947
## iter  50 value 18.412091
## iter  60 value 18.411932
## final  value 18.411927 
## converged
## # weights:  11
## initial  value 119.931364 
## iter  10 value 33.212563
## iter  20 value 6.825543
## iter  30 value 4.153607
## iter  40 value 3.996719
## iter  50 value 3.936301
## iter  60 value 3.900913
## iter  70 value 3.868653
## iter  80 value 3.868193
## iter  90 value 3.864798
## iter 100 value 3.860658
## final  value 3.860658 
## stopped after 100 iterations
## # weights:  27
## initial  value 125.980953 
## iter  10 value 3.828376
## iter  20 value 1.757039
## iter  30 value 1.084888
## iter  40 value 0.779504
## iter  50 value 0.534913
## iter  60 value 0.521705
## iter  70 value 0.515783
## iter  80 value 0.504124
## iter  90 value 0.485201
## iter 100 value 0.483827
## final  value 0.483827 
## stopped after 100 iterations
## # weights:  43
## initial  value 143.013185 
## iter  10 value 7.195354
## iter  20 value 1.984745
## iter  30 value 0.713672
## iter  40 value 0.552459
## iter  50 value 0.437450
## iter  60 value 0.403627
## iter  70 value 0.363382
## iter  80 value 0.356303
## iter  90 value 0.346628
## iter 100 value 0.337926
## final  value 0.337926 
## stopped after 100 iterations
## # weights:  11
## initial  value 119.603843 
## iter  10 value 66.519353
## iter  20 value 48.085237
## iter  30 value 10.691129
## iter  40 value 4.343493
## iter  50 value 3.486657
## iter  60 value 2.937962
## iter  70 value 2.185862
## iter  80 value 1.910157
## iter  90 value 1.802781
## iter 100 value 1.791733
## final  value 1.791733 
## stopped after 100 iterations
## # weights:  27
## initial  value 120.493313 
## iter  10 value 14.568437
## iter  20 value 1.413139
## iter  30 value 0.002421
## final  value 0.000049 
## converged
## # weights:  43
## initial  value 131.990396 
## iter  10 value 3.607345
## iter  20 value 0.869522
## iter  30 value 0.000776
## final  value 0.000079 
## converged
## # weights:  11
## initial  value 127.213395 
## iter  10 value 58.997762
## iter  20 value 44.424763
## final  value 43.139243 
## converged
## # weights:  27
## initial  value 117.195869 
## iter  10 value 28.619024
## iter  20 value 19.206476
## iter  30 value 18.621574
## iter  40 value 18.619068
## iter  40 value 18.619068
## iter  40 value 18.619068
## final  value 18.619068 
## converged
## # weights:  43
## initial  value 165.598734 
## iter  10 value 24.205649
## iter  20 value 17.629535
## iter  30 value 17.222776
## iter  40 value 17.168752
## iter  50 value 17.168464
## iter  60 value 17.168428
## iter  60 value 17.168428
## iter  60 value 17.168428
## final  value 17.168428 
## converged
## # weights:  11
## initial  value 115.941037 
## iter  10 value 48.705139
## iter  20 value 47.783092
## iter  30 value 43.562064
## iter  40 value 11.101593
## iter  50 value 4.031437
## iter  60 value 3.116711
## iter  70 value 3.019260
## iter  80 value 2.993105
## iter  90 value 2.981303
## iter 100 value 2.969047
## final  value 2.969047 
## stopped after 100 iterations
## # weights:  27
## initial  value 132.813339 
## iter  10 value 3.715700
## iter  20 value 1.056815
## iter  30 value 0.558748
## iter  40 value 0.530262
## iter  50 value 0.467614
## iter  60 value 0.445847
## iter  70 value 0.424130
## iter  80 value 0.373259
## iter  90 value 0.354379
## iter 100 value 0.342801
## final  value 0.342801 
## stopped after 100 iterations
## # weights:  43
## initial  value 126.886256 
## iter  10 value 3.942342
## iter  20 value 1.736816
## iter  30 value 0.630651
## iter  40 value 0.552680
## iter  50 value 0.489807
## iter  60 value 0.396264
## iter  70 value 0.356221
## iter  80 value 0.340605
## iter  90 value 0.328238
## iter 100 value 0.321360
## final  value 0.321360 
## stopped after 100 iterations
## # weights:  11
## initial  value 128.489378 
## iter  10 value 49.909576
## iter  20 value 49.876540
## iter  30 value 47.945970
## iter  40 value 39.847846
## iter  50 value 8.019855
## iter  60 value 4.613532
## iter  70 value 2.856566
## iter  80 value 1.479799
## iter  90 value 1.304505
## iter 100 value 1.264325
## final  value 1.264325 
## stopped after 100 iterations
## # weights:  27
## initial  value 141.912242 
## iter  10 value 7.102731
## iter  20 value 0.339738
## final  value 0.000079 
## converged
## # weights:  43
## initial  value 128.771330 
## iter  10 value 21.354630
## iter  20 value 2.784172
## iter  30 value 0.013786
## iter  40 value 0.000332
## final  value 0.000076 
## converged
## # weights:  11
## initial  value 120.181179 
## iter  10 value 46.347790
## iter  20 value 43.064428
## iter  30 value 43.054040
## final  value 43.054021 
## converged
## # weights:  27
## initial  value 126.647230 
## iter  10 value 25.682812
## iter  20 value 20.660342
## iter  30 value 19.500529
## iter  40 value 19.121600
## iter  50 value 19.088454
## iter  60 value 19.083697
## final  value 19.083689 
## converged
## # weights:  43
## initial  value 132.234904 
## iter  10 value 29.615687
## iter  20 value 19.279132
## iter  30 value 17.877712
## iter  40 value 17.806996
## iter  50 value 17.793960
## iter  60 value 17.793819
## final  value 17.793686 
## converged
## # weights:  11
## initial  value 121.579687 
## iter  10 value 49.472914
## iter  20 value 48.410085
## iter  30 value 45.340464
## iter  40 value 37.104905
## iter  50 value 8.129207
## iter  60 value 4.703713
## iter  70 value 4.278400
## iter  80 value 3.667245
## iter  90 value 3.604730
## iter 100 value 3.568160
## final  value 3.568160 
## stopped after 100 iterations
## # weights:  27
## initial  value 135.360878 
## iter  10 value 10.436945
## iter  20 value 2.222820
## iter  30 value 0.763058
## iter  40 value 0.725440
## iter  50 value 0.677966
## iter  60 value 0.570628
## iter  70 value 0.518380
## iter  80 value 0.502364
## iter  90 value 0.462332
## iter 100 value 0.455880
## final  value 0.455880 
## stopped after 100 iterations
## # weights:  43
## initial  value 125.924213 
## iter  10 value 3.865138
## iter  20 value 1.025246
## iter  30 value 0.422681
## iter  40 value 0.379135
## iter  50 value 0.353145
## iter  60 value 0.335865
## iter  70 value 0.319622
## iter  80 value 0.303895
## iter  90 value 0.289299
## iter 100 value 0.271561
## final  value 0.271561 
## stopped after 100 iterations
## # weights:  11
## initial  value 114.925820 
## iter  10 value 45.333263
## iter  20 value 21.250608
## iter  30 value 6.082611
## iter  40 value 4.448976
## iter  50 value 3.266614
## iter  60 value 1.880390
## iter  70 value 1.733764
## iter  80 value 1.089267
## iter  90 value 1.045776
## iter 100 value 0.950634
## final  value 0.950634 
## stopped after 100 iterations
## # weights:  27
## initial  value 116.607224 
## iter  10 value 6.159810
## iter  20 value 1.197702
## iter  30 value 0.000196
## final  value 0.000057 
## converged
## # weights:  43
## initial  value 123.125697 
## iter  10 value 4.793414
## iter  20 value 0.073094
## iter  30 value 0.000393
## final  value 0.000088 
## converged
## # weights:  11
## initial  value 120.471214 
## iter  10 value 45.420303
## iter  20 value 43.694661
## iter  30 value 43.690235
## final  value 43.690202 
## converged
## # weights:  27
## initial  value 168.714249 
## iter  10 value 28.073376
## iter  20 value 21.126580
## iter  30 value 20.968508
## iter  40 value 20.968134
## final  value 20.968117 
## converged
## # weights:  43
## initial  value 134.057733 
## iter  10 value 44.240823
## iter  20 value 19.621880
## iter  30 value 18.596469
## iter  40 value 18.220014
## iter  50 value 18.200869
## iter  60 value 18.194706
## final  value 18.194547 
## converged
## # weights:  11
## initial  value 137.081572 
## iter  10 value 53.546736
## iter  20 value 49.263649
## iter  30 value 49.116099
## iter  40 value 49.041348
## iter  50 value 48.683090
## iter  60 value 48.634845
## iter  70 value 48.489442
## iter  80 value 48.480790
## iter  90 value 48.451846
## iter 100 value 48.179017
## final  value 48.179017 
## stopped after 100 iterations
## # weights:  27
## initial  value 143.490043 
## iter  10 value 4.357251
## iter  20 value 1.321252
## iter  30 value 0.645280
## iter  40 value 0.616636
## iter  50 value 0.565996
## iter  60 value 0.521660
## iter  70 value 0.508617
## iter  80 value 0.487870
## iter  90 value 0.483152
## iter 100 value 0.479423
## final  value 0.479423 
## stopped after 100 iterations
## # weights:  43
## initial  value 178.832632 
## iter  10 value 8.121158
## iter  20 value 1.422046
## iter  30 value 0.568662
## iter  40 value 0.518952
## iter  50 value 0.434974
## iter  60 value 0.392568
## iter  70 value 0.345835
## iter  80 value 0.285289
## iter  90 value 0.268178
## iter 100 value 0.253675
## final  value 0.253675 
## stopped after 100 iterations
## # weights:  11
## initial  value 123.307045 
## iter  10 value 43.672929
## iter  20 value 8.049676
## iter  30 value 3.773651
## iter  40 value 3.173208
## iter  50 value 3.060201
## iter  60 value 2.971167
## iter  70 value 2.563371
## iter  80 value 2.471224
## iter  90 value 2.341221
## iter 100 value 2.320048
## final  value 2.320048 
## stopped after 100 iterations
## # weights:  27
## initial  value 129.270569 
## iter  10 value 10.575847
## iter  20 value 2.930770
## iter  30 value 1.689612
## iter  40 value 0.097359
## iter  50 value 0.000123
## iter  50 value 0.000057
## iter  50 value 0.000057
## final  value 0.000057 
## converged
## # weights:  43
## initial  value 119.634242 
## iter  10 value 6.310691
## iter  20 value 1.591412
## iter  30 value 0.028391
## iter  40 value 0.000902
## final  value 0.000069 
## converged
## # weights:  11
## initial  value 120.069235 
## iter  10 value 60.195069
## iter  20 value 51.394914
## iter  30 value 43.991436
## final  value 43.991141 
## converged
## # weights:  27
## initial  value 152.809198 
## iter  10 value 25.471737
## iter  20 value 21.511163
## iter  30 value 21.387357
## iter  40 value 21.386800
## final  value 21.386800 
## converged
## # weights:  43
## initial  value 137.024287 
## iter  10 value 22.447246
## iter  20 value 19.002967
## iter  30 value 18.519064
## iter  40 value 18.404215
## iter  50 value 18.397540
## iter  60 value 18.396716
## final  value 18.396607 
## converged
## # weights:  11
## initial  value 121.726735 
## iter  10 value 50.373336
## iter  20 value 50.105529
## iter  30 value 49.998791
## iter  40 value 49.958270
## iter  50 value 49.774790
## iter  60 value 48.541266
## iter  70 value 18.978385
## iter  80 value 6.743585
## iter  90 value 4.055527
## iter 100 value 3.921743
## final  value 3.921743 
## stopped after 100 iterations
## # weights:  27
## initial  value 146.633351 
## iter  10 value 6.579898
## iter  20 value 0.624311
## iter  30 value 0.562510
## iter  40 value 0.514462
## iter  50 value 0.457198
## iter  60 value 0.403961
## iter  70 value 0.382785
## iter  80 value 0.371306
## iter  90 value 0.358751
## iter 100 value 0.317469
## final  value 0.317469 
## stopped after 100 iterations
## # weights:  43
## initial  value 127.981900 
## iter  10 value 7.369546
## iter  20 value 0.839917
## iter  30 value 0.675447
## iter  40 value 0.617273
## iter  50 value 0.540482
## iter  60 value 0.477520
## iter  70 value 0.443309
## iter  80 value 0.359346
## iter  90 value 0.308424
## iter 100 value 0.292198
## final  value 0.292198 
## stopped after 100 iterations
## # weights:  11
## initial  value 133.510869 
## iter  10 value 66.279276
## iter  20 value 49.065891
## iter  30 value 46.607987
## final  value 46.598156 
## converged
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)

Matriz de confusión

mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
mcre5 
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         36         0
##   virginica       0          4        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9169, 0.9908)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9000           1.0000
## Specificity                 1.0000            1.0000           0.9500
## Pos Pred Value              1.0000            1.0000           0.9091
## Neg Pred Value              1.0000            0.9524           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3000           0.3333
## Detection Prevalence        0.3333            0.3000           0.3667
## Balanced Accuracy           1.0000            0.9500           0.9750
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
mcrp5
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0          9         0
##   virginica       0          1        10
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9000           1.0000
## Specificity                 1.0000            1.0000           0.9500
## Pos Pred Value              1.0000            1.0000           0.9091
## Neg Pred Value              1.0000            0.9524           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3000           0.3333
## Detection Prevalence        0.3333            0.3000           0.3667
## Balanced Accuracy           1.0000            0.9500           0.9750

Random forest

modelo6 <- train(Species ~ ., data = entrenamiento, method = "rf", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = expand.grid(mtry =c(2,4,6)))
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range

## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)

Matriz de confusión/span>

mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcre6 
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         0
##   virginica       0          0        40
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9697, 1)
##     No Information Rate : 0.3333     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           1.0000
## Specificity                 1.0000            1.0000           1.0000
## Pos Pred Value              1.0000            1.0000           1.0000
## Neg Pred Value              1.0000            1.0000           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3333
## Detection Prevalence        0.3333            0.3333           0.3333
## Balanced Accuracy           1.0000            1.0000           1.0000
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
mcrp6
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Resumen de resultados

resumen <- data.frame(
  
  "svmLinear" = c(mcre1$overall["Accuracy"], mcre1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "NeuralNet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "RandomForest" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
  
)

rownames(resumen) <- c("Precision de entrenamiento", "Precision de prueba")
resumen
##                            svmLinear svmRadial   svmPoly     rpart NeuralNet
## Precision de entrenamiento 0.9916667 0.9916667 0.9916667 0.9666667 0.9666667
## Precision de prueba        0.9916667 0.9333333 0.9666667 0.9333333 0.9666667
##                            RandomForest
## Precision de entrenamiento    1.0000000
## Precision de prueba           0.9333333

Resultados

El modelo empleando la técnica de bosques aleatorios muestra indicios de sobreajuste, evidenciado por su elevada precisión en la fase de entrenamiento en contraste con una precisión significativamente menor durante la fase de evaluación.

Según el análisis de los datos obtenidos, el modelo más eficaz resulta ser el basado en Máquina de Vectores de Soporte Lineal.

---
title: "Machine Learning - IRIS"
author: "Gabriel Medina - A01275763"
date: "2024-02-20"
output:
  html_document:
    code_folding: hide
    toc: yes
    toc_float: yes
    code_download: yes
    theme: yeti
    highlight: tango
  pdf_document:
    toc: yes
---

![](/Users/gabrielmedina/Downloads/M2/iris.gif)



```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## <span style="color: Purple;">Teoría</span>

El paquete *caret* (Classification And REgression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.


## <span style="color: Purple;">Librerías</span>
```{r message=FALSE, warning=FALSE}

library(caret)
library(ggplot2) # Crear gráficos
library(datasets) # Usar la base de datos "Iris"
library(lattice) # Crear gráficos
library(DataExplorer)
library(mlbench)

```

## <span style="color: Purple;"> Base de datos</span>
```{r}
df <- data.frame(iris)
```


## <span style="color: Purple;">Análisis exploratorio de datos</span>
```{r}
summary(df)
str(df)
#create_report(df)
plot_missing(df)
plot_histogram(df)
plot_correlation(df)
```

La variable será factor

## <span style="color: Purple;">Partición</span>

```{r}
set.seed(123)
renglones_entrenamiento <-createDataPartition(df$Species, p=0.8, list=FALSE)
entrenamiento <- iris[renglones_entrenamiento, ]
prueba <- iris[-renglones_entrenamiento, ]
```


## <span style="color: Purple;">Diferentes modelos</span>

Los métodos más utilizados para modelar aprendizaje automático son:

* **SVM**: *Support Vector Machine* o Máquina de Vectores de Soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómicos (svmPoly), etc.

* **Árbol de Decisión**: rpart

* **Redes Neuronales**: nnet

* **Random Forests** o Bosques aleatorios: rf


## <span style="color: Purple;">Lineal</span>

```{r}
modelo1 <- train(Species ~ ., data= entrenamiento, method = "svmLinear", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(C=1)) 

resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict(modelo1, prueba)


```

### <span style="color: Purple;">Matriz de confusión</span>

```{r}
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1 

mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1
```

## <span style="color: Purple;">Modelo svmRadial</span>
```{r}

modelo2 <- train(Species ~ ., data= entrenamiento, method = "svmRadial", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(sigma=1, C=1)) #Cambiar



resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict(modelo2, prueba)


```

### <span style="color: Purple;">Matriz de confusión</span>

```{r}
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2 

mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2
```



## <span style="color: Purple;">Modelo svmPoly</span>
```{r}



modelo3 <- train(Species ~ ., data= entrenamiento, method = "svmPoly", preProcess= c("scale", "center"), trControl = trainControl(method = "cv", number = 10), tuneGrid = data.frame(degree=1,scale=1, C=1)) #Cambiar


resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)


```

### <span style="color: Purple;">Matriz de confusión</span>

```{r}

mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3 

mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3
```


## <span style="color: Purple;">Árbol de decisión</span>
```{r}
modelo4 <- train(Species ~ ., data = entrenamiento, method = "rpart", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneLength = 10)

resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo4, prueba)


```

### <span style="color: Purple;">Matriz de confusión</span>

```{r}
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4 

mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Species)
mcrp4
```


## <span style="color: Purple;">Modelo natural net</span>
```{r}
modelo5 <- train(Species ~ ., data = entrenamiento, method = "nnet", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10))

resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)


```

### <span style="color: Purple;">Matriz de confusión</span>

```{r}
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
mcre5 

mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
mcrp5
```


## <span style="color: Purple;">Random forest</span>
```{r}
modelo6 <- train(Species ~ ., data = entrenamiento, method = "rf", preProcess = c("scale", "center"), trControl = trainControl(method="cv", number = 10), tuneGrid = expand.grid(mtry =c(2,4,6)))

resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)


```

### <span style="color: Purple;">Matriz de confusión/span>

```{r}

mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcre6 

mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
mcrp6
```



## <span style="color: Purple;">Resumen de resultados</span>

```{r}
resumen <- data.frame(
  
  "svmLinear" = c(mcre1$overall["Accuracy"], mcre1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "NeuralNet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "RandomForest" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
  
)

rownames(resumen) <- c("Precision de entrenamiento", "Precision de prueba")
resumen
```

## <span style="color: Purple;">Resultados</span>

El modelo empleando la técnica de bosques aleatorios muestra indicios de sobreajuste, evidenciado por su elevada precisión en la fase de entrenamiento en contraste con una precisión significativamente menor durante la fase de evaluación.

Según el análisis de los datos obtenidos, el modelo más eficaz resulta ser el basado en Máquina de Vectores de Soporte Lineal.

