Teoría

El paquete CARET (Classification And Regression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.

Instalar paquetes y llamar librerías

#install.packages("ggplot2") #Graficar
library(ggplot2)
#install.packages("lattice") #Crear gráficos
library(lattice)
#install.packages("caret") #Algoritmos de aprendizaje automático
library(caret)
#install.packages("datasets") #Usar bases de datos, iris en este caso
library(datasets)
#install.packages("DataExplorer") #Análisis exploratorio
library(DataExplorer)

Crear la base de datos

df <- data.frame(iris)

Entender la base de datos

summary(df)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
str(df)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
#create_report(df)
plot_missing(df)

plot_histogram(df)

plot_correlation(df)

NOTA: La variable que queremos predecir debe tener formato de FACTOR

Partir la base de datos

#Normalmente 80-20
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list=FALSE)
entrenamiento <- df[renglones_entrenamiento, ]
prueba <- df[-renglones_entrenamiento, ]

Distintos tipos de métodos para modelar

Los métodos más utilizados para modelar son:

  • SVM: Support Vector Machine o Máquina de vectores de soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc.
  • Árbol de Decisión: rpart
  • Redes Neuronales: nnet
  • Random Forest o Bosques aleatorios: rf

Modelo 1. SVM Lineal

modelo1 <- train(Species ~., data=entrenamiento,
                 method = "svmLinear", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(c=1) #cambiar
                 )

resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict (modelo1, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
#Matriz de confusión del resultado de prueba
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

Modelo 2. SVM Radial

modelo2 <- train(Species ~., data=entrenamiento,
                 method = "svmRadial", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(sigma = 1, c=1) #cambiar
                 )

resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict (modelo2, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
#Matriz de confusión del resultado de prueba
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Modelo 3. SVM Polinomial

modelo3 <- train(Species ~., data=entrenamiento,
                 method = "svmPoly", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(degree = 1, scale = 1, c=1) #cambiar
                 )

resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict (modelo3, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         4
##   virginica       0          0        36
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9169, 0.9908)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500
#Matriz de confusión del resultado de prueba
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Modelo 4. Árbol de decisión

modelo4 <- train(Species ~., data=entrenamiento,
                 method = "rpart", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneLength = 10 #cambiar
                 )

resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict (modelo4, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         3
##   virginica       0          1        37
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9169, 0.9908)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           0.9250
## Specificity                 1.0000            0.9625           0.9875
## Pos Pred Value              1.0000            0.9286           0.9737
## Neg Pred Value              1.0000            0.9872           0.9634
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3083
## Detection Prevalence        0.3333            0.3500           0.3167
## Balanced Accuracy           1.0000            0.9688           0.9563
#Matriz de confusión del resultado de prueba
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Species)
mcrp4
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Modelo 5. Redes neuronales

modelo5 <- train(Species ~., data=entrenamiento,
                 method = "nnet", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10)
                 )
## # weights:  11
## initial  value 117.009298 
## iter  10 value 30.693062
## iter  20 value 4.480530
## iter  30 value 2.124646
## iter  40 value 1.724334
## iter  50 value 1.558463
## iter  60 value 1.384759
## iter  70 value 1.194959
## iter  80 value 1.157489
## iter  90 value 1.037588
## iter 100 value 1.021675
## final  value 1.021675 
## stopped after 100 iterations
## # weights:  27
## initial  value 144.555680 
## iter  10 value 7.295675
## iter  20 value 0.951236
## iter  30 value 0.019779
## iter  40 value 0.001778
## final  value 0.000054 
## converged
## # weights:  43
## initial  value 159.941630 
## iter  10 value 29.130468
## iter  20 value 2.433214
## iter  30 value 0.203630
## iter  40 value 0.008716
## final  value 0.000085 
## converged
## # weights:  11
## initial  value 122.779692 
## iter  10 value 57.486922
## iter  20 value 43.651900
## iter  30 value 43.557151
## final  value 43.557011 
## converged
## # weights:  27
## initial  value 125.993185 
## iter  10 value 26.004322
## iter  20 value 19.109050
## iter  30 value 18.927620
## iter  40 value 18.923505
## final  value 18.923489 
## converged
## # weights:  43
## initial  value 162.888687 
## iter  10 value 20.938843
## iter  20 value 18.031113
## iter  30 value 17.715120
## iter  40 value 17.596630
## iter  50 value 17.399696
## iter  60 value 17.368518
## iter  70 value 17.368513
## iter  70 value 17.368513
## iter  70 value 17.368513
## final  value 17.368513 
## converged
## # weights:  11
## initial  value 118.505234 
## iter  10 value 40.039271
## iter  20 value 5.135436
## iter  30 value 3.734191
## iter  40 value 3.277615
## iter  50 value 3.145069
## iter  60 value 2.954089
## iter  70 value 2.904871
## iter  80 value 2.904015
## iter  90 value 2.903149
## iter 100 value 2.902252
## final  value 2.902252 
## stopped after 100 iterations
## # weights:  27
## initial  value 122.899975 
## iter  10 value 7.401054
## iter  20 value 0.334446
## iter  30 value 0.315600
## iter  40 value 0.297078
## iter  50 value 0.290032
## iter  60 value 0.266801
## iter  70 value 0.252113
## iter  80 value 0.242813
## iter  90 value 0.232748
## iter 100 value 0.223962
## final  value 0.223962 
## stopped after 100 iterations
## # weights:  43
## initial  value 130.368788 
## iter  10 value 6.085146
## iter  20 value 0.541576
## iter  30 value 0.324721
## iter  40 value 0.310666
## iter  50 value 0.288764
## iter  60 value 0.261876
## iter  70 value 0.241344
## iter  80 value 0.232620
## iter  90 value 0.221434
## iter 100 value 0.211098
## final  value 0.211098 
## stopped after 100 iterations
## # weights:  11
## initial  value 114.821771 
## iter  10 value 44.055072
## iter  20 value 11.932704
## iter  30 value 3.206457
## iter  40 value 2.877159
## iter  50 value 2.634047
## iter  60 value 2.327700
## iter  70 value 2.138623
## iter  80 value 1.010075
## iter  90 value 0.852535
## iter 100 value 0.693471
## final  value 0.693471 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.119589 
## iter  10 value 4.241406
## iter  20 value 0.011919
## final  value 0.000059 
## converged
## # weights:  43
## initial  value 146.114510 
## iter  10 value 5.225415
## iter  20 value 0.151541
## iter  30 value 0.000108
## iter  30 value 0.000056
## iter  30 value 0.000055
## final  value 0.000055 
## converged
## # weights:  11
## initial  value 117.838276 
## iter  10 value 43.963261
## iter  20 value 42.933568
## iter  20 value 42.933567
## iter  20 value 42.933567
## final  value 42.933567 
## converged
## # weights:  27
## initial  value 150.828476 
## iter  10 value 24.131661
## iter  20 value 19.605948
## iter  30 value 19.530553
## iter  40 value 19.527242
## final  value 19.527057 
## converged
## # weights:  43
## initial  value 125.354310 
## iter  10 value 21.660478
## iter  20 value 17.958628
## iter  30 value 17.736044
## iter  40 value 17.719752
## iter  50 value 17.719581
## iter  50 value 17.719581
## iter  50 value 17.719581
## final  value 17.719581 
## converged
## # weights:  11
## initial  value 118.990031 
## iter  10 value 46.484314
## iter  20 value 39.225686
## iter  30 value 9.650620
## iter  40 value 4.514044
## iter  50 value 3.865438
## iter  60 value 3.508071
## iter  70 value 3.041838
## iter  80 value 2.965815
## iter  90 value 2.953655
## iter 100 value 2.950243
## final  value 2.950243 
## stopped after 100 iterations
## # weights:  27
## initial  value 122.010199 
## iter  10 value 10.036298
## iter  20 value 0.341101
## iter  30 value 0.272783
## iter  40 value 0.249672
## iter  50 value 0.228956
## iter  60 value 0.219568
## iter  70 value 0.215171
## iter  80 value 0.210042
## iter  90 value 0.197683
## iter 100 value 0.188999
## final  value 0.188999 
## stopped after 100 iterations
## # weights:  43
## initial  value 129.722314 
## iter  10 value 4.282782
## iter  20 value 0.309622
## iter  30 value 0.272824
## iter  40 value 0.255595
## iter  50 value 0.245836
## iter  60 value 0.226164
## iter  70 value 0.216460
## iter  80 value 0.208423
## iter  90 value 0.204559
## iter 100 value 0.200969
## final  value 0.200969 
## stopped after 100 iterations
## # weights:  11
## initial  value 129.077732 
## iter  10 value 50.893757
## iter  20 value 49.341214
## iter  30 value 47.711365
## iter  40 value 45.883507
## iter  50 value 45.433657
## iter  60 value 44.762037
## iter  70 value 20.639842
## iter  80 value 5.307883
## iter  90 value 4.186131
## iter 100 value 3.928824
## final  value 3.928824 
## stopped after 100 iterations
## # weights:  27
## initial  value 129.835217 
## iter  10 value 9.829857
## iter  20 value 1.817620
## iter  30 value 0.001192
## final  value 0.000064 
## converged
## # weights:  43
## initial  value 157.390266 
## iter  10 value 8.338611
## iter  20 value 1.541313
## iter  30 value 0.005079
## final  value 0.000075 
## converged
## # weights:  11
## initial  value 129.818872 
## iter  10 value 75.525777
## iter  20 value 57.964228
## iter  30 value 43.875992
## final  value 43.827214 
## converged
## # weights:  27
## initial  value 130.968358 
## iter  10 value 45.968354
## iter  20 value 22.634991
## iter  30 value 21.409538
## iter  40 value 21.358119
## iter  50 value 21.351406
## iter  60 value 21.351186
## final  value 21.351182 
## converged
## # weights:  43
## initial  value 213.775591 
## iter  10 value 21.725412
## iter  20 value 18.822673
## iter  30 value 18.513150
## iter  40 value 18.456837
## iter  50 value 18.449317
## final  value 18.449280 
## converged
## # weights:  11
## initial  value 118.962754 
## iter  10 value 26.053147
## iter  20 value 6.333793
## iter  30 value 4.570285
## iter  40 value 4.297908
## iter  50 value 4.105559
## iter  60 value 3.903061
## iter  70 value 3.826761
## iter  80 value 3.823960
## iter  90 value 3.823294
## iter 100 value 3.822242
## final  value 3.822242 
## stopped after 100 iterations
## # weights:  27
## initial  value 132.606102 
## iter  10 value 22.897922
## iter  20 value 1.809082
## iter  30 value 0.796543
## iter  40 value 0.759424
## iter  50 value 0.614426
## iter  60 value 0.580862
## iter  70 value 0.550861
## iter  80 value 0.541313
## iter  90 value 0.484870
## iter 100 value 0.476444
## final  value 0.476444 
## stopped after 100 iterations
## # weights:  43
## initial  value 108.487853 
## iter  10 value 9.006639
## iter  20 value 1.563172
## iter  30 value 0.682425
## iter  40 value 0.633490
## iter  50 value 0.545956
## iter  60 value 0.496924
## iter  70 value 0.473405
## iter  80 value 0.439298
## iter  90 value 0.395353
## iter 100 value 0.365287
## final  value 0.365287 
## stopped after 100 iterations
## # weights:  11
## initial  value 119.337907 
## iter  10 value 53.721540
## iter  20 value 51.519354
## iter  30 value 50.670578
## iter  40 value 49.482013
## iter  50 value 40.619548
## iter  60 value 14.073622
## iter  70 value 5.816424
## iter  80 value 4.847922
## iter  90 value 3.451476
## iter 100 value 2.080745
## final  value 2.080745 
## stopped after 100 iterations
## # weights:  27
## initial  value 140.648336 
## iter  10 value 49.133514
## iter  20 value 34.297381
## iter  30 value 33.313181
## iter  40 value 17.096108
## iter  50 value 9.985887
## iter  60 value 1.717089
## iter  70 value 0.288081
## iter  80 value 0.001470
## final  value 0.000094 
## converged
## # weights:  43
## initial  value 125.329208 
## iter  10 value 4.485921
## iter  20 value 0.228080
## iter  30 value 0.006659
## final  value 0.000088 
## converged
## # weights:  11
## initial  value 124.049774 
## iter  10 value 57.031473
## iter  20 value 44.512638
## iter  30 value 44.385507
## final  value 44.384992 
## converged
## # weights:  27
## initial  value 126.045603 
## iter  10 value 39.049105
## iter  20 value 22.776253
## iter  30 value 20.135451
## iter  40 value 19.958468
## iter  50 value 19.949415
## iter  60 value 19.949154
## final  value 19.949153 
## converged
## # weights:  43
## initial  value 131.415649 
## iter  10 value 23.127808
## iter  20 value 19.282709
## iter  30 value 19.178237
## iter  40 value 19.174237
## iter  50 value 19.173870
## iter  60 value 19.173657
## final  value 19.173655 
## converged
## # weights:  11
## initial  value 121.517667 
## iter  10 value 70.104915
## iter  20 value 51.842884
## iter  30 value 37.560847
## iter  40 value 17.174634
## iter  50 value 7.825111
## iter  60 value 4.130372
## iter  70 value 4.047645
## iter  80 value 3.974757
## iter  90 value 3.892953
## iter 100 value 3.880627
## final  value 3.880627 
## stopped after 100 iterations
## # weights:  27
## initial  value 156.724237 
## iter  10 value 31.687326
## iter  20 value 13.871604
## iter  30 value 11.415025
## iter  40 value 4.734947
## iter  50 value 1.658477
## iter  60 value 1.395765
## iter  70 value 0.901869
## iter  80 value 0.612757
## iter  90 value 0.549314
## iter 100 value 0.538747
## final  value 0.538747 
## stopped after 100 iterations
## # weights:  43
## initial  value 132.598227 
## iter  10 value 13.217554
## iter  20 value 3.162245
## iter  30 value 0.557260
## iter  40 value 0.448381
## iter  50 value 0.415241
## iter  60 value 0.381674
## iter  70 value 0.363224
## iter  80 value 0.355472
## iter  90 value 0.350360
## iter 100 value 0.345746
## final  value 0.345746 
## stopped after 100 iterations
## # weights:  11
## initial  value 132.139541 
## iter  10 value 53.874624
## iter  20 value 49.262219
## iter  30 value 49.200325
## iter  40 value 48.489657
## iter  50 value 47.952170
## iter  60 value 47.819369
## iter  70 value 47.659295
## iter  80 value 47.630938
## iter  90 value 47.585539
## iter 100 value 47.552074
## final  value 47.552074 
## stopped after 100 iterations
## # weights:  27
## initial  value 122.797825 
## iter  10 value 11.536029
## iter  20 value 1.166215
## iter  30 value 0.002579
## final  value 0.000066 
## converged
## # weights:  43
## initial  value 111.656108 
## iter  10 value 4.672364
## iter  20 value 0.141012
## iter  30 value 0.001779
## iter  40 value 0.000649
## final  value 0.000068 
## converged
## # weights:  11
## initial  value 123.343747 
## iter  10 value 54.681912
## iter  20 value 43.801812
## iter  30 value 43.746268
## final  value 43.745415 
## converged
## # weights:  27
## initial  value 123.511781 
## iter  10 value 25.492386
## iter  20 value 20.202171
## iter  30 value 19.920891
## final  value 19.919737 
## converged
## # weights:  43
## initial  value 109.464495 
## iter  10 value 21.689217
## iter  20 value 18.433883
## iter  30 value 18.149227
## iter  40 value 18.115337
## iter  50 value 18.114134
## final  value 18.114127 
## converged
## # weights:  11
## initial  value 128.961908 
## iter  10 value 49.469897
## iter  20 value 44.367486
## iter  30 value 21.778378
## iter  40 value 6.844399
## iter  50 value 4.596111
## iter  60 value 4.420636
## iter  70 value 3.971717
## iter  80 value 3.937134
## iter  90 value 3.837716
## iter 100 value 3.831022
## final  value 3.831022 
## stopped after 100 iterations
## # weights:  27
## initial  value 117.146045 
## iter  10 value 7.488152
## iter  20 value 0.471752
## iter  30 value 0.421101
## iter  40 value 0.368511
## iter  50 value 0.342218
## iter  60 value 0.331822
## iter  70 value 0.317132
## iter  80 value 0.306914
## iter  90 value 0.296449
## iter 100 value 0.281767
## final  value 0.281767 
## stopped after 100 iterations
## # weights:  43
## initial  value 140.529472 
## iter  10 value 19.875220
## iter  20 value 2.954415
## iter  30 value 1.161097
## iter  40 value 1.019260
## iter  50 value 0.890732
## iter  60 value 0.728691
## iter  70 value 0.663176
## iter  80 value 0.613073
## iter  90 value 0.578710
## iter 100 value 0.540378
## final  value 0.540378 
## stopped after 100 iterations
## # weights:  11
## initial  value 127.631173 
## iter  10 value 21.873628
## iter  20 value 4.440109
## iter  30 value 0.177788
## iter  40 value 0.094798
## iter  50 value 0.024690
## iter  60 value 0.024059
## iter  70 value 0.023937
## iter  80 value 0.020992
## iter  90 value 0.020810
## iter 100 value 0.020705
## final  value 0.020705 
## stopped after 100 iterations
## # weights:  27
## initial  value 127.012445 
## iter  10 value 33.312366
## iter  20 value 1.204810
## iter  30 value 0.003554
## final  value 0.000078 
## converged
## # weights:  43
## initial  value 121.024455 
## iter  10 value 1.700603
## iter  20 value 0.003619
## final  value 0.000088 
## converged
## # weights:  11
## initial  value 121.802701 
## iter  10 value 67.660888
## iter  20 value 55.525621
## iter  30 value 42.687102
## final  value 42.579623 
## converged
## # weights:  27
## initial  value 116.162681 
## iter  10 value 27.378517
## iter  20 value 19.998507
## iter  30 value 19.985737
## iter  40 value 19.985715
## iter  40 value 19.985715
## iter  40 value 19.985715
## final  value 19.985715 
## converged
## # weights:  43
## initial  value 118.143759 
## iter  10 value 22.054136
## iter  20 value 17.989103
## iter  30 value 17.619595
## iter  40 value 17.317946
## iter  50 value 17.225372
## iter  60 value 17.222953
## iter  70 value 17.222487
## iter  80 value 17.222437
## iter  80 value 17.222437
## iter  80 value 17.222437
## final  value 17.222437 
## converged
## # weights:  11
## initial  value 118.355062 
## iter  10 value 49.818324
## iter  20 value 38.647325
## iter  30 value 15.844677
## iter  40 value 2.582960
## iter  50 value 2.136811
## iter  60 value 2.035498
## iter  70 value 2.033350
## iter  80 value 2.021573
## iter  90 value 1.995371
## iter 100 value 1.994256
## final  value 1.994256 
## stopped after 100 iterations
## # weights:  27
## initial  value 130.821997 
## iter  10 value 19.933529
## iter  20 value 0.975068
## iter  30 value 0.473534
## iter  40 value 0.432481
## iter  50 value 0.372884
## iter  60 value 0.295422
## iter  70 value 0.276597
## iter  80 value 0.263857
## iter  90 value 0.216302
## iter 100 value 0.209542
## final  value 0.209542 
## stopped after 100 iterations
## # weights:  43
## initial  value 144.732840 
## iter  10 value 10.040236
## iter  20 value 0.345443
## iter  30 value 0.298449
## iter  40 value 0.270041
## iter  50 value 0.225858
## iter  60 value 0.195855
## iter  70 value 0.176125
## iter  80 value 0.164022
## iter  90 value 0.152679
## iter 100 value 0.150088
## final  value 0.150088 
## stopped after 100 iterations
## # weights:  11
## initial  value 129.869113 
## iter  10 value 50.042465
## iter  20 value 49.908565
## iter  30 value 49.906746
## final  value 49.906672 
## converged
## # weights:  27
## initial  value 136.216049 
## iter  10 value 10.058478
## iter  20 value 0.927014
## iter  30 value 0.000386
## final  value 0.000089 
## converged
## # weights:  43
## initial  value 116.240399 
## iter  10 value 2.687573
## iter  20 value 0.910154
## iter  30 value 0.001158
## final  value 0.000058 
## converged
## # weights:  11
## initial  value 122.548705 
## iter  10 value 48.475518
## iter  20 value 43.263734
## final  value 43.262103 
## converged
## # weights:  27
## initial  value 151.969882 
## iter  10 value 29.866683
## iter  20 value 20.041938
## iter  30 value 19.181326
## iter  40 value 18.658426
## iter  50 value 18.646346
## iter  60 value 18.646228
## final  value 18.646221 
## converged
## # weights:  43
## initial  value 123.857709 
## iter  10 value 21.867276
## iter  20 value 18.327082
## iter  30 value 18.061485
## iter  40 value 18.034598
## iter  50 value 18.032834
## final  value 18.032819 
## converged
## # weights:  11
## initial  value 133.691360 
## iter  10 value 27.686299
## iter  20 value 3.688826
## iter  30 value 2.998116
## iter  40 value 2.982774
## iter  50 value 2.975593
## iter  60 value 2.973846
## iter  70 value 2.972998
## iter  80 value 2.972919
## final  value 2.972915 
## converged
## # weights:  27
## initial  value 121.910769 
## iter  10 value 4.186175
## iter  20 value 1.397080
## iter  30 value 0.429392
## iter  40 value 0.403901
## iter  50 value 0.371671
## iter  60 value 0.353374
## iter  70 value 0.346994
## iter  80 value 0.340640
## iter  90 value 0.326373
## iter 100 value 0.320888
## final  value 0.320888 
## stopped after 100 iterations
## # weights:  43
## initial  value 150.467908 
## iter  10 value 2.516940
## iter  20 value 0.515624
## iter  30 value 0.419086
## iter  40 value 0.400951
## iter  50 value 0.356741
## iter  60 value 0.321748
## iter  70 value 0.269242
## iter  80 value 0.257949
## iter  90 value 0.244026
## iter 100 value 0.230916
## final  value 0.230916 
## stopped after 100 iterations
## # weights:  11
## initial  value 130.397834 
## iter  10 value 50.573102
## iter  20 value 49.909353
## final  value 49.906794 
## converged
## # weights:  27
## initial  value 142.795978 
## iter  10 value 8.325156
## iter  20 value 1.733508
## iter  30 value 0.008474
## final  value 0.000056 
## converged
## # weights:  43
## initial  value 147.890271 
## iter  10 value 14.812090
## iter  20 value 0.484823
## iter  30 value 0.000580
## final  value 0.000074 
## converged
## # weights:  11
## initial  value 122.928070 
## iter  10 value 67.652212
## iter  20 value 46.324443
## iter  30 value 44.176746
## final  value 44.154954 
## converged
## # weights:  27
## initial  value 118.635144 
## iter  10 value 27.365477
## iter  20 value 20.236166
## iter  30 value 19.919856
## final  value 19.919323 
## converged
## # weights:  43
## initial  value 123.713602 
## iter  10 value 23.115917
## iter  20 value 19.181439
## iter  30 value 19.146092
## iter  40 value 19.135538
## iter  50 value 19.135363
## iter  60 value 19.135305
## final  value 19.135297 
## converged
## # weights:  11
## initial  value 124.268451 
## iter  10 value 50.415686
## iter  20 value 50.122205
## iter  30 value 50.099925
## iter  40 value 50.039219
## iter  50 value 48.795443
## iter  60 value 45.810850
## iter  70 value 44.976509
## iter  80 value 44.652199
## iter  90 value 43.655722
## iter 100 value 21.549290
## final  value 21.549290 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.168989 
## iter  10 value 12.887627
## iter  20 value 1.421530
## iter  30 value 0.596446
## iter  40 value 0.525751
## iter  50 value 0.482633
## iter  60 value 0.455870
## iter  70 value 0.441781
## iter  80 value 0.405097
## iter  90 value 0.379758
## iter 100 value 0.359516
## final  value 0.359516 
## stopped after 100 iterations
## # weights:  43
## initial  value 154.331260 
## iter  10 value 9.135896
## iter  20 value 1.287634
## iter  30 value 0.536307
## iter  40 value 0.516772
## iter  50 value 0.444990
## iter  60 value 0.422222
## iter  70 value 0.414269
## iter  80 value 0.388936
## iter  90 value 0.382924
## iter 100 value 0.374877
## final  value 0.374877 
## stopped after 100 iterations
## # weights:  11
## initial  value 119.161548 
## iter  10 value 49.944558
## iter  20 value 48.322924
## iter  30 value 46.428533
## iter  40 value 46.268965
## iter  50 value 46.227049
## iter  60 value 46.155518
## iter  70 value 45.699558
## iter  80 value 37.848275
## iter  90 value 7.852629
## iter 100 value 4.666912
## final  value 4.666912 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.994742 
## iter  10 value 4.269484
## iter  20 value 0.323645
## iter  30 value 0.000228
## final  value 0.000073 
## converged
## # weights:  43
## initial  value 120.712721 
## iter  10 value 11.633228
## iter  20 value 1.928801
## iter  30 value 0.089821
## iter  40 value 0.000470
## final  value 0.000048 
## converged
## # weights:  11
## initial  value 125.023647 
## iter  10 value 52.033224
## iter  20 value 43.475534
## iter  30 value 43.428235
## final  value 43.428214 
## converged
## # weights:  27
## initial  value 150.243876 
## iter  10 value 32.636911
## iter  20 value 21.467254
## iter  30 value 19.692563
## iter  40 value 19.331538
## iter  50 value 19.321372
## iter  60 value 19.321093
## final  value 19.321088 
## converged
## # weights:  43
## initial  value 137.965234 
## iter  10 value 22.585768
## iter  20 value 18.845985
## iter  30 value 18.359487
## iter  40 value 18.060579
## iter  50 value 17.805017
## iter  60 value 17.768557
## iter  70 value 17.768156
## iter  80 value 17.768097
## final  value 17.768090 
## converged
## # weights:  11
## initial  value 127.936624 
## iter  10 value 64.892982
## iter  20 value 52.457973
## iter  30 value 51.893780
## iter  40 value 46.822596
## iter  50 value 20.931506
## iter  60 value 5.855746
## iter  70 value 4.449348
## iter  80 value 3.934110
## iter  90 value 3.778873
## iter 100 value 3.726881
## final  value 3.726881 
## stopped after 100 iterations
## # weights:  27
## initial  value 124.538161 
## iter  10 value 19.935878
## iter  20 value 1.179326
## iter  30 value 0.515886
## iter  40 value 0.495212
## iter  50 value 0.459550
## iter  60 value 0.425014
## iter  70 value 0.408161
## iter  80 value 0.387730
## iter  90 value 0.360044
## iter 100 value 0.337881
## final  value 0.337881 
## stopped after 100 iterations
## # weights:  43
## initial  value 133.979878 
## iter  10 value 12.011468
## iter  20 value 0.941094
## iter  30 value 0.495870
## iter  40 value 0.477345
## iter  50 value 0.464609
## iter  60 value 0.425356
## iter  70 value 0.381548
## iter  80 value 0.333472
## iter  90 value 0.319381
## iter 100 value 0.304457
## final  value 0.304457 
## stopped after 100 iterations
## # weights:  11
## initial  value 129.556405 
## iter  10 value 49.342383
## iter  20 value 35.419965
## iter  30 value 5.074745
## iter  40 value 2.940088
## iter  50 value 2.543858
## iter  60 value 2.312929
## iter  70 value 2.281995
## iter  80 value 2.059116
## iter  90 value 2.012601
## iter 100 value 1.857343
## final  value 1.857343 
## stopped after 100 iterations
## # weights:  27
## initial  value 155.931952 
## iter  10 value 9.651371
## iter  20 value 1.510703
## iter  30 value 0.025766
## iter  40 value 0.003870
## iter  50 value 0.000229
## final  value 0.000085 
## converged
## # weights:  43
## initial  value 139.919182 
## iter  10 value 7.001515
## iter  20 value 2.265308
## iter  30 value 0.480211
## iter  40 value 0.046809
## iter  50 value 0.001016
## iter  60 value 0.000148
## final  value 0.000085 
## converged
## # weights:  11
## initial  value 137.379619 
## iter  10 value 58.587747
## iter  20 value 45.493964
## final  value 43.369230 
## converged
## # weights:  27
## initial  value 122.183968 
## iter  10 value 34.103176
## iter  20 value 19.778653
## iter  30 value 19.482816
## iter  40 value 19.412523
## iter  50 value 19.406639
## final  value 19.406612 
## converged
## # weights:  43
## initial  value 127.464513 
## iter  10 value 20.014903
## iter  20 value 18.283826
## iter  30 value 17.864550
## iter  40 value 17.854806
## iter  50 value 17.852930
## final  value 17.852879 
## converged
## # weights:  11
## initial  value 126.686154 
## iter  10 value 47.126881
## iter  20 value 13.760512
## iter  30 value 5.663815
## iter  40 value 4.107040
## iter  50 value 3.970174
## iter  60 value 3.913599
## iter  70 value 3.760966
## iter  80 value 3.757224
## iter  90 value 3.753338
## iter 100 value 3.748772
## final  value 3.748772 
## stopped after 100 iterations
## # weights:  27
## initial  value 144.486253 
## iter  10 value 4.284520
## iter  20 value 0.488472
## iter  30 value 0.440576
## iter  40 value 0.431107
## iter  50 value 0.424352
## iter  60 value 0.415746
## iter  70 value 0.413581
## iter  80 value 0.410783
## iter  90 value 0.409591
## iter 100 value 0.408124
## final  value 0.408124 
## stopped after 100 iterations
## # weights:  43
## initial  value 127.608663 
## iter  10 value 4.647335
## iter  20 value 1.221540
## iter  30 value 0.611939
## iter  40 value 0.560234
## iter  50 value 0.535709
## iter  60 value 0.477525
## iter  70 value 0.419169
## iter  80 value 0.399554
## iter  90 value 0.391996
## iter 100 value 0.375996
## final  value 0.375996 
## stopped after 100 iterations
## # weights:  11
## initial  value 138.169000 
## iter  10 value 58.741341
## iter  20 value 46.635586
## final  value 46.598157 
## converged
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict (modelo5, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
#mcre5
#Matriz de confusión del resultado de prueba
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
#mcrp5

Modelo 6. Random Forest

modelo6 <- train(Species ~., data=entrenamiento,
                 method = "rf", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGrid = expand.grid(mtry = c(2,4,6))
                 
                 )
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict (modelo6, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcre6
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         0
##   virginica       0          0        40
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.9697, 1)
##     No Information Rate : 0.3333     
##     P-Value [Acc > NIR] : < 2.2e-16  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           1.0000
## Specificity                 1.0000            1.0000           1.0000
## Pos Pred Value              1.0000            1.0000           1.0000
## Neg Pred Value              1.0000            1.0000           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3333
## Detection Prevalence        0.3333            0.3333           0.3333
## Balanced Accuracy           1.0000            1.0000           1.0000
#Matriz de confusión del resultado de prueba
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
mcrp6
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Tabla de resultados

resultados <- data.frame(
  "svmLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "nnet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "rf" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)

rownames(resultados) <- c("Precisión de entrenamiento", "Precisión de prueba")
resultados
##                            svmLinear svmRadial   svmPoly     rpart      nnet
## Precisión de entrenamiento 0.9916667 0.9916667 0.9666667 0.9666667 0.9666667
## Precisión de prueba        0.9666667 0.9333333 0.9333333 0.9333333 0.9666667
##                                   rf
## Precisión de entrenamiento 1.0000000
## Precisión de prueba        0.9333333

Conclusiones

Acorde a la tabla de resultados, obervamos que ningún método presenta sobreajuste. Podemos seleccionar el de Redes neuronales por su desempeño

---
title: "CARET"
author: "Arturo Dix"
date: "2025-08-22"
output:
  html_document:
    toc: TRUE
    toc_float: TRUE
    code_download: TRUE
    theme: yeti
---
<center>
![](https://static.bainet.es/clip/ef5cd25e-f1b1-46b0-a37c-0a0063d10164_source-aspect-ratio_1600w_0.jpg)
</center>

# <span style="color:blue;"> Teoría </span>
El paquete **CARET (Classification And Regression Training)** es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.

# <span style="color:blue;"> Instalar paquetes y llamar librerías </span>

```{r}
#install.packages("ggplot2") #Graficar
library(ggplot2)
#install.packages("lattice") #Crear gráficos
library(lattice)
#install.packages("caret") #Algoritmos de aprendizaje automático
library(caret)
#install.packages("datasets") #Usar bases de datos, iris en este caso
library(datasets)
#install.packages("DataExplorer") #Análisis exploratorio
library(DataExplorer)

```

# <span style="color:blue;"> Crear la base de datos </span>
```{r}
df <- data.frame(iris)
```

# <span style="color:blue;"> Entender la base de datos </span>
```{r}
summary(df)
str(df)
#create_report(df)
plot_missing(df)
plot_histogram(df)
plot_correlation(df)
```
**NOTA: La variable que queremos predecir debe tener formato de FACTOR**

# <span style="color:blue;"> Partir la base de datos </span>
```{r}
#Normalmente 80-20
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list=FALSE)
entrenamiento <- df[renglones_entrenamiento, ]
prueba <- df[-renglones_entrenamiento, ]
```

# <span style="color:blue;"> Distintos tipos de métodos para modelar </span>
Los métodos más utilizados para modelar son: 

* **SVM**: *Support Vector Machine* o Máquina de vectores de soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc.
* **Árbol de Decisión**: rpart
* **Redes Neuronales**: nnet
* **Random Forest** o Bosques aleatorios: rf

# <span style="color:blue;"> Modelo 1. SVM Lineal </span>

```{r}
modelo1 <- train(Species ~., data=entrenamiento,
                 method = "svmLinear", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(c=1) #cambiar
                 )

resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict (modelo1, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1
#Matriz de confusión del resultado de prueba
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1
```
# <span style="color:blue;"> Modelo 2. SVM Radial </span>

```{r}
modelo2 <- train(Species ~., data=entrenamiento,
                 method = "svmRadial", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(sigma = 1, c=1) #cambiar
                 )

resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict (modelo2, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2
#Matriz de confusión del resultado de prueba
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2
```
# <span style="color:blue;"> Modelo 3. SVM Polinomial </span>

```{r}
modelo3 <- train(Species ~., data=entrenamiento,
                 method = "svmPoly", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGride = data.frame(degree = 1, scale = 1, c=1) #cambiar
                 )

resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict (modelo3, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3
#Matriz de confusión del resultado de prueba
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3
```
# <span style="color:blue;"> Modelo 4. Árbol de decisión </span>

```{r}
modelo4 <- train(Species ~., data=entrenamiento,
                 method = "rpart", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneLength = 10 #cambiar
                 )

resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict (modelo4, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4
#Matriz de confusión del resultado de prueba
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Species)
mcrp4
```
# <span style="color:blue;"> Modelo 5. Redes neuronales </span>

```{r}
modelo5 <- train(Species ~., data=entrenamiento,
                 method = "nnet", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10)
                 )

resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict (modelo5, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
#mcre5
#Matriz de confusión del resultado de prueba
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
#mcrp5
```

# <span style="color:blue;"> Modelo 6. Random Forest </span>

```{r}
modelo6 <- train(Species ~., data=entrenamiento,
                 method = "rf", #cambiar
                 preProcess=c("scale", "center"),
                 trControl = trainControl(method="cv", number=10),
                 tuneGrid = expand.grid(mtry = c(2,4,6))
                 
                 )

resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict (modelo6, prueba)

#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.

#Matriz de confusión del resultado del entrenamiento
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcre6
#Matriz de confusión del resultado de prueba
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
mcrp6
```

# <span style="color:blue;"> Tabla de resultados </span>

```{r}
resultados <- data.frame(
  "svmLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "nnet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "rf" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)

rownames(resultados) <- c("Precisión de entrenamiento", "Precisión de prueba")
resultados
```

# <span style="color:blue;"> Conclusiones </span>
Acorde a la tabla de resultados, obervamos que ningún método presenta sobreajuste. Podemos seleccionar el de **Redes neuronales ** por su desempeño

