Teoría

El paquete CARET (Classification and regression training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.

Instalar paquetes y llamar librerías

#install.packages("ggplot2") # Gráfica
library(ggplot2)
#install.packages("lattice") # Gráfica
library(lattice)
#install.packages("caret") # Algoritmos de aprendizaje automatico
library(caret)
#install.packages("datasets") # Usar bases de datos, en este caso iris
library(datasets)
# install.packages("DataExplorer") # Análisis Exploratorio
library(DataExplorer)
# install.packages("DataExplorer")
library(DataExplorer)
#install.packages("caret")
library(caret)

Instalar paquetes y llamar librerías

df <- data.frame(iris)

Entender la base de datos

summary(df)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
str(df)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# create_report(df)
plot_missing(df)

plot_histogram(df)

plot_correlation(df)

NOTA: La variable que queremos predecir debe tener formato de FACTOR

Partir la base de datos

#Normalmente 80 y 20
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list= FALSE)
entrenamiento <- df [renglones_entrenamiento, ]
prueba <- df[-renglones_entrenamiento]

Distintos tipos de métodos para modelar

Los métodos más utilizados para modelar aprendizaje automático son:

SVM : Support Vector Machine o Máquina de vectores de soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc * Árbol de Decisión: rpart * Redes Neuronales: nnet ** *Bosques Aleatorios**: rt

Modelo1

library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo1 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "svmLinear",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10),
                 tuneGrid = data.frame(C = 1) # corregido
)

# Predicciones
resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict(modelo1, prueba)

# Matriz de confusión
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

Modelo 2 SVM Radial

library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo1 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "svmRadial",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10),
                 tuneGrid = data.frame(sigma=1, C = 1) # corregido
)

# Predicciones
resultado_entrenamiento2 <- predict(modelo1, entrenamiento)
resultado_prueba2 <- predict(modelo1, prueba)

# Matriz de confusión
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         2
##   virginica       0          0         8
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9333          
##                  95% CI : (0.7793, 0.9918)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 8.747e-12       
##                                           
##                   Kappa : 0.9             
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.8000
## Specificity                 1.0000            0.9000           1.0000
## Pos Pred Value              1.0000            0.8333           1.0000
## Neg Pred Value              1.0000            1.0000           0.9091
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.2667
## Detection Prevalence        0.3333            0.4000           0.2667
## Balanced Accuracy           1.0000            0.9500           0.9000

Modelo 3 SVM Polinómico

library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo3 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "svmPoly",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10),
                 tuneGrid = data.frame(degree=1, scale=1, C = 1) # corregido
)

# Predicciones
resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)

# Matriz de confusión
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         0
##   virginica       0          1        40
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9917          
##                  95% CI : (0.9544, 0.9998)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9875          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           1.0000
## Specificity                 1.0000            1.0000           0.9875
## Pos Pred Value              1.0000            1.0000           0.9756
## Neg Pred Value              1.0000            0.9877           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3333
## Detection Prevalence        0.3333            0.3250           0.3417
## Balanced Accuracy           1.0000            0.9875           0.9938
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

Modelo 4 Árbol de desición

library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo4 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "rpart",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10),
                 tuneLength = 10 #Cambiar
)

# Predicciones
resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo4, prueba)

# Matriz de confusión
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         39         3
##   virginica       0          1        37
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9169, 0.9908)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9750           0.9250
## Specificity                 1.0000            0.9625           0.9875
## Pos Pred Value              1.0000            0.9286           0.9737
## Neg Pred Value              1.0000            0.9872           0.9634
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3250           0.3083
## Detection Prevalence        0.3333            0.3500           0.3167
## Balanced Accuracy           1.0000            0.9688           0.9563
mcrp4 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp4
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

Modelo 5 Redes Neuronales

library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo5 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "nnet",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10)
              
)
## # weights:  11
## initial  value 137.158154 
## iter  10 value 83.679659
## iter  20 value 49.940615
## final  value 49.908817 
## converged
## # weights:  27
## initial  value 116.394990 
## iter  10 value 6.186241
## iter  20 value 0.260622
## iter  30 value 0.000744
## final  value 0.000077 
## converged
## # weights:  43
## initial  value 147.341272 
## iter  10 value 20.723909
## iter  20 value 1.711652
## iter  30 value 0.002668
## final  value 0.000059 
## converged
## # weights:  11
## initial  value 121.560247 
## iter  10 value 65.949471
## iter  20 value 55.881751
## iter  30 value 44.683010
## iter  40 value 44.159606
## iter  40 value 44.159606
## iter  40 value 44.159606
## final  value 44.159606 
## converged
## # weights:  27
## initial  value 162.871678 
## iter  10 value 31.973292
## iter  20 value 21.643843
## iter  30 value 21.176664
## iter  40 value 21.175187
## final  value 21.175184 
## converged
## # weights:  43
## initial  value 124.403113 
## iter  10 value 22.493781
## iter  20 value 18.970819
## iter  30 value 18.776926
## iter  40 value 18.751512
## iter  50 value 18.594955
## iter  60 value 18.433573
## iter  70 value 18.406314
## final  value 18.406244 
## converged
## # weights:  11
## initial  value 120.330027 
## iter  10 value 48.948789
## iter  20 value 19.465594
## iter  30 value 4.922147
## iter  40 value 3.955106
## iter  50 value 3.905223
## iter  60 value 3.894400
## iter  70 value 3.881836
## iter  80 value 3.874804
## iter  90 value 3.872370
## iter 100 value 3.872335
## final  value 3.872335 
## stopped after 100 iterations
## # weights:  27
## initial  value 126.848219 
## iter  10 value 26.579951
## iter  20 value 2.125995
## iter  30 value 0.503338
## iter  40 value 0.462699
## iter  50 value 0.426509
## iter  60 value 0.411345
## iter  70 value 0.394772
## iter  80 value 0.382673
## iter  90 value 0.374982
## iter 100 value 0.371874
## final  value 0.371874 
## stopped after 100 iterations
## # weights:  43
## initial  value 139.437764 
## iter  10 value 6.381330
## iter  20 value 0.512400
## iter  30 value 0.467261
## iter  40 value 0.445606
## iter  50 value 0.436766
## iter  60 value 0.416541
## iter  70 value 0.388106
## iter  80 value 0.382227
## iter  90 value 0.379333
## iter 100 value 0.371278
## final  value 0.371278 
## stopped after 100 iterations
## # weights:  11
## initial  value 130.106043 
## iter  10 value 27.186408
## iter  20 value 4.702041
## iter  30 value 3.010037
## iter  40 value 2.199278
## iter  50 value 1.884656
## iter  60 value 1.836945
## iter  70 value 1.752174
## iter  80 value 1.745749
## iter  90 value 1.507029
## iter 100 value 1.493314
## final  value 1.493314 
## stopped after 100 iterations
## # weights:  27
## initial  value 114.723677 
## iter  10 value 6.352999
## iter  20 value 0.555935
## final  value 0.000059 
## converged
## # weights:  43
## initial  value 135.420682 
## iter  10 value 7.555059
## iter  20 value 0.331280
## iter  30 value 0.000103
## iter  30 value 0.000051
## iter  30 value 0.000050
## final  value 0.000050 
## converged
## # weights:  11
## initial  value 124.421768 
## iter  10 value 60.982773
## iter  20 value 44.161198
## iter  30 value 43.464615
## iter  30 value 43.464614
## iter  30 value 43.464614
## final  value 43.464614 
## converged
## # weights:  27
## initial  value 123.592678 
## iter  10 value 29.678577
## iter  20 value 20.064880
## iter  30 value 19.879129
## iter  40 value 19.844432
## final  value 19.844390 
## converged
## # weights:  43
## initial  value 140.131749 
## iter  10 value 28.007751
## iter  20 value 18.816789
## iter  30 value 18.340611
## iter  40 value 18.308431
## iter  50 value 18.305748
## final  value 18.305348 
## converged
## # weights:  11
## initial  value 123.494107 
## iter  10 value 105.350381
## iter  20 value 99.435136
## iter  30 value 40.065228
## iter  40 value 8.675195
## iter  50 value 4.168312
## iter  60 value 4.104548
## iter  70 value 3.854797
## iter  80 value 3.850383
## iter  90 value 3.845358
## iter 100 value 3.835212
## final  value 3.835212 
## stopped after 100 iterations
## # weights:  27
## initial  value 122.562474 
## iter  10 value 8.450360
## iter  20 value 1.814922
## iter  30 value 0.716283
## iter  40 value 0.640788
## iter  50 value 0.621339
## iter  60 value 0.612589
## iter  70 value 0.584200
## iter  80 value 0.544549
## iter  90 value 0.528601
## iter 100 value 0.514779
## final  value 0.514779 
## stopped after 100 iterations
## # weights:  43
## initial  value 123.657348 
## iter  10 value 10.614579
## iter  20 value 2.048113
## iter  30 value 0.659304
## iter  40 value 0.544423
## iter  50 value 0.532691
## iter  60 value 0.510665
## iter  70 value 0.466030
## iter  80 value 0.432980
## iter  90 value 0.364387
## iter 100 value 0.341414
## final  value 0.341414 
## stopped after 100 iterations
## # weights:  11
## initial  value 132.593715 
## iter  10 value 48.478232
## iter  20 value 26.680423
## iter  30 value 5.247608
## iter  40 value 2.894683
## iter  50 value 2.713712
## iter  60 value 2.620707
## iter  70 value 2.540290
## iter  80 value 2.455773
## iter  90 value 2.267592
## iter 100 value 2.151796
## final  value 2.151796 
## stopped after 100 iterations
## # weights:  27
## initial  value 120.567002 
## iter  10 value 4.393773
## iter  20 value 0.043939
## iter  30 value 0.000566
## final  value 0.000075 
## converged
## # weights:  43
## initial  value 119.486317 
## iter  10 value 8.852497
## iter  20 value 1.503151
## iter  30 value 0.005087
## iter  40 value 0.000143
## final  value 0.000045 
## converged
## # weights:  11
## initial  value 138.956697 
## iter  10 value 64.167257
## iter  20 value 46.791809
## iter  30 value 44.090079
## final  value 44.057609 
## converged
## # weights:  27
## initial  value 117.449640 
## iter  10 value 25.035090
## iter  20 value 20.291424
## iter  30 value 20.270756
## final  value 20.270746 
## converged
## # weights:  43
## initial  value 131.615496 
## iter  10 value 28.251470
## iter  20 value 19.360745
## iter  30 value 19.289328
## iter  40 value 19.287649
## iter  50 value 19.287597
## final  value 19.287594 
## converged
## # weights:  11
## initial  value 120.067169 
## iter  10 value 52.352023
## iter  20 value 50.228278
## iter  30 value 50.176003
## iter  40 value 50.144595
## iter  50 value 50.095225
## iter  60 value 49.938343
## iter  70 value 49.520427
## iter  80 value 49.433208
## iter  90 value 49.391382
## iter 100 value 49.306617
## final  value 49.306617 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.958571 
## iter  10 value 4.666620
## iter  20 value 0.776850
## iter  30 value 0.707439
## iter  40 value 0.640867
## iter  50 value 0.600758
## iter  60 value 0.593505
## iter  70 value 0.584147
## iter  80 value 0.557207
## iter  90 value 0.538098
## iter 100 value 0.514508
## final  value 0.514508 
## stopped after 100 iterations
## # weights:  43
## initial  value 122.004578 
## iter  10 value 5.297091
## iter  20 value 1.244824
## iter  30 value 0.495554
## iter  40 value 0.483174
## iter  50 value 0.468581
## iter  60 value 0.375798
## iter  70 value 0.362002
## iter  80 value 0.358173
## iter  90 value 0.350027
## iter 100 value 0.341343
## final  value 0.341343 
## stopped after 100 iterations
## # weights:  11
## initial  value 127.875079 
## iter  10 value 33.444226
## iter  20 value 6.390350
## iter  30 value 2.177976
## iter  40 value 1.888232
## iter  50 value 0.770754
## iter  60 value 0.617610
## iter  70 value 0.559419
## iter  80 value 0.543517
## iter  90 value 0.477915
## iter 100 value 0.457584
## final  value 0.457584 
## stopped after 100 iterations
## # weights:  27
## initial  value 128.599893 
## iter  10 value 44.079424
## iter  20 value 2.154037
## iter  30 value 0.029681
## final  value 0.000069 
## converged
## # weights:  43
## initial  value 124.439568 
## iter  10 value 4.443221
## iter  20 value 0.029295
## final  value 0.000092 
## converged
## # weights:  11
## initial  value 140.781917 
## iter  10 value 49.139091
## iter  20 value 42.676198
## final  value 42.671367 
## converged
## # weights:  27
## initial  value 130.168472 
## iter  10 value 25.483245
## iter  20 value 19.394183
## iter  30 value 18.266411
## iter  40 value 18.183916
## iter  50 value 18.183745
## final  value 18.183745 
## converged
## # weights:  43
## initial  value 123.492335 
## iter  10 value 25.604376
## iter  20 value 17.578453
## iter  30 value 17.302413
## iter  40 value 17.239993
## iter  50 value 17.235966
## iter  60 value 17.210698
## iter  70 value 16.990390
## iter  80 value 16.954457
## final  value 16.954456 
## converged
## # weights:  11
## initial  value 141.016346 
## iter  10 value 27.908285
## iter  20 value 4.184545
## iter  30 value 2.072889
## iter  40 value 1.990949
## iter  50 value 1.947868
## iter  60 value 1.913708
## iter  70 value 1.905145
## iter  80 value 1.901705
## iter  90 value 1.899713
## iter 100 value 1.897791
## final  value 1.897791 
## stopped after 100 iterations
## # weights:  27
## initial  value 130.318008 
## iter  10 value 2.534330
## iter  20 value 0.226734
## iter  30 value 0.212995
## iter  40 value 0.196194
## iter  50 value 0.192300
## iter  60 value 0.173589
## iter  70 value 0.165969
## iter  80 value 0.158151
## iter  90 value 0.151971
## iter 100 value 0.146246
## final  value 0.146246 
## stopped after 100 iterations
## # weights:  43
## initial  value 132.276641 
## iter  10 value 27.227544
## iter  20 value 2.208204
## iter  30 value 0.486149
## iter  40 value 0.384802
## iter  50 value 0.322610
## iter  60 value 0.296213
## iter  70 value 0.260001
## iter  80 value 0.186170
## iter  90 value 0.168783
## iter 100 value 0.156405
## final  value 0.156405 
## stopped after 100 iterations
## # weights:  11
## initial  value 124.721231 
## iter  10 value 24.907273
## iter  20 value 3.860256
## iter  30 value 2.694222
## iter  40 value 2.606534
## iter  50 value 2.549742
## iter  60 value 2.480770
## iter  70 value 2.432589
## iter  80 value 2.374387
## iter  90 value 2.304015
## iter 100 value 2.277255
## final  value 2.277255 
## stopped after 100 iterations
## # weights:  27
## initial  value 135.883765 
## iter  10 value 9.588177
## iter  20 value 2.164867
## iter  30 value 0.003083
## iter  40 value 0.000404
## final  value 0.000079 
## converged
## # weights:  43
## initial  value 128.423859 
## iter  10 value 8.362453
## iter  20 value 1.107101
## iter  30 value 0.002056
## final  value 0.000080 
## converged
## # weights:  11
## initial  value 126.814860 
## iter  10 value 45.330853
## iter  20 value 43.371713
## final  value 43.371698 
## converged
## # weights:  27
## initial  value 134.971673 
## iter  10 value 35.878441
## iter  20 value 23.544146
## iter  30 value 21.359427
## iter  40 value 19.924676
## iter  50 value 19.397158
## iter  60 value 19.396984
## final  value 19.396975 
## converged
## # weights:  43
## initial  value 131.139565 
## iter  10 value 26.253238
## iter  20 value 18.403900
## iter  30 value 17.965643
## iter  40 value 17.848649
## iter  50 value 17.842969
## iter  60 value 17.842727
## final  value 17.842722 
## converged
## # weights:  11
## initial  value 121.942596 
## iter  10 value 38.114589
## iter  20 value 5.888252
## iter  30 value 4.098676
## iter  40 value 3.775997
## iter  50 value 3.766825
## iter  60 value 3.726327
## iter  70 value 3.712347
## iter  80 value 3.712293
## iter  90 value 3.711980
## final  value 3.711809 
## converged
## # weights:  27
## initial  value 124.424719 
## iter  10 value 36.553008
## iter  20 value 9.253423
## iter  30 value 1.918811
## iter  40 value 0.813359
## iter  50 value 0.471166
## iter  60 value 0.434232
## iter  70 value 0.412618
## iter  80 value 0.405562
## iter  90 value 0.400759
## iter 100 value 0.390511
## final  value 0.390511 
## stopped after 100 iterations
## # weights:  43
## initial  value 127.087485 
## iter  10 value 3.861857
## iter  20 value 0.789964
## iter  30 value 0.603240
## iter  40 value 0.538274
## iter  50 value 0.482849
## iter  60 value 0.442551
## iter  70 value 0.343017
## iter  80 value 0.327263
## iter  90 value 0.311396
## iter 100 value 0.296024
## final  value 0.296024 
## stopped after 100 iterations
## # weights:  11
## initial  value 120.926669 
## iter  10 value 80.051213
## iter  20 value 49.871571
## iter  30 value 37.312293
## iter  40 value 8.554628
## iter  50 value 4.749590
## iter  60 value 4.203783
## iter  70 value 3.338322
## iter  80 value 2.396834
## iter  90 value 2.285253
## iter 100 value 2.266166
## final  value 2.266166 
## stopped after 100 iterations
## # weights:  27
## initial  value 134.314255 
## iter  10 value 18.375214
## iter  20 value 3.032836
## iter  30 value 0.088363
## iter  40 value 0.000269
## final  value 0.000068 
## converged
## # weights:  43
## initial  value 127.460669 
## iter  10 value 6.647251
## iter  20 value 0.860359
## iter  30 value 0.000183
## iter  30 value 0.000089
## iter  30 value 0.000088
## final  value 0.000088 
## converged
## # weights:  11
## initial  value 129.349972 
## iter  10 value 58.657455
## iter  20 value 46.190684
## iter  30 value 43.787672
## final  value 43.776742 
## converged
## # weights:  27
## initial  value 136.564527 
## iter  10 value 26.172828
## iter  20 value 21.449170
## iter  30 value 21.350292
## iter  40 value 21.347425
## final  value 21.347423 
## converged
## # weights:  43
## initial  value 134.553205 
## iter  10 value 26.863852
## iter  20 value 19.099568
## iter  30 value 18.385082
## iter  40 value 18.319448
## iter  50 value 18.315140
## iter  60 value 18.314237
## final  value 18.314138 
## converged
## # weights:  11
## initial  value 118.845224 
## iter  10 value 51.168472
## iter  20 value 49.976921
## iter  30 value 49.973436
## iter  40 value 49.963205
## iter  50 value 49.957210
## iter  60 value 49.917964
## iter  70 value 44.684196
## iter  80 value 14.830732
## iter  90 value 4.745959
## iter 100 value 4.005893
## final  value 4.005893 
## stopped after 100 iterations
## # weights:  27
## initial  value 116.024325 
## iter  10 value 39.736151
## iter  20 value 28.650940
## iter  30 value 21.536570
## iter  40 value 10.723406
## iter  50 value 4.960185
## iter  60 value 4.763786
## iter  70 value 4.501322
## iter  80 value 3.728015
## iter  90 value 1.977298
## iter 100 value 1.662670
## final  value 1.662670 
## stopped after 100 iterations
## # weights:  43
## initial  value 139.109512 
## iter  10 value 6.839446
## iter  20 value 1.801500
## iter  30 value 0.653309
## iter  40 value 0.629758
## iter  50 value 0.531276
## iter  60 value 0.495929
## iter  70 value 0.479711
## iter  80 value 0.474062
## iter  90 value 0.459572
## iter 100 value 0.439866
## final  value 0.439866 
## stopped after 100 iterations
## # weights:  11
## initial  value 130.134894 
## iter  10 value 21.120199
## iter  20 value 3.850425
## iter  30 value 2.619123
## iter  40 value 2.164615
## iter  50 value 2.113793
## iter  60 value 2.099946
## iter  70 value 1.982283
## iter  80 value 1.970304
## iter  90 value 1.855431
## iter 100 value 1.840065
## final  value 1.840065 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.319823 
## iter  10 value 9.224554
## iter  20 value 1.619145
## iter  30 value 0.023451
## final  value 0.000052 
## converged
## # weights:  43
## initial  value 119.958783 
## iter  10 value 7.355517
## iter  20 value 0.451841
## iter  30 value 0.000974
## final  value 0.000055 
## converged
## # weights:  11
## initial  value 135.882848 
## iter  10 value 106.344554
## iter  20 value 49.663389
## iter  30 value 44.082267
## final  value 44.081824 
## converged
## # weights:  27
## initial  value 129.601052 
## iter  10 value 25.326332
## iter  20 value 20.841301
## iter  30 value 20.365360
## iter  40 value 19.981453
## iter  50 value 19.971411
## iter  60 value 19.970853
## final  value 19.970845 
## converged
## # weights:  43
## initial  value 133.009438 
## iter  10 value 28.591164
## iter  20 value 19.530477
## iter  30 value 19.340802
## iter  40 value 19.337827
## iter  50 value 19.336955
## iter  60 value 19.336942
## final  value 19.336937 
## converged
## # weights:  11
## initial  value 122.494186 
## iter  10 value 50.595369
## iter  20 value 49.972100
## final  value 49.965717 
## converged
## # weights:  27
## initial  value 126.005379 
## iter  10 value 20.133877
## iter  20 value 1.723439
## iter  30 value 0.850570
## iter  40 value 0.800848
## iter  50 value 0.703194
## iter  60 value 0.587465
## iter  70 value 0.532854
## iter  80 value 0.518100
## iter  90 value 0.494016
## iter 100 value 0.483044
## final  value 0.483044 
## stopped after 100 iterations
## # weights:  43
## initial  value 141.403683 
## iter  10 value 5.004731
## iter  20 value 1.695175
## iter  30 value 0.892849
## iter  40 value 0.736870
## iter  50 value 0.604144
## iter  60 value 0.525125
## iter  70 value 0.514531
## iter  80 value 0.503234
## iter  90 value 0.485164
## iter 100 value 0.477446
## final  value 0.477446 
## stopped after 100 iterations
## # weights:  11
## initial  value 134.899919 
## iter  10 value 37.356151
## iter  20 value 12.284395
## iter  30 value 4.202226
## iter  40 value 3.073761
## iter  50 value 2.532373
## iter  60 value 2.211445
## iter  70 value 2.166202
## iter  80 value 2.124348
## iter  90 value 2.055965
## iter 100 value 1.816387
## final  value 1.816387 
## stopped after 100 iterations
## # weights:  27
## initial  value 124.434786 
## iter  10 value 4.774821
## iter  20 value 0.002590
## iter  30 value 0.000113
## final  value 0.000099 
## converged
## # weights:  43
## initial  value 117.179052 
## iter  10 value 2.763293
## iter  20 value 0.003312
## final  value 0.000057 
## converged
## # weights:  11
## initial  value 144.222752 
## iter  10 value 60.355328
## iter  20 value 43.895422
## iter  30 value 42.994217
## iter  30 value 42.994216
## iter  30 value 42.994216
## final  value 42.994216 
## converged
## # weights:  27
## initial  value 149.958592 
## iter  10 value 29.106817
## iter  20 value 18.723106
## iter  30 value 18.602068
## iter  40 value 18.594731
## final  value 18.594730 
## converged
## # weights:  43
## initial  value 151.323466 
## iter  10 value 25.176469
## iter  20 value 17.911917
## iter  30 value 17.801742
## iter  40 value 17.567450
## iter  50 value 17.232979
## iter  60 value 17.040933
## iter  70 value 17.030898
## iter  80 value 17.030049
## final  value 17.029957 
## converged
## # weights:  11
## initial  value 125.330915 
## iter  10 value 50.092228
## iter  20 value 49.928518
## iter  30 value 49.225211
## iter  40 value 46.190986
## iter  50 value 39.912675
## iter  60 value 14.721394
## iter  70 value 5.240189
## iter  80 value 3.695497
## iter  90 value 3.515460
## iter 100 value 3.098835
## final  value 3.098835 
## stopped after 100 iterations
## # weights:  27
## initial  value 131.514021 
## iter  10 value 21.428450
## iter  20 value 1.857646
## iter  30 value 0.656298
## iter  40 value 0.573496
## iter  50 value 0.440421
## iter  60 value 0.411152
## iter  70 value 0.390519
## iter  80 value 0.385680
## iter  90 value 0.368784
## iter 100 value 0.348473
## final  value 0.348473 
## stopped after 100 iterations
## # weights:  43
## initial  value 131.396056 
## iter  10 value 4.227384
## iter  20 value 1.424260
## iter  30 value 0.484542
## iter  40 value 0.451634
## iter  50 value 0.434553
## iter  60 value 0.411818
## iter  70 value 0.380774
## iter  80 value 0.361957
## iter  90 value 0.355571
## iter 100 value 0.341448
## final  value 0.341448 
## stopped after 100 iterations
## # weights:  11
## initial  value 124.725766 
## iter  10 value 52.435359
## iter  20 value 49.907922
## iter  30 value 49.382401
## iter  40 value 35.220997
## iter  50 value 8.801913
## iter  60 value 4.262407
## iter  70 value 2.632280
## iter  80 value 2.291911
## iter  90 value 2.171173
## iter 100 value 2.064037
## final  value 2.064037 
## stopped after 100 iterations
## # weights:  27
## initial  value 121.213256 
## iter  10 value 4.715440
## iter  20 value 0.012867
## final  value 0.000060 
## converged
## # weights:  43
## initial  value 125.839411 
## iter  10 value 5.743691
## iter  20 value 0.160593
## final  value 0.000072 
## converged
## # weights:  11
## initial  value 119.242497 
## iter  10 value 44.746080
## iter  20 value 43.871570
## final  value 43.871556 
## converged
## # weights:  27
## initial  value 126.090732 
## iter  10 value 27.134842
## iter  20 value 22.347298
## iter  30 value 22.047386
## iter  40 value 21.022820
## iter  50 value 20.939006
## final  value 20.938933 
## converged
## # weights:  43
## initial  value 108.205013 
## iter  10 value 33.575657
## iter  20 value 19.798211
## iter  30 value 19.239341
## iter  40 value 18.985431
## iter  50 value 18.950630
## iter  60 value 18.943527
## iter  70 value 18.940566
## final  value 18.940477 
## converged
## # weights:  11
## initial  value 127.705849 
## iter  10 value 45.929987
## iter  20 value 36.821210
## iter  30 value 13.164203
## iter  40 value 4.588665
## iter  50 value 3.615648
## iter  60 value 3.539641
## iter  70 value 3.325096
## iter  80 value 3.304364
## iter  90 value 3.301093
## iter 100 value 3.300849
## final  value 3.300849 
## stopped after 100 iterations
## # weights:  27
## initial  value 145.426276 
## iter  10 value 10.662092
## iter  20 value 1.885068
## iter  30 value 0.785276
## iter  40 value 0.647444
## iter  50 value 0.560103
## iter  60 value 0.508256
## iter  70 value 0.473915
## iter  80 value 0.426546
## iter  90 value 0.371958
## iter 100 value 0.345431
## final  value 0.345431 
## stopped after 100 iterations
## # weights:  43
## initial  value 129.307234 
## iter  10 value 6.466680
## iter  20 value 0.465599
## iter  30 value 0.369290
## iter  40 value 0.356601
## iter  50 value 0.340119
## iter  60 value 0.332450
## iter  70 value 0.314144
## iter  80 value 0.302247
## iter  90 value 0.299302
## iter 100 value 0.291440
## final  value 0.291440 
## stopped after 100 iterations
## # weights:  11
## initial  value 118.395363 
## iter  10 value 50.040092
## iter  20 value 49.907040
## final  value 49.906755 
## converged
## # weights:  27
## initial  value 120.969076 
## iter  10 value 9.562802
## iter  20 value 0.612910
## iter  30 value 0.002474
## final  value 0.000067 
## converged
## # weights:  43
## initial  value 122.764102 
## iter  10 value 8.448140
## iter  20 value 0.780831
## iter  30 value 0.002363
## final  value 0.000084 
## converged
## # weights:  11
## initial  value 130.728649 
## iter  10 value 57.681475
## iter  20 value 44.398334
## iter  30 value 43.385700
## final  value 43.382021 
## converged
## # weights:  27
## initial  value 128.997817 
## iter  10 value 30.196862
## iter  20 value 19.901318
## iter  30 value 19.715836
## iter  40 value 19.653286
## iter  50 value 19.494844
## iter  60 value 19.484851
## iter  70 value 19.483818
## final  value 19.483817 
## converged
## # weights:  43
## initial  value 102.606354 
## iter  10 value 24.828885
## iter  20 value 18.910758
## iter  30 value 18.573516
## iter  40 value 18.463629
## iter  50 value 18.462101
## final  value 18.462087 
## converged
## # weights:  11
## initial  value 120.687211 
## iter  10 value 54.577891
## iter  20 value 26.112984
## iter  30 value 4.533226
## iter  40 value 3.795451
## iter  50 value 3.694006
## iter  60 value 3.664231
## iter  70 value 3.658786
## iter  80 value 3.647778
## iter  90 value 3.645322
## iter 100 value 3.645062
## final  value 3.645062 
## stopped after 100 iterations
## # weights:  27
## initial  value 126.392641 
## iter  10 value 28.610864
## iter  20 value 6.330440
## iter  30 value 1.064218
## iter  40 value 0.263611
## iter  50 value 0.252745
## iter  60 value 0.248951
## iter  70 value 0.224939
## iter  80 value 0.215765
## iter  90 value 0.214087
## iter 100 value 0.211165
## final  value 0.211165 
## stopped after 100 iterations
## # weights:  43
## initial  value 139.699176 
## iter  10 value 4.566035
## iter  20 value 0.709051
## iter  30 value 0.579667
## iter  40 value 0.527335
## iter  50 value 0.469353
## iter  60 value 0.440636
## iter  70 value 0.396077
## iter  80 value 0.370362
## iter  90 value 0.317181
## iter 100 value 0.308036
## final  value 0.308036 
## stopped after 100 iterations
## # weights:  27
## initial  value 176.738412 
## iter  10 value 28.632388
## iter  20 value 21.019063
## iter  30 value 20.367429
## iter  40 value 20.366183
## final  value 20.366062 
## converged
# Predicciones
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)

# Matriz de confusión
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
mcre5
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         38         0
##   virginica       0          2        40
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9833         
##                  95% CI : (0.9411, 0.998)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.975          
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9500           1.0000
## Specificity                 1.0000            1.0000           0.9750
## Pos Pred Value              1.0000            1.0000           0.9524
## Neg Pred Value              1.0000            0.9756           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3167           0.3333
## Detection Prevalence        0.3333            0.3167           0.3500
## Balanced Accuracy           1.0000            0.9750           0.9875
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
mcrp5
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

Modelo 6 Bosques aleatorios

# Paquetes
# install.packages(c("caret","randomForest"))
library(caret)
set.seed(123)

# Datos y split
data(iris)
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Control y grilla
ctrl <- trainControl(method = "cv", number = 10)
grid <- expand.grid(mtry = c(2, 3, 4))

# Entrenamiento (nota: preProcess no afecta a "rf", pero no estorba)
modelo6 <- train(
  Species ~ .,
  data = entrenamiento,
  method = "rf",
  preProcess = c("center","scale"),
  trControl = ctrl,
  tuneGrid = grid,
  ntree = 500
)

# Predicciones y matrices de confusión
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)

mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)

Resultados

# <span style="color:blue;"> Modelo 6 Bosques aleatorios  </span> 
resultados <- data.frame(
  "sumLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly"   = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart"     = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "nnet"      = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "rf"        = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)

rownames(resultados) <- c("Precisión de entrenamiento", "Precisión de prueba")
resultados
##                            sumLinear svmRadial   svmPoly     rpart      nnet
## Precisión de entrenamiento 0.9916667 0.9916667 0.9916667 0.9666667 0.9833333
## Precisión de prueba        0.9666667 0.9333333 0.9666667 0.9666667 0.9666667
##                                   rf
## Precisión de entrenamiento 1.0000000
## Precisión de prueba        0.9333333

Modelo 6 Bosques aleatorios

Acorde a la tabla de resultados, observamos que ningún método presenta sobreajustes. Podemos seleccionar el de redes neuronales por su desempeño

---
title: "CARET"
author: "Maria Jose Flores"
date: "`r Sys.Date()`"
output: 
  html_document:
    toc: TRUE
    toc_float: TRUE
    code_download: TRUE
    theme: cosmo
---

<center>
![]("https://www.google.com/url?sa=i&url=https%3A%2F%2Fes.pngtree.com%2Fso%2Fflor-iris&psig=AOvVaw13kFPfhfKrXiGuESm-BCoY&ust=1755984454272000&source=images&cd=vfe&opi=89978449&ved=0CBIQjRxqFwoTCLCl1f6tn48DFQAAAAAdAAAAABAE")
<center>

# <span style="color:blue;"> Teoría </span>
El paquete *CARET (Classification and regression training)* es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.

# <span style="color:blue;"> Instalar paquetes y llamar librerías </span>
```{r}
#install.packages("ggplot2") # Gráfica
library(ggplot2)
#install.packages("lattice") # Gráfica
library(lattice)
#install.packages("caret") # Algoritmos de aprendizaje automatico
library(caret)
#install.packages("datasets") # Usar bases de datos, en este caso iris
library(datasets)
# install.packages("DataExplorer") # Análisis Exploratorio
library(DataExplorer)
# install.packages("DataExplorer")
library(DataExplorer)
#install.packages("caret")
library(caret)
```

# <span style="color:blue;"> Instalar paquetes y llamar librerías </span>
```{r}
df <- data.frame(iris)
```

# <span style="color:blue;"> Entender la base de datos </span>
```{r}
summary(df)
str(df)
# create_report(df)
plot_missing(df)
plot_histogram(df)
plot_correlation(df)
```

**NOTA: La variable que queremos predecir debe tener formato de FACTOR**

# <span style="color:blue;"> Partir la base de datos </span>
```{r}
#Normalmente 80 y 20
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list= FALSE)
entrenamiento <- df [renglones_entrenamiento, ]
prueba <- df[-renglones_entrenamiento]
```

# <span style="color:blue;"> Distintos tipos de métodos para modelar </span>
Los métodos más utilizados para modelar aprendizaje automático son:

**SVM** : *Support Vector Machine* o Máquina de vectores de soporte. Hay varios subtipos:
Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc
* **Árbol de Decisión**: rpart
* **Redes Neuronales**: nnet
** *Bosques Aleatorios**: rt

# <span style="color:blue;"> Modelo1  </span>
```{r}
library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo1 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "svmLinear",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10),
                 tuneGrid = data.frame(C = 1) # corregido
)

# Predicciones
resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict(modelo1, prueba)

# Matriz de confusión
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1

mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1

```

# <span style="color:blue;"> Modelo 2 SVM Radial  </span> 

```{r}
library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo1 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "svmRadial",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10),
                 tuneGrid = data.frame(sigma=1, C = 1) # corregido
)

# Predicciones
resultado_entrenamiento2 <- predict(modelo1, entrenamiento)
resultado_prueba2 <- predict(modelo1, prueba)

# Matriz de confusión
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2

mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2

```

# <span style="color:blue;"> Modelo 3 SVM Polinómico  </span> 

```{r}
library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo3 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "svmPoly",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10),
                 tuneGrid = data.frame(degree=1, scale=1, C = 1) # corregido
)

# Predicciones
resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)

# Matriz de confusión
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3

mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3

```

# <span style="color:blue;"> Modelo 4 Árbol de desición  </span> 
```{r}
library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo4 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "rpart",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10),
                 tuneLength = 10 #Cambiar
)

# Predicciones
resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo4, prueba)

# Matriz de confusión
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4

mcrp4 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp4

```


# <span style="color:blue;"> Modelo 5 Redes Neuronales  </span> 
```{r}
library(caret)
set.seed(123)

# Supongamos que partiste del dataset iris
data(iris)

# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Entrenar modelo SVM lineal
modelo5 <- train(Species ~ ., 
                 data = entrenamiento,
                 method = "nnet",
                 preProcess = c("scale", "center"),
                 trControl = trainControl(method = "cv", number = 10)
              
)

# Predicciones
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)

# Matriz de confusión
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
mcre5

mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
mcrp5

```

# <span style="color:blue;"> Modelo 6 Bosques aleatorios  </span> 
```{r}
# Paquetes
# install.packages(c("caret","randomForest"))
library(caret)
set.seed(123)

# Datos y split
data(iris)
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]

# Control y grilla
ctrl <- trainControl(method = "cv", number = 10)
grid <- expand.grid(mtry = c(2, 3, 4))

# Entrenamiento (nota: preProcess no afecta a "rf", pero no estorba)
modelo6 <- train(
  Species ~ .,
  data = entrenamiento,
  method = "rf",
  preProcess = c("center","scale"),
  trControl = ctrl,
  tuneGrid = grid,
  ntree = 500
)

# Predicciones y matrices de confusión
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)

mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)

```


# <span style="color:blue;"> Resultados  </span> 
```{r}
# <span style="color:blue;"> Modelo 6 Bosques aleatorios  </span> 
resultados <- data.frame(
  "sumLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
  "svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
  "svmPoly"   = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
  "rpart"     = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
  "nnet"      = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
  "rf"        = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)

rownames(resultados) <- c("Precisión de entrenamiento", "Precisión de prueba")
resultados

```

# <span style="color:blue;"> Modelo 6 Bosques aleatorios  </span> 
Acorde a la tabla de resultados, observamos que ningún método presenta sobreajustes. Podemos seleccionar el de **redes neuronales** por su desempeño 










