El paquete CARET (Classification and regression training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.
#install.packages("ggplot2") # Gráfica
library(ggplot2)
#install.packages("lattice") # Gráfica
library(lattice)
#install.packages("caret") # Algoritmos de aprendizaje automatico
library(caret)
#install.packages("datasets") # Usar bases de datos, en este caso iris
library(datasets)
# install.packages("DataExplorer") # Análisis Exploratorio
library(DataExplorer)
# install.packages("DataExplorer")
library(DataExplorer)
#install.packages("caret")
library(caret)
df <- data.frame(iris)
summary(df)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
str(df)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
# create_report(df)
plot_missing(df)
plot_histogram(df)
plot_correlation(df)
NOTA: La variable que queremos predecir debe tener formato de FACTOR
#Normalmente 80 y 20
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list= FALSE)
entrenamiento <- df [renglones_entrenamiento, ]
prueba <- df[-renglones_entrenamiento]
Los métodos más utilizados para modelar aprendizaje automático son:
SVM : Support Vector Machine o Máquina de vectores de soporte. Hay varios subtipos: Lineal (svmLinear), Radial (svmRadial), Polinómico (svmPoly), etc * Árbol de Decisión: rpart * Redes Neuronales: nnet ** *Bosques Aleatorios**: rt
library(caret)
set.seed(123)
# Supongamos que partiste del dataset iris
data(iris)
# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]
# Entrenar modelo SVM lineal
modelo1 <- train(Species ~ .,
data = entrenamiento,
method = "svmLinear",
preProcess = c("scale", "center"),
trControl = trainControl(method = "cv", number = 10),
tuneGrid = data.frame(C = 1) # corregido
)
# Predicciones
resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict(modelo1, prueba)
# Matriz de confusión
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 39 0
## virginica 0 1 40
##
## Overall Statistics
##
## Accuracy : 0.9917
## 95% CI : (0.9544, 0.9998)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9875
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9750 1.0000
## Specificity 1.0000 1.0000 0.9875
## Pos Pred Value 1.0000 1.0000 0.9756
## Neg Pred Value 1.0000 0.9877 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3250 0.3333
## Detection Prevalence 0.3333 0.3250 0.3417
## Balanced Accuracy 1.0000 0.9875 0.9938
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 1
## virginica 0 0 9
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.8278, 0.9992)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 2.963e-13
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.9000
## Specificity 1.0000 0.9500 1.0000
## Pos Pred Value 1.0000 0.9091 1.0000
## Neg Pred Value 1.0000 1.0000 0.9524
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3000
## Detection Prevalence 0.3333 0.3667 0.3000
## Balanced Accuracy 1.0000 0.9750 0.9500
library(caret)
set.seed(123)
# Supongamos que partiste del dataset iris
data(iris)
# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]
# Entrenar modelo SVM lineal
modelo1 <- train(Species ~ .,
data = entrenamiento,
method = "svmRadial",
preProcess = c("scale", "center"),
trControl = trainControl(method = "cv", number = 10),
tuneGrid = data.frame(sigma=1, C = 1) # corregido
)
# Predicciones
resultado_entrenamiento2 <- predict(modelo1, entrenamiento)
resultado_prueba2 <- predict(modelo1, prueba)
# Matriz de confusión
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 39 0
## virginica 0 1 40
##
## Overall Statistics
##
## Accuracy : 0.9917
## 95% CI : (0.9544, 0.9998)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9875
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9750 1.0000
## Specificity 1.0000 1.0000 0.9875
## Pos Pred Value 1.0000 1.0000 0.9756
## Neg Pred Value 1.0000 0.9877 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3250 0.3333
## Detection Prevalence 0.3333 0.3250 0.3417
## Balanced Accuracy 1.0000 0.9875 0.9938
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 2
## virginica 0 0 8
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.7793, 0.9918)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 8.747e-12
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.8000
## Specificity 1.0000 0.9000 1.0000
## Pos Pred Value 1.0000 0.8333 1.0000
## Neg Pred Value 1.0000 1.0000 0.9091
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.2667
## Detection Prevalence 0.3333 0.4000 0.2667
## Balanced Accuracy 1.0000 0.9500 0.9000
library(caret)
set.seed(123)
# Supongamos que partiste del dataset iris
data(iris)
# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]
# Entrenar modelo SVM lineal
modelo3 <- train(Species ~ .,
data = entrenamiento,
method = "svmPoly",
preProcess = c("scale", "center"),
trControl = trainControl(method = "cv", number = 10),
tuneGrid = data.frame(degree=1, scale=1, C = 1) # corregido
)
# Predicciones
resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)
# Matriz de confusión
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 39 0
## virginica 0 1 40
##
## Overall Statistics
##
## Accuracy : 0.9917
## 95% CI : (0.9544, 0.9998)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9875
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9750 1.0000
## Specificity 1.0000 1.0000 0.9875
## Pos Pred Value 1.0000 1.0000 0.9756
## Neg Pred Value 1.0000 0.9877 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3250 0.3333
## Detection Prevalence 0.3333 0.3250 0.3417
## Balanced Accuracy 1.0000 0.9875 0.9938
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 1
## virginica 0 0 9
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.8278, 0.9992)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 2.963e-13
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.9000
## Specificity 1.0000 0.9500 1.0000
## Pos Pred Value 1.0000 0.9091 1.0000
## Neg Pred Value 1.0000 1.0000 0.9524
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3000
## Detection Prevalence 0.3333 0.3667 0.3000
## Balanced Accuracy 1.0000 0.9750 0.9500
library(caret)
set.seed(123)
# Supongamos que partiste del dataset iris
data(iris)
# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]
# Entrenar modelo SVM lineal
modelo4 <- train(Species ~ .,
data = entrenamiento,
method = "rpart",
preProcess = c("scale", "center"),
trControl = trainControl(method = "cv", number = 10),
tuneLength = 10 #Cambiar
)
# Predicciones
resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo4, prueba)
# Matriz de confusión
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 39 3
## virginica 0 1 37
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.9169, 0.9908)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9750 0.9250
## Specificity 1.0000 0.9625 0.9875
## Pos Pred Value 1.0000 0.9286 0.9737
## Neg Pred Value 1.0000 0.9872 0.9634
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3250 0.3083
## Detection Prevalence 0.3333 0.3500 0.3167
## Balanced Accuracy 1.0000 0.9688 0.9563
mcrp4 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp4
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 1
## virginica 0 0 9
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.8278, 0.9992)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 2.963e-13
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.9000
## Specificity 1.0000 0.9500 1.0000
## Pos Pred Value 1.0000 0.9091 1.0000
## Neg Pred Value 1.0000 1.0000 0.9524
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3000
## Detection Prevalence 0.3333 0.3667 0.3000
## Balanced Accuracy 1.0000 0.9750 0.9500
library(caret)
set.seed(123)
# Supongamos que partiste del dataset iris
data(iris)
# Separar en train/test
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]
# Entrenar modelo SVM lineal
modelo5 <- train(Species ~ .,
data = entrenamiento,
method = "nnet",
preProcess = c("scale", "center"),
trControl = trainControl(method = "cv", number = 10)
)
## # weights: 11
## initial value 137.158154
## iter 10 value 83.679659
## iter 20 value 49.940615
## final value 49.908817
## converged
## # weights: 27
## initial value 116.394990
## iter 10 value 6.186241
## iter 20 value 0.260622
## iter 30 value 0.000744
## final value 0.000077
## converged
## # weights: 43
## initial value 147.341272
## iter 10 value 20.723909
## iter 20 value 1.711652
## iter 30 value 0.002668
## final value 0.000059
## converged
## # weights: 11
## initial value 121.560247
## iter 10 value 65.949471
## iter 20 value 55.881751
## iter 30 value 44.683010
## iter 40 value 44.159606
## iter 40 value 44.159606
## iter 40 value 44.159606
## final value 44.159606
## converged
## # weights: 27
## initial value 162.871678
## iter 10 value 31.973292
## iter 20 value 21.643843
## iter 30 value 21.176664
## iter 40 value 21.175187
## final value 21.175184
## converged
## # weights: 43
## initial value 124.403113
## iter 10 value 22.493781
## iter 20 value 18.970819
## iter 30 value 18.776926
## iter 40 value 18.751512
## iter 50 value 18.594955
## iter 60 value 18.433573
## iter 70 value 18.406314
## final value 18.406244
## converged
## # weights: 11
## initial value 120.330027
## iter 10 value 48.948789
## iter 20 value 19.465594
## iter 30 value 4.922147
## iter 40 value 3.955106
## iter 50 value 3.905223
## iter 60 value 3.894400
## iter 70 value 3.881836
## iter 80 value 3.874804
## iter 90 value 3.872370
## iter 100 value 3.872335
## final value 3.872335
## stopped after 100 iterations
## # weights: 27
## initial value 126.848219
## iter 10 value 26.579951
## iter 20 value 2.125995
## iter 30 value 0.503338
## iter 40 value 0.462699
## iter 50 value 0.426509
## iter 60 value 0.411345
## iter 70 value 0.394772
## iter 80 value 0.382673
## iter 90 value 0.374982
## iter 100 value 0.371874
## final value 0.371874
## stopped after 100 iterations
## # weights: 43
## initial value 139.437764
## iter 10 value 6.381330
## iter 20 value 0.512400
## iter 30 value 0.467261
## iter 40 value 0.445606
## iter 50 value 0.436766
## iter 60 value 0.416541
## iter 70 value 0.388106
## iter 80 value 0.382227
## iter 90 value 0.379333
## iter 100 value 0.371278
## final value 0.371278
## stopped after 100 iterations
## # weights: 11
## initial value 130.106043
## iter 10 value 27.186408
## iter 20 value 4.702041
## iter 30 value 3.010037
## iter 40 value 2.199278
## iter 50 value 1.884656
## iter 60 value 1.836945
## iter 70 value 1.752174
## iter 80 value 1.745749
## iter 90 value 1.507029
## iter 100 value 1.493314
## final value 1.493314
## stopped after 100 iterations
## # weights: 27
## initial value 114.723677
## iter 10 value 6.352999
## iter 20 value 0.555935
## final value 0.000059
## converged
## # weights: 43
## initial value 135.420682
## iter 10 value 7.555059
## iter 20 value 0.331280
## iter 30 value 0.000103
## iter 30 value 0.000051
## iter 30 value 0.000050
## final value 0.000050
## converged
## # weights: 11
## initial value 124.421768
## iter 10 value 60.982773
## iter 20 value 44.161198
## iter 30 value 43.464615
## iter 30 value 43.464614
## iter 30 value 43.464614
## final value 43.464614
## converged
## # weights: 27
## initial value 123.592678
## iter 10 value 29.678577
## iter 20 value 20.064880
## iter 30 value 19.879129
## iter 40 value 19.844432
## final value 19.844390
## converged
## # weights: 43
## initial value 140.131749
## iter 10 value 28.007751
## iter 20 value 18.816789
## iter 30 value 18.340611
## iter 40 value 18.308431
## iter 50 value 18.305748
## final value 18.305348
## converged
## # weights: 11
## initial value 123.494107
## iter 10 value 105.350381
## iter 20 value 99.435136
## iter 30 value 40.065228
## iter 40 value 8.675195
## iter 50 value 4.168312
## iter 60 value 4.104548
## iter 70 value 3.854797
## iter 80 value 3.850383
## iter 90 value 3.845358
## iter 100 value 3.835212
## final value 3.835212
## stopped after 100 iterations
## # weights: 27
## initial value 122.562474
## iter 10 value 8.450360
## iter 20 value 1.814922
## iter 30 value 0.716283
## iter 40 value 0.640788
## iter 50 value 0.621339
## iter 60 value 0.612589
## iter 70 value 0.584200
## iter 80 value 0.544549
## iter 90 value 0.528601
## iter 100 value 0.514779
## final value 0.514779
## stopped after 100 iterations
## # weights: 43
## initial value 123.657348
## iter 10 value 10.614579
## iter 20 value 2.048113
## iter 30 value 0.659304
## iter 40 value 0.544423
## iter 50 value 0.532691
## iter 60 value 0.510665
## iter 70 value 0.466030
## iter 80 value 0.432980
## iter 90 value 0.364387
## iter 100 value 0.341414
## final value 0.341414
## stopped after 100 iterations
## # weights: 11
## initial value 132.593715
## iter 10 value 48.478232
## iter 20 value 26.680423
## iter 30 value 5.247608
## iter 40 value 2.894683
## iter 50 value 2.713712
## iter 60 value 2.620707
## iter 70 value 2.540290
## iter 80 value 2.455773
## iter 90 value 2.267592
## iter 100 value 2.151796
## final value 2.151796
## stopped after 100 iterations
## # weights: 27
## initial value 120.567002
## iter 10 value 4.393773
## iter 20 value 0.043939
## iter 30 value 0.000566
## final value 0.000075
## converged
## # weights: 43
## initial value 119.486317
## iter 10 value 8.852497
## iter 20 value 1.503151
## iter 30 value 0.005087
## iter 40 value 0.000143
## final value 0.000045
## converged
## # weights: 11
## initial value 138.956697
## iter 10 value 64.167257
## iter 20 value 46.791809
## iter 30 value 44.090079
## final value 44.057609
## converged
## # weights: 27
## initial value 117.449640
## iter 10 value 25.035090
## iter 20 value 20.291424
## iter 30 value 20.270756
## final value 20.270746
## converged
## # weights: 43
## initial value 131.615496
## iter 10 value 28.251470
## iter 20 value 19.360745
## iter 30 value 19.289328
## iter 40 value 19.287649
## iter 50 value 19.287597
## final value 19.287594
## converged
## # weights: 11
## initial value 120.067169
## iter 10 value 52.352023
## iter 20 value 50.228278
## iter 30 value 50.176003
## iter 40 value 50.144595
## iter 50 value 50.095225
## iter 60 value 49.938343
## iter 70 value 49.520427
## iter 80 value 49.433208
## iter 90 value 49.391382
## iter 100 value 49.306617
## final value 49.306617
## stopped after 100 iterations
## # weights: 27
## initial value 121.958571
## iter 10 value 4.666620
## iter 20 value 0.776850
## iter 30 value 0.707439
## iter 40 value 0.640867
## iter 50 value 0.600758
## iter 60 value 0.593505
## iter 70 value 0.584147
## iter 80 value 0.557207
## iter 90 value 0.538098
## iter 100 value 0.514508
## final value 0.514508
## stopped after 100 iterations
## # weights: 43
## initial value 122.004578
## iter 10 value 5.297091
## iter 20 value 1.244824
## iter 30 value 0.495554
## iter 40 value 0.483174
## iter 50 value 0.468581
## iter 60 value 0.375798
## iter 70 value 0.362002
## iter 80 value 0.358173
## iter 90 value 0.350027
## iter 100 value 0.341343
## final value 0.341343
## stopped after 100 iterations
## # weights: 11
## initial value 127.875079
## iter 10 value 33.444226
## iter 20 value 6.390350
## iter 30 value 2.177976
## iter 40 value 1.888232
## iter 50 value 0.770754
## iter 60 value 0.617610
## iter 70 value 0.559419
## iter 80 value 0.543517
## iter 90 value 0.477915
## iter 100 value 0.457584
## final value 0.457584
## stopped after 100 iterations
## # weights: 27
## initial value 128.599893
## iter 10 value 44.079424
## iter 20 value 2.154037
## iter 30 value 0.029681
## final value 0.000069
## converged
## # weights: 43
## initial value 124.439568
## iter 10 value 4.443221
## iter 20 value 0.029295
## final value 0.000092
## converged
## # weights: 11
## initial value 140.781917
## iter 10 value 49.139091
## iter 20 value 42.676198
## final value 42.671367
## converged
## # weights: 27
## initial value 130.168472
## iter 10 value 25.483245
## iter 20 value 19.394183
## iter 30 value 18.266411
## iter 40 value 18.183916
## iter 50 value 18.183745
## final value 18.183745
## converged
## # weights: 43
## initial value 123.492335
## iter 10 value 25.604376
## iter 20 value 17.578453
## iter 30 value 17.302413
## iter 40 value 17.239993
## iter 50 value 17.235966
## iter 60 value 17.210698
## iter 70 value 16.990390
## iter 80 value 16.954457
## final value 16.954456
## converged
## # weights: 11
## initial value 141.016346
## iter 10 value 27.908285
## iter 20 value 4.184545
## iter 30 value 2.072889
## iter 40 value 1.990949
## iter 50 value 1.947868
## iter 60 value 1.913708
## iter 70 value 1.905145
## iter 80 value 1.901705
## iter 90 value 1.899713
## iter 100 value 1.897791
## final value 1.897791
## stopped after 100 iterations
## # weights: 27
## initial value 130.318008
## iter 10 value 2.534330
## iter 20 value 0.226734
## iter 30 value 0.212995
## iter 40 value 0.196194
## iter 50 value 0.192300
## iter 60 value 0.173589
## iter 70 value 0.165969
## iter 80 value 0.158151
## iter 90 value 0.151971
## iter 100 value 0.146246
## final value 0.146246
## stopped after 100 iterations
## # weights: 43
## initial value 132.276641
## iter 10 value 27.227544
## iter 20 value 2.208204
## iter 30 value 0.486149
## iter 40 value 0.384802
## iter 50 value 0.322610
## iter 60 value 0.296213
## iter 70 value 0.260001
## iter 80 value 0.186170
## iter 90 value 0.168783
## iter 100 value 0.156405
## final value 0.156405
## stopped after 100 iterations
## # weights: 11
## initial value 124.721231
## iter 10 value 24.907273
## iter 20 value 3.860256
## iter 30 value 2.694222
## iter 40 value 2.606534
## iter 50 value 2.549742
## iter 60 value 2.480770
## iter 70 value 2.432589
## iter 80 value 2.374387
## iter 90 value 2.304015
## iter 100 value 2.277255
## final value 2.277255
## stopped after 100 iterations
## # weights: 27
## initial value 135.883765
## iter 10 value 9.588177
## iter 20 value 2.164867
## iter 30 value 0.003083
## iter 40 value 0.000404
## final value 0.000079
## converged
## # weights: 43
## initial value 128.423859
## iter 10 value 8.362453
## iter 20 value 1.107101
## iter 30 value 0.002056
## final value 0.000080
## converged
## # weights: 11
## initial value 126.814860
## iter 10 value 45.330853
## iter 20 value 43.371713
## final value 43.371698
## converged
## # weights: 27
## initial value 134.971673
## iter 10 value 35.878441
## iter 20 value 23.544146
## iter 30 value 21.359427
## iter 40 value 19.924676
## iter 50 value 19.397158
## iter 60 value 19.396984
## final value 19.396975
## converged
## # weights: 43
## initial value 131.139565
## iter 10 value 26.253238
## iter 20 value 18.403900
## iter 30 value 17.965643
## iter 40 value 17.848649
## iter 50 value 17.842969
## iter 60 value 17.842727
## final value 17.842722
## converged
## # weights: 11
## initial value 121.942596
## iter 10 value 38.114589
## iter 20 value 5.888252
## iter 30 value 4.098676
## iter 40 value 3.775997
## iter 50 value 3.766825
## iter 60 value 3.726327
## iter 70 value 3.712347
## iter 80 value 3.712293
## iter 90 value 3.711980
## final value 3.711809
## converged
## # weights: 27
## initial value 124.424719
## iter 10 value 36.553008
## iter 20 value 9.253423
## iter 30 value 1.918811
## iter 40 value 0.813359
## iter 50 value 0.471166
## iter 60 value 0.434232
## iter 70 value 0.412618
## iter 80 value 0.405562
## iter 90 value 0.400759
## iter 100 value 0.390511
## final value 0.390511
## stopped after 100 iterations
## # weights: 43
## initial value 127.087485
## iter 10 value 3.861857
## iter 20 value 0.789964
## iter 30 value 0.603240
## iter 40 value 0.538274
## iter 50 value 0.482849
## iter 60 value 0.442551
## iter 70 value 0.343017
## iter 80 value 0.327263
## iter 90 value 0.311396
## iter 100 value 0.296024
## final value 0.296024
## stopped after 100 iterations
## # weights: 11
## initial value 120.926669
## iter 10 value 80.051213
## iter 20 value 49.871571
## iter 30 value 37.312293
## iter 40 value 8.554628
## iter 50 value 4.749590
## iter 60 value 4.203783
## iter 70 value 3.338322
## iter 80 value 2.396834
## iter 90 value 2.285253
## iter 100 value 2.266166
## final value 2.266166
## stopped after 100 iterations
## # weights: 27
## initial value 134.314255
## iter 10 value 18.375214
## iter 20 value 3.032836
## iter 30 value 0.088363
## iter 40 value 0.000269
## final value 0.000068
## converged
## # weights: 43
## initial value 127.460669
## iter 10 value 6.647251
## iter 20 value 0.860359
## iter 30 value 0.000183
## iter 30 value 0.000089
## iter 30 value 0.000088
## final value 0.000088
## converged
## # weights: 11
## initial value 129.349972
## iter 10 value 58.657455
## iter 20 value 46.190684
## iter 30 value 43.787672
## final value 43.776742
## converged
## # weights: 27
## initial value 136.564527
## iter 10 value 26.172828
## iter 20 value 21.449170
## iter 30 value 21.350292
## iter 40 value 21.347425
## final value 21.347423
## converged
## # weights: 43
## initial value 134.553205
## iter 10 value 26.863852
## iter 20 value 19.099568
## iter 30 value 18.385082
## iter 40 value 18.319448
## iter 50 value 18.315140
## iter 60 value 18.314237
## final value 18.314138
## converged
## # weights: 11
## initial value 118.845224
## iter 10 value 51.168472
## iter 20 value 49.976921
## iter 30 value 49.973436
## iter 40 value 49.963205
## iter 50 value 49.957210
## iter 60 value 49.917964
## iter 70 value 44.684196
## iter 80 value 14.830732
## iter 90 value 4.745959
## iter 100 value 4.005893
## final value 4.005893
## stopped after 100 iterations
## # weights: 27
## initial value 116.024325
## iter 10 value 39.736151
## iter 20 value 28.650940
## iter 30 value 21.536570
## iter 40 value 10.723406
## iter 50 value 4.960185
## iter 60 value 4.763786
## iter 70 value 4.501322
## iter 80 value 3.728015
## iter 90 value 1.977298
## iter 100 value 1.662670
## final value 1.662670
## stopped after 100 iterations
## # weights: 43
## initial value 139.109512
## iter 10 value 6.839446
## iter 20 value 1.801500
## iter 30 value 0.653309
## iter 40 value 0.629758
## iter 50 value 0.531276
## iter 60 value 0.495929
## iter 70 value 0.479711
## iter 80 value 0.474062
## iter 90 value 0.459572
## iter 100 value 0.439866
## final value 0.439866
## stopped after 100 iterations
## # weights: 11
## initial value 130.134894
## iter 10 value 21.120199
## iter 20 value 3.850425
## iter 30 value 2.619123
## iter 40 value 2.164615
## iter 50 value 2.113793
## iter 60 value 2.099946
## iter 70 value 1.982283
## iter 80 value 1.970304
## iter 90 value 1.855431
## iter 100 value 1.840065
## final value 1.840065
## stopped after 100 iterations
## # weights: 27
## initial value 121.319823
## iter 10 value 9.224554
## iter 20 value 1.619145
## iter 30 value 0.023451
## final value 0.000052
## converged
## # weights: 43
## initial value 119.958783
## iter 10 value 7.355517
## iter 20 value 0.451841
## iter 30 value 0.000974
## final value 0.000055
## converged
## # weights: 11
## initial value 135.882848
## iter 10 value 106.344554
## iter 20 value 49.663389
## iter 30 value 44.082267
## final value 44.081824
## converged
## # weights: 27
## initial value 129.601052
## iter 10 value 25.326332
## iter 20 value 20.841301
## iter 30 value 20.365360
## iter 40 value 19.981453
## iter 50 value 19.971411
## iter 60 value 19.970853
## final value 19.970845
## converged
## # weights: 43
## initial value 133.009438
## iter 10 value 28.591164
## iter 20 value 19.530477
## iter 30 value 19.340802
## iter 40 value 19.337827
## iter 50 value 19.336955
## iter 60 value 19.336942
## final value 19.336937
## converged
## # weights: 11
## initial value 122.494186
## iter 10 value 50.595369
## iter 20 value 49.972100
## final value 49.965717
## converged
## # weights: 27
## initial value 126.005379
## iter 10 value 20.133877
## iter 20 value 1.723439
## iter 30 value 0.850570
## iter 40 value 0.800848
## iter 50 value 0.703194
## iter 60 value 0.587465
## iter 70 value 0.532854
## iter 80 value 0.518100
## iter 90 value 0.494016
## iter 100 value 0.483044
## final value 0.483044
## stopped after 100 iterations
## # weights: 43
## initial value 141.403683
## iter 10 value 5.004731
## iter 20 value 1.695175
## iter 30 value 0.892849
## iter 40 value 0.736870
## iter 50 value 0.604144
## iter 60 value 0.525125
## iter 70 value 0.514531
## iter 80 value 0.503234
## iter 90 value 0.485164
## iter 100 value 0.477446
## final value 0.477446
## stopped after 100 iterations
## # weights: 11
## initial value 134.899919
## iter 10 value 37.356151
## iter 20 value 12.284395
## iter 30 value 4.202226
## iter 40 value 3.073761
## iter 50 value 2.532373
## iter 60 value 2.211445
## iter 70 value 2.166202
## iter 80 value 2.124348
## iter 90 value 2.055965
## iter 100 value 1.816387
## final value 1.816387
## stopped after 100 iterations
## # weights: 27
## initial value 124.434786
## iter 10 value 4.774821
## iter 20 value 0.002590
## iter 30 value 0.000113
## final value 0.000099
## converged
## # weights: 43
## initial value 117.179052
## iter 10 value 2.763293
## iter 20 value 0.003312
## final value 0.000057
## converged
## # weights: 11
## initial value 144.222752
## iter 10 value 60.355328
## iter 20 value 43.895422
## iter 30 value 42.994217
## iter 30 value 42.994216
## iter 30 value 42.994216
## final value 42.994216
## converged
## # weights: 27
## initial value 149.958592
## iter 10 value 29.106817
## iter 20 value 18.723106
## iter 30 value 18.602068
## iter 40 value 18.594731
## final value 18.594730
## converged
## # weights: 43
## initial value 151.323466
## iter 10 value 25.176469
## iter 20 value 17.911917
## iter 30 value 17.801742
## iter 40 value 17.567450
## iter 50 value 17.232979
## iter 60 value 17.040933
## iter 70 value 17.030898
## iter 80 value 17.030049
## final value 17.029957
## converged
## # weights: 11
## initial value 125.330915
## iter 10 value 50.092228
## iter 20 value 49.928518
## iter 30 value 49.225211
## iter 40 value 46.190986
## iter 50 value 39.912675
## iter 60 value 14.721394
## iter 70 value 5.240189
## iter 80 value 3.695497
## iter 90 value 3.515460
## iter 100 value 3.098835
## final value 3.098835
## stopped after 100 iterations
## # weights: 27
## initial value 131.514021
## iter 10 value 21.428450
## iter 20 value 1.857646
## iter 30 value 0.656298
## iter 40 value 0.573496
## iter 50 value 0.440421
## iter 60 value 0.411152
## iter 70 value 0.390519
## iter 80 value 0.385680
## iter 90 value 0.368784
## iter 100 value 0.348473
## final value 0.348473
## stopped after 100 iterations
## # weights: 43
## initial value 131.396056
## iter 10 value 4.227384
## iter 20 value 1.424260
## iter 30 value 0.484542
## iter 40 value 0.451634
## iter 50 value 0.434553
## iter 60 value 0.411818
## iter 70 value 0.380774
## iter 80 value 0.361957
## iter 90 value 0.355571
## iter 100 value 0.341448
## final value 0.341448
## stopped after 100 iterations
## # weights: 11
## initial value 124.725766
## iter 10 value 52.435359
## iter 20 value 49.907922
## iter 30 value 49.382401
## iter 40 value 35.220997
## iter 50 value 8.801913
## iter 60 value 4.262407
## iter 70 value 2.632280
## iter 80 value 2.291911
## iter 90 value 2.171173
## iter 100 value 2.064037
## final value 2.064037
## stopped after 100 iterations
## # weights: 27
## initial value 121.213256
## iter 10 value 4.715440
## iter 20 value 0.012867
## final value 0.000060
## converged
## # weights: 43
## initial value 125.839411
## iter 10 value 5.743691
## iter 20 value 0.160593
## final value 0.000072
## converged
## # weights: 11
## initial value 119.242497
## iter 10 value 44.746080
## iter 20 value 43.871570
## final value 43.871556
## converged
## # weights: 27
## initial value 126.090732
## iter 10 value 27.134842
## iter 20 value 22.347298
## iter 30 value 22.047386
## iter 40 value 21.022820
## iter 50 value 20.939006
## final value 20.938933
## converged
## # weights: 43
## initial value 108.205013
## iter 10 value 33.575657
## iter 20 value 19.798211
## iter 30 value 19.239341
## iter 40 value 18.985431
## iter 50 value 18.950630
## iter 60 value 18.943527
## iter 70 value 18.940566
## final value 18.940477
## converged
## # weights: 11
## initial value 127.705849
## iter 10 value 45.929987
## iter 20 value 36.821210
## iter 30 value 13.164203
## iter 40 value 4.588665
## iter 50 value 3.615648
## iter 60 value 3.539641
## iter 70 value 3.325096
## iter 80 value 3.304364
## iter 90 value 3.301093
## iter 100 value 3.300849
## final value 3.300849
## stopped after 100 iterations
## # weights: 27
## initial value 145.426276
## iter 10 value 10.662092
## iter 20 value 1.885068
## iter 30 value 0.785276
## iter 40 value 0.647444
## iter 50 value 0.560103
## iter 60 value 0.508256
## iter 70 value 0.473915
## iter 80 value 0.426546
## iter 90 value 0.371958
## iter 100 value 0.345431
## final value 0.345431
## stopped after 100 iterations
## # weights: 43
## initial value 129.307234
## iter 10 value 6.466680
## iter 20 value 0.465599
## iter 30 value 0.369290
## iter 40 value 0.356601
## iter 50 value 0.340119
## iter 60 value 0.332450
## iter 70 value 0.314144
## iter 80 value 0.302247
## iter 90 value 0.299302
## iter 100 value 0.291440
## final value 0.291440
## stopped after 100 iterations
## # weights: 11
## initial value 118.395363
## iter 10 value 50.040092
## iter 20 value 49.907040
## final value 49.906755
## converged
## # weights: 27
## initial value 120.969076
## iter 10 value 9.562802
## iter 20 value 0.612910
## iter 30 value 0.002474
## final value 0.000067
## converged
## # weights: 43
## initial value 122.764102
## iter 10 value 8.448140
## iter 20 value 0.780831
## iter 30 value 0.002363
## final value 0.000084
## converged
## # weights: 11
## initial value 130.728649
## iter 10 value 57.681475
## iter 20 value 44.398334
## iter 30 value 43.385700
## final value 43.382021
## converged
## # weights: 27
## initial value 128.997817
## iter 10 value 30.196862
## iter 20 value 19.901318
## iter 30 value 19.715836
## iter 40 value 19.653286
## iter 50 value 19.494844
## iter 60 value 19.484851
## iter 70 value 19.483818
## final value 19.483817
## converged
## # weights: 43
## initial value 102.606354
## iter 10 value 24.828885
## iter 20 value 18.910758
## iter 30 value 18.573516
## iter 40 value 18.463629
## iter 50 value 18.462101
## final value 18.462087
## converged
## # weights: 11
## initial value 120.687211
## iter 10 value 54.577891
## iter 20 value 26.112984
## iter 30 value 4.533226
## iter 40 value 3.795451
## iter 50 value 3.694006
## iter 60 value 3.664231
## iter 70 value 3.658786
## iter 80 value 3.647778
## iter 90 value 3.645322
## iter 100 value 3.645062
## final value 3.645062
## stopped after 100 iterations
## # weights: 27
## initial value 126.392641
## iter 10 value 28.610864
## iter 20 value 6.330440
## iter 30 value 1.064218
## iter 40 value 0.263611
## iter 50 value 0.252745
## iter 60 value 0.248951
## iter 70 value 0.224939
## iter 80 value 0.215765
## iter 90 value 0.214087
## iter 100 value 0.211165
## final value 0.211165
## stopped after 100 iterations
## # weights: 43
## initial value 139.699176
## iter 10 value 4.566035
## iter 20 value 0.709051
## iter 30 value 0.579667
## iter 40 value 0.527335
## iter 50 value 0.469353
## iter 60 value 0.440636
## iter 70 value 0.396077
## iter 80 value 0.370362
## iter 90 value 0.317181
## iter 100 value 0.308036
## final value 0.308036
## stopped after 100 iterations
## # weights: 27
## initial value 176.738412
## iter 10 value 28.632388
## iter 20 value 21.019063
## iter 30 value 20.367429
## iter 40 value 20.366183
## final value 20.366062
## converged
# Predicciones
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)
# Matriz de confusión
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
mcre5
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 38 0
## virginica 0 2 40
##
## Overall Statistics
##
## Accuracy : 0.9833
## 95% CI : (0.9411, 0.998)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.975
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9500 1.0000
## Specificity 1.0000 1.0000 0.9750
## Pos Pred Value 1.0000 1.0000 0.9524
## Neg Pred Value 1.0000 0.9756 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3167 0.3333
## Detection Prevalence 0.3333 0.3167 0.3500
## Balanced Accuracy 1.0000 0.9750 0.9875
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
mcrp5
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 1
## virginica 0 0 9
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.8278, 0.9992)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 2.963e-13
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.9000
## Specificity 1.0000 0.9500 1.0000
## Pos Pred Value 1.0000 0.9091 1.0000
## Neg Pred Value 1.0000 1.0000 0.9524
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3000
## Detection Prevalence 0.3333 0.3667 0.3000
## Balanced Accuracy 1.0000 0.9750 0.9500
# Paquetes
# install.packages(c("caret","randomForest"))
library(caret)
set.seed(123)
# Datos y split
data(iris)
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
entrenamiento <- iris[trainIndex, ]
prueba <- iris[-trainIndex, ]
# Control y grilla
ctrl <- trainControl(method = "cv", number = 10)
grid <- expand.grid(mtry = c(2, 3, 4))
# Entrenamiento (nota: preProcess no afecta a "rf", pero no estorba)
modelo6 <- train(
Species ~ .,
data = entrenamiento,
method = "rf",
preProcess = c("center","scale"),
trControl = ctrl,
tuneGrid = grid,
ntree = 500
)
# Predicciones y matrices de confusión
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
# <span style="color:blue;"> Modelo 6 Bosques aleatorios </span>
resultados <- data.frame(
"sumLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
"svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
"svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
"rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
"nnet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
"rf" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)
rownames(resultados) <- c("Precisión de entrenamiento", "Precisión de prueba")
resultados
## sumLinear svmRadial svmPoly rpart nnet
## Precisión de entrenamiento 0.9916667 0.9916667 0.9916667 0.9666667 0.9833333
## Precisión de prueba 0.9666667 0.9333333 0.9666667 0.9666667 0.9666667
## rf
## Precisión de entrenamiento 1.0000000
## Precisión de prueba 0.9333333
Acorde a la tabla de resultados, observamos que ningún método presenta sobreajustes. Podemos seleccionar el de redes neuronales por su desempeño