El paquete CARET (Classification And Regression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.
#install.packages("ggplot2") #Graficar
library(ggplot2)
#install.packages("lattice") #Crear gráficos
library(lattice)
#install.packages("caret") #Algoritmos de aprendizaje automático
library(caret)
#install.packages("datasets") #Usar bases de datos, iris en este caso
library(datasets)
#install.packages("DataExplorer") #Análisis exploratorio
library(DataExplorer)
df <- data.frame(iris)
summary(df)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
str(df)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
#create_report(df)
plot_missing(df)
plot_histogram(df)
plot_correlation(df)
NOTA: La variable que queremos predecir debe tener formato de
FACTOR
#Normalmente 80-20
set.seed(123)
renglones_entrenamiento <- createDataPartition(df$Species, p=0.8, list=FALSE)
entrenamiento <- df[renglones_entrenamiento, ]
prueba <- df[-renglones_entrenamiento, ]
Los métodos más utilizados para modelar son:
modelo1 <- train(Species ~., data=entrenamiento,
method = "svmLinear", #cambiar
preProcess=c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneGride = data.frame(c=1) #cambiar
)
resultado_entrenamiento1 <- predict(modelo1, entrenamiento)
resultado_prueba1 <- predict (modelo1, prueba)
#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.
#Matriz de confusión del resultado del entrenamiento
mcre1 <- confusionMatrix(resultado_entrenamiento1, entrenamiento$Species)
mcre1
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 39 0
## virginica 0 1 40
##
## Overall Statistics
##
## Accuracy : 0.9917
## 95% CI : (0.9544, 0.9998)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9875
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9750 1.0000
## Specificity 1.0000 1.0000 0.9875
## Pos Pred Value 1.0000 1.0000 0.9756
## Neg Pred Value 1.0000 0.9877 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3250 0.3333
## Detection Prevalence 0.3333 0.3250 0.3417
## Balanced Accuracy 1.0000 0.9875 0.9938
#Matriz de confusión del resultado de prueba
mcrp1 <- confusionMatrix(resultado_prueba1, prueba$Species)
mcrp1
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 1
## virginica 0 0 9
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.8278, 0.9992)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 2.963e-13
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.9000
## Specificity 1.0000 0.9500 1.0000
## Pos Pred Value 1.0000 0.9091 1.0000
## Neg Pred Value 1.0000 1.0000 0.9524
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3000
## Detection Prevalence 0.3333 0.3667 0.3000
## Balanced Accuracy 1.0000 0.9750 0.9500
modelo2 <- train(Species ~., data=entrenamiento,
method = "svmRadial", #cambiar
preProcess=c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneGride = data.frame(sigma = 1, c=1) #cambiar
)
resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict (modelo2, prueba)
#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.
#Matriz de confusión del resultado del entrenamiento
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento$Species)
mcre2
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 39 0
## virginica 0 1 40
##
## Overall Statistics
##
## Accuracy : 0.9917
## 95% CI : (0.9544, 0.9998)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9875
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9750 1.0000
## Specificity 1.0000 1.0000 0.9875
## Pos Pred Value 1.0000 1.0000 0.9756
## Neg Pred Value 1.0000 0.9877 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3250 0.3333
## Detection Prevalence 0.3333 0.3250 0.3417
## Balanced Accuracy 1.0000 0.9875 0.9938
#Matriz de confusión del resultado de prueba
mcrp2 <- confusionMatrix(resultado_prueba2, prueba$Species)
mcrp2
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 2
## virginica 0 0 8
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.7793, 0.9918)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 8.747e-12
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.8000
## Specificity 1.0000 0.9000 1.0000
## Pos Pred Value 1.0000 0.8333 1.0000
## Neg Pred Value 1.0000 1.0000 0.9091
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.2667
## Detection Prevalence 0.3333 0.4000 0.2667
## Balanced Accuracy 1.0000 0.9500 0.9000
modelo3 <- train(Species ~., data=entrenamiento,
method = "svmPoly", #cambiar
preProcess=c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneGride = data.frame(degree = 1, scale = 1, c=1) #cambiar
)
resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict (modelo3, prueba)
#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.
#Matriz de confusión del resultado del entrenamiento
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento$Species)
mcre3
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 40 4
## virginica 0 0 36
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.9169, 0.9908)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.9000
## Specificity 1.0000 0.9500 1.0000
## Pos Pred Value 1.0000 0.9091 1.0000
## Neg Pred Value 1.0000 1.0000 0.9524
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3000
## Detection Prevalence 0.3333 0.3667 0.3000
## Balanced Accuracy 1.0000 0.9750 0.9500
#Matriz de confusión del resultado de prueba
mcrp3 <- confusionMatrix(resultado_prueba3, prueba$Species)
mcrp3
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 2
## virginica 0 0 8
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.7793, 0.9918)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 8.747e-12
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.8000
## Specificity 1.0000 0.9000 1.0000
## Pos Pred Value 1.0000 0.8333 1.0000
## Neg Pred Value 1.0000 1.0000 0.9091
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.2667
## Detection Prevalence 0.3333 0.4000 0.2667
## Balanced Accuracy 1.0000 0.9500 0.9000
modelo4 <- train(Species ~., data=entrenamiento,
method = "rpart", #cambiar
preProcess=c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneLength = 10 #cambiar
)
resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict (modelo4, prueba)
#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.
#Matriz de confusión del resultado del entrenamiento
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento$Species)
mcre4
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 39 3
## virginica 0 1 37
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.9169, 0.9908)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9750 0.9250
## Specificity 1.0000 0.9625 0.9875
## Pos Pred Value 1.0000 0.9286 0.9737
## Neg Pred Value 1.0000 0.9872 0.9634
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3250 0.3083
## Detection Prevalence 0.3333 0.3500 0.3167
## Balanced Accuracy 1.0000 0.9688 0.9563
#Matriz de confusión del resultado de prueba
mcrp4 <- confusionMatrix(resultado_prueba4, prueba$Species)
mcrp4
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 2
## virginica 0 0 8
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.7793, 0.9918)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 8.747e-12
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.8000
## Specificity 1.0000 0.9000 1.0000
## Pos Pred Value 1.0000 0.8333 1.0000
## Neg Pred Value 1.0000 1.0000 0.9091
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.2667
## Detection Prevalence 0.3333 0.4000 0.2667
## Balanced Accuracy 1.0000 0.9500 0.9000
modelo5 <- train(Species ~., data=entrenamiento,
method = "nnet", #cambiar
preProcess=c("scale", "center"),
trControl = trainControl(method="cv", number=10)
)
## # weights: 11
## initial value 117.009298
## iter 10 value 30.693062
## iter 20 value 4.480530
## iter 30 value 2.124646
## iter 40 value 1.724334
## iter 50 value 1.558463
## iter 60 value 1.384759
## iter 70 value 1.194959
## iter 80 value 1.157489
## iter 90 value 1.037588
## iter 100 value 1.021675
## final value 1.021675
## stopped after 100 iterations
## # weights: 27
## initial value 144.555680
## iter 10 value 7.295675
## iter 20 value 0.951236
## iter 30 value 0.019779
## iter 40 value 0.001778
## final value 0.000054
## converged
## # weights: 43
## initial value 159.941630
## iter 10 value 29.130468
## iter 20 value 2.433214
## iter 30 value 0.203630
## iter 40 value 0.008716
## final value 0.000085
## converged
## # weights: 11
## initial value 122.779692
## iter 10 value 57.486922
## iter 20 value 43.651900
## iter 30 value 43.557151
## final value 43.557011
## converged
## # weights: 27
## initial value 125.993185
## iter 10 value 26.004322
## iter 20 value 19.109050
## iter 30 value 18.927620
## iter 40 value 18.923505
## final value 18.923489
## converged
## # weights: 43
## initial value 162.888687
## iter 10 value 20.938843
## iter 20 value 18.031113
## iter 30 value 17.715120
## iter 40 value 17.596630
## iter 50 value 17.399696
## iter 60 value 17.368518
## iter 70 value 17.368513
## iter 70 value 17.368513
## iter 70 value 17.368513
## final value 17.368513
## converged
## # weights: 11
## initial value 118.505234
## iter 10 value 40.039271
## iter 20 value 5.135436
## iter 30 value 3.734191
## iter 40 value 3.277615
## iter 50 value 3.145069
## iter 60 value 2.954089
## iter 70 value 2.904871
## iter 80 value 2.904015
## iter 90 value 2.903149
## iter 100 value 2.902252
## final value 2.902252
## stopped after 100 iterations
## # weights: 27
## initial value 122.899975
## iter 10 value 7.401054
## iter 20 value 0.334446
## iter 30 value 0.315600
## iter 40 value 0.297078
## iter 50 value 0.290032
## iter 60 value 0.266801
## iter 70 value 0.252113
## iter 80 value 0.242813
## iter 90 value 0.232748
## iter 100 value 0.223962
## final value 0.223962
## stopped after 100 iterations
## # weights: 43
## initial value 130.368788
## iter 10 value 6.085146
## iter 20 value 0.541576
## iter 30 value 0.324721
## iter 40 value 0.310666
## iter 50 value 0.288764
## iter 60 value 0.261876
## iter 70 value 0.241344
## iter 80 value 0.232620
## iter 90 value 0.221434
## iter 100 value 0.211098
## final value 0.211098
## stopped after 100 iterations
## # weights: 11
## initial value 114.821771
## iter 10 value 44.055072
## iter 20 value 11.932704
## iter 30 value 3.206457
## iter 40 value 2.877159
## iter 50 value 2.634047
## iter 60 value 2.327700
## iter 70 value 2.138623
## iter 80 value 1.010075
## iter 90 value 0.852535
## iter 100 value 0.693471
## final value 0.693471
## stopped after 100 iterations
## # weights: 27
## initial value 121.119589
## iter 10 value 4.241406
## iter 20 value 0.011919
## final value 0.000059
## converged
## # weights: 43
## initial value 146.114510
## iter 10 value 5.225415
## iter 20 value 0.151541
## iter 30 value 0.000108
## iter 30 value 0.000056
## iter 30 value 0.000055
## final value 0.000055
## converged
## # weights: 11
## initial value 117.838276
## iter 10 value 43.963261
## iter 20 value 42.933568
## iter 20 value 42.933567
## iter 20 value 42.933567
## final value 42.933567
## converged
## # weights: 27
## initial value 150.828476
## iter 10 value 24.131661
## iter 20 value 19.605948
## iter 30 value 19.530553
## iter 40 value 19.527242
## final value 19.527057
## converged
## # weights: 43
## initial value 125.354310
## iter 10 value 21.660478
## iter 20 value 17.958628
## iter 30 value 17.736044
## iter 40 value 17.719752
## iter 50 value 17.719581
## iter 50 value 17.719581
## iter 50 value 17.719581
## final value 17.719581
## converged
## # weights: 11
## initial value 118.990031
## iter 10 value 46.484314
## iter 20 value 39.225686
## iter 30 value 9.650620
## iter 40 value 4.514044
## iter 50 value 3.865438
## iter 60 value 3.508071
## iter 70 value 3.041838
## iter 80 value 2.965815
## iter 90 value 2.953655
## iter 100 value 2.950243
## final value 2.950243
## stopped after 100 iterations
## # weights: 27
## initial value 122.010199
## iter 10 value 10.036298
## iter 20 value 0.341101
## iter 30 value 0.272783
## iter 40 value 0.249672
## iter 50 value 0.228956
## iter 60 value 0.219568
## iter 70 value 0.215171
## iter 80 value 0.210042
## iter 90 value 0.197683
## iter 100 value 0.188999
## final value 0.188999
## stopped after 100 iterations
## # weights: 43
## initial value 129.722314
## iter 10 value 4.282782
## iter 20 value 0.309622
## iter 30 value 0.272824
## iter 40 value 0.255595
## iter 50 value 0.245836
## iter 60 value 0.226164
## iter 70 value 0.216460
## iter 80 value 0.208423
## iter 90 value 0.204559
## iter 100 value 0.200969
## final value 0.200969
## stopped after 100 iterations
## # weights: 11
## initial value 129.077732
## iter 10 value 50.893757
## iter 20 value 49.341214
## iter 30 value 47.711365
## iter 40 value 45.883507
## iter 50 value 45.433657
## iter 60 value 44.762037
## iter 70 value 20.639842
## iter 80 value 5.307883
## iter 90 value 4.186131
## iter 100 value 3.928824
## final value 3.928824
## stopped after 100 iterations
## # weights: 27
## initial value 129.835217
## iter 10 value 9.829857
## iter 20 value 1.817620
## iter 30 value 0.001192
## final value 0.000064
## converged
## # weights: 43
## initial value 157.390266
## iter 10 value 8.338611
## iter 20 value 1.541313
## iter 30 value 0.005079
## final value 0.000075
## converged
## # weights: 11
## initial value 129.818872
## iter 10 value 75.525777
## iter 20 value 57.964228
## iter 30 value 43.875992
## final value 43.827214
## converged
## # weights: 27
## initial value 130.968358
## iter 10 value 45.968354
## iter 20 value 22.634991
## iter 30 value 21.409538
## iter 40 value 21.358119
## iter 50 value 21.351406
## iter 60 value 21.351186
## final value 21.351182
## converged
## # weights: 43
## initial value 213.775591
## iter 10 value 21.725412
## iter 20 value 18.822673
## iter 30 value 18.513150
## iter 40 value 18.456837
## iter 50 value 18.449317
## final value 18.449280
## converged
## # weights: 11
## initial value 118.962754
## iter 10 value 26.053147
## iter 20 value 6.333793
## iter 30 value 4.570285
## iter 40 value 4.297908
## iter 50 value 4.105559
## iter 60 value 3.903061
## iter 70 value 3.826761
## iter 80 value 3.823960
## iter 90 value 3.823294
## iter 100 value 3.822242
## final value 3.822242
## stopped after 100 iterations
## # weights: 27
## initial value 132.606102
## iter 10 value 22.897922
## iter 20 value 1.809082
## iter 30 value 0.796543
## iter 40 value 0.759424
## iter 50 value 0.614426
## iter 60 value 0.580862
## iter 70 value 0.550861
## iter 80 value 0.541313
## iter 90 value 0.484870
## iter 100 value 0.476444
## final value 0.476444
## stopped after 100 iterations
## # weights: 43
## initial value 108.487853
## iter 10 value 9.006639
## iter 20 value 1.563172
## iter 30 value 0.682425
## iter 40 value 0.633490
## iter 50 value 0.545956
## iter 60 value 0.496924
## iter 70 value 0.473405
## iter 80 value 0.439298
## iter 90 value 0.395353
## iter 100 value 0.365287
## final value 0.365287
## stopped after 100 iterations
## # weights: 11
## initial value 119.337907
## iter 10 value 53.721540
## iter 20 value 51.519354
## iter 30 value 50.670578
## iter 40 value 49.482013
## iter 50 value 40.619548
## iter 60 value 14.073622
## iter 70 value 5.816424
## iter 80 value 4.847922
## iter 90 value 3.451476
## iter 100 value 2.080745
## final value 2.080745
## stopped after 100 iterations
## # weights: 27
## initial value 140.648336
## iter 10 value 49.133514
## iter 20 value 34.297381
## iter 30 value 33.313181
## iter 40 value 17.096108
## iter 50 value 9.985887
## iter 60 value 1.717089
## iter 70 value 0.288081
## iter 80 value 0.001470
## final value 0.000094
## converged
## # weights: 43
## initial value 125.329208
## iter 10 value 4.485921
## iter 20 value 0.228080
## iter 30 value 0.006659
## final value 0.000088
## converged
## # weights: 11
## initial value 124.049774
## iter 10 value 57.031473
## iter 20 value 44.512638
## iter 30 value 44.385507
## final value 44.384992
## converged
## # weights: 27
## initial value 126.045603
## iter 10 value 39.049105
## iter 20 value 22.776253
## iter 30 value 20.135451
## iter 40 value 19.958468
## iter 50 value 19.949415
## iter 60 value 19.949154
## final value 19.949153
## converged
## # weights: 43
## initial value 131.415649
## iter 10 value 23.127808
## iter 20 value 19.282709
## iter 30 value 19.178237
## iter 40 value 19.174237
## iter 50 value 19.173870
## iter 60 value 19.173657
## final value 19.173655
## converged
## # weights: 11
## initial value 121.517667
## iter 10 value 70.104915
## iter 20 value 51.842884
## iter 30 value 37.560847
## iter 40 value 17.174634
## iter 50 value 7.825111
## iter 60 value 4.130372
## iter 70 value 4.047645
## iter 80 value 3.974757
## iter 90 value 3.892953
## iter 100 value 3.880627
## final value 3.880627
## stopped after 100 iterations
## # weights: 27
## initial value 156.724237
## iter 10 value 31.687326
## iter 20 value 13.871604
## iter 30 value 11.415025
## iter 40 value 4.734947
## iter 50 value 1.658477
## iter 60 value 1.395765
## iter 70 value 0.901869
## iter 80 value 0.612757
## iter 90 value 0.549314
## iter 100 value 0.538747
## final value 0.538747
## stopped after 100 iterations
## # weights: 43
## initial value 132.598227
## iter 10 value 13.217554
## iter 20 value 3.162245
## iter 30 value 0.557260
## iter 40 value 0.448381
## iter 50 value 0.415241
## iter 60 value 0.381674
## iter 70 value 0.363224
## iter 80 value 0.355472
## iter 90 value 0.350360
## iter 100 value 0.345746
## final value 0.345746
## stopped after 100 iterations
## # weights: 11
## initial value 132.139541
## iter 10 value 53.874624
## iter 20 value 49.262219
## iter 30 value 49.200325
## iter 40 value 48.489657
## iter 50 value 47.952170
## iter 60 value 47.819369
## iter 70 value 47.659295
## iter 80 value 47.630938
## iter 90 value 47.585539
## iter 100 value 47.552074
## final value 47.552074
## stopped after 100 iterations
## # weights: 27
## initial value 122.797825
## iter 10 value 11.536029
## iter 20 value 1.166215
## iter 30 value 0.002579
## final value 0.000066
## converged
## # weights: 43
## initial value 111.656108
## iter 10 value 4.672364
## iter 20 value 0.141012
## iter 30 value 0.001779
## iter 40 value 0.000649
## final value 0.000068
## converged
## # weights: 11
## initial value 123.343747
## iter 10 value 54.681912
## iter 20 value 43.801812
## iter 30 value 43.746268
## final value 43.745415
## converged
## # weights: 27
## initial value 123.511781
## iter 10 value 25.492386
## iter 20 value 20.202171
## iter 30 value 19.920891
## final value 19.919737
## converged
## # weights: 43
## initial value 109.464495
## iter 10 value 21.689217
## iter 20 value 18.433883
## iter 30 value 18.149227
## iter 40 value 18.115337
## iter 50 value 18.114134
## final value 18.114127
## converged
## # weights: 11
## initial value 128.961908
## iter 10 value 49.469897
## iter 20 value 44.367486
## iter 30 value 21.778378
## iter 40 value 6.844399
## iter 50 value 4.596111
## iter 60 value 4.420636
## iter 70 value 3.971717
## iter 80 value 3.937134
## iter 90 value 3.837716
## iter 100 value 3.831022
## final value 3.831022
## stopped after 100 iterations
## # weights: 27
## initial value 117.146045
## iter 10 value 7.488152
## iter 20 value 0.471752
## iter 30 value 0.421101
## iter 40 value 0.368511
## iter 50 value 0.342218
## iter 60 value 0.331822
## iter 70 value 0.317132
## iter 80 value 0.306914
## iter 90 value 0.296449
## iter 100 value 0.281767
## final value 0.281767
## stopped after 100 iterations
## # weights: 43
## initial value 140.529472
## iter 10 value 19.875220
## iter 20 value 2.954415
## iter 30 value 1.161097
## iter 40 value 1.019260
## iter 50 value 0.890732
## iter 60 value 0.728691
## iter 70 value 0.663176
## iter 80 value 0.613073
## iter 90 value 0.578710
## iter 100 value 0.540378
## final value 0.540378
## stopped after 100 iterations
## # weights: 11
## initial value 127.631173
## iter 10 value 21.873628
## iter 20 value 4.440109
## iter 30 value 0.177788
## iter 40 value 0.094798
## iter 50 value 0.024690
## iter 60 value 0.024059
## iter 70 value 0.023937
## iter 80 value 0.020992
## iter 90 value 0.020810
## iter 100 value 0.020705
## final value 0.020705
## stopped after 100 iterations
## # weights: 27
## initial value 127.012445
## iter 10 value 33.312366
## iter 20 value 1.204810
## iter 30 value 0.003554
## final value 0.000078
## converged
## # weights: 43
## initial value 121.024455
## iter 10 value 1.700603
## iter 20 value 0.003619
## final value 0.000088
## converged
## # weights: 11
## initial value 121.802701
## iter 10 value 67.660888
## iter 20 value 55.525621
## iter 30 value 42.687102
## final value 42.579623
## converged
## # weights: 27
## initial value 116.162681
## iter 10 value 27.378517
## iter 20 value 19.998507
## iter 30 value 19.985737
## iter 40 value 19.985715
## iter 40 value 19.985715
## iter 40 value 19.985715
## final value 19.985715
## converged
## # weights: 43
## initial value 118.143759
## iter 10 value 22.054136
## iter 20 value 17.989103
## iter 30 value 17.619595
## iter 40 value 17.317946
## iter 50 value 17.225372
## iter 60 value 17.222953
## iter 70 value 17.222487
## iter 80 value 17.222437
## iter 80 value 17.222437
## iter 80 value 17.222437
## final value 17.222437
## converged
## # weights: 11
## initial value 118.355062
## iter 10 value 49.818324
## iter 20 value 38.647325
## iter 30 value 15.844677
## iter 40 value 2.582960
## iter 50 value 2.136811
## iter 60 value 2.035498
## iter 70 value 2.033350
## iter 80 value 2.021573
## iter 90 value 1.995371
## iter 100 value 1.994256
## final value 1.994256
## stopped after 100 iterations
## # weights: 27
## initial value 130.821997
## iter 10 value 19.933529
## iter 20 value 0.975068
## iter 30 value 0.473534
## iter 40 value 0.432481
## iter 50 value 0.372884
## iter 60 value 0.295422
## iter 70 value 0.276597
## iter 80 value 0.263857
## iter 90 value 0.216302
## iter 100 value 0.209542
## final value 0.209542
## stopped after 100 iterations
## # weights: 43
## initial value 144.732840
## iter 10 value 10.040236
## iter 20 value 0.345443
## iter 30 value 0.298449
## iter 40 value 0.270041
## iter 50 value 0.225858
## iter 60 value 0.195855
## iter 70 value 0.176125
## iter 80 value 0.164022
## iter 90 value 0.152679
## iter 100 value 0.150088
## final value 0.150088
## stopped after 100 iterations
## # weights: 11
## initial value 129.869113
## iter 10 value 50.042465
## iter 20 value 49.908565
## iter 30 value 49.906746
## final value 49.906672
## converged
## # weights: 27
## initial value 136.216049
## iter 10 value 10.058478
## iter 20 value 0.927014
## iter 30 value 0.000386
## final value 0.000089
## converged
## # weights: 43
## initial value 116.240399
## iter 10 value 2.687573
## iter 20 value 0.910154
## iter 30 value 0.001158
## final value 0.000058
## converged
## # weights: 11
## initial value 122.548705
## iter 10 value 48.475518
## iter 20 value 43.263734
## final value 43.262103
## converged
## # weights: 27
## initial value 151.969882
## iter 10 value 29.866683
## iter 20 value 20.041938
## iter 30 value 19.181326
## iter 40 value 18.658426
## iter 50 value 18.646346
## iter 60 value 18.646228
## final value 18.646221
## converged
## # weights: 43
## initial value 123.857709
## iter 10 value 21.867276
## iter 20 value 18.327082
## iter 30 value 18.061485
## iter 40 value 18.034598
## iter 50 value 18.032834
## final value 18.032819
## converged
## # weights: 11
## initial value 133.691360
## iter 10 value 27.686299
## iter 20 value 3.688826
## iter 30 value 2.998116
## iter 40 value 2.982774
## iter 50 value 2.975593
## iter 60 value 2.973846
## iter 70 value 2.972998
## iter 80 value 2.972919
## final value 2.972915
## converged
## # weights: 27
## initial value 121.910769
## iter 10 value 4.186175
## iter 20 value 1.397080
## iter 30 value 0.429392
## iter 40 value 0.403901
## iter 50 value 0.371671
## iter 60 value 0.353374
## iter 70 value 0.346994
## iter 80 value 0.340640
## iter 90 value 0.326373
## iter 100 value 0.320888
## final value 0.320888
## stopped after 100 iterations
## # weights: 43
## initial value 150.467908
## iter 10 value 2.516940
## iter 20 value 0.515624
## iter 30 value 0.419086
## iter 40 value 0.400951
## iter 50 value 0.356741
## iter 60 value 0.321748
## iter 70 value 0.269242
## iter 80 value 0.257949
## iter 90 value 0.244026
## iter 100 value 0.230916
## final value 0.230916
## stopped after 100 iterations
## # weights: 11
## initial value 130.397834
## iter 10 value 50.573102
## iter 20 value 49.909353
## final value 49.906794
## converged
## # weights: 27
## initial value 142.795978
## iter 10 value 8.325156
## iter 20 value 1.733508
## iter 30 value 0.008474
## final value 0.000056
## converged
## # weights: 43
## initial value 147.890271
## iter 10 value 14.812090
## iter 20 value 0.484823
## iter 30 value 0.000580
## final value 0.000074
## converged
## # weights: 11
## initial value 122.928070
## iter 10 value 67.652212
## iter 20 value 46.324443
## iter 30 value 44.176746
## final value 44.154954
## converged
## # weights: 27
## initial value 118.635144
## iter 10 value 27.365477
## iter 20 value 20.236166
## iter 30 value 19.919856
## final value 19.919323
## converged
## # weights: 43
## initial value 123.713602
## iter 10 value 23.115917
## iter 20 value 19.181439
## iter 30 value 19.146092
## iter 40 value 19.135538
## iter 50 value 19.135363
## iter 60 value 19.135305
## final value 19.135297
## converged
## # weights: 11
## initial value 124.268451
## iter 10 value 50.415686
## iter 20 value 50.122205
## iter 30 value 50.099925
## iter 40 value 50.039219
## iter 50 value 48.795443
## iter 60 value 45.810850
## iter 70 value 44.976509
## iter 80 value 44.652199
## iter 90 value 43.655722
## iter 100 value 21.549290
## final value 21.549290
## stopped after 100 iterations
## # weights: 27
## initial value 121.168989
## iter 10 value 12.887627
## iter 20 value 1.421530
## iter 30 value 0.596446
## iter 40 value 0.525751
## iter 50 value 0.482633
## iter 60 value 0.455870
## iter 70 value 0.441781
## iter 80 value 0.405097
## iter 90 value 0.379758
## iter 100 value 0.359516
## final value 0.359516
## stopped after 100 iterations
## # weights: 43
## initial value 154.331260
## iter 10 value 9.135896
## iter 20 value 1.287634
## iter 30 value 0.536307
## iter 40 value 0.516772
## iter 50 value 0.444990
## iter 60 value 0.422222
## iter 70 value 0.414269
## iter 80 value 0.388936
## iter 90 value 0.382924
## iter 100 value 0.374877
## final value 0.374877
## stopped after 100 iterations
## # weights: 11
## initial value 119.161548
## iter 10 value 49.944558
## iter 20 value 48.322924
## iter 30 value 46.428533
## iter 40 value 46.268965
## iter 50 value 46.227049
## iter 60 value 46.155518
## iter 70 value 45.699558
## iter 80 value 37.848275
## iter 90 value 7.852629
## iter 100 value 4.666912
## final value 4.666912
## stopped after 100 iterations
## # weights: 27
## initial value 121.994742
## iter 10 value 4.269484
## iter 20 value 0.323645
## iter 30 value 0.000228
## final value 0.000073
## converged
## # weights: 43
## initial value 120.712721
## iter 10 value 11.633228
## iter 20 value 1.928801
## iter 30 value 0.089821
## iter 40 value 0.000470
## final value 0.000048
## converged
## # weights: 11
## initial value 125.023647
## iter 10 value 52.033224
## iter 20 value 43.475534
## iter 30 value 43.428235
## final value 43.428214
## converged
## # weights: 27
## initial value 150.243876
## iter 10 value 32.636911
## iter 20 value 21.467254
## iter 30 value 19.692563
## iter 40 value 19.331538
## iter 50 value 19.321372
## iter 60 value 19.321093
## final value 19.321088
## converged
## # weights: 43
## initial value 137.965234
## iter 10 value 22.585768
## iter 20 value 18.845985
## iter 30 value 18.359487
## iter 40 value 18.060579
## iter 50 value 17.805017
## iter 60 value 17.768557
## iter 70 value 17.768156
## iter 80 value 17.768097
## final value 17.768090
## converged
## # weights: 11
## initial value 127.936624
## iter 10 value 64.892982
## iter 20 value 52.457973
## iter 30 value 51.893780
## iter 40 value 46.822596
## iter 50 value 20.931506
## iter 60 value 5.855746
## iter 70 value 4.449348
## iter 80 value 3.934110
## iter 90 value 3.778873
## iter 100 value 3.726881
## final value 3.726881
## stopped after 100 iterations
## # weights: 27
## initial value 124.538161
## iter 10 value 19.935878
## iter 20 value 1.179326
## iter 30 value 0.515886
## iter 40 value 0.495212
## iter 50 value 0.459550
## iter 60 value 0.425014
## iter 70 value 0.408161
## iter 80 value 0.387730
## iter 90 value 0.360044
## iter 100 value 0.337881
## final value 0.337881
## stopped after 100 iterations
## # weights: 43
## initial value 133.979878
## iter 10 value 12.011468
## iter 20 value 0.941094
## iter 30 value 0.495870
## iter 40 value 0.477345
## iter 50 value 0.464609
## iter 60 value 0.425356
## iter 70 value 0.381548
## iter 80 value 0.333472
## iter 90 value 0.319381
## iter 100 value 0.304457
## final value 0.304457
## stopped after 100 iterations
## # weights: 11
## initial value 129.556405
## iter 10 value 49.342383
## iter 20 value 35.419965
## iter 30 value 5.074745
## iter 40 value 2.940088
## iter 50 value 2.543858
## iter 60 value 2.312929
## iter 70 value 2.281995
## iter 80 value 2.059116
## iter 90 value 2.012601
## iter 100 value 1.857343
## final value 1.857343
## stopped after 100 iterations
## # weights: 27
## initial value 155.931952
## iter 10 value 9.651371
## iter 20 value 1.510703
## iter 30 value 0.025766
## iter 40 value 0.003870
## iter 50 value 0.000229
## final value 0.000085
## converged
## # weights: 43
## initial value 139.919182
## iter 10 value 7.001515
## iter 20 value 2.265308
## iter 30 value 0.480211
## iter 40 value 0.046809
## iter 50 value 0.001016
## iter 60 value 0.000148
## final value 0.000085
## converged
## # weights: 11
## initial value 137.379619
## iter 10 value 58.587747
## iter 20 value 45.493964
## final value 43.369230
## converged
## # weights: 27
## initial value 122.183968
## iter 10 value 34.103176
## iter 20 value 19.778653
## iter 30 value 19.482816
## iter 40 value 19.412523
## iter 50 value 19.406639
## final value 19.406612
## converged
## # weights: 43
## initial value 127.464513
## iter 10 value 20.014903
## iter 20 value 18.283826
## iter 30 value 17.864550
## iter 40 value 17.854806
## iter 50 value 17.852930
## final value 17.852879
## converged
## # weights: 11
## initial value 126.686154
## iter 10 value 47.126881
## iter 20 value 13.760512
## iter 30 value 5.663815
## iter 40 value 4.107040
## iter 50 value 3.970174
## iter 60 value 3.913599
## iter 70 value 3.760966
## iter 80 value 3.757224
## iter 90 value 3.753338
## iter 100 value 3.748772
## final value 3.748772
## stopped after 100 iterations
## # weights: 27
## initial value 144.486253
## iter 10 value 4.284520
## iter 20 value 0.488472
## iter 30 value 0.440576
## iter 40 value 0.431107
## iter 50 value 0.424352
## iter 60 value 0.415746
## iter 70 value 0.413581
## iter 80 value 0.410783
## iter 90 value 0.409591
## iter 100 value 0.408124
## final value 0.408124
## stopped after 100 iterations
## # weights: 43
## initial value 127.608663
## iter 10 value 4.647335
## iter 20 value 1.221540
## iter 30 value 0.611939
## iter 40 value 0.560234
## iter 50 value 0.535709
## iter 60 value 0.477525
## iter 70 value 0.419169
## iter 80 value 0.399554
## iter 90 value 0.391996
## iter 100 value 0.375996
## final value 0.375996
## stopped after 100 iterations
## # weights: 11
## initial value 138.169000
## iter 10 value 58.741341
## iter 20 value 46.635586
## final value 46.598157
## converged
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict (modelo5, prueba)
#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.
#Matriz de confusión del resultado del entrenamiento
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento$Species)
#mcre5
#Matriz de confusión del resultado de prueba
mcrp5 <- confusionMatrix(resultado_prueba5, prueba$Species)
#mcrp5
modelo6 <- train(Species ~., data=entrenamiento,
method = "rf", #cambiar
preProcess=c("scale", "center"),
trControl = trainControl(method="cv", number=10),
tuneGrid = expand.grid(mtry = c(2,4,6))
)
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict (modelo6, prueba)
#Matriz de confusión
#Es una tabla de evaluación que desglosa el rendimiento del modelo de clasificación.
#Matriz de confusión del resultado del entrenamiento
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento$Species)
mcre6
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 40 0 0
## versicolor 0 40 0
## virginica 0 0 40
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9697, 1)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3333
## Detection Prevalence 0.3333 0.3333 0.3333
## Balanced Accuracy 1.0000 1.0000 1.0000
#Matriz de confusión del resultado de prueba
mcrp6 <- confusionMatrix(resultado_prueba6, prueba$Species)
mcrp6
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 2
## virginica 0 0 8
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.7793, 0.9918)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 8.747e-12
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.8000
## Specificity 1.0000 0.9000 1.0000
## Pos Pred Value 1.0000 0.8333 1.0000
## Neg Pred Value 1.0000 1.0000 0.9091
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.2667
## Detection Prevalence 0.3333 0.4000 0.2667
## Balanced Accuracy 1.0000 0.9500 0.9000
resultados <- data.frame(
"svmLinear" = c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
"svmRadial" = c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
"svmPoly" = c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
"rpart" = c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
"nnet" = c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
"rf" = c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)
rownames(resultados) <- c("Precisión de entrenamiento", "Precisión de prueba")
resultados
## svmLinear svmRadial svmPoly rpart nnet
## Precisión de entrenamiento 0.9916667 0.9916667 0.9666667 0.9666667 0.9666667
## Precisión de prueba 0.9666667 0.9333333 0.9333333 0.9333333 0.9666667
## rf
## Precisión de entrenamiento 1.0000000
## Precisión de prueba 0.9333333
Acorde a la tabla de resultados, obervamos que ningún método presenta sobreajuste. Podemos seleccionar el de Redes neuronales por su desempeño