El paquete caret (Clasification and Regression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automatico.
# install.packages("ggplot2") #Graficas con mejor diseno
library(ggplot2)
# install.packages("lattice") #Crear Graficos
library(lattice)
# install.packages("caret") # Algoritmos de aprendizaje automatico
library(caret)
# install.packages("datasets") # Usar la base de datos "Iris"
library(datasets)
# install.packages("DataExplorer") # Exploracion de datos
library(DataExplorer)
# install.packages("kernlab") # Paquete con metodos de aprendizaje automatico
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
# install.packages("randomForest") # Paquete para este metodo de clasificacion
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
# install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ kernlab::alpha() masks ggplot2::alpha()
## ✖ dplyr::combine() masks randomForest::combine()
## ✖ purrr::cross() masks kernlab::cross()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ✖ randomForest::margin() masks ggplot2::margin()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# file.choose()
# Cargar el dataset
heart_data <- read.csv("/Users/constantinomilletxacur/Desktop/Concentracion/Modulo 2/heart.csv")
# Convertir variables categóricas a factores
heart_data$sex <- as.factor(heart_data$sex)
heart_data$cp <- as.factor(heart_data$cp)
heart_data$fbs <- as.factor(heart_data$fbs)
heart_data$restecg <- as.factor(heart_data$restecg)
heart_data$exang <- as.factor(heart_data$exang)
heart_data$slope <- as.factor(heart_data$slope)
heart_data$ca <- as.factor(heart_data$ca)
heart_data$thal <- as.factor(heart_data$thal)
heart_data$target <- as.factor(heart_data$target)
Partir los Datos 80-20
set.seed(123)
trainIndex <- createDataPartition(heart_data$target, p = 0.8, list = FALSE)
heart_train <- heart_data[trainIndex, ]
heart_test <- heart_data[-trainIndex, ]
Modelos para Clasificación SVM (Máquina de Vectores de Soporte) SVM con Kernel Lineal
svm_linear_model <- train(target ~ ., data = heart_train, method = "svmLinear", trControl = trainControl(method = "cv", number = 5))
# Predicciones en el conjunto de entrenamiento
svm_linear_train_predictions <- predict(svm_linear_model, newdata = heart_train)
mcre1 <- confusionMatrix(svm_linear_train_predictions, heart_train$target)
# Predicciones en el conjunto de prueba
svm_linear_test_predictions <- predict(svm_linear_model, newdata = heart_test)
mcrp1 <- confusionMatrix(svm_linear_test_predictions, heart_test$target)
mcrp1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 84 7
## 1 15 98
##
## Accuracy : 0.8922
## 95% CI : (0.8413, 0.9312)
## No Information Rate : 0.5147
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.7836
##
## Mcnemar's Test P-Value : 0.1356
##
## Sensitivity : 0.8485
## Specificity : 0.9333
## Pos Pred Value : 0.9231
## Neg Pred Value : 0.8673
## Prevalence : 0.4853
## Detection Rate : 0.4118
## Detection Prevalence : 0.4461
## Balanced Accuracy : 0.8909
##
## 'Positive' Class : 0
##
SVM con Kernel Radial
svm_radial_model <- train(target ~ ., data = heart_train, method = "svmRadial", trControl = trainControl(method = "cv", number = 5))
# Predicciones en el conjunto de entrenamiento
svm_radial_train_predictions <- predict(svm_radial_model, newdata = heart_train)
mcre2 <- confusionMatrix(svm_radial_train_predictions, heart_train$target)
# Predicciones en el conjunto de prueba
svm_radial_test_predictions <- predict(svm_radial_model, newdata = heart_test)
mcrp2 <- confusionMatrix(svm_radial_test_predictions, heart_test$target)
mcrp2
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 93 6
## 1 6 99
##
## Accuracy : 0.9412
## 95% CI : (0.8995, 0.9692)
## No Information Rate : 0.5147
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8823
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9394
## Specificity : 0.9429
## Pos Pred Value : 0.9394
## Neg Pred Value : 0.9429
## Prevalence : 0.4853
## Detection Rate : 0.4559
## Detection Prevalence : 0.4853
## Balanced Accuracy : 0.9411
##
## 'Positive' Class : 0
##
SVM con Kernel Polinómico
svm_poly_model <- train(target ~ ., data = heart_train, method = "svmPoly", trControl = trainControl(method = "cv", number = 5))
# Predicciones en el conjunto de entrenamiento
svm_poly_train_predictions <- predict(svm_poly_model, newdata = heart_train)
mcre3 <- confusionMatrix(svm_poly_train_predictions, heart_train$target)
# Predicciones en el conjunto de prueba
svm_poly_test_predictions <- predict(svm_poly_model, newdata = heart_test)
mcrp3 <- confusionMatrix(svm_poly_test_predictions, heart_test$target)
mcrp3
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 99 3
## 1 0 102
##
## Accuracy : 0.9853
## 95% CI : (0.9576, 0.997)
## No Information Rate : 0.5147
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9706
##
## Mcnemar's Test P-Value : 0.2482
##
## Sensitivity : 1.0000
## Specificity : 0.9714
## Pos Pred Value : 0.9706
## Neg Pred Value : 1.0000
## Prevalence : 0.4853
## Detection Rate : 0.4853
## Detection Prevalence : 0.5000
## Balanced Accuracy : 0.9857
##
## 'Positive' Class : 0
##
Árbol de Decisión
rpart_model <- train(target ~ ., data = heart_train, method = "rpart", trControl = trainControl(method = "cv", number = 5))
# Predicciones en el conjunto de entrenamiento
rpart_train_predictions <- predict(rpart_model, newdata = heart_train)
mcre4 <- confusionMatrix(rpart_train_predictions, heart_train$target)
# Predicciones en el conjunto de prueba
rpart_test_predictions <- predict(rpart_model, newdata = heart_test)
mcrp4 <- confusionMatrix(rpart_test_predictions, heart_test$target)
mcrp4
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 74 14
## 1 25 91
##
## Accuracy : 0.8088
## 95% CI : (0.7481, 0.8604)
## No Information Rate : 0.5147
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.6161
##
## Mcnemar's Test P-Value : 0.1093
##
## Sensitivity : 0.7475
## Specificity : 0.8667
## Pos Pred Value : 0.8409
## Neg Pred Value : 0.7845
## Prevalence : 0.4853
## Detection Rate : 0.3627
## Detection Prevalence : 0.4314
## Balanced Accuracy : 0.8071
##
## 'Positive' Class : 0
##
Redes Neuronales
nnet_model <- train(target ~ ., data = heart_train, method = "nnet", trControl = trainControl(method = "cv", number = 5), trace = FALSE)
# Predicciones en el conjunto de entrenamiento
nnet_train_predictions <- predict(nnet_model, newdata = heart_train)
mcre5 <- confusionMatrix(nnet_train_predictions, heart_train$target)
# Predicciones en el conjunto de prueba
nnet_test_predictions <- predict(nnet_model, newdata = heart_test)
mcrp5 <- confusionMatrix(nnet_test_predictions, heart_test$target)
mcrp5
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 92 7
## 1 7 98
##
## Accuracy : 0.9314
## 95% CI : (0.8875, 0.962)
## No Information Rate : 0.5147
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8626
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9293
## Specificity : 0.9333
## Pos Pred Value : 0.9293
## Neg Pred Value : 0.9333
## Prevalence : 0.4853
## Detection Rate : 0.4510
## Detection Prevalence : 0.4853
## Balanced Accuracy : 0.9313
##
## 'Positive' Class : 0
##
Random Forest
rf_model <- train(target ~ ., data = heart_train, method = "rf", trControl = trainControl(method = "cv", number = 5))
# Predicciones en el conjunto de entrenamiento
rf_train_predictions <- predict(rf_model, newdata = heart_train)
mcre6 <- confusionMatrix(rf_train_predictions, heart_train$target)
# Predicciones en el conjunto de prueba
rf_test_predictions <- predict(rf_model, newdata = heart_test)
mcrp6 <- confusionMatrix(rf_test_predictions, heart_test$target)
mcrp6
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 99 0
## 1 0 105
##
## Accuracy : 1
## 95% CI : (0.9821, 1)
## No Information Rate : 0.5147
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.4853
## Detection Rate : 0.4853
## Detection Prevalence : 0.4853
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : 0
##
resultados <- data.frame(
"svmLinear"=c(mcre1$overall["Accuracy"], mcrp1$overall["Accuracy"]),
"svmRadial"=c(mcre2$overall["Accuracy"], mcrp2$overall["Accuracy"]),
"svmPoly"=c(mcre3$overall["Accuracy"], mcrp3$overall["Accuracy"]),
"rpart"=c(mcre4$overall["Accuracy"], mcrp4$overall["Accuracy"]),
"nnet"=c(mcre5$overall["Accuracy"], mcrp5$overall["Accuracy"]),
"rf"=c(mcre6$overall["Accuracy"], mcrp6$overall["Accuracy"])
)
rownames(resultados) <- c("Precisión de Entrenamiento", "Precisión de Prueba")
resultados
## svmLinear svmRadial svmPoly rpart nnet rf
## Precisión de Entrenamiento 0.8952497 0.9354446 1.0000000 0.7917174 0.9086480 1
## Precisión de Prueba 0.8921569 0.9411765 0.9852941 0.8088235 0.9313725 1
Acorde al resumen de resultados, el modelo mejor evaluado es el de svmRadial.