# Paso 2
folds <- createFolds(data$DEATH_EVENT,k=5)
entrenamiento <- data_norm[-folds[[5]],]
prueba <- data_norm[folds[[5]],]
# Etiquetas
entrenamiento_labels <- data$DEATH_EVENT[-folds[[5]]]
prueba_labels <- data$DEATH_EVENT[folds[[5]]]
# Paso 3: Aplicar método K-NN
train.kknn(entrenamiento_labels ~., data =entrenamiento, kmax = 50 )
##
## Call:
## train.kknn(formula = entrenamiento_labels ~ ., data = entrenamiento, kmax = 50)
##
## Type of response variable: nominal
## Minimal misclassification: 0.2301255
## Best kernel: optimal
## Best k: 20
prediccion <- knn(entrenamiento,prueba, cl = entrenamiento_labels, k = 47)
confusionMatrix(data = prediccion, reference = prueba_labels)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Vivo Muerto
## Vivo 40 19
## Muerto 0 1
##
## Accuracy : 0.6833
## 95% CI : (0.5504, 0.7974)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.4516
##
## Kappa : 0.0656
##
## Mcnemar's Test P-Value : 3.636e-05
##
## Sensitivity : 1.0000
## Specificity : 0.0500
## Pos Pred Value : 0.6780
## Neg Pred Value : 1.0000
## Prevalence : 0.6667
## Detection Rate : 0.6667
## Detection Prevalence : 0.9833
## Balanced Accuracy : 0.5250
##
## 'Positive' Class : Vivo
##
# Paso 4
exactitud <- numeric(length = 5)
for(i in 1:5){
# Definir conjuntos entrenamiento y prueba según el fold actual
prueba <- data_norm[folds[[i]],]
entrenamiento <- data_norm[-folds[[i]],]
# Etiquetas
entrenamiento_labels <- data$DEATH_EVENT[-folds[[i]]]
prueba_labels <- data$DEATH_EVENT[folds[[i]]]
pred_knn <- knn(entrenamiento,prueba, cl= entrenamiento_labels, k = 47)
# Evaluar exactitud del modelo en cada fold
cm <- confusionMatrix(pred_knn, prueba_labels)
exactitud[i] <- cm$overall["Accuracy"]
# Mostrar resultado del fold
cat("Fold", i, "- Exactitud:", exactitud[i], "\n")
}
## Fold 1 - Exactitud: 0.6779661
## Fold 2 - Exactitud: 0.6833333
## Fold 3 - Exactitud: 0.6833333
## Fold 4 - Exactitud: 0.6833333
## Fold 5 - Exactitud: 0.6833333
Exactitud_promedio <- round(mean(exactitud),4)*100
paste("Exactitud_promedio: ",Exactitud_promedio,"%",sep="")
## [1] "Exactitud_promedio: 68.23%"
# Validación cruzada
set.seed(2025)
train_control <- trainControl(method="cv",number=10,savePredictions = TRUE)
knn_cv <- train(DEATH_EVENT ~ ., data=cbind(prueba, DEATH_EVENT=prueba_labels),
method = "knn", trControl = train_control, tuneGrid = data.frame(k=47))
knn_cv
## k-Nearest Neighbors
##
## 60 samples
## 12 predictors
## 2 classes: 'Vivo', 'Muerto'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 54, 54, 54, 54, 54, 54, ...
## Resampling results:
##
## Accuracy Kappa
## 0.6666667 0
##
## Tuning parameter 'k' was held constant at a value of 47
Wine Quality
library(readr)
winequality_red <- read_csv("winequality-red.csv")
## Rows: 1599 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): fixed acidity;volatile acidity;citric acid;residual sugar;chlorides...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
winequality_white <- read_csv("winequality-white.csv")
## Rows: 4898 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): fixed acidity;volatile acidity;citric acid;residual sugar;chlorides...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Limpieza de datos
winequality_red <- separate(winequality_red, col = 1, into = c("fixed_acidity","volatile_acidity","citric_acid","residual_sugar",
"chlorides","free_sulfur_dioxide","total_sulfur_dioxide",
"density","pH","sulphates","alcohol","quality"), sep = ";")
winequality_white <- separate(winequality_white, col = 1, into = c("fixed_acidity","volatile_acidity","citric_acid","residual_sugar",
"chlorides","free_sulfur_dioxide","total_sulfur_dioxide",
"density","pH","sulphates","alcohol","quality"), sep = ";")
winequality_red$quality <- as.factor(winequality_red$quality)
winequality_white$quality <- as.factor(winequality_white$quality)
winequality_red <- na.omit(winequality_red)
winequality_white <- na.omit(winequality_white)
# Normalización Min-Max con rescale()
wine_red_norm <- winequality_red %>% select(-quality) %>%
mutate(across(where(is.numeric), rescale))
wine_white_norm <- winequality_white %>% select(-quality) %>%
mutate(across(where(is.numeric), rescale))
# Paso 2
folds_wine_red <- createFolds(winequality_red$quality,k=5)
folds_wine_white <- createFolds(winequality_white$quality, k=5)
entrenamiento_w_red <- wine_red_norm[-folds_wine_red[[5]],]
prueba_w_red <- wine_red_norm[folds_wine_red[[5]],]
entrenamiento_w_white <- wine_white_norm[-folds_wine_white[[5]],]
prueba_w_white <- wine_white_norm[folds_wine_white[[5]],]
# Etiquetas
entrenamiento_labels_w_red <- winequality_red$quality[-folds_wine_red[[5]]]
prueba_labels_w_red <- winequality_red$quality[folds_wine_red[[5]]]
entrenamiento_labels_w_white <- winequality_white$quality[-folds_wine_white[[5]]]
prueba_labels_w_white <- winequality_white$quality[folds_wine_white[[5]]]
# Entrenamiento
##Red wine
train.kknn(entrenamiento_labels_w_red ~., data =entrenamiento_w_red, kmax = 50 )
##
## Call:
## train.kknn(formula = entrenamiento_labels_w_red ~ ., data = entrenamiento_w_red, kmax = 50)
##
## Type of response variable: nominal
## Minimal misclassification: 0.4295775
## Best kernel: optimal
## Best k: 5
prediccion_red_wine <- knn(entrenamiento_w_red,prueba_w_red, cl = entrenamiento_labels_w_red, k = 5)
confusionMatrix(data = prediccion_red_wine, reference = prueba_labels_w_red)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 3 4 5 6 7 8
## 3 0 0 0 0 0 0
## 4 0 0 0 1 1 0
## 5 1 8 81 48 1 0
## 6 1 2 51 67 18 3
## 7 0 1 4 12 20 1
## 8 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.5234
## 95% CI : (0.4672, 0.5791)
## No Information Rate : 0.4237
## P-Value [Acc > NIR] : 0.0002034
##
## Kappa : 0.2376
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 3 Class: 4 Class: 5 Class: 6 Class: 7 Class: 8
## Sensitivity 0.000000 0.000000 0.5956 0.5234 0.50000 0.00000
## Specificity 1.000000 0.993548 0.6865 0.6114 0.93594 1.00000
## Pos Pred Value NaN 0.000000 0.5827 0.4718 0.52632 NaN
## Neg Pred Value 0.993769 0.965517 0.6978 0.6592 0.92933 0.98754
## Prevalence 0.006231 0.034268 0.4237 0.3988 0.12461 0.01246
## Detection Rate 0.000000 0.000000 0.2523 0.2087 0.06231 0.00000
## Detection Prevalence 0.000000 0.006231 0.4330 0.4424 0.11838 0.00000
## Balanced Accuracy 0.500000 0.496774 0.6410 0.5674 0.71797 0.50000
##White wine
train.kknn(entrenamiento_labels_w_white ~., data = entrenamiento_w_white, kmax=50)
##
## Call:
## train.kknn(formula = entrenamiento_labels_w_white ~ ., data = entrenamiento_w_white, kmax = 50)
##
## Type of response variable: nominal
## Minimal misclassification: 0.4139867
## Best kernel: optimal
## Best k: 1
prediccion_white_wine <- knn(entrenamiento_w_white,prueba_w_white, cl = entrenamiento_labels_w_white, k = 1)
confusionMatrix(data = prediccion_white_wine, reference = prueba_labels_w_white)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 3 4 5 6 7 8 9
## 3 1 0 2 0 1 0 0
## 4 0 7 4 9 2 0 0
## 5 1 13 170 76 18 0 1
## 6 1 10 90 284 65 13 0
## 7 1 3 22 61 83 10 0
## 8 0 0 4 9 6 12 0
## 9 0 0 0 0 1 0 0
##
## Overall Statistics
##
## Accuracy : 0.5684
## 95% CI : (0.5367, 0.5996)
## No Information Rate : 0.448
## P-Value [Acc > NIR] : 2.707e-14
##
## Kappa : 0.3545
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 3 Class: 4 Class: 5 Class: 6 Class: 7 Class: 8
## Sensitivity 0.250000 0.212121 0.5822 0.6469 0.47159 0.34286
## Specificity 0.996926 0.984161 0.8416 0.6691 0.87935 0.97989
## Pos Pred Value 0.250000 0.318182 0.6093 0.6134 0.46111 0.38710
## Neg Pred Value 0.996926 0.972860 0.8260 0.7002 0.88375 0.97576
## Prevalence 0.004082 0.033673 0.2980 0.4480 0.17959 0.03571
## Detection Rate 0.001020 0.007143 0.1735 0.2898 0.08469 0.01224
## Detection Prevalence 0.004082 0.022449 0.2847 0.4724 0.18367 0.03163
## Balanced Accuracy 0.623463 0.598141 0.7119 0.6580 0.67547 0.66138
## Class: 9
## Sensitivity 0.00000
## Specificity 0.99898
## Pos Pred Value 0.00000
## Neg Pred Value 0.99898
## Prevalence 0.00102
## Detection Rate 0.00000
## Detection Prevalence 0.00102
## Balanced Accuracy 0.49949
# Validación cruzada
##Red wine
set.seed(2025)
train_control_red <- trainControl(method="cv",number=10,savePredictions = TRUE)
knn_cv_red <- train(quality ~ ., data=cbind(prueba_w_red, quality=prueba_labels_w_red),
method = "knn", trControl = train_control, tuneGrid = data.frame(k=5))
# Resultados de validación cruzada
knn_cv_red
## k-Nearest Neighbors
##
## 321 samples
## 11 predictor
## 6 classes: '3', '4', '5', '6', '7', '8'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 289, 288, 288, 290, 288, 288, ...
## Resampling results:
##
## Accuracy Kappa
## 0.505022 0.1752814
##
## Tuning parameter 'k' was held constant at a value of 5
##White wine
set.seed(2025)
train_control_white <- trainControl(method="cv",number=10,savePredictions = TRUE)
knn_cv_white <- train(quality ~ ., data=cbind(prueba_w_white, quality=prueba_labels_w_white),
method = "knn", trControl = train_control, tuneGrid = data.frame(k=1))
# Resultados de validación cruzada
knn_cv_white
## k-Nearest Neighbors
##
## 980 samples
## 11 predictor
## 7 classes: '3', '4', '5', '6', '7', '8', '9'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 882, 884, 880, 884, 882, 882, ...
## Resampling results:
##
## Accuracy Kappa
## 0.4226225 0.1213336
##
## Tuning parameter 'k' was held constant at a value of 1