# Base de datos
records <- read.csv("heart_failure_clinical_records_dataset.csv")
summary(records)
## age anaemia creatinine_phosphokinase diabetes
## Min. :40.00 Min. :0.0000 Min. : 23.0 Min. :0.0000
## 1st Qu.:51.00 1st Qu.:0.0000 1st Qu.: 116.5 1st Qu.:0.0000
## Median :60.00 Median :0.0000 Median : 250.0 Median :0.0000
## Mean :60.83 Mean :0.4314 Mean : 581.8 Mean :0.4181
## 3rd Qu.:70.00 3rd Qu.:1.0000 3rd Qu.: 582.0 3rd Qu.:1.0000
## Max. :95.00 Max. :1.0000 Max. :7861.0 Max. :1.0000
## ejection_fraction high_blood_pressure platelets serum_creatinine
## Min. :14.00 Min. :0.0000 Min. : 25100 Min. :0.500
## 1st Qu.:30.00 1st Qu.:0.0000 1st Qu.:212500 1st Qu.:0.900
## Median :38.00 Median :0.0000 Median :262000 Median :1.100
## Mean :38.08 Mean :0.3512 Mean :263358 Mean :1.394
## 3rd Qu.:45.00 3rd Qu.:1.0000 3rd Qu.:303500 3rd Qu.:1.400
## Max. :80.00 Max. :1.0000 Max. :850000 Max. :9.400
## serum_sodium sex smoking time
## Min. :113.0 Min. :0.0000 Min. :0.0000 Min. : 4.0
## 1st Qu.:134.0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 73.0
## Median :137.0 Median :1.0000 Median :0.0000 Median :115.0
## Mean :136.6 Mean :0.6488 Mean :0.3211 Mean :130.3
## 3rd Qu.:140.0 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:203.0
## Max. :148.0 Max. :1.0000 Max. :1.0000 Max. :285.0
## DEATH_EVENT
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.3211
## 3rd Qu.:1.0000
## Max. :1.0000
# Visión general de toda la base de datos
skimr::skim(records)
| Name | records |
| Number of rows | 299 |
| Number of columns | 13 |
| _______________________ | |
| Column type frequency: | |
| numeric | 13 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 60.83 | 11.89 | 40.0 | 51.0 | 60.0 | 70.0 | 95.0 | ▆▇▇▂▁ |
| anaemia | 0 | 1 | 0.43 | 0.50 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | ▇▁▁▁▆ |
| creatinine_phosphokinase | 0 | 1 | 581.84 | 970.29 | 23.0 | 116.5 | 250.0 | 582.0 | 7861.0 | ▇▁▁▁▁ |
| diabetes | 0 | 1 | 0.42 | 0.49 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | ▇▁▁▁▆ |
| ejection_fraction | 0 | 1 | 38.08 | 11.83 | 14.0 | 30.0 | 38.0 | 45.0 | 80.0 | ▃▇▂▂▁ |
| high_blood_pressure | 0 | 1 | 0.35 | 0.48 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | ▇▁▁▁▅ |
| platelets | 0 | 1 | 263358.03 | 97804.24 | 25100.0 | 212500.0 | 262000.0 | 303500.0 | 850000.0 | ▂▇▂▁▁ |
| serum_creatinine | 0 | 1 | 1.39 | 1.03 | 0.5 | 0.9 | 1.1 | 1.4 | 9.4 | ▇▁▁▁▁ |
| serum_sodium | 0 | 1 | 136.63 | 4.41 | 113.0 | 134.0 | 137.0 | 140.0 | 148.0 | ▁▁▃▇▁ |
| sex | 0 | 1 | 0.65 | 0.48 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | ▅▁▁▁▇ |
| smoking | 0 | 1 | 0.32 | 0.47 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | ▇▁▁▁▃ |
| time | 0 | 1 | 130.26 | 77.61 | 4.0 | 73.0 | 115.0 | 203.0 | 285.0 | ▆▇▃▆▃ |
| DEATH_EVENT | 0 | 1 | 0.32 | 0.47 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | ▇▁▁▁▃ |
# Características de las variables de la base de datos
glimpse(records)
## Rows: 299
## Columns: 13
## $ age <dbl> 75, 55, 65, 50, 65, 90, 75, 60, 65, 80, 75, 6…
## $ anaemia <int> 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, …
## $ creatinine_phosphokinase <int> 582, 7861, 146, 111, 160, 47, 246, 315, 157, …
## $ diabetes <int> 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
## $ ejection_fraction <int> 20, 38, 20, 20, 20, 40, 15, 60, 65, 35, 38, 2…
## $ high_blood_pressure <int> 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, …
## $ platelets <dbl> 265000, 263358, 162000, 210000, 327000, 20400…
## $ serum_creatinine <dbl> 1.90, 1.10, 1.30, 1.90, 2.70, 2.10, 1.20, 1.1…
## $ serum_sodium <int> 130, 136, 129, 137, 116, 132, 137, 131, 138, …
## $ sex <int> 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, …
## $ smoking <int> 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, …
## $ time <int> 4, 6, 7, 7, 8, 8, 10, 10, 10, 10, 10, 10, 11,…
## $ DEATH_EVENT <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, …
# Información de las primeras filas de la base de datos
head(records)
## age anaemia creatinine_phosphokinase diabetes ejection_fraction
## 1 75 0 582 0 20
## 2 55 0 7861 0 38
## 3 65 0 146 0 20
## 4 50 1 111 0 20
## 5 65 1 160 1 20
## 6 90 1 47 0 40
## high_blood_pressure platelets serum_creatinine serum_sodium sex smoking time
## 1 1 265000 1.9 130 1 0 4
## 2 0 263358 1.1 136 1 0 6
## 3 0 162000 1.3 129 1 1 7
## 4 0 210000 1.9 137 1 0 7
## 5 0 327000 2.7 116 0 0 8
## 6 1 204000 2.1 132 1 1 8
## DEATH_EVENT
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
# Porcentaje total de valores faltantes
pct_miss(records)
## [1] 0
# Normalización de variable
records_norm <- records %>%
select(-DEATH_EVENT) %>%
mutate(across(everything(), rescale))
library(caret)
## Loading required package: lattice
# Crear folds usando la variable respuesta del dataset original
folds <- createFolds(records$DEATH_EVENT, k = 5)
# Definir conjuntos de entrenamiento y prueba usando la data normalizada
entrenamiento <- records_norm[-folds[[5]], ]
prueba <- records_norm[folds[[5]], ]
# Definir etiquetas usando la data original
entrenamiento_labels <- records$DEATH_EVENT[-folds[[5]]]
prueba_labels <- records$DEATH_EVENT[folds[[5]]]
# Ver tamaño de cada conjunto
dim(entrenamiento)[1]
## [1] 239
dim(prueba)[1]
## [1] 60
library(kknn)
##
## Attaching package: 'kknn'
## The following object is masked from 'package:caret':
##
## contr.dummy
library(class)
library(caret)
# Convertir etiquetas a factor
entrenamiento_labels <- as.factor(entrenamiento_labels)
prueba_labels <- as.factor(prueba_labels)
# Buscar el mejor valor de k
modelo <- train.kknn(entrenamiento_labels ~ ., data = entrenamiento, kmax = 50)
modelo
##
## Call:
## train.kknn(formula = entrenamiento_labels ~ ., data = entrenamiento, kmax = 50)
##
## Type of response variable: nominal
## Minimal misclassification: 0.2677824
## Best kernel: optimal
## Best k: 15
# Predecir con el mejor k
pred <- knn(entrenamiento,
prueba,
cl = entrenamiento_labels,
k = modelo$best.parameters$k)
# Evaluar el modelo
confusionMatrix(data = pred, reference = prueba_labels)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 40 16
## 1 0 4
##
## Accuracy : 0.7333
## 95% CI : (0.6034, 0.8393)
## No Information Rate : 0.6667
## P-Value [Acc > NIR] : 0.1691967
##
## Kappa : 0.25
##
## Mcnemar's Test P-Value : 0.0001768
##
## Sensitivity : 1.0000
## Specificity : 0.2000
## Pos Pred Value : 0.7143
## Neg Pred Value : 1.0000
## Prevalence : 0.6667
## Detection Rate : 0.6667
## Detection Prevalence : 0.9333
## Balanced Accuracy : 0.6000
##
## 'Positive' Class : 0
##
# Guardar la exactitud de cada fold
exactitud <- numeric(length = 5)
for(i in 1:5){
# Definir conjuntos entrenamiento y prueba según el fold actual
prueba <- records_norm[folds[[i]], ]
entrenamiento <- records_norm[-folds[[i]], ]
# Etiquetas
entrenamiento_labels <- as.factor(records$DEATH_EVENT[-folds[[i]]])
prueba_labels <- as.factor(records$DEATH_EVENT[folds[[i]]])
# Aplicar K-NN con el mejor valor de k encontrado
pred_knn <- knn(entrenamiento,
prueba,
cl = entrenamiento_labels,
k = modelo$best.parameters$k)
# Evaluar exactitud del modelo en cada fold
cm <- confusionMatrix(pred_knn, prueba_labels)
exactitud[i] <- cm$overall["Accuracy"]
# Mostrar resultado del fold
cat("Fold", i, "- Exactitud:", exactitud[i], "\n")
}
## Fold 1 - Exactitud: 0.8
## Fold 2 - Exactitud: 0.7
## Fold 3 - Exactitud: 0.7288136
## Fold 4 - Exactitud: 0.6833333
## Fold 5 - Exactitud: 0.7333333
# Exactitud promedio de validación cruzada
Exactitud_promedio <- round(mean(exactitud), 4) * 100
paste("Exactitud_promedio: ", Exactitud_promedio, "%", sep = "")
## [1] "Exactitud_promedio: 72.91%"
# Recuperar la partición original del Paso 2
entrenamiento <- records_norm[-folds[[5]], ]
prueba <- records_norm[folds[[5]], ]
entrenamiento_labels <- as.factor(records$DEATH_EVENT[-folds[[5]]])
prueba_labels <- as.factor(records$DEATH_EVENT[folds[[5]]])
pred <- knn(entrenamiento,
prueba,
cl = entrenamiento_labels,
k = modelo$best.parameters$k)
# Matriz de confusión del modelo
tabla1 <- table(prueba_labels, pred)
tabla1
## pred
## prueba_labels 0 1
## 0 40 0
## 1 16 4
# Calcular exactitud
TA <- (sum(diag(tabla1))) / sum(tabla1)
round(TA, 4)
## [1] 0.7333
# Validación cruzada automática con caret usando toda la data normalizada
set.seed(2025)
train_control <- trainControl(method = "cv", number = 10, savePredictions = TRUE)
knn_cv <- train(DEATH_EVENT ~ .,
data = cbind(records_norm, DEATH_EVENT = as.factor(records$DEATH_EVENT)),
method = "knn",
trControl = train_control,
tuneGrid = data.frame(k = modelo$best.parameters$k))
# Resultados de validación cruzada
knn_cv
## k-Nearest Neighbors
##
## 299 samples
## 12 predictor
## 2 classes: '0', '1'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 269, 269, 268, 270, 270, 268, ...
## Resampling results:
##
## Accuracy Kappa
## 0.7304598 0.2432598
##
## Tuning parameter 'k' was held constant at a value of 15
# Matriz de confusión usando predicciones guardadas por caret
confusionMatrix(knn_cv$pred$pred, knn_cv$pred$obs)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 195 73
## 1 8 23
##
## Accuracy : 0.7291
## 95% CI : (0.6749, 0.7787)
## No Information Rate : 0.6789
## P-Value [Acc > NIR] : 0.0348
##
## Kappa : 0.2437
##
## Mcnemar's Test P-Value : 1.151e-12
##
## Sensitivity : 0.9606
## Specificity : 0.2396
## Pos Pred Value : 0.7276
## Neg Pred Value : 0.7419
## Prevalence : 0.6789
## Detection Rate : 0.6522
## Detection Prevalence : 0.8963
## Balanced Accuracy : 0.6001
##
## 'Positive' Class : 0
##
#Leer base de datos / vino blanco
library(readr)
winequality_white <- read_delim("winequality-white.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
## Rows: 4898 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## dbl (12): fixed acidity, volatile acidity, citric acid, residual sugar, chlo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Leer base de datos / vino rojo
library(readr)
winequality_red <- read_delim("winequality-red.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)
## Rows: 1599 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## dbl (12): fixed acidity, volatile acidity, citric acid, residual sugar, chlo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Unir bases de datos
library(dplyr)
# Añadir una variable para identificar el tipo de vino
winequality_red$tipo <- "rojo"
winequality_white$tipo <- "blanco"
# Unir las dos bases de datos
winequality <- bind_rows(winequality_red, winequality_white)
# Convertir la variable respuesta en factor
winequality$tipo <- as.factor(winequality$tipo)
#Preparar y limpiar datos
head(winequality)
## # A tibble: 6 × 13
## `fixed acidity` `volatile acidity` `citric acid` `residual sugar` chlorides
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7.4 0.7 0 1.9 0.076
## 2 7.8 0.88 0 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.7 0 1.9 0.076
## 6 7.4 0.66 0 1.8 0.075
## # ℹ 8 more variables: `free sulfur dioxide` <dbl>,
## # `total sulfur dioxide` <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## # alcohol <dbl>, quality <dbl>, tipo <fct>
str(winequality)
## spc_tbl_ [6,497 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ fixed acidity : num [1:6497] 7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
## $ volatile acidity : num [1:6497] 0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
## $ citric acid : num [1:6497] 0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
## $ residual sugar : num [1:6497] 1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
## $ chlorides : num [1:6497] 0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
## $ free sulfur dioxide : num [1:6497] 11 25 15 17 11 13 15 15 9 17 ...
## $ total sulfur dioxide: num [1:6497] 34 67 54 60 34 40 59 21 18 102 ...
## $ density : num [1:6497] 0.998 0.997 0.997 0.998 0.998 ...
## $ pH : num [1:6497] 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
## $ sulphates : num [1:6497] 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
## $ alcohol : num [1:6497] 9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
## $ quality : num [1:6497] 5 5 5 6 5 5 5 7 7 5 ...
## $ tipo : Factor w/ 2 levels "blanco","rojo": 2 2 2 2 2 2 2 2 2 2 ...
## - attr(*, "spec")=
## .. cols(
## .. `fixed acidity` = col_double(),
## .. `volatile acidity` = col_double(),
## .. `citric acid` = col_double(),
## .. `residual sugar` = col_double(),
## .. chlorides = col_double(),
## .. `free sulfur dioxide` = col_double(),
## .. `total sulfur dioxide` = col_double(),
## .. density = col_double(),
## .. pH = col_double(),
## .. sulphates = col_double(),
## .. alcohol = col_double(),
## .. quality = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(winequality)
## fixed acidity volatile acidity citric acid residual sugar
## Min. : 3.800 Min. :0.0800 Min. :0.0000 Min. : 0.600
## 1st Qu.: 6.400 1st Qu.:0.2300 1st Qu.:0.2500 1st Qu.: 1.800
## Median : 7.000 Median :0.2900 Median :0.3100 Median : 3.000
## Mean : 7.215 Mean :0.3397 Mean :0.3186 Mean : 5.443
## 3rd Qu.: 7.700 3rd Qu.:0.4000 3rd Qu.:0.3900 3rd Qu.: 8.100
## Max. :15.900 Max. :1.5800 Max. :1.6600 Max. :65.800
## chlorides free sulfur dioxide total sulfur dioxide density
## Min. :0.00900 Min. : 1.00 Min. : 6.0 Min. :0.9871
## 1st Qu.:0.03800 1st Qu.: 17.00 1st Qu.: 77.0 1st Qu.:0.9923
## Median :0.04700 Median : 29.00 Median :118.0 Median :0.9949
## Mean :0.05603 Mean : 30.53 Mean :115.7 Mean :0.9947
## 3rd Qu.:0.06500 3rd Qu.: 41.00 3rd Qu.:156.0 3rd Qu.:0.9970
## Max. :0.61100 Max. :289.00 Max. :440.0 Max. :1.0390
## pH sulphates alcohol quality tipo
## Min. :2.720 Min. :0.2200 Min. : 8.00 Min. :3.000 blanco:4898
## 1st Qu.:3.110 1st Qu.:0.4300 1st Qu.: 9.50 1st Qu.:5.000 rojo :1599
## Median :3.210 Median :0.5100 Median :10.30 Median :6.000
## Mean :3.219 Mean :0.5313 Mean :10.49 Mean :5.818
## 3rd Qu.:3.320 3rd Qu.:0.6000 3rd Qu.:11.30 3rd Qu.:6.000
## Max. :4.010 Max. :2.0000 Max. :14.90 Max. :9.000
# Verificar cantidad por tipo de vino
table(winequality$tipo)
##
## blanco rojo
## 4898 1599
round(prop.table(table(winequality$tipo)),2)
##
## blanco rojo
## 0.75 0.25
# Verificar valores faltantes
colSums(is.na(winequality))
## fixed acidity volatile acidity citric acid
## 0 0 0
## residual sugar chlorides free sulfur dioxide
## 0 0 0
## total sulfur dioxide density pH
## 0 0 0
## sulphates alcohol quality
## 0 0 0
## tipo
## 0
#Dividir los datos en conjunto de entrenamiento y prueba
winequality_norm <- as.data.frame(lapply(winequality[, -ncol(winequality)], rescale))
winequality_norm$tipo <- winequality$tipo
set.seed(2025)
folds <- createFolds(winequality_norm$tipo, k = 5)
entrenamiento <- winequality_norm[c(folds$Fold1, folds$Fold2, folds$Fold3, folds$Fold4), ]
prueba <- winequality_norm[folds$Fold5, ]
dim(entrenamiento)[1]
## [1] 5198
dim(prueba)[1]
## [1] 1299
#Aplicar el método K-NN
modelo <- train.kknn(tipo ~ ., data = entrenamiento, kmax = 20)
modelo
##
## Call:
## train.kknn(formula = tipo ~ ., data = entrenamiento, kmax = 20)
##
## Type of response variable: nominal
## Minimal misclassification: 0.005001924
## Best kernel: optimal
## Best k: 1
#Predicción sobre la prueba
Pred <- predict(modelo, prueba[, -ncol(prueba)])
tabla_confusion <- table(prueba[, ncol(prueba)], Pred)
tabla_confusion
## Pred
## blanco rojo
## blanco 975 5
## rojo 4 315
#Calcular la ecxactitud
TA <- sum(diag(tabla_confusion)) / sum(tabla_confusion)
round(TA, 4)
## [1] 0.9931
#Matriz de confusión con caret
confusionMatrix(data = Pred, reference = prueba$tipo)
## Confusion Matrix and Statistics
##
## Reference
## Prediction blanco rojo
## blanco 975 4
## rojo 5 315
##
## Accuracy : 0.9931
## 95% CI : (0.9869, 0.9968)
## No Information Rate : 0.7544
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9813
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9949
## Specificity : 0.9875
## Pos Pred Value : 0.9959
## Neg Pred Value : 0.9844
## Prevalence : 0.7544
## Detection Rate : 0.7506
## Detection Prevalence : 0.7537
## Balanced Accuracy : 0.9912
##
## 'Positive' Class : blanco
##
exactitud <- numeric(length = 5)
for(i in 1:5){
prueba_cv <- winequality_norm[folds[[i]], ]
entrenamiento_cv <- winequality_norm[-folds[[i]], ]
entrenamiento_labels <- winequality$tipo[-folds[[i]]]
prueba_labels <- winequality$tipo[folds[[i]]]
pred_knn <- knn(entrenamiento_cv[, -ncol(entrenamiento_cv)],
prueba_cv[, -ncol(prueba_cv)],
cl = entrenamiento_labels,
k = 1)
cm <- confusionMatrix(pred_knn, prueba_labels)
exactitud[i] <- cm$overall["Accuracy"]
cat("Fold", i, "- Exactitud:", exactitud[i], "\n")
}
## Fold 1 - Exactitud: 0.9923018
## Fold 2 - Exactitud: 0.9923077
## Fold 3 - Exactitud: 0.9930716
## Fold 4 - Exactitud: 0.9892308
## Fold 5 - Exactitud: 0.9899923
Exactitud_promedio <- round(mean(exactitud),4)*100
paste("Exactitud promedio: ", Exactitud_promedio, "%", sep="")
## [1] "Exactitud promedio: 99.14%"
# Paso #5: Interpretación de los resultados finales
# Matriz de confusión del modelo
tabla1 <- table(prueba$tipo, Pred)
tabla1
## Pred
## blanco rojo
## blanco 975 5
## rojo 4 315
# Calcular exactitud
TA <- (sum(diag(tabla1))) / sum(tabla1)
round(TA, 4)
## [1] 0.9931
# Validación cruzada automática con caret usando toda la data normalizada
set.seed(2025)
train_control <- trainControl(method = "cv", number = 10, savePredictions = TRUE)
knn_cv <- train(tipo ~ .,
data = winequality_norm,
method = "knn",
trControl = train_control,
tuneGrid = data.frame(k = 1))
# Resultados de validación cruzada
knn_cv
## k-Nearest Neighbors
##
## 6497 samples
## 12 predictor
## 2 classes: 'blanco', 'rojo'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 5847, 5847, 5847, 5847, 5847, 5847, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9916883 0.9775896
##
## Tuning parameter 'k' was held constant at a value of 1
# Matriz de confusión usando predicciones guardadas por caret
confusionMatrix(knn_cv$pred$pred, knn_cv$pred$obs)
## Confusion Matrix and Statistics
##
## Reference
## Prediction blanco rojo
## blanco 4873 29
## rojo 25 1570
##
## Accuracy : 0.9917
## 95% CI : (0.9892, 0.9938)
## No Information Rate : 0.7539
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.9776
##
## Mcnemar's Test P-Value : 0.6831
##
## Sensitivity : 0.9949
## Specificity : 0.9819
## Pos Pred Value : 0.9941
## Neg Pred Value : 0.9843
## Prevalence : 0.7539
## Detection Rate : 0.7500
## Detection Prevalence : 0.7545
## Balanced Accuracy : 0.9884
##
## 'Positive' Class : blanco
##