Carga de datos
vinos <- read.csv("wine.csv")
head(vinos)
## Cultivar Alcohol Malic.acid Ash Alcalinity.of.ash Magnesium Total.phenols
## 1 1 14.23 1.71 2.43 15.6 127 2.80
## 2 1 13.20 1.78 2.14 11.2 100 2.65
## 3 1 13.16 2.36 2.67 18.6 101 2.80
## 4 1 14.37 1.95 2.50 16.8 113 3.85
## 5 1 13.24 2.59 2.87 21.0 118 2.80
## 6 1 14.20 1.76 2.45 15.2 112 3.27
## Flavanoids Nonflavanoid.phenols Proanthocyanins Color.intensity Hue
## 1 3.06 0.28 2.29 5.64 1.04
## 2 2.76 0.26 1.28 4.38 1.05
## 3 3.24 0.30 2.81 5.68 1.03
## 4 3.49 0.24 2.18 7.80 0.86
## 5 2.69 0.39 1.82 4.32 1.04
## 6 3.39 0.34 1.97 6.75 1.05
## OD280.OD315.of.diluted.wines Proline
## 1 3.92 1065
## 2 3.40 1050
## 3 3.17 1185
## 4 3.45 1480
## 5 2.93 735
## 6 2.85 1450
Selección y escalado de variables numéricas
vinos_num <- vinos %>% select_if(is.numeric)
vinos_scaled <- scale(vinos_num)
head(vinos_scaled)
## Cultivar Alcohol Malic.acid Ash Alcalinity.of.ash Magnesium
## [1,] -1.210529 1.5143408 -0.56066822 0.2313998 -1.1663032 1.90852151
## [2,] -1.210529 0.2455968 -0.49800856 -0.8256672 -2.4838405 0.01809398
## [3,] -1.210529 0.1963252 0.02117152 1.1062139 -0.2679823 0.08810981
## [4,] -1.210529 1.6867914 -0.34583508 0.4865539 -0.8069748 0.92829983
## [5,] -1.210529 0.2948684 0.22705328 1.8352256 0.4506745 1.27837900
## [6,] -1.210529 1.4773871 -0.51591132 0.3043010 -1.2860793 0.85828399
## Total.phenols Flavanoids Nonflavanoid.phenols Proanthocyanins
## [1,] 0.8067217 1.0319081 -0.6577078 1.2214385
## [2,] 0.5670481 0.7315653 -0.8184106 -0.5431887
## [3,] 0.8067217 1.2121137 -0.4970050 2.1299594
## [4,] 2.4844372 1.4623994 -0.9791134 1.0292513
## [5,] 0.8067217 0.6614853 0.2261576 0.4002753
## [6,] 1.5576991 1.3622851 -0.1755994 0.6623487
## Color.intensity Hue OD280.OD315.of.diluted.wines Proline
## [1,] 0.2510088 0.3611585 1.8427215 1.01015939
## [2,] -0.2924962 0.4049085 1.1103172 0.96252635
## [3,] 0.2682629 0.3174085 0.7863692 1.39122370
## [4,] 1.1827317 -0.4263410 1.1807407 2.32800680
## [5,] -0.3183774 0.3611585 0.4483365 -0.03776747
## [6,] 0.7298108 0.4049085 0.3356589 2.23274072
Análisis de Componentes Principales (PCA)
res.pca <- prcomp(vinos_scaled, center = TRUE, scale. = TRUE)
Varianza explicada
fviz_eig(res.pca, addlabels = TRUE, ylim = c(0, 60)) +
labs(title = "Varianza Explicada por Componentes Principales")

Determinación del número óptimo de clusters
fviz_nbclust(vinos_scaled, kmeans, method = "wss") +
geom_vline(xintercept = 3, linetype = 2) +
labs(subtitle = "Determinación del número de grupos")

Aplicación de K-means
set.seed(123)
km_res <- kmeans(vinos_scaled, centers = 3, nstart = 25)
vinos$cluster <- as.factor(km_res$cluster)
table(vinos$cluster)
##
## 1 2 3
## 49 61 68
Escalamiento Multidimensional (MDS)
dist_matrix <- dist(vinos_scaled)
mds <- cmdscale(dist_matrix)
mds_df <- data.frame(
Dimension1 = mds[, 1],
Dimension2 = mds[, 2],
Cluster = vinos$cluster
)
Visualización MDS
ggplot(mds_df, aes(x = Dimension1, y = Dimension2, color = Cluster)) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(
title = "Visualización de Clusters de Vino mediante MDS",
x = "Dimensión 1",
y = "Dimensión 2"
) +
scale_color_viridis_d()
