Carga de datos

vinos <- read.csv("wine.csv")
head(vinos)
##   Cultivar Alcohol Malic.acid  Ash Alcalinity.of.ash Magnesium Total.phenols
## 1        1   14.23       1.71 2.43              15.6       127          2.80
## 2        1   13.20       1.78 2.14              11.2       100          2.65
## 3        1   13.16       2.36 2.67              18.6       101          2.80
## 4        1   14.37       1.95 2.50              16.8       113          3.85
## 5        1   13.24       2.59 2.87              21.0       118          2.80
## 6        1   14.20       1.76 2.45              15.2       112          3.27
##   Flavanoids Nonflavanoid.phenols Proanthocyanins Color.intensity  Hue
## 1       3.06                 0.28            2.29            5.64 1.04
## 2       2.76                 0.26            1.28            4.38 1.05
## 3       3.24                 0.30            2.81            5.68 1.03
## 4       3.49                 0.24            2.18            7.80 0.86
## 5       2.69                 0.39            1.82            4.32 1.04
## 6       3.39                 0.34            1.97            6.75 1.05
##   OD280.OD315.of.diluted.wines Proline
## 1                         3.92    1065
## 2                         3.40    1050
## 3                         3.17    1185
## 4                         3.45    1480
## 5                         2.93     735
## 6                         2.85    1450

Selección y escalado de variables numéricas

vinos_num <- vinos %>% select_if(is.numeric)
vinos_scaled <- scale(vinos_num)

head(vinos_scaled)
##       Cultivar   Alcohol  Malic.acid        Ash Alcalinity.of.ash  Magnesium
## [1,] -1.210529 1.5143408 -0.56066822  0.2313998        -1.1663032 1.90852151
## [2,] -1.210529 0.2455968 -0.49800856 -0.8256672        -2.4838405 0.01809398
## [3,] -1.210529 0.1963252  0.02117152  1.1062139        -0.2679823 0.08810981
## [4,] -1.210529 1.6867914 -0.34583508  0.4865539        -0.8069748 0.92829983
## [5,] -1.210529 0.2948684  0.22705328  1.8352256         0.4506745 1.27837900
## [6,] -1.210529 1.4773871 -0.51591132  0.3043010        -1.2860793 0.85828399
##      Total.phenols Flavanoids Nonflavanoid.phenols Proanthocyanins
## [1,]     0.8067217  1.0319081           -0.6577078       1.2214385
## [2,]     0.5670481  0.7315653           -0.8184106      -0.5431887
## [3,]     0.8067217  1.2121137           -0.4970050       2.1299594
## [4,]     2.4844372  1.4623994           -0.9791134       1.0292513
## [5,]     0.8067217  0.6614853            0.2261576       0.4002753
## [6,]     1.5576991  1.3622851           -0.1755994       0.6623487
##      Color.intensity        Hue OD280.OD315.of.diluted.wines     Proline
## [1,]       0.2510088  0.3611585                    1.8427215  1.01015939
## [2,]      -0.2924962  0.4049085                    1.1103172  0.96252635
## [3,]       0.2682629  0.3174085                    0.7863692  1.39122370
## [4,]       1.1827317 -0.4263410                    1.1807407  2.32800680
## [5,]      -0.3183774  0.3611585                    0.4483365 -0.03776747
## [6,]       0.7298108  0.4049085                    0.3356589  2.23274072

Análisis de Componentes Principales (PCA)

res.pca <- prcomp(vinos_scaled, center = TRUE, scale. = TRUE)

Varianza explicada

fviz_eig(res.pca, addlabels = TRUE, ylim = c(0, 60)) +
  labs(title = "Varianza Explicada por Componentes Principales")

Determinación del número óptimo de clusters

fviz_nbclust(vinos_scaled, kmeans, method = "wss") +
  geom_vline(xintercept = 3, linetype = 2) +
  labs(subtitle = "Determinación del número de grupos")

Aplicación de K-means

set.seed(123)
km_res <- kmeans(vinos_scaled, centers = 3, nstart = 25)

vinos$cluster <- as.factor(km_res$cluster)
table(vinos$cluster)
## 
##  1  2  3 
## 49 61 68

Escalamiento Multidimensional (MDS)

dist_matrix <- dist(vinos_scaled)
mds <- cmdscale(dist_matrix)

mds_df <- data.frame(
  Dimension1 = mds[, 1],
  Dimension2 = mds[, 2],
  Cluster = vinos$cluster
)

Visualización MDS

ggplot(mds_df, aes(x = Dimension1, y = Dimension2, color = Cluster)) +
  geom_point(size = 3, alpha = 0.8) +
  theme_minimal() +
  labs(
    title = "Visualización de Clusters de Vino mediante MDS",
    x = "Dimensión 1",
    y = "Dimensión 2"
  ) +
  scale_color_viridis_d()