Contexto

Estos datos son el resultado de un análisis químico de vinos cultivados en la misma región de Italia, pero derivados de tres cultivares diferentes.

El análisis determinó las cantidades de 13 componentes que se encuentran en cada uno de los tres culti

Paso 1. Instalar paquetes y llamar librerías

#install.packages("cluster") # Análisis de Agrupamiento
library(cluster)
#install.packages("ggplot2") # Graficar
library(ggplot2)
#install.packages("data.table") # Manejo de muchos datos
library(data.table)
#install.packages("factoextra") # Grafica optimización de número de clusters
library(factoextra)
#install.packages("dplyr")
library(dplyr)

Paso 2. Obtener los datos

df3 <- data.frame(
  Entidad = c("Aguascalientes", "Baja California", "Baja California Sur", "Campeche", "Chiapas", 
              "Chihuahua", "Distrito Federal", "Coahuila de Zaragoza", "Colima", "Durango", 
              "Guanajuato", "Guerrero", "Hidalgo", "Jalisco", "Michoacán de Ocampo", "Morelos", 
              "México", "Nayarit", "Nuevo León", "Oaxaca", "Puebla", "Querétaro", "Quintana Roo", 
              "San Luis Potosí", "Sinaloa", "Sonora", "Tabasco", "Tamaulipas", 
              "Tlaxcala", "Veracruz de Ignacio de la Llave", "Yucatán", "Zacatecas"),
  Suel_Prom_Empleados = c(9432.454645, 14044.353582, 8123.243189, 17940.328515, 5633.688618, 
                          16804.911982, 18725.967825, 9880.586704, 6691.318058, 8559.506877, 
                          8659.015872, 6127.738942, 7136.083255, 9566.576551, 6564.490245, 
                          6761.228389, 12980.218774, 7679.770338, 13964.116103, 6054.543362, 
                          10170.617462, 11790.052681, 11033.895805, 8390.806228, 10935.056047, 
                          12690.473421, 17766.800911, 12204.903685, 5254.007124, 10137.518401, 
                          7200.283540, 6556.083345)
)

df4 <- data.frame(
  Suel_Prom_Empleados = c(
    9432.454645, 14044.353582, 8123.243189, 17940.328515, 
    5633.688618, 16804.911982, 18725.967825, 9880.586704, 
    6691.318058, 8559.506877, 8659.015872, 6127.738942, 
    7136.083255, 9566.576551, 6564.490245, 6761.228389, 
    12980.218774, 7679.770338, 13964.116103, 6054.543362, 
    10170.617462, 11790.052681, 11033.895805, 8390.806228, 
    10935.056047, 12690.473421, 17766.800911, 12204.903685, 
    5254.007124, 10137.518401, 7200.283540, 6556.083345
  )
)

Paso 3. Entender los datos

summary(df3) # min, promedio, max, NAs
##    Entidad          Suel_Prom_Empleados
##  Length:32          Min.   : 5254      
##  Class :character   1st Qu.: 7042      
##  Mode  :character   Median : 9500      
##                     Mean   :10171      
##                     3rd Qu.:12326      
##                     Max.   :18726
str(df3) # estructura: tipo de objeto, número de objetos
## 'data.frame':    32 obs. of  2 variables:
##  $ Entidad            : chr  "Aguascalientes" "Baja California" "Baja California Sur" "Campeche" ...
##  $ Suel_Prom_Empleados: num  9432 14044 8123 17940 5634 ...

Paso 4. Optimizar el número de grupos

set.seed(123)
optimizacion3 <- clusGap(df4, FUN=kmeans, nstart=1, K.max=10)
plot(optimizacion3, xlab="Número de clusters k", main="Optimización de Clusters") #el primer punto más alto se selecciona como óptimo

Paso 5. Determinar número de grupos

# Siempre es un valor inicial "cualquiera", luego se optimiza.
grupos3 <-  4 # óptimos

Paso 6. Generar los grupos

set.seed(123)
clusters3 <-  kmeans(df4, grupos3)
clusters3
## K-means clustering with 4 clusters of sizes 9, 11, 4, 8
## 
## Cluster means:
##   Suel_Prom_Empleados
## 1            9213.370
## 2            6514.476
## 3           17809.502
## 4           12455.384
## 
## Clustering vector:
##  [1] 1 4 1 3 2 3 3 1 2 1 1 2 2 1 2 2 4 2 4 2 1 4 4 1 4 4 3 4 2 1 2 2
## 
## Within cluster sum of squares by cluster:
## [1] 4988137 5036703 1868050 9969247
##  (between_SS / total_SS =  95.2 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Paso 8. Graficar los grupos

# Añadir clusters al df original con Entidad
df3$cluster <- clusters3$cluster

# Ordenar por sueldo y graficar
ggplot(df3, aes(x = reorder(Entidad, Suel_Prom_Empleados),
                y = Suel_Prom_Empleados,
                fill = factor(cluster))) +
  geom_col() +
  coord_flip() +
  labs(x = "Entidad", y = "Sueldo promedio empleados",
       fill = "Cluster",
       title = "Clusters por sueldo promedio de empleados (1 variable)") +
  theme_minimal()

Paso 9. Agregar Clusters al Dataframe

df3
##                            Entidad Suel_Prom_Empleados cluster
## 1                   Aguascalientes            9432.455       1
## 2                  Baja California           14044.354       4
## 3              Baja California Sur            8123.243       1
## 4                         Campeche           17940.329       3
## 5                          Chiapas            5633.689       2
## 6                        Chihuahua           16804.912       3
## 7                 Distrito Federal           18725.968       3
## 8             Coahuila de Zaragoza            9880.587       1
## 9                           Colima            6691.318       2
## 10                         Durango            8559.507       1
## 11                      Guanajuato            8659.016       1
## 12                        Guerrero            6127.739       2
## 13                         Hidalgo            7136.083       2
## 14                         Jalisco            9566.577       1
## 15             Michoacán de Ocampo            6564.490       2
## 16                         Morelos            6761.228       2
## 17                          México           12980.219       4
## 18                         Nayarit            7679.770       2
## 19                      Nuevo León           13964.116       4
## 20                          Oaxaca            6054.543       2
## 21                          Puebla           10170.617       1
## 22                       Querétaro           11790.053       4
## 23                    Quintana Roo           11033.896       4
## 24                 San Luis Potosí            8390.806       1
## 25                         Sinaloa           10935.056       4
## 26                          Sonora           12690.473       4
## 27                         Tabasco           17766.801       3
## 28                      Tamaulipas           12204.904       4
## 29                        Tlaxcala            5254.007       2
## 30 Veracruz de Ignacio de la Llave           10137.518       1
## 31                         Yucatán            7200.284       2
## 32                       Zacatecas            6556.083       2
LS0tCnRpdGxlOiAiU2FsYXJpb3MgTWVkaW9zIGVuIE3DqXhpY286IEFncnVwYW1pZW50byIKYXV0aG9yOiAiQW5uYSBEdXLDoW4gQTAxMjg1Njc0IgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKICAgICAgICBodG1sX2RvY3VtZW50OgogICAgICAgICAgICAgICAgdG9jOiBUUlVFCiAgICAgICAgICAgICAgICB0b2NfZmxvYXQ6IFRSVUUKICAgICAgICAgICAgICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKICAgICAgICAgICAgICAgIHRoZW1lOiB5ZXRpCi0tLQohW10oaHR0cHM6Ly9taXJvLm1lZGl1bS5jb20vMSpidkJqVXpBU2cwQWlqdmVzYm1iUGdBLmdpZikKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiM1MjhCOEI7Ij48Yj5Db250ZXh0bzwvYj48L3NwYW4+CkVzdG9zIGRhdG9zIHNvbiBlbCByZXN1bHRhZG8gZGUgdW4gYW7DoWxpc2lzIHF1w61taWNvIGRlIHZpbm9zIGN1bHRpdmFkb3MgZW4gbGEgbWlzbWEgcmVnacOzbiBkZSBJdGFsaWEsIHBlcm8gZGVyaXZhZG9zIGRlIHRyZXMgY3VsdGl2YXJlcyBkaWZlcmVudGVzLiAgCgpFbCBhbsOhbGlzaXMgZGV0ZXJtaW7DsyBsYXMgY2FudGlkYWRlcyBkZSAxMyBjb21wb25lbnRlcyBxdWUgc2UgZW5jdWVudHJhbiBlbiBjYWRhIHVubyBkZSBsb3MgdHJlcyBjdWx0aQoKIyA8c3BhbiBzdHlsZT0iY29sb3I6IzUyOEI4QjsiPjxiPlBhc28gMS4gSW5zdGFsYXIgcGFxdWV0ZXMgeSBsbGFtYXIgbGlicmVyw61hczwvYj48L3NwYW4+CmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CiNpbnN0YWxsLnBhY2thZ2VzKCJjbHVzdGVyIikgIyBBbsOhbGlzaXMgZGUgQWdydXBhbWllbnRvCmxpYnJhcnkoY2x1c3RlcikKI2luc3RhbGwucGFja2FnZXMoImdncGxvdDIiKSAjIEdyYWZpY2FyCmxpYnJhcnkoZ2dwbG90MikKI2luc3RhbGwucGFja2FnZXMoImRhdGEudGFibGUiKSAjIE1hbmVqbyBkZSBtdWNob3MgZGF0b3MKbGlicmFyeShkYXRhLnRhYmxlKQojaW5zdGFsbC5wYWNrYWdlcygiZmFjdG9leHRyYSIpICMgR3JhZmljYSBvcHRpbWl6YWNpw7NuIGRlIG7Dum1lcm8gZGUgY2x1c3RlcnMKbGlicmFyeShmYWN0b2V4dHJhKQojaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKQpsaWJyYXJ5KGRwbHlyKQpgYGAKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiM1MjhCOEI7Ij48Yj5QYXNvIDIuIE9idGVuZXIgbG9zIGRhdG9zIDwvYj48L3NwYW4+CmBgYHtyfQpkZjMgPC0gZGF0YS5mcmFtZSgKICBFbnRpZGFkID0gYygiQWd1YXNjYWxpZW50ZXMiLCAiQmFqYSBDYWxpZm9ybmlhIiwgIkJhamEgQ2FsaWZvcm5pYSBTdXIiLCAiQ2FtcGVjaGUiLCAiQ2hpYXBhcyIsIAogICAgICAgICAgICAgICJDaGlodWFodWEiLCAiRGlzdHJpdG8gRmVkZXJhbCIsICJDb2FodWlsYSBkZSBaYXJhZ296YSIsICJDb2xpbWEiLCAiRHVyYW5nbyIsIAogICAgICAgICAgICAgICJHdWFuYWp1YXRvIiwgIkd1ZXJyZXJvIiwgIkhpZGFsZ28iLCAiSmFsaXNjbyIsICJNaWNob2Fjw6FuIGRlIE9jYW1wbyIsICJNb3JlbG9zIiwgCiAgICAgICAgICAgICAgIk3DqXhpY28iLCAiTmF5YXJpdCIsICJOdWV2byBMZcOzbiIsICJPYXhhY2EiLCAiUHVlYmxhIiwgIlF1ZXLDqXRhcm8iLCAiUXVpbnRhbmEgUm9vIiwgCiAgICAgICAgICAgICAgIlNhbiBMdWlzIFBvdG9zw60iLCAiU2luYWxvYSIsICJTb25vcmEiLCAiVGFiYXNjbyIsICJUYW1hdWxpcGFzIiwgCiAgICAgICAgICAgICAgIlRsYXhjYWxhIiwgIlZlcmFjcnV6IGRlIElnbmFjaW8gZGUgbGEgTGxhdmUiLCAiWXVjYXTDoW4iLCAiWmFjYXRlY2FzIiksCiAgU3VlbF9Qcm9tX0VtcGxlYWRvcyA9IGMoOTQzMi40NTQ2NDUsIDE0MDQ0LjM1MzU4MiwgODEyMy4yNDMxODksIDE3OTQwLjMyODUxNSwgNTYzMy42ODg2MTgsIAogICAgICAgICAgICAgICAgICAgICAgICAgIDE2ODA0LjkxMTk4MiwgMTg3MjUuOTY3ODI1LCA5ODgwLjU4NjcwNCwgNjY5MS4zMTgwNTgsIDg1NTkuNTA2ODc3LCAKICAgICAgICAgICAgICAgICAgICAgICAgICA4NjU5LjAxNTg3MiwgNjEyNy43Mzg5NDIsIDcxMzYuMDgzMjU1LCA5NTY2LjU3NjU1MSwgNjU2NC40OTAyNDUsIAogICAgICAgICAgICAgICAgICAgICAgICAgIDY3NjEuMjI4Mzg5LCAxMjk4MC4yMTg3NzQsIDc2NzkuNzcwMzM4LCAxMzk2NC4xMTYxMDMsIDYwNTQuNTQzMzYyLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAxMDE3MC42MTc0NjIsIDExNzkwLjA1MjY4MSwgMTEwMzMuODk1ODA1LCA4MzkwLjgwNjIyOCwgMTA5MzUuMDU2MDQ3LCAKICAgICAgICAgICAgICAgICAgICAgICAgICAxMjY5MC40NzM0MjEsIDE3NzY2LjgwMDkxMSwgMTIyMDQuOTAzNjg1LCA1MjU0LjAwNzEyNCwgMTAxMzcuNTE4NDAxLCAKICAgICAgICAgICAgICAgICAgICAgICAgICA3MjAwLjI4MzU0MCwgNjU1Ni4wODMzNDUpCikKCmRmNCA8LSBkYXRhLmZyYW1lKAogIFN1ZWxfUHJvbV9FbXBsZWFkb3MgPSBjKAogICAgOTQzMi40NTQ2NDUsIDE0MDQ0LjM1MzU4MiwgODEyMy4yNDMxODksIDE3OTQwLjMyODUxNSwgCiAgICA1NjMzLjY4ODYxOCwgMTY4MDQuOTExOTgyLCAxODcyNS45Njc4MjUsIDk4ODAuNTg2NzA0LCAKICAgIDY2OTEuMzE4MDU4LCA4NTU5LjUwNjg3NywgODY1OS4wMTU4NzIsIDYxMjcuNzM4OTQyLCAKICAgIDcxMzYuMDgzMjU1LCA5NTY2LjU3NjU1MSwgNjU2NC40OTAyNDUsIDY3NjEuMjI4Mzg5LCAKICAgIDEyOTgwLjIxODc3NCwgNzY3OS43NzAzMzgsIDEzOTY0LjExNjEwMywgNjA1NC41NDMzNjIsIAogICAgMTAxNzAuNjE3NDYyLCAxMTc5MC4wNTI2ODEsIDExMDMzLjg5NTgwNSwgODM5MC44MDYyMjgsIAogICAgMTA5MzUuMDU2MDQ3LCAxMjY5MC40NzM0MjEsIDE3NzY2LjgwMDkxMSwgMTIyMDQuOTAzNjg1LCAKICAgIDUyNTQuMDA3MTI0LCAxMDEzNy41MTg0MDEsIDcyMDAuMjgzNTQwLCA2NTU2LjA4MzM0NQogICkKKQpgYGAKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiM1MjhCOEI7Ij48Yj5QYXNvIDMuIEVudGVuZGVyIGxvcyBkYXRvcyA8L2I+PC9zcGFuPgpgYGB7cn0Kc3VtbWFyeShkZjMpICMgbWluLCBwcm9tZWRpbywgbWF4LCBOQXMKc3RyKGRmMykgIyBlc3RydWN0dXJhOiB0aXBvIGRlIG9iamV0bywgbsO6bWVybyBkZSBvYmpldG9zCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6IzUyOEI4QjsiPjxiPlBhc28gNC4gT3B0aW1pemFyIGVsIG7Dum1lcm8gZGUgZ3J1cG9zICA8L2I+PC9zcGFuPgpgYGB7cn0Kc2V0LnNlZWQoMTIzKQpvcHRpbWl6YWNpb24zIDwtIGNsdXNHYXAoZGY0LCBGVU49a21lYW5zLCBuc3RhcnQ9MSwgSy5tYXg9MTApCnBsb3Qob3B0aW1pemFjaW9uMywgeGxhYj0iTsO6bWVybyBkZSBjbHVzdGVycyBrIiwgbWFpbj0iT3B0aW1pemFjacOzbiBkZSBDbHVzdGVycyIpICNlbCBwcmltZXIgcHVudG8gbcOhcyBhbHRvIHNlIHNlbGVjY2lvbmEgY29tbyDDs3B0aW1vCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6IzUyOEI4QjsiPjxiPlBhc28gNS4gRGV0ZXJtaW5hciBuw7ptZXJvIGRlIGdydXBvcyA8L2I+PC9zcGFuPgpgYGB7cn0KIyBTaWVtcHJlIGVzIHVuIHZhbG9yIGluaWNpYWwgImN1YWxxdWllcmEiLCBsdWVnbyBzZSBvcHRpbWl6YS4KZ3J1cG9zMyA8LSAgNCAjIMOzcHRpbW9zCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6IzUyOEI4QjsiPjxiPlBhc28gNi4gR2VuZXJhciBsb3MgZ3J1cG9zICA8L2I+PC9zcGFuPgpgYGB7cn0Kc2V0LnNlZWQoMTIzKQpjbHVzdGVyczMgPC0gIGttZWFucyhkZjQsIGdydXBvczMpCmNsdXN0ZXJzMwpgYGAKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiM1MjhCOEI7Ij48Yj5QYXNvIDguIEdyYWZpY2FyIGxvcyBncnVwb3MgPC9iPjwvc3Bhbj4KYGBge3J9CiMgQcOxYWRpciBjbHVzdGVycyBhbCBkZiBvcmlnaW5hbCBjb24gRW50aWRhZApkZjMkY2x1c3RlciA8LSBjbHVzdGVyczMkY2x1c3RlcgoKIyBPcmRlbmFyIHBvciBzdWVsZG8geSBncmFmaWNhcgpnZ3Bsb3QoZGYzLCBhZXMoeCA9IHJlb3JkZXIoRW50aWRhZCwgU3VlbF9Qcm9tX0VtcGxlYWRvcyksCiAgICAgICAgICAgICAgICB5ID0gU3VlbF9Qcm9tX0VtcGxlYWRvcywKICAgICAgICAgICAgICAgIGZpbGwgPSBmYWN0b3IoY2x1c3RlcikpKSArCiAgZ2VvbV9jb2woKSArCiAgY29vcmRfZmxpcCgpICsKICBsYWJzKHggPSAiRW50aWRhZCIsIHkgPSAiU3VlbGRvIHByb21lZGlvIGVtcGxlYWRvcyIsCiAgICAgICBmaWxsID0gIkNsdXN0ZXIiLAogICAgICAgdGl0bGUgPSAiQ2x1c3RlcnMgcG9yIHN1ZWxkbyBwcm9tZWRpbyBkZSBlbXBsZWFkb3MgKDEgdmFyaWFibGUpIikgKwogIHRoZW1lX21pbmltYWwoKQoKYGBgCgojIDxzcGFuIHN0eWxlPSJjb2xvcjojNTI4QjhCOyI+PGI+UGFzbyA5LiBBZ3JlZ2FyIENsdXN0ZXJzIGFsIERhdGFmcmFtZSA8L2I+PC9zcGFuPgpgYGB7cn0KZGYzCmBgYA==