Paso 1. Instalar paquetes y llamar

#install.packages("cluster")
library(cluster)
## Warning: package 'cluster' was built under R version 4.3.3
#install.packages("ggplot2")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
#install.packages("data.table")
library(data.table)
## Warning: package 'data.table' was built under R version 4.3.3
#install.packages("factoextra")
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Paso 2. Obtener los datos

datos <- read.csv("C:\\Users\\HP\\OneDrive - FEMSA Comercio\\Escritorio\\Inteligencia de Negocios\\7mo Semestre\\M2\\Codigos\\wine.csv")

Paso 3. Entender la base de datos

summary(datos)
##     Alcohol        Malic_Acid         Ash         Ash_Alcanity  
##  Min.   :11.03   Min.   :0.740   Min.   :1.360   Min.   :10.60  
##  1st Qu.:12.36   1st Qu.:1.603   1st Qu.:2.210   1st Qu.:17.20  
##  Median :13.05   Median :1.865   Median :2.360   Median :19.50  
##  Mean   :13.00   Mean   :2.336   Mean   :2.367   Mean   :19.49  
##  3rd Qu.:13.68   3rd Qu.:3.083   3rd Qu.:2.558   3rd Qu.:21.50  
##  Max.   :14.83   Max.   :5.800   Max.   :3.230   Max.   :30.00  
##    Magnesium      Total_Phenols     Flavanoids    Nonflavanoid_Phenols
##  Min.   : 70.00   Min.   :0.980   Min.   :0.340   Min.   :0.1300      
##  1st Qu.: 88.00   1st Qu.:1.742   1st Qu.:1.205   1st Qu.:0.2700      
##  Median : 98.00   Median :2.355   Median :2.135   Median :0.3400      
##  Mean   : 99.74   Mean   :2.295   Mean   :2.029   Mean   :0.3619      
##  3rd Qu.:107.00   3rd Qu.:2.800   3rd Qu.:2.875   3rd Qu.:0.4375      
##  Max.   :162.00   Max.   :3.880   Max.   :5.080   Max.   :0.6600      
##  Proanthocyanins Color_Intensity       Hue             OD280      
##  Min.   :0.410   Min.   : 1.280   Min.   :0.4800   Min.   :1.270  
##  1st Qu.:1.250   1st Qu.: 3.220   1st Qu.:0.7825   1st Qu.:1.938  
##  Median :1.555   Median : 4.690   Median :0.9650   Median :2.780  
##  Mean   :1.591   Mean   : 5.058   Mean   :0.9574   Mean   :2.612  
##  3rd Qu.:1.950   3rd Qu.: 6.200   3rd Qu.:1.1200   3rd Qu.:3.170  
##  Max.   :3.580   Max.   :13.000   Max.   :1.7100   Max.   :4.000  
##     Proline      
##  Min.   : 278.0  
##  1st Qu.: 500.5  
##  Median : 673.5  
##  Mean   : 746.9  
##  3rd Qu.: 985.0  
##  Max.   :1680.0

Paso 4. Escalar la base de datos

df <- scale(datos)

Paso 5. Cantidad de grupos

grupos <- 3

Paso 6. Generar los segmentos

segmentos <- kmeans(df,grupos)
segmentos
## K-means clustering with 3 clusters of sizes 62, 65, 51
## 
## Cluster means:
##      Alcohol Malic_Acid        Ash Ash_Alcanity   Magnesium Total_Phenols
## 1  0.8328826 -0.3029551  0.3636801   -0.6084749  0.57596208    0.88274724
## 2 -0.9234669 -0.3929331 -0.4931257    0.1701220 -0.49032869   -0.07576891
## 3  0.1644436  0.8690954  0.1863726    0.5228924 -0.07526047   -0.97657548
##    Flavanoids Nonflavanoid_Phenols Proanthocyanins Color_Intensity        Hue
## 1  0.97506900          -0.56050853      0.57865427       0.1705823  0.4726504
## 2  0.02075402          -0.03343924      0.05810161      -0.8993770  0.4605046
## 3 -1.21182921           0.72402116     -0.77751312       0.9388902 -1.1615122
##        OD280    Proline
## 1  0.7770551  1.1220202
## 2  0.2700025 -0.7517257
## 3 -1.2887761 -0.4059428
## 
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 2 2 2 2 2 2 2 2 2 2 2 1
##  [75] 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
## [112] 2 2 2 2 2 2 2 3 2 2 1 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [149] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## 
## Within cluster sum of squares by cluster:
## [1] 385.6983 558.6971 326.3537
##  (between_SS / total_SS =  44.8 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Paso 7. Asignar el grupo al que pertenece cada observación

asignacion <- cbind(datos, cluster = segmentos$cluster)

Paso 8. Graficar los clusters

fviz_cluster(segmentos, data=df)

Paso 9. Optimizar la cantidad de grupos

La cantidad óptima de grupos corresponde al punto más alto de la siguiente gráfica.

set.seed(123)
optimizacion <- clusGap(df, FUN=kmeans, nstart=1, K.max =10)
plot(optimizacion, xlab="Número de clusters k")

Paso 10. Comparar segmentos

promedio <- aggregate(asignacion, by=list(asignacion$cluster), FUN=mean)
promedio
##   Group.1  Alcohol Malic_Acid      Ash Ash_Alcanity Magnesium Total_Phenols
## 1       1 13.67677   1.997903 2.466290     17.46290 107.96774      2.847581
## 2       2 12.25092   1.897385 2.231231     20.06308  92.73846      2.247692
## 3       3 13.13412   3.307255 2.417647     21.24118  98.66667      1.683922
##   Flavanoids Nonflavanoid_Phenols Proanthocyanins Color_Intensity       Hue
## 1  3.0032258            0.2920968        1.922097        5.453548 1.0654839
## 2  2.0500000            0.3576923        1.624154        2.973077 1.0627077
## 3  0.8188235            0.4519608        1.145882        7.234706 0.6919608
##      OD280   Proline cluster
## 1 3.163387 1100.2258       1
## 2 2.803385  510.1692       2
## 3 1.696667  619.0588       3

Conclusión

La segmentación o clusters es un algoritmo útil para identificar el cultivar correspondiente a cada vino.

LS0tDQp0aXRsZTogIkNsdXN0ZXJzIEVqZW1wbG86IFZpbm9zIg0KYXV0aG9yOiAiWGltZW5hIE1lbMOpbmRleiAtIEEwMTcyMDU2OSINCmRhdGU6ICIyMDI0LTA4LTEyIg0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDogDQogICAgdG9jOiB0cnVlDQogICAgdG9jX2Zsb2F0OiB0cnVlDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KICAgIHRoZW1lOiBjb3Ntbw0KLS0tDQohW10oQzpcXFVzZXJzXFxIUFxcT25lRHJpdmUgLSBGRU1TQSBDb21lcmNpb1xcRXNjcml0b3Jpb1xcSW50ZWxpZ2VuY2lhIGRlIE5lZ29jaW9zXFw3bW8gU2VtZXN0cmVcXE0yXFxGb3Rvc1xcZm90b3dpbmUuanBnKQ0KDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBtYWdlbnRhOyI+UGFzbyAxLiBJbnN0YWxhciBwYXF1ZXRlcyB5IGxsYW1hcg0KYGBge3J9DQojaW5zdGFsbC5wYWNrYWdlcygiY2x1c3RlciIpDQpsaWJyYXJ5KGNsdXN0ZXIpDQojaW5zdGFsbC5wYWNrYWdlcygiZ2dwbG90MiIpDQpsaWJyYXJ5KGdncGxvdDIpDQojaW5zdGFsbC5wYWNrYWdlcygiZGF0YS50YWJsZSIpDQpsaWJyYXJ5KGRhdGEudGFibGUpDQojaW5zdGFsbC5wYWNrYWdlcygiZmFjdG9leHRyYSIpDQpsaWJyYXJ5KGZhY3RvZXh0cmEpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6IG1hZ2VudGE7Ij5QYXNvIDIuIE9idGVuZXIgbG9zIGRhdG9zPC9zcGFuPiANCmBgYHtyfQ0KZGF0b3MgPC0gcmVhZC5jc3YoIkM6XFxVc2Vyc1xcSFBcXE9uZURyaXZlIC0gRkVNU0EgQ29tZXJjaW9cXEVzY3JpdG9yaW9cXEludGVsaWdlbmNpYSBkZSBOZWdvY2lvc1xcN21vIFNlbWVzdHJlXFxNMlxcQ29kaWdvc1xcd2luZS5jc3YiKQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBtYWdlbnRhOyI+UGFzbyAzLiBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zPC9zcGFuPiANCmBgYHtyfQ0Kc3VtbWFyeShkYXRvcykNCmBgYA0KDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBtYWdlbnRhOyI+UGFzbyA0LiBFc2NhbGFyIGxhIGJhc2UgZGUgZGF0b3M8L3NwYW4+IA0KYGBge3J9DQpkZiA8LSBzY2FsZShkYXRvcykNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjogbWFnZW50YTsiPlBhc28gNS4gQ2FudGlkYWQgZGUgZ3J1cG9zPC9zcGFuPiANCmBgYHtyfQ0KZ3J1cG9zIDwtIDMNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjogbWFnZW50YTsiPlBhc28gNi4gR2VuZXJhciBsb3Mgc2VnbWVudG9zPC9zcGFuPiANCmBgYHtyfQ0Kc2VnbWVudG9zIDwtIGttZWFucyhkZixncnVwb3MpDQpzZWdtZW50b3MNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjogbWFnZW50YTsiPlBhc28gNy4gQXNpZ25hciBlbCBncnVwbyBhbCBxdWUgcGVydGVuZWNlIGNhZGEgb2JzZXJ2YWNpw7NuPC9zcGFuPiANCmBgYHtyfQ0KYXNpZ25hY2lvbiA8LSBjYmluZChkYXRvcywgY2x1c3RlciA9IHNlZ21lbnRvcyRjbHVzdGVyKQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBtYWdlbnRhOyI+UGFzbyA4LiBHcmFmaWNhciBsb3MgY2x1c3RlcnM8L3NwYW4+IA0KYGBge3J9DQpmdml6X2NsdXN0ZXIoc2VnbWVudG9zLCBkYXRhPWRmKQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBtYWdlbnRhOyI+UGFzbyA5LiBPcHRpbWl6YXIgbGEgY2FudGlkYWQgZGUgZ3J1cG9zPC9zcGFuPiANCkxhIGNhbnRpZGFkIMOzcHRpbWEgZGUgZ3J1cG9zIGNvcnJlc3BvbmRlIGFsIHB1bnRvIG3DoXMgYWx0byBkZSBsYSBzaWd1aWVudGUgZ3LDoWZpY2EuDQpgYGB7cn0NCnNldC5zZWVkKDEyMykNCm9wdGltaXphY2lvbiA8LSBjbHVzR2FwKGRmLCBGVU49a21lYW5zLCBuc3RhcnQ9MSwgSy5tYXggPTEwKQ0KcGxvdChvcHRpbWl6YWNpb24sIHhsYWI9Ik7Dum1lcm8gZGUgY2x1c3RlcnMgayIpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6IG1hZ2VudGE7Ij5QYXNvIDEwLiBDb21wYXJhciBzZWdtZW50b3MgPC9zcGFuPiANCmBgYHtyfQ0KcHJvbWVkaW8gPC0gYWdncmVnYXRlKGFzaWduYWNpb24sIGJ5PWxpc3QoYXNpZ25hY2lvbiRjbHVzdGVyKSwgRlVOPW1lYW4pDQpwcm9tZWRpbw0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBtYWdlbnRhOyI+Q29uY2x1c2nDs248L3NwYW4+ICANCkxhIHNlZ21lbnRhY2nDs24gbyBjbHVzdGVycyBlcyB1biBhbGdvcml0bW8gw7p0aWwgcGFyYSBpZGVudGlmaWNhciBlbCBjdWx0aXZhciBjb3JyZXNwb25kaWVudGUgYSBjYWRhIHZpbm8u