
Paso 1. Instalar paquetes y llamar
librerías
#install.packages("cluster") # Análisis de Agrupamiento
library(cluster)
#install.packages("ggplot2") # Paquete de gráficas
library(ggplot2)
#install.packages("data.table") # Manejo de muchos datos
library(data.table)
#install.packages("factoextra") # Gráfica optimización de número de clusters
library(factoextra)
Paso 2. Obtener los datos
dfrs <- read.csv("/Users/pablosancho/Desktop/Concentración/Modulo 2 Concentracion (R)/redsocial1_clean.csv")
dfrs_numeric <- dfrs[,c("Usuarios","IDEstado")]
Paso 3. Entender los datos
summary(dfrs)
## Estado Usuarios IDEstado
## Length:32 Min. : 620788 Min. : 1.00
## Class :character 1st Qu.: 1435961 1st Qu.: 8.75
## Mode :character Median : 2203361 Median :16.50
## Mean : 2866902 Mean :16.50
## 3rd Qu.: 3329249 3rd Qu.:24.25
## Max. :12313382 Max. :32.00
Paso 3.5. Escalar los datos
datos_escalados <- scale(dfrs_numeric)
Paso 4. Determinar numero de grupos
plot(datos_escalados)

grupos <- 10
Paso 5. Generar los grupos
clusterrs <- kmeans(datos_escalados, grupos)
clusterrs
## K-means clustering with 10 clusters of sizes 3, 2, 5, 4, 4, 1, 1, 5, 4, 3
##
## Cluster means:
## Usuarios IDEstado
## 1 -0.68557148 1.51017174
## 2 1.69028017 -0.53300179
## 3 -0.51076481 0.37310125
## 4 0.58713300 0.02665009
## 5 0.02613631 -1.17260394
## 6 3.92329317 -0.15990054
## 7 0.98061616 1.43910484
## 8 -0.27886229 1.01270340
## 9 -0.87929095 -1.38580466
## 10 -0.40517771 -0.51523506
##
## Clustering vector:
## [1] 9 5 9 9 5 9 5 5 2 10 4 10 10 2 6 4 3 3 4 3 4 3 3 8 8
## [26] 8 8 8 1 7 1 1
##
## Within cluster sum of squares by cluster:
## [1] 0.09634001 0.19014606 0.51650053 0.86717455 0.34462944 0.00000000
## [7] 0.00000000 0.21655506 0.16503669 0.15413381
## (between_SS / total_SS = 95.9 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
Paso 6. Optimizar el numero de grupos
set.seed(123)
optimizacionrs <- clusGap(datos_escalados, FUN = kmeans, nstart = 25, K.max = 10)
plot (optimizacionrs, xlab="Numero de clusters k")

Paso 7. Graficar los grupos
fviz_cluster(clusterrs, data=datos_escalados)

LS0tCnRpdGxlOiAiTWV4aWNvIgphdXRob3I6ICJQYWJsbyBTYW5jaG8gQTAxNzIyMjM2IgpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6IAogICAgdG9jOiBUUlVFCiAgICB0b2NfZmxvYXQ6IFRSVUUKICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKICAgIHRoZW1lOiBqb3VybmFsCmRhdGU6ICIyMDI1LTA4LTE5IgotLS0KCiFbXShodHRwczovL3VwbG9hZC53aWtpbWVkaWEub3JnL3dpa2lwZWRpYS9jb21tb25zLzUvNWMvQW5pbWF0ZWQtRmxhZy1NZXhpY28uZ2lmKQoKIyA8c3BhbiBzdHlsZT0iY29sb3I6Z3JleTsiPiBQYXNvIDEuIEluc3RhbGFyIHBhcXVldGVzIHkgbGxhbWFyIGxpYnJlcsOtYXMgPC9zcGFuPgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQojaW5zdGFsbC5wYWNrYWdlcygiY2x1c3RlciIpICMgQW7DoWxpc2lzIGRlIEFncnVwYW1pZW50bwpsaWJyYXJ5KGNsdXN0ZXIpCiNpbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIikgIyBQYXF1ZXRlIGRlIGdyw6FmaWNhcwpsaWJyYXJ5KGdncGxvdDIpCiNpbnN0YWxsLnBhY2thZ2VzKCJkYXRhLnRhYmxlIikgIyBNYW5lam8gZGUgbXVjaG9zIGRhdG9zCmxpYnJhcnkoZGF0YS50YWJsZSkKI2luc3RhbGwucGFja2FnZXMoImZhY3RvZXh0cmEiKSAjIEdyw6FmaWNhIG9wdGltaXphY2nDs24gZGUgbsO6bWVybyBkZSBjbHVzdGVycwpsaWJyYXJ5KGZhY3RvZXh0cmEpCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6Z3JleTsiPiBQYXNvIDIuIE9idGVuZXIgbG9zIGRhdG9zIDwvc3Bhbj4KYGBge3J9CmRmcnMgPC0gcmVhZC5jc3YoIi9Vc2Vycy9wYWJsb3NhbmNoby9EZXNrdG9wL0NvbmNlbnRyYWNpb8yBbi9Nb2R1bG8gMiBDb25jZW50cmFjaW9uIChSKS9yZWRzb2NpYWwxX2NsZWFuLmNzdiIpCgpkZnJzX251bWVyaWMgPC0gZGZyc1ssYygiVXN1YXJpb3MiLCJJREVzdGFkbyIpXQpgYGAKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmdyZXk7Ij4gUGFzbyAzLiBFbnRlbmRlciBsb3MgZGF0b3MgPC9zcGFuPgpgYGB7cn0Kc3VtbWFyeShkZnJzKQpgYGAKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWU7Ij4gUGFzbyAzLjUuIEVzY2FsYXIgbG9zIGRhdG9zIDwvc3Bhbj4KYGBge3J9CmRhdG9zX2VzY2FsYWRvcyA8LSBzY2FsZShkZnJzX251bWVyaWMpCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDQuIERldGVybWluYXIgbnVtZXJvIGRlIGdydXBvcyA8L3NwYW4+CmBgYHtyfQpwbG90KGRhdG9zX2VzY2FsYWRvcykKZ3J1cG9zIDwtIDEwCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDUuIEdlbmVyYXIgbG9zIGdydXBvcyA8L3NwYW4+CmBgYHtyfQpjbHVzdGVycnMgPC0ga21lYW5zKGRhdG9zX2VzY2FsYWRvcywgZ3J1cG9zKQpjbHVzdGVycnMKYGBgCgojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IFBhc28gNi4gT3B0aW1pemFyIGVsIG51bWVybyBkZSBncnVwb3MgPC9zcGFuPgpgYGB7cn0Kc2V0LnNlZWQoMTIzKQpvcHRpbWl6YWNpb25ycyA8LSBjbHVzR2FwKGRhdG9zX2VzY2FsYWRvcywgRlVOID0ga21lYW5zLCBuc3RhcnQgPSAyNSwgSy5tYXggPSAxMCkKcGxvdCAob3B0aW1pemFjaW9ucnMsIHhsYWI9Ik51bWVybyBkZSBjbHVzdGVycyBrIikKYGBgCgojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IFBhc28gNy4gR3JhZmljYXIgbG9zIGdydXBvcyA8L3NwYW4+CmBgYHtyfQpmdml6X2NsdXN0ZXIoY2x1c3RlcnJzLCBkYXRhPWRhdG9zX2VzY2FsYWRvcykKYGBgCgo=