Paso 1. Instalar paquetes y llamar librerías

#install.packages("cluster") # Análisis de Agrupamiento
library(cluster)
#install.packages("ggplot2") # Paquete de gráficas
library(ggplot2)
#install.packages("data.table") # Manejo de muchos datos
library(data.table)
#install.packages("factoextra") # Gráfica optimización de número de clusters
library(factoextra)

Paso 2. Obtener los datos

dfrs <- read.csv("/Users/pablosancho/Desktop/Concentración/Modulo 2 Concentracion (R)/redsocial1_clean.csv")

dfrs_numeric <- dfrs[,c("Usuarios","IDEstado")]

Paso 3. Entender los datos

summary(dfrs)
##     Estado             Usuarios           IDEstado    
##  Length:32          Min.   :  620788   Min.   : 1.00  
##  Class :character   1st Qu.: 1435961   1st Qu.: 8.75  
##  Mode  :character   Median : 2203361   Median :16.50  
##                     Mean   : 2866902   Mean   :16.50  
##                     3rd Qu.: 3329249   3rd Qu.:24.25  
##                     Max.   :12313382   Max.   :32.00

Paso 3.5. Escalar los datos

datos_escalados <- scale(dfrs_numeric)

Paso 4. Determinar numero de grupos

plot(datos_escalados)

grupos <- 10

Paso 5. Generar los grupos

clusterrs <- kmeans(datos_escalados, grupos)
clusterrs
## K-means clustering with 10 clusters of sizes 3, 2, 5, 4, 4, 1, 1, 5, 4, 3
## 
## Cluster means:
##       Usuarios    IDEstado
## 1  -0.68557148  1.51017174
## 2   1.69028017 -0.53300179
## 3  -0.51076481  0.37310125
## 4   0.58713300  0.02665009
## 5   0.02613631 -1.17260394
## 6   3.92329317 -0.15990054
## 7   0.98061616  1.43910484
## 8  -0.27886229  1.01270340
## 9  -0.87929095 -1.38580466
## 10 -0.40517771 -0.51523506
## 
## Clustering vector:
##  [1]  9  5  9  9  5  9  5  5  2 10  4 10 10  2  6  4  3  3  4  3  4  3  3  8  8
## [26]  8  8  8  1  7  1  1
## 
## Within cluster sum of squares by cluster:
##  [1] 0.09634001 0.19014606 0.51650053 0.86717455 0.34462944 0.00000000
##  [7] 0.00000000 0.21655506 0.16503669 0.15413381
##  (between_SS / total_SS =  95.9 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Paso 6. Optimizar el numero de grupos

set.seed(123)
optimizacionrs <- clusGap(datos_escalados, FUN = kmeans, nstart = 25, K.max = 10)
plot (optimizacionrs, xlab="Numero de clusters k")

Paso 7. Graficar los grupos

fviz_cluster(clusterrs, data=datos_escalados)

LS0tCnRpdGxlOiAiTWV4aWNvIgphdXRob3I6ICJQYWJsbyBTYW5jaG8gQTAxNzIyMjM2IgpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6IAogICAgdG9jOiBUUlVFCiAgICB0b2NfZmxvYXQ6IFRSVUUKICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKICAgIHRoZW1lOiBqb3VybmFsCmRhdGU6ICIyMDI1LTA4LTE5IgotLS0KCiFbXShodHRwczovL3VwbG9hZC53aWtpbWVkaWEub3JnL3dpa2lwZWRpYS9jb21tb25zLzUvNWMvQW5pbWF0ZWQtRmxhZy1NZXhpY28uZ2lmKQoKIyA8c3BhbiBzdHlsZT0iY29sb3I6Z3JleTsiPiBQYXNvIDEuIEluc3RhbGFyIHBhcXVldGVzIHkgbGxhbWFyIGxpYnJlcsOtYXMgPC9zcGFuPgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQojaW5zdGFsbC5wYWNrYWdlcygiY2x1c3RlciIpICMgQW7DoWxpc2lzIGRlIEFncnVwYW1pZW50bwpsaWJyYXJ5KGNsdXN0ZXIpCiNpbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIikgIyBQYXF1ZXRlIGRlIGdyw6FmaWNhcwpsaWJyYXJ5KGdncGxvdDIpCiNpbnN0YWxsLnBhY2thZ2VzKCJkYXRhLnRhYmxlIikgIyBNYW5lam8gZGUgbXVjaG9zIGRhdG9zCmxpYnJhcnkoZGF0YS50YWJsZSkKI2luc3RhbGwucGFja2FnZXMoImZhY3RvZXh0cmEiKSAjIEdyw6FmaWNhIG9wdGltaXphY2nDs24gZGUgbsO6bWVybyBkZSBjbHVzdGVycwpsaWJyYXJ5KGZhY3RvZXh0cmEpCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6Z3JleTsiPiBQYXNvIDIuIE9idGVuZXIgbG9zIGRhdG9zIDwvc3Bhbj4KYGBge3J9CmRmcnMgPC0gcmVhZC5jc3YoIi9Vc2Vycy9wYWJsb3NhbmNoby9EZXNrdG9wL0NvbmNlbnRyYWNpb8yBbi9Nb2R1bG8gMiBDb25jZW50cmFjaW9uIChSKS9yZWRzb2NpYWwxX2NsZWFuLmNzdiIpCgpkZnJzX251bWVyaWMgPC0gZGZyc1ssYygiVXN1YXJpb3MiLCJJREVzdGFkbyIpXQpgYGAKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmdyZXk7Ij4gUGFzbyAzLiBFbnRlbmRlciBsb3MgZGF0b3MgPC9zcGFuPgpgYGB7cn0Kc3VtbWFyeShkZnJzKQpgYGAKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWU7Ij4gUGFzbyAzLjUuIEVzY2FsYXIgbG9zIGRhdG9zIDwvc3Bhbj4KYGBge3J9CmRhdG9zX2VzY2FsYWRvcyA8LSBzY2FsZShkZnJzX251bWVyaWMpCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDQuIERldGVybWluYXIgbnVtZXJvIGRlIGdydXBvcyA8L3NwYW4+CmBgYHtyfQpwbG90KGRhdG9zX2VzY2FsYWRvcykKZ3J1cG9zIDwtIDEwCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDUuIEdlbmVyYXIgbG9zIGdydXBvcyA8L3NwYW4+CmBgYHtyfQpjbHVzdGVycnMgPC0ga21lYW5zKGRhdG9zX2VzY2FsYWRvcywgZ3J1cG9zKQpjbHVzdGVycnMKYGBgCgojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IFBhc28gNi4gT3B0aW1pemFyIGVsIG51bWVybyBkZSBncnVwb3MgPC9zcGFuPgpgYGB7cn0Kc2V0LnNlZWQoMTIzKQpvcHRpbWl6YWNpb25ycyA8LSBjbHVzR2FwKGRhdG9zX2VzY2FsYWRvcywgRlVOID0ga21lYW5zLCBuc3RhcnQgPSAyNSwgSy5tYXggPSAxMCkKcGxvdCAob3B0aW1pemFjaW9ucnMsIHhsYWI9Ik51bWVybyBkZSBjbHVzdGVycyBrIikKYGBgCgojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IFBhc28gNy4gR3JhZmljYXIgbG9zIGdydXBvcyA8L3NwYW4+CmBgYHtyfQpmdml6X2NsdXN0ZXIoY2x1c3RlcnJzLCBkYXRhPWRhdG9zX2VzY2FsYWRvcykKYGBgCgo=