# Teoría Agrupamiento o clustering es una técnica de aprendizaje automático no supervisado que agrupa datos en función de su similitud. Algunos usos típicos de esta técnica son: * Segmentación de clientes * Detección de anormalidades * Categorización de documentos

Instalar paquetes y llamar librerías

#install.packages("cluster") # Análisis de Agrupamiento
library(cluster)
#install.packages("ggplot2") # Graficar
library(ggplot2)
#install.packages("data.table") # Manejo de muchos datos
library(data.table)
# install.packages("factoextra") # Gráfica optimización de número de clusters
library(factoextra)
library(readxl)
library(dplyr)

Obtener los datos

super <- read_excel("C:\\Users\\Emili\\OneDrive\\Desktop\\TEC\\Tec 6to Semestre Concentracion\\Modulo 2\\Archivos CSV\\supermarket.xlsx")
## Warning: Expecting numeric in A522063 / R522063C1: got 'A563185'
## Warning: Expecting numeric in A522064 / R522064C1: got 'A563186'
## Warning: Expecting numeric in A522065 / R522065C1: got 'A563187'
super <- na.omit(super)

Entender los datos

summary(super)
##      BillNo         Itemname            Quantity       
##  Min.   :536365   Length:388023      Min.   :    1.00  
##  1st Qu.:549225   Class :character   1st Qu.:    2.00  
##  Median :561888   Mode  :character   Median :    5.00  
##  Mean   :560611                      Mean   :   12.89  
##  3rd Qu.:572131                      3rd Qu.:   12.00  
##  Max.   :581587                      Max.   :80995.00  
##       Date                          Time                         Price         
##  Min.   :2010-12-01 00:00:00   Min.   :1899-12-31 06:20:00   Min.   :   0.000  
##  1st Qu.:2011-04-07 00:00:00   1st Qu.:1899-12-31 11:44:00   1st Qu.:   1.250  
##  Median :2011-07-31 00:00:00   Median :1899-12-31 13:11:00   Median :   1.950  
##  Mean   :2011-07-10 08:10:29   Mean   :1899-12-31 13:15:23   Mean   :   3.079  
##  3rd Qu.:2011-10-21 00:00:00   3rd Qu.:1899-12-31 14:50:00   3rd Qu.:   3.750  
##  Max.   :2011-12-09 00:00:00   Max.   :1899-12-31 20:18:00   Max.   :8142.750  
##    CustomerID      Country         
##  Min.   :12346   Length:388023     
##  1st Qu.:13950   Class :character  
##  Median :15265   Mode  :character  
##  Mean   :15317                     
##  3rd Qu.:16837                     
##  Max.   :18287
str(super)
## tibble [388,023 × 8] (S3: tbl_df/tbl/data.frame)
##  $ BillNo    : num [1:388023] 536365 536365 536365 536365 536365 ...
##  $ Itemname  : chr [1:388023] "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
##  $ Quantity  : num [1:388023] 6 6 8 6 6 2 6 6 6 32 ...
##  $ Date      : POSIXct[1:388023], format: "2010-12-01" "2010-12-01" ...
##  $ Time      : POSIXct[1:388023], format: "1899-12-31 08:26:00" "1899-12-31 08:26:00" ...
##  $ Price     : num [1:388023] 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
##  $ CustomerID: num [1:388023] 17850 17850 17850 17850 17850 ...
##  $ Country   : chr [1:388023] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
##  - attr(*, "na.action")= 'omit' Named int [1:134041] 614 1411 1412 1413 1414 1415 1416 1417 1418 1419 ...
##   ..- attr(*, "names")= chr [1:134041] "614" "1411" "1412" "1413" ...

Crear columnas adicionales

super <- super %>%
  mutate(Subtotal = Quantity * Price)
totales_factura <- super %>%
  group_by(CustomerID, BillNo) %>%
  summarise(TotalFactura = sum(Subtotal, na.rm = TRUE)) %>%
  ungroup()
## `summarise()` has grouped output by 'CustomerID'. You can override using the
## `.groups` argument.
ticket_cliente <- totales_factura %>%
  group_by(CustomerID) %>%
  summarise(
    Frecuencia = n(),  # número de facturas
    TicketPromedio = mean(TotalFactura)
  ) %>%
  ungroup()

Escalar los datos

# Sólo si los datos no están en la misma escala.
ticket_cliente <- ticket_cliente %>%
  mutate(
    Frecuencia_scaled = as.numeric(scale(Frecuencia)),
    TicketPromedio_scaled = as.numeric(scale(TicketPromedio))
  )
df1 <- ticket_cliente %>%
  select(Frecuencia_scaled, TicketPromedio_scaled) %>%
  na.omit()

Numero optimo de grupos

set.seed(123)
optimizacion1 <- clusGap(df1, FUN=kmeans, nstart=1, K.max=7)
# El K.max normalmente es 10, en este ejercicio al ser 8 datos se dejó en
7.
## [1] 7
plot(optimizacion1, xlab="Número de clusters k", main="Optimización de
Clusters")

# Se selecciona como óptimo el primer punto más alto.
wss <- numeric(10)

for (k in 1:10) {
  kmeans_model <- kmeans(df1, centers = k, nstart = 25)
  wss[k] <- kmeans_model$tot.withinss
}
plot(1:10, wss,
     type = "b",
     pch = 19,
     xlab = "Número de clusters (k)",
     ylab = "Within-Cluster Sum of Squares",
     main = "Método del Codo")

Determinar número de grupos

# Siempre es un valor inicial "cualquiera", luego se optimiza.
plot(ticket_cliente$Frecuencia_scaled,ticket_cliente$TicketPromedio_scaled)

grupos1 <- 4

Generar los grupos

set.seed(123)
clusters1 <- kmeans(df1,grupos1)
clusters1
## K-means clustering with 4 clusters of sizes 19, 2, 436, 3840
## 
## Cluster means:
##   Frecuencia_scaled TicketPromedio_scaled
## 1         9.8400548            0.42762303
## 2        -0.3845733           44.52401880
## 3         1.5348491            0.01814010
## 4        -0.2227568           -0.02736509
## 
## Clustering vector:
##    [1] 2 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4
##   [38] 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4
##   [75] 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 3 4 3 4 4 4 4 3 4 4 4 4 3 4 4 4 4 4 4 3 4 4
##  [112] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4
##  [149] 4 4 4 3 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4 4 3 4
##  [186] 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4
##  [223] 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4 3 3 3 4 4 4 4 4
##  [260] 4 4 4 4 4 4 4 4 4 4 3 4 3 3 4 3 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 3 4 4 4
##  [297] 4 4 4 4 4 4 4 3 1 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
##  [334] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 3 4
##  [371] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4
##  [408] 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4
##  [445] 4 4 4 4 3 4 4 4 4 4 4 4 4 4 1 4 4 4 3 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4
##  [482] 3 4 4 4 4 4 3 3 4 4 4 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 3 4 4 4
##  [519] 4 4 4 4 4 4 4 4 4 3 4 4 4 4 3 4 4 3 3 4 4 1 3 4 4 4 3 4 3 3 4 4 3 4 4 4 4
##  [556] 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4
##  [593] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 3 3 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4
##  [630] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
##  [667] 4 4 3 4 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4
##  [704] 4 4 4 4 4 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
##  [741] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 4 4 4
##  [778] 4 4 3 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 3
##  [815] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
##  [852] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4
##  [889] 4 4 4 4 3 4 4 4 3 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4
##  [926] 4 4 4 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4
##  [963] 4 4 4 4 4 4 4 4 4 4 4 4 1 3 4 4 3 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1000] 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4 4 3 3 4 4 4
## [1037] 3 4 4 4 4 3 4 4 4 4 4 1 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1074] 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4
## [1111] 3 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1148] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 3 4
## [1185] 3 4 4 4 4 4 4 3 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4
## [1222] 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 3 3 4 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3
## [1259] 4 3 4 4 3 4 3 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4
## [1296] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 3 3 4
## [1333] 3 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 3 4 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 3
## [1370] 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 3 3 4
## [1407] 4 4 3 3 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1444] 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 3 4 3 4 3 4 4
## [1481] 4 4 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1518] 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1555] 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 1 4 4 4 4 4 3 4 4 4 4 4 4 4 4 3
## [1592] 4 4 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4
## [1629] 4 4 4 4 4 1 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 4 4 4
## [1666] 4 4 4 4 4 4 3 4 4 4 4 4 4 3 4 4 4 4 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 3
## [1703] 3 4 3 4 4 3 4 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4 4 4 4 4 4 4
## [1740] 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 3 4 4 4
## [1777] 4 4 4 3 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4
## [1814] 4 4 4 4 3 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 3 4 4 4 4 4 4
## [1851] 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 3 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4
## [1888] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 3 4 3 3 4 4 4 4 4 4 4 3
## [1925] 4 4 4 4 1 4 4 4 3 4 3 4 4 4 4 4 4 4 4 4 3 3 4 1 4 4 4 4 4 4 4 4 4 4 4 4 4
## [1962] 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4
## [1999] 4 3 4 4 3 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 3 3 4 4 4 4 4 3 4 4 4 4 4 4 4 4
## [2036] 4 4 4 4 4 4 4 4 4 4 3 3 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4
## [2073] 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [2110] 4 4 4 3 4 4 4 4 4 4 4 3 4 4 4 4 3 3 4 4 4 3 4 4 3 4 4 4 4 4 1 4 4 4 4 4 4
## [2147] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4 4 4
## [2184] 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [2221] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4
## [2258] 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4
## [2295] 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 3
## [2332] 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 3 4 4 4 4 3 4 3 4 4 4
## [2369] 4 3 4 4 4 4 3 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4
## [2406] 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3
## [2443] 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [2480] 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4 4 4 4 4
## [2517] 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4 4 4
## [2554] 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [2591] 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4
## [2628] 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 1 4 4 4 4 4 3 4 4 4 4 4 4 4
## [2665] 1 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4
## [2702] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [2739] 4 3 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 3 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4
## [2776] 4 4 4 4 4 4 4 3 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4
## [2813] 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4
## [2850] 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 3 4 4
## [2887] 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 3 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4
## [2924] 4 4 4 4 4 4 4 3 4 4 4 4 4 3 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 1 4 4 4 3 4 4 4
## [2961] 4 4 4 4 4 4 4 4 4 4 2 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [2998] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4 4
## [3035] 4 4 4 4 4 4 4 3 4 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4
## [3072] 3 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 3 4 4
## [3109] 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 3 4 3 4 4 3 4 4 4 4 3 4 4 4 4 4 4
## [3146] 4 4 4 4 3 3 3 4 4 4 3 4 3 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 3 4 4 4 4 4 4 4 4
## [3183] 4 4 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 3 4 4 4 4 4 4 3 4 4 4 3 4 4 3
## [3220] 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 3 3 4 4 4 4 4 4 4 4 4 4
## [3257] 4 4 4 3 4 3 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [3294] 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 3 4 4 4 4 4 4 3 4 3 4 4 4 3 4 4 3 4 4 4
## [3331] 4 4 4 3 4 3 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4
## [3368] 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [3405] 4 4 4 4 4 3 4 4 4 4 4 4 4 4 3 4 4 4 3 3 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3
## [3442] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4
## [3479] 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [3516] 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 3 4 4 3 4 4 4 4 4 4 4 4
## [3553] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 3 4 4 4 4 4
## [3590] 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4
## [3627] 4 4 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4 4 4
## [3664] 4 4 4 4 3 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 1 4 4 4 4 4 4 4 4 4 4 4
## [3701] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 3 3 4 4 4 4
## [3738] 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [3775] 4 4 4 4 4 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 3 4 4 4 4 4
## [3812] 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 3 4 4 4 4 4 3 3 4 4 4 4 4 3 4 4 4 4 4 4
## [3849] 4 4 4 4 4 4 3 4 3 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 3 3 4 4 4 4 4
## [3886] 4 4 3 4 3 4 4 4 4 3 4 4 3 4 4 4 4 3 4 4 4 4 4 4 4 4 4 3 4 4 3 4 4 4 4 4 4
## [3923] 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 3 3 4 4 4 3 3 4 4 4 4 4 4
## [3960] 4 4 4 4 4 4 4 4 4 4 4 1 4 4 3 4 3 4 4 4 4 3 3 4 4 4 4 4 4 3 4 4 4 3 4 4 4
## [3997] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4
## [4034] 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 1 4 4 3 4 4 4 4 4 3 4 4 3 4 4 4 4 4
## [4071] 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4
## [4108] 4 4 4 4 4 4 4 4 4 3 4 4 3 4 4 4 4 3 4 4 4 3 4 4 3 4 4 4 4 4 4 3 4 3 3 4 4
## [4145] 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 1 4 4 4 4 3 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4
## [4182] 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4
## [4219] 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 3 4 3 3 4
## [4256] 4 3 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
## [4293] 4 4 4 3 4
## 
## Within cluster sum of squares by cluster:
## [1] 573.026031   7.656972 433.142874 549.246270
##  (between_SS / total_SS =  81.8 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Graficar los grupos

fviz_cluster(clusters1, data=df1)

Agregar Clusters a la Base de Datos

df1_clusters <- cbind(df1, cluster = clusters1$cluster)
head(df1_clusters)
##   Frecuencia_scaled TicketPromedio_scaled cluster
## 1        -0.4550880           42.56863812       2
## 2         0.3910881            0.11101871       4
## 3        -0.4550880            0.74417746       4
## 4        -0.4550880           -0.04497273       4
## 5         0.5321175           -0.05669783       4
## 6        -0.4550880           -0.18104936       4

Conclusiones

Con base a nuestro analisis, se decidio que el numero optimo de clusteres sea 4. La segmentación nos permite clasificar a los clientes según su frecuencia de compra y su ticket promedio. Además, se decidió mantener los valores de clientes con compras consideradas como outliers ya que son casos relevantes.

Los clústeres son los siguientes:

Cluster 1: Van muchas veces y gastan poco Comportamiento: Clientes que compran con alta frecuencia pero con bajo ticket promedio. Realizan muchas visitas, pero su gasto por compra es poco. Recomendación: Implementar estrategias de incremento de ticket como promociones por volumen, paquetes combinados, ventas cruzadas y programas de lealtad que incentiven gastar más en cada visita.

Cluster 2: Pocas visitas y compras masivas. Comportamiento: Clientes con baja frecuencia de compra pero ticket promedio alto. Realizan compras grandes, aunque no visitan con regularidad. Recomendación: Desarrollar estrategias de retención y recompra, como recordatorios personalizados, membresías o beneficios exclusivos que incentiven que regresen más seguido sin afectar su nivel de gasto.

Cluster 3: Van pocas veces y gastan poco. Comportamiento: Clientes con baja frecuencia y bajo ticket promedio. Representan el segmento de menor valor. Recomendación: Aplicar campañas de activación como descuentos iniciales, promociones dirigidas o estrategias digitales para incentivar mayor recurrencia y aumentar el valor promedio de compra.

Cluster 4: Clientes balanceados, frecuencia y ticket promedio moderados. Comportamiento: Clientes con comportamiento intermedio y equilibrado, tanto en frecuencia como en ticket promedio. Representan un segmento estable. Recomendación: Fidelizarlos mediante programas de recompensas, beneficios personalizados y segmentación avanzada para convertirlos en clientes de alto valor, incrementando gradualmente su frecuencia y gasto promedio.

LS0tDQp0aXRsZTogIkNsdXN0ZXJpbmcgLSBTdXBlcm1hcmtldCINCmF1dGhvcjogIkVtaWxpbyBDaXVmZmFyZGkgQTAxNjEyNTY5Ig0KZGF0ZTogIjIwMjYtMDItMjMiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiBUUlVFDQogICAgdG9jX2Zsb2F0OiBUUlVFDQogICAgY29kZV9kb3dubG9hZDogVFJVRQ0KICAgIHRoZW1lOiB5ZXRpDQotLS0NCg0KIVtdKGh0dHBzOi8vc3RhdGljLndpa2lhLm5vY29va2llLm5ldC90aGVhbWF6aW5nd29ybGRvZmd1bWJhbGwvaW1hZ2VzL2EvYTAvRWxtb3JlU2hvcHBpbmcucG5nL3JldmlzaW9uL2xhdGVzdD9jYj0yMDEzMDkyMTEwMzUxNw0KKQ0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBUZW9yw61hIDwvc3Bhbj4NCioqQWdydXBhbWllbnRvKiogbyAqY2x1c3RlcmluZyogZXMgdW5hIHTDqWNuaWNhIGRlIGFwcmVuZGl6YWplIGF1dG9tw6F0aWNvDQpubyBzdXBlcnZpc2FkbyBxdWUgYWdydXBhIGRhdG9zIGVuIGZ1bmNpw7NuIGRlIHN1IHNpbWlsaXR1ZC4NCkFsZ3Vub3MgdXNvcyB0w61waWNvcyBkZSBlc3RhIHTDqWNuaWNhIHNvbjoNCiogU2VnbWVudGFjacOzbiBkZSBjbGllbnRlcw0KKiBEZXRlY2Npw7NuIGRlIGFub3JtYWxpZGFkZXMNCiogQ2F0ZWdvcml6YWNpw7NuIGRlIGRvY3VtZW50b3MNCg0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IEluc3RhbGFyIHBhcXVldGVzIHkgbGxhbWFyIGxpYnJlcsOtYXMNCjwvc3Bhbj4NCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQojaW5zdGFsbC5wYWNrYWdlcygiY2x1c3RlciIpICMgQW7DoWxpc2lzIGRlIEFncnVwYW1pZW50bw0KbGlicmFyeShjbHVzdGVyKQ0KI2luc3RhbGwucGFja2FnZXMoImdncGxvdDIiKSAjIEdyYWZpY2FyDQpsaWJyYXJ5KGdncGxvdDIpDQojaW5zdGFsbC5wYWNrYWdlcygiZGF0YS50YWJsZSIpICMgTWFuZWpvIGRlIG11Y2hvcyBkYXRvcw0KbGlicmFyeShkYXRhLnRhYmxlKQ0KIyBpbnN0YWxsLnBhY2thZ2VzKCJmYWN0b2V4dHJhIikgIyBHcsOhZmljYSBvcHRpbWl6YWNpw7NuIGRlIG7Dum1lcm8gZGUgY2x1c3RlcnMNCmxpYnJhcnkoZmFjdG9leHRyYSkNCmxpYnJhcnkocmVhZHhsKQ0KbGlicmFyeShkcGx5cikNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IE9idGVuZXIgbG9zIGRhdG9zIDwvc3Bhbj4NCmBgYHtyfQ0Kc3VwZXIgPC0gcmVhZF9leGNlbCgiQzpcXFVzZXJzXFxFbWlsaVxcT25lRHJpdmVcXERlc2t0b3BcXFRFQ1xcVGVjIDZ0byBTZW1lc3RyZSBDb25jZW50cmFjaW9uXFxNb2R1bG8gMlxcQXJjaGl2b3MgQ1NWXFxzdXBlcm1hcmtldC54bHN4IikNCnN1cGVyIDwtIG5hLm9taXQoc3VwZXIpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiAgRW50ZW5kZXIgbG9zIGRhdG9zIDwvc3Bhbj4NCmBgYHtyfQ0Kc3VtbWFyeShzdXBlcikNCnN0cihzdXBlcikNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+ICBDcmVhciBjb2x1bW5hcyBhZGljaW9uYWxlcyA8L3NwYW4+DQpgYGB7cn0NCnN1cGVyIDwtIHN1cGVyICU+JQ0KICBtdXRhdGUoU3VidG90YWwgPSBRdWFudGl0eSAqIFByaWNlKQ0KdG90YWxlc19mYWN0dXJhIDwtIHN1cGVyICU+JQ0KICBncm91cF9ieShDdXN0b21lcklELCBCaWxsTm8pICU+JQ0KICBzdW1tYXJpc2UoVG90YWxGYWN0dXJhID0gc3VtKFN1YnRvdGFsLCBuYS5ybSA9IFRSVUUpKSAlPiUNCiAgdW5ncm91cCgpDQp0aWNrZXRfY2xpZW50ZSA8LSB0b3RhbGVzX2ZhY3R1cmEgJT4lDQogIGdyb3VwX2J5KEN1c3RvbWVySUQpICU+JQ0KICBzdW1tYXJpc2UoDQogICAgRnJlY3VlbmNpYSA9IG4oKSwgICMgbsO6bWVybyBkZSBmYWN0dXJhcw0KICAgIFRpY2tldFByb21lZGlvID0gbWVhbihUb3RhbEZhY3R1cmEpDQogICkgJT4lDQogIHVuZ3JvdXAoKQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWU7Ij4gIEVzY2FsYXIgbG9zIGRhdG9zIDwvc3Bhbj4NCmBgYHtyfQ0KIyBTw7NsbyBzaSBsb3MgZGF0b3Mgbm8gZXN0w6FuIGVuIGxhIG1pc21hIGVzY2FsYS4NCnRpY2tldF9jbGllbnRlIDwtIHRpY2tldF9jbGllbnRlICU+JQ0KICBtdXRhdGUoDQogICAgRnJlY3VlbmNpYV9zY2FsZWQgPSBhcy5udW1lcmljKHNjYWxlKEZyZWN1ZW5jaWEpKSwNCiAgICBUaWNrZXRQcm9tZWRpb19zY2FsZWQgPSBhcy5udW1lcmljKHNjYWxlKFRpY2tldFByb21lZGlvKSkNCiAgKQ0KYGBgDQpgYGB7cn0NCmRmMSA8LSB0aWNrZXRfY2xpZW50ZSAlPiUNCiAgc2VsZWN0KEZyZWN1ZW5jaWFfc2NhbGVkLCBUaWNrZXRQcm9tZWRpb19zY2FsZWQpICU+JQ0KICBuYS5vbWl0KCkNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+ICBOdW1lcm8gb3B0aW1vIGRlIGdydXBvcyA8L3NwYW4+DQpgYGB7cn0NCnNldC5zZWVkKDEyMykNCm9wdGltaXphY2lvbjEgPC0gY2x1c0dhcChkZjEsIEZVTj1rbWVhbnMsIG5zdGFydD0xLCBLLm1heD03KQ0KIyBFbCBLLm1heCBub3JtYWxtZW50ZSBlcyAxMCwgZW4gZXN0ZSBlamVyY2ljaW8gYWwgc2VyIDggZGF0b3Mgc2UgZGVqw7MgZW4NCjcuDQpwbG90KG9wdGltaXphY2lvbjEsIHhsYWI9Ik7Dum1lcm8gZGUgY2x1c3RlcnMgayIsIG1haW49Ik9wdGltaXphY2nDs24gZGUNCkNsdXN0ZXJzIikNCiMgU2Ugc2VsZWNjaW9uYSBjb21vIMOzcHRpbW8gZWwgcHJpbWVyIHB1bnRvIG3DoXMgYWx0by4NCmBgYA0KYGBge3J9DQp3c3MgPC0gbnVtZXJpYygxMCkNCg0KZm9yIChrIGluIDE6MTApIHsNCiAga21lYW5zX21vZGVsIDwtIGttZWFucyhkZjEsIGNlbnRlcnMgPSBrLCBuc3RhcnQgPSAyNSkNCiAgd3NzW2tdIDwtIGttZWFuc19tb2RlbCR0b3Qud2l0aGluc3MNCn0NCnBsb3QoMToxMCwgd3NzLA0KICAgICB0eXBlID0gImIiLA0KICAgICBwY2ggPSAxOSwNCiAgICAgeGxhYiA9ICJOw7ptZXJvIGRlIGNsdXN0ZXJzIChrKSIsDQogICAgIHlsYWIgPSAiV2l0aGluLUNsdXN0ZXIgU3VtIG9mIFNxdWFyZXMiLA0KICAgICBtYWluID0gIk3DqXRvZG8gZGVsIENvZG8iKQ0KYGBgDQoNCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBEZXRlcm1pbmFyIG7Dum1lcm8gZGUgZ3J1cG9zIDwvc3Bhbj4NCmBgYHtyfQ0KIyBTaWVtcHJlIGVzIHVuIHZhbG9yIGluaWNpYWwgImN1YWxxdWllcmEiLCBsdWVnbyBzZSBvcHRpbWl6YS4NCnBsb3QodGlja2V0X2NsaWVudGUkRnJlY3VlbmNpYV9zY2FsZWQsdGlja2V0X2NsaWVudGUkVGlja2V0UHJvbWVkaW9fc2NhbGVkKQ0KZ3J1cG9zMSA8LSA0DQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBHZW5lcmFyIGxvcyBncnVwb3MgPC9zcGFuPg0KYGBge3J9DQpzZXQuc2VlZCgxMjMpDQpjbHVzdGVyczEgPC0ga21lYW5zKGRmMSxncnVwb3MxKQ0KY2x1c3RlcnMxDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBHcmFmaWNhciBsb3MgZ3J1cG9zIDwvc3Bhbj4NCmBgYHtyfQ0KZnZpel9jbHVzdGVyKGNsdXN0ZXJzMSwgZGF0YT1kZjEpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBBZ3JlZ2FyIENsdXN0ZXJzIGEgbGEgQmFzZSBkZSBEYXRvcw0KPC9zcGFuPg0KYGBge3J9DQpkZjFfY2x1c3RlcnMgPC0gY2JpbmQoZGYxLCBjbHVzdGVyID0gY2x1c3RlcnMxJGNsdXN0ZXIpDQpoZWFkKGRmMV9jbHVzdGVycykNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IENvbmNsdXNpb25lcyA8L3NwYW4+DQpDb24gYmFzZSBhIG51ZXN0cm8gYW5hbGlzaXMsIHNlIGRlY2lkaW8gcXVlIGVsIG51bWVybyBvcHRpbW8gZGUgY2x1c3RlcmVzIHNlYSA0LiBMYSBzZWdtZW50YWNpw7NuIG5vcyBwZXJtaXRlIGNsYXNpZmljYXIgYSBsb3MgY2xpZW50ZXMgc2Vnw7puIHN1IGZyZWN1ZW5jaWEgZGUgY29tcHJhIHkgc3UgdGlja2V0IHByb21lZGlvLiBBZGVtw6FzLCBzZSBkZWNpZGnDsyBtYW50ZW5lciBsb3MgdmFsb3JlcyBkZSBjbGllbnRlcyBjb24gY29tcHJhcyBjb25zaWRlcmFkYXMgY29tbyBvdXRsaWVycyB5YSBxdWUgc29uIGNhc29zIHJlbGV2YW50ZXMuICANCg0KTG9zIGNsw7pzdGVyZXMgc29uIGxvcyBzaWd1aWVudGVzOiAgDQoNCkNsdXN0ZXIgMTogVmFuIG11Y2hhcyB2ZWNlcyB5IGdhc3RhbiBwb2NvIENvbXBvcnRhbWllbnRvOiBDbGllbnRlcyBxdWUgY29tcHJhbiBjb24gYWx0YSBmcmVjdWVuY2lhIHBlcm8gY29uIGJham8gdGlja2V0IHByb21lZGlvLiBSZWFsaXphbiBtdWNoYXMgdmlzaXRhcywgcGVybyBzdSBnYXN0byBwb3IgY29tcHJhIGVzIHBvY28uIFJlY29tZW5kYWNpw7NuOiBJbXBsZW1lbnRhciBlc3RyYXRlZ2lhcyBkZSBpbmNyZW1lbnRvIGRlIHRpY2tldCBjb21vIHByb21vY2lvbmVzIHBvciB2b2x1bWVuLCBwYXF1ZXRlcyBjb21iaW5hZG9zLCB2ZW50YXMgY3J1emFkYXMgeSBwcm9ncmFtYXMgZGUgbGVhbHRhZCBxdWUgaW5jZW50aXZlbiBnYXN0YXIgbcOhcyBlbiBjYWRhIHZpc2l0YS4gIA0KDQpDbHVzdGVyIDI6IFBvY2FzIHZpc2l0YXMgeSBjb21wcmFzIG1hc2l2YXMuIENvbXBvcnRhbWllbnRvOiBDbGllbnRlcyBjb24gYmFqYSBmcmVjdWVuY2lhIGRlIGNvbXByYSBwZXJvIHRpY2tldCBwcm9tZWRpbyBhbHRvLiBSZWFsaXphbiBjb21wcmFzIGdyYW5kZXMsIGF1bnF1ZSBubyB2aXNpdGFuIGNvbiByZWd1bGFyaWRhZC4gUmVjb21lbmRhY2nDs246IERlc2Fycm9sbGFyIGVzdHJhdGVnaWFzIGRlIHJldGVuY2nDs24geSByZWNvbXByYSwgY29tbyByZWNvcmRhdG9yaW9zIHBlcnNvbmFsaXphZG9zLCBtZW1icmVzw61hcyBvIGJlbmVmaWNpb3MgZXhjbHVzaXZvcyBxdWUgaW5jZW50aXZlbiBxdWUgcmVncmVzZW4gbcOhcyBzZWd1aWRvIHNpbiBhZmVjdGFyIHN1IG5pdmVsIGRlIGdhc3RvLiAgDQoNCkNsdXN0ZXIgMzogVmFuIHBvY2FzIHZlY2VzIHkgZ2FzdGFuIHBvY28uIENvbXBvcnRhbWllbnRvOiBDbGllbnRlcyBjb24gYmFqYSBmcmVjdWVuY2lhIHkgYmFqbyB0aWNrZXQgcHJvbWVkaW8uIFJlcHJlc2VudGFuIGVsIHNlZ21lbnRvIGRlIG1lbm9yIHZhbG9yLiBSZWNvbWVuZGFjacOzbjogQXBsaWNhciBjYW1wYcOxYXMgZGUgYWN0aXZhY2nDs24gY29tbyBkZXNjdWVudG9zIGluaWNpYWxlcywgcHJvbW9jaW9uZXMgZGlyaWdpZGFzIG8gZXN0cmF0ZWdpYXMgZGlnaXRhbGVzIHBhcmEgaW5jZW50aXZhciBtYXlvciByZWN1cnJlbmNpYSB5IGF1bWVudGFyIGVsIHZhbG9yIHByb21lZGlvIGRlIGNvbXByYS4gIA0KDQpDbHVzdGVyIDQ6IENsaWVudGVzIGJhbGFuY2VhZG9zLCBmcmVjdWVuY2lhIHkgdGlja2V0IHByb21lZGlvIG1vZGVyYWRvcy4gQ29tcG9ydGFtaWVudG86IENsaWVudGVzIGNvbiBjb21wb3J0YW1pZW50byBpbnRlcm1lZGlvIHkgZXF1aWxpYnJhZG8sIHRhbnRvIGVuIGZyZWN1ZW5jaWEgY29tbyBlbiB0aWNrZXQgcHJvbWVkaW8uIFJlcHJlc2VudGFuIHVuIHNlZ21lbnRvIGVzdGFibGUuIFJlY29tZW5kYWNpw7NuOiBGaWRlbGl6YXJsb3MgbWVkaWFudGUgcHJvZ3JhbWFzIGRlIHJlY29tcGVuc2FzLCBiZW5lZmljaW9zIHBlcnNvbmFsaXphZG9zIHkgc2VnbWVudGFjacOzbiBhdmFuemFkYSBwYXJhIGNvbnZlcnRpcmxvcyBlbiBjbGllbnRlcyBkZSBhbHRvIHZhbG9yLCBpbmNyZW1lbnRhbmRvIGdyYWR1YWxtZW50ZSBzdSBmcmVjdWVuY2lhIHkgZ2FzdG8gcHJvbWVkaW8uICANCg==