Paso 1. Instalar paquetes y llamar librerías

#install.packages("cluster") # Análisis de Agrupamiento
library(cluster)
#install.packages("ggplot2") # Graficar
library(ggplot2)
#install.packages("data.table") # Manejo de muchos datos
library(data.table)
#install.packages("factoextra") # Gráfica optimización de número de clusters
library(factoextra)
#install.packages("readxl")
library(readxl)
#install.packages("tidyverse")
library(tidyverse)

Paso 2. Obtener los datos

df1 <- read_excel("C:\\Users\\karla\\Desktop\\CONCENTRACION\\Modulo_progra\\supermarket.xlsx")
## Warning: Expecting numeric in A522063 / R522063C1: got 'A563185'
## Warning: Expecting numeric in A522064 / R522064C1: got 'A563186'
## Warning: Expecting numeric in A522065 / R522065C1: got 'A563187'

Paso 3. Explorar base de datos

str(df1)
## tibble [522,064 × 8] (S3: tbl_df/tbl/data.frame)
##  $ BillNo    : num [1:522064] 536365 536365 536365 536365 536365 ...
##  $ Itemname  : chr [1:522064] "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
##  $ Quantity  : num [1:522064] 6 6 8 6 6 2 6 6 6 32 ...
##  $ Date      : POSIXct[1:522064], format: "2010-12-01" "2010-12-01" ...
##  $ Time      : POSIXct[1:522064], format: "1899-12-31 08:26:00" "1899-12-31 08:26:00" ...
##  $ Price     : num [1:522064] 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
##  $ CustomerID: num [1:522064] 17850 17850 17850 17850 17850 ...
##  $ Country   : chr [1:522064] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
summary(df1)
##      BillNo         Itemname            Quantity       
##  Min.   :536365   Length:522064      Min.   :-9600.00  
##  1st Qu.:547892   Class :character   1st Qu.:    1.00  
##  Median :560603   Mode  :character   Median :    3.00  
##  Mean   :559951                      Mean   :   10.09  
##  3rd Qu.:571892                      3rd Qu.:   10.00  
##  Max.   :581587                      Max.   :80995.00  
##  NA's   :3                                             
##       Date                          Time                    
##  Min.   :2010-12-01 00:00:00   Min.   :1899-12-31 06:20:00  
##  1st Qu.:2011-03-28 00:00:00   1st Qu.:1899-12-31 11:48:00  
##  Median :2011-07-20 00:00:00   Median :1899-12-31 13:37:00  
##  Mean   :2011-07-03 23:15:13   Mean   :1899-12-31 13:36:07  
##  3rd Qu.:2011-10-19 00:00:00   3rd Qu.:1899-12-31 15:30:00  
##  Max.   :2011-12-09 00:00:00   Max.   :1899-12-31 20:18:00  
##                                                             
##      Price              CustomerID       Country         
##  Min.   :-11062.060   Min.   :12346    Length:522064     
##  1st Qu.:     1.250   1st Qu.:13950    Class :character  
##  Median :     2.080   Median :15265    Mode  :character  
##  Mean   :     3.827   Mean   :15317                      
##  3rd Qu.:     4.130   3rd Qu.:16837                      
##  Max.   : 13541.330   Max.   :18287                      
##                       NA's   :134041

Paso 4. Limpieza de datos

sum(is.na(df1$CustomerID))
## [1] 134041
sum(df1$Quantity < 0)
## [1] 1336
sum(df1$Price < 0)
## [1] 2
df_clean <- df1 %>%
  filter(
    !is.na(CustomerID),
    Quantity > 0,
    Price > 0
  )

Paso 5. Creacion de las medidas

clientes <- df_clean %>%
  group_by(CustomerID) %>%
  summarise(
    Frecuencia = n_distinct(BillNo),
    TicketPromedio = mean(Quantity * Price)
  )
datos_cluster <- clientes %>%
  select(
    X = Frecuencia,
    Y = TicketPromedio
  )
datos_scaled <- scale(datos_cluster)
plot(datos_cluster$X, datos_cluster$Y,
     xlab = "Frecuencia de Compra",
     ylab = "Ticket Promedio",
     main = "Segmentación de Clientes")

# Paso 6. Creacion de las medidas

par(mfrow = c(1,2))

boxplot(datos_cluster$X,
        main = "Frecuencia de Compra")

boxplot(datos_cluster$Y,
        main = "Ticket Promedio")

# Detectar outliers
out_x <- boxplot.stats(datos_cluster$X)$out
out_y <- boxplot.stats(datos_cluster$Y)$out
# Filtrar dataset
datos_sin_outliers <- datos_cluster %>%
  filter(!(X %in% out_x | Y %in% out_y))
plot(datos_sin_outliers$X, datos_sin_outliers$Y,
     xlab = "Frecuencia de Compra",
     ylab = "Ticket Promedio",
     main = "Segmentación de Clientes")

grupos1 <- 3

Paso 7. Generar los grupos y escalar

datos_scaled <- scale(datos_sin_outliers)
set.seed(123)

kmodel <- kmeans(datos_scaled, grupos1)
kmodel
## K-means clustering with 3 clusters of sizes 1820, 1137, 633
## 
## Cluster means:
##            X           Y
## 1 -0.4247508 -0.63046866
## 2 -0.3183498  0.98604174
## 3  1.7930651  0.04158532
## 
## Clustering vector:
##    [1] 3 2 2 3 2 2 2 2 2 3 2 1 2 2 2 2 2 1 2 2 3 3 2 2 3 1 2 1 2 2 2 1 2 3 2 2 2
##   [38] 2 3 2 2 2 3 2 2 2 2 2 3 1 2 2 1 1 2 2 1 1 2 2 2 2 2 1 1 2 2 2 2 2 1 3 1 2
##   [75] 3 2 2 3 2 1 2 3 3 3 2 2 3 1 1 2 3 1 1 2 1 1 2 2 1 1 2 2 1 3 2 3 1 1 3 3 2
##  [112] 2 3 2 1 2 1 2 1 2 2 2 2 1 2 2 1 1 2 3 1 2 2 2 1 3 2 1 2 3 1 1 1 2 3 1 2 1
##  [149] 1 1 3 2 1 2 1 2 2 3 2 2 3 3 3 3 1 1 1 2 3 1 2 3 1 2 2 1 1 3 1 1 1 1 2 3 3
##  [186] 2 2 1 3 1 2 2 2 2 1 2 2 1 2 1 1 2 3 2 2 2 1 2 3 2 2 2 2 2 1 2 3 2 1 3 3 1
##  [223] 2 1 2 2 2 2 2 2 2 1 1 1 3 1 3 1 2 3 1 1 2 1 1 3 2 3 2 1 3 3 2 1 2 1 2 1 1
##  [260] 2 1 1 3 3 1 2 2 1 2 2 2 3 2 1 3 3 3 3 3 1 2 2 2 1 1 2 2 2 2 1 1 2 1 1 2 2
##  [297] 2 1 1 2 1 3 1 3 2 2 1 1 1 1 1 1 2 2 3 1 1 1 2 1 1 3 1 1 3 2 1 2 3 3 2 1 2
##  [334] 1 2 1 1 1 1 1 1 1 1 2 3 1 2 3 1 1 1 2 1 2 1 1 1 3 1 1 1 1 1 2 2 3 3 3 3 1
##  [371] 2 3 1 3 1 3 1 1 2 1 1 2 1 2 1 2 1 2 1 2 1 1 1 2 1 1 1 2 2 2 1 1 3 3 3 1 3
##  [408] 1 2 1 1 1 1 1 2 2 2 1 2 1 1 1 3 1 2 1 2 2 2 2 2 1 1 1 1 2 2 2 1 2 1 3 1 2
##  [445] 1 3 1 3 1 2 2 2 1 3 2 1 3 3 2 1 1 1 3 1 2 2 2 1 2 3 3 3 3 1 3 1 2 1 2 1 2
##  [482] 1 2 1 1 2 1 2 2 1 1 1 1 1 1 1 1 1 3 1 3 3 2 1 1 1 1 2 1 3 2 1 2 3 3 1 2 1
##  [519] 3 1 2 1 2 2 1 2 1 3 1 1 2 2 1 1 2 1 1 2 3 2 1 2 2 2 2 1 1 3 3 1 2 1 2 2 1
##  [556] 1 1 2 1 1 1 1 2 1 2 1 2 2 2 1 1 3 1 2 3 1 2 3 1 3 2 1 1 2 3 2 1 1 1 2 1 1
##  [593] 2 2 2 2 1 1 2 2 1 1 2 1 1 1 2 2 1 1 1 2 2 2 1 2 2 2 1 3 1 3 1 2 1 2 3 1 2
##  [630] 2 1 1 2 2 2 2 1 2 2 1 2 1 2 2 3 2 1 1 1 2 3 1 3 1 2 2 3 1 3 1 2 2 3 1 1 1
##  [667] 1 1 1 1 2 1 1 2 3 2 3 1 1 2 2 1 1 2 3 2 1 2 2 2 2 1 2 1 1 1 2 1 1 1 3 1 1
##  [704] 2 2 1 2 1 1 1 2 1 2 3 2 3 2 2 1 1 3 1 1 1 1 3 2 2 1 1 1 1 2 1 2 1 1 1 1 1
##  [741] 2 1 3 2 2 1 1 1 3 1 3 2 1 3 1 1 1 1 1 2 1 1 1 3 1 1 1 1 1 1 1 1 3 3 3 1 1
##  [778] 3 1 1 2 1 2 2 2 1 3 1 1 3 1 1 2 2 1 1 1 1 1 2 1 2 1 2 1 2 1 2 1 1 2 2 1 2
##  [815] 3 3 2 1 1 1 2 2 2 3 1 3 2 3 2 2 2 2 3 2 1 3 1 1 1 1 1 1 1 2 3 1 1 2 2 3 2
##  [852] 2 3 3 2 1 1 1 3 2 1 1 2 1 1 3 2 1 1 1 1 1 1 1 3 2 1 3 2 1 2 2 2 2 1 2 1 1
##  [889] 2 2 2 1 1 3 1 1 1 1 1 3 1 2 1 1 1 2 2 2 1 3 3 1 1 2 1 1 3 1 2 3 1 2 2 3 3
##  [926] 3 1 1 1 1 2 3 2 2 2 2 1 1 2 1 3 1 2 3 2 1 2 2 1 3 1 2 1 1 2 2 2 2 2 1 1 1
##  [963] 1 1 1 2 3 2 1 2 1 1 1 1 2 1 2 2 3 1 2 1 3 2 1 1 1 1 2 3 2 2 3 1 3 3 3 2 1
## [1000] 3 2 2 1 2 1 1 2 1 2 1 2 2 1 2 3 3 1 3 1 1 1 2 1 1 1 2 1 1 1 1 1 2 3 2 1 1
## [1037] 3 1 2 1 3 3 2 1 1 1 3 1 1 1 1 3 1 3 1 1 3 1 1 3 2 3 2 1 1 1 1 3 1 3 2 2 1
## [1074] 1 2 1 1 3 1 2 1 2 2 1 1 3 1 1 1 1 1 1 1 2 2 1 3 1 2 2 2 2 3 1 2 1 1 3 3 3
## [1111] 3 3 2 1 1 2 3 1 1 1 2 3 2 2 3 2 1 2 2 1 1 3 2 2 1 3 1 2 1 2 1 2 2 2 2 2 3
## [1148] 2 1 2 1 1 1 2 1 2 1 2 1 2 2 3 3 3 1 2 2 3 1 2 1 3 3 1 3 1 3 2 1 3 1 2 2 3
## [1185] 1 2 2 3 1 2 2 2 2 3 2 3 1 1 2 1 1 2 1 1 1 2 1 1 2 1 2 2 2 1 1 1 1 2 1 2 1
## [1222] 3 2 3 1 3 1 2 3 2 3 3 2 3 1 1 3 2 1 3 1 2 3 3 2 1 2 3 1 1 1 2 2 1 1 3 2 1
## [1259] 1 1 1 1 3 2 2 1 3 1 3 1 1 3 2 2 2 1 1 2 2 3 1 2 1 3 2 1 2 3 2 2 2 1 3 1 2
## [1296] 3 3 1 3 1 3 1 3 1 2 1 3 2 1 2 3 3 2 1 3 1 3 1 1 2 2 1 1 3 2 1 1 3 1 1 2 1
## [1333] 1 1 1 1 2 2 1 3 3 2 1 1 1 2 1 1 1 1 3 1 1 1 1 1 3 1 1 1 2 2 2 1 3 1 1 1 1
## [1370] 2 2 1 2 1 1 2 2 3 3 1 2 2 2 1 1 1 1 3 2 1 1 1 3 1 2 1 1 3 1 1 1 1 2 1 2 1
## [1407] 2 1 1 2 1 2 3 1 1 1 2 3 1 3 2 1 3 2 3 1 1 1 1 1 1 3 1 2 1 3 1 3 3 1 3 2 3
## [1444] 1 3 2 1 3 2 2 2 1 1 2 1 1 3 2 3 1 1 1 3 2 1 1 2 2 2 2 2 1 1 1 3 2 1 1 1 2
## [1481] 2 1 1 3 3 1 1 1 1 2 1 2 1 2 1 1 3 1 1 1 3 2 2 2 1 1 3 3 2 2 2 2 1 2 1 2 1
## [1518] 2 3 1 2 1 2 2 3 1 1 3 2 1 1 1 1 1 2 1 2 3 3 1 1 1 2 3 2 1 2 1 2 1 2 1 1 1
## [1555] 2 2 2 2 3 2 1 1 1 1 1 1 1 3 2 1 1 1 1 1 1 1 2 1 2 3 1 1 1 1 1 1 2 1 1 1 1
## [1592] 1 1 1 1 1 1 1 1 1 1 3 1 3 3 1 1 2 1 1 3 3 1 1 3 1 2 1 1 2 2 1 1 1 1 3 3 1
## [1629] 1 2 2 2 1 1 2 2 1 2 1 2 3 1 2 2 3 1 1 1 2 2 2 1 1 2 2 1 3 3 3 1 2 2 2 3 1
## [1666] 2 3 2 1 1 2 2 1 2 1 3 1 3 2 3 1 2 1 3 2 2 2 1 2 1 2 3 1 1 1 1 2 1 1 1 1 2
## [1703] 3 1 1 1 2 1 2 2 1 1 2 2 2 2 2 3 3 2 1 3 1 1 2 1 3 2 3 2 3 2 2 1 2 3 2 2 2
## [1740] 1 2 1 2 1 3 3 1 2 2 1 1 3 1 2 1 2 3 1 1 1 1 2 2 1 1 1 1 2 1 1 3 2 2 1 1 3
## [1777] 1 1 2 3 2 1 1 2 1 1 1 1 1 2 1 1 1 2 2 1 1 1 1 1 2 2 1 2 1 1 2 3 1 2 3 2 2
## [1814] 2 3 1 2 3 1 3 3 1 2 1 2 2 2 1 2 1 2 1 1 1 1 1 3 1 2 2 1 2 1 1 2 1 2 2 1 1
## [1851] 1 1 1 1 3 1 1 1 1 1 3 1 2 1 1 1 1 1 3 1 1 1 2 1 2 1 1 1 1 2 2 1 1 1 2 2 1
## [1888] 2 2 2 3 2 1 1 1 1 1 2 1 1 3 1 1 3 1 1 3 2 3 1 2 1 2 3 1 3 1 1 1 1 1 1 1 1
## [1925] 1 1 3 1 1 1 1 1 1 1 1 2 2 1 2 2 1 1 3 1 1 2 1 1 1 1 1 1 1 1 2 3 3 1 1 1 3
## [1962] 2 1 3 3 1 2 2 2 2 1 1 3 1 1 2 1 3 2 3 1 2 1 1 1 2 1 1 1 2 2 1 1 1 2 3 2 3
## [1999] 1 2 2 2 2 1 3 2 3 1 1 2 2 1 1 2 2 3 2 1 1 2 3 1 1 1 2 2 2 2 1 3 2 3 2 1 1
## [2036] 2 1 1 2 1 2 1 3 1 1 2 1 3 1 1 1 1 1 1 1 3 3 1 3 1 1 2 2 1 1 3 2 2 1 1 1 1
## [2073] 1 1 2 2 1 1 1 2 1 3 1 1 1 2 2 2 3 1 3 2 2 2 1 1 1 2 1 2 2 1 1 1 1 1 2 1 1
## [2110] 1 1 2 1 1 3 1 1 1 1 1 2 1 3 3 1 1 2 1 1 1 1 3 1 1 3 2 1 2 1 1 1 1 1 1 1 3
## [2147] 1 1 1 1 3 1 2 3 1 3 1 1 1 1 1 2 1 1 2 3 1 2 2 3 2 2 1 2 2 1 1 3 1 1 1 1 1
## [2184] 2 2 3 1 1 1 2 3 1 3 2 1 3 1 1 1 3 1 3 1 2 1 1 1 1 1 1 1 1 1 3 1 1 3 2 1 1
## [2221] 1 1 1 1 1 2 1 2 2 2 3 2 1 2 1 1 1 1 1 1 1 3 3 2 1 1 3 2 1 2 3 2 3 2 1 1 2
## [2258] 1 1 1 1 2 2 3 2 1 2 3 1 2 1 1 2 1 1 1 1 3 1 1 2 1 2 2 2 2 1 3 1 2 2 2 2 2
## [2295] 1 3 3 2 1 1 3 3 3 2 2 2 2 2 3 2 3 3 2 2 2 2 2 2 3 1 1 2 2 1 2 1 1 2 1 1 1
## [2332] 1 2 1 2 1 2 2 1 2 2 3 1 1 1 1 2 2 1 2 2 3 2 2 2 3 1 1 1 1 2 2 1 2 3 2 2 1
## [2369] 3 3 1 2 3 1 1 1 2 1 1 1 1 1 1 3 1 1 2 2 2 1 3 1 1 1 1 3 2 1 2 2 1 1 2 3 1
## [2406] 3 1 2 1 1 2 2 1 1 1 3 1 2 2 1 1 1 2 2 3 3 1 1 2 1 3 1 1 1 1 1 1 1 1 1 2 1
## [2443] 1 1 1 2 2 1 3 1 2 1 1 1 1 2 2 1 3 1 1 1 1 1 1 1 1 3 2 2 1 3 2 1 2 1 3 1 3
## [2480] 1 1 1 1 1 1 1 2 2 1 3 3 2 1 1 1 1 3 2 1 1 1 1 3 2 3 2 1 1 1 1 1 1 1 2 2 1
## [2517] 1 1 3 1 2 2 1 1 2 2 2 1 2 3 1 1 1 1 1 2 1 1 1 3 1 2 1 2 2 2 3 1 2 2 1 1 3
## [2554] 2 1 1 2 2 2 3 1 1 1 1 1 1 2 1 1 2 1 1 1 2 1 1 3 1 1 1 3 1 1 2 2 2 1 2 2 1
## [2591] 2 2 2 1 1 1 3 1 3 2 1 3 2 2 2 1 1 1 1 2 1 2 3 1 1 2 1 1 1 3 1 3 1 2 2 1 2
## [2628] 2 1 3 2 2 2 1 1 1 2 2 3 3 1 1 3 1 3 1 1 1 3 1 1 1 1 2 1 3 1 2 3 3 2 3 1 2
## [2665] 1 1 1 1 1 2 2 1 2 2 2 3 1 1 1 1 1 1 3 1 1 1 1 1 2 1 1 3 3 1 1 3 1 1 1 2 1
## [2702] 1 1 1 1 1 1 2 2 2 3 1 3 3 2 1 1 2 1 1 1 1 1 2 1 1 2 3 2 3 2 2 1 2 1 1 1 1
## [2739] 1 1 2 1 2 2 2 1 3 3 3 3 1 3 2 1 1 1 1 1 1 1 1 1 1 1 3 1 2 1 3 1 1 2 3 3 1
## [2776] 1 3 1 3 3 1 1 1 3 1 1 1 1 1 1 1 3 2 2 2 3 2 1 1 2 3 2 2 2 1 1 1 1 1 1 1 1
## [2813] 1 1 3 1 1 3 1 3 1 3 2 2 3 2 2 1 3 1 1 1 1 1 1 2 2 2 3 1 1 2 2 1 1 1 2 1 3
## [2850] 1 1 2 1 2 3 3 1 1 2 3 2 1 1 3 1 3 1 3 1 1 2 1 2 1 2 2 1 1 3 1 3 3 1 2 1 2
## [2887] 3 1 2 2 1 3 3 1 1 3 2 1 1 1 1 2 2 1 1 1 1 3 1 3 1 2 2 2 2 2 1 1 1 1 2 2 1
## [2924] 3 3 3 1 1 1 1 2 1 2 2 1 1 1 2 3 1 2 3 1 2 1 1 3 2 2 2 3 3 3 1 3 1 1 1 1 2
## [2961] 3 3 1 3 1 1 1 2 3 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 2 3 1 3
## [2998] 3 1 1 3 1 1 1 1 1 3 1 1 2 3 1 1 3 2 1 1 1 2 1 3 2 3 1 1 1 1 1 1 2 2 1 1 1
## [3035] 1 1 1 1 3 3 1 3 1 1 3 1 1 1 1 1 2 1 2 1 1 1 1 1 3 1 1 2 1 3 2 3 2 3 1 2 3
## [3072] 2 1 3 1 2 1 1 1 3 2 2 2 3 1 1 3 3 2 1 1 1 1 1 3 1 2 1 2 2 2 3 3 1 1 1 1 2
## [3109] 1 1 3 3 2 2 2 1 1 1 2 1 3 2 2 1 1 3 1 3 1 1 1 1 3 1 1 3 2 2 2 3 3 2 2 1 2
## [3146] 1 1 2 1 1 2 2 1 1 2 3 1 2 1 2 1 3 1 1 3 1 1 2 1 2 2 1 1 1 2 1 1 1 3 3 2 1
## [3183] 3 2 1 1 3 2 1 1 1 3 1 2 1 1 2 1 1 1 1 3 2 1 2 3 1 1 1 1 1 3 2 2 2 1 2 3 2
## [3220] 1 3 3 2 1 3 1 2 3 2 2 2 2 3 2 3 2 1 3 1 3 3 1 1 1 2 2 3 1 2 2 1 2 1 2 3 2
## [3257] 1 2 1 1 1 3 2 2 2 2 3 1 1 2 1 2 1 1 1 2 2 3 1 2 3 3 3 1 1 2 1 1 1 1 2 3 2
## [3294] 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 1 1 1 1 2 1 2 2 1 1 1 1 1 3 3 1
## [3331] 1 3 2 1 2 1 1 1 3 2 3 1 2 3 3 1 2 3 2 1 2 1 1 1 1 1 1 1 3 1 1 1 1 1 3 1 1
## [3368] 1 1 2 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 2 1 2 1 2 1 1 1 1 1 3 1 1 1 2
## [3405] 1 2 1 1 1 2 1 2 1 2 2 1 3 2 2 1 2 3 1 1 1 1 2 1 1 1 1 1 1 3 1 1 1 2 1 2 1
## [3442] 1 1 1 1 3 2 2 1 3 1 2 1 1 3 2 1 2 2 1 1 1 1 2 3 3 1 2 2 1 2 1 1 2 2 1 3 1
## [3479] 1 2 1 1 1 1 1 1 3 1 1 1 2 3 1 1 1 1 1 1 2 2 2 1 1 1 2 2 1 1 2 1 1 1 2 1 3
## [3516] 1 2 2 3 2 2 3 1 3 1 3 3 1 1 1 2 1 2 2 2 1 1 1 2 1 1 1 1 3 3 1 1 2 2 2 2 3
## [3553] 3 2 1 3 2 2 3 3 1 1 3 3 3 2 3 1 1 1 1 1 3 3 1 1 2 1 2 2 2 3 1 2 1 2 1 1 1
## [3590] 2
## 
## Within cluster sum of squares by cluster:
## [1] 1056.8476  961.8176  850.5962
##  (between_SS / total_SS =  60.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Paso 8. Optimizar el número de grupos

set.seed(123)
optimizacion1 <- clusGap(datos_scaled, FUN=kmeans, nstart=1, K.max=10)
# El K.max normalmente es 10, en este ejercicio al ser 8 datos se dejó en 7.
plot(optimizacion1, xlab="Número de clusters k", main="Optimización de Clusters")

# Se selecciona como óptimo el primer punto más alto.

Paso 9. Graficar los grupos

fviz_cluster(kmodel, data=datos_scaled)

# Paso 10. Agregar Clusters a la Base de Datos

df1_clusters <- cbind(datos_scaled, cluster = kmodel$cluster)
head(df1_clusters)
##              X         Y cluster
## [1,]  1.697833 0.8301372       3
## [2,] -0.831479 0.8775053       2
## [3,] -0.831479 0.3488188       2
## [4,]  2.119385 1.5263624       3
## [5,] -0.831479 0.6583680       2
## [6,] -0.831479 0.2215814       2
tail(df1_clusters)
##                   X          Y cluster
## [3585,] -0.83147901 -0.3559969       1
## [3586,] -0.83147901  0.3070122       2
## [3587,] -0.83147901  0.1555358       1
## [3588,] -0.83147901 -0.6262285       1
## [3589,] -0.40992702 -0.2311889       1
## [3590,]  0.01162497  1.1380216       2

Paso 11. Renombrar

datos_sin_outliers$cluster <- kmodel$cluster

library(dplyr)

resumen_clusters <- datos_sin_outliers %>%
  group_by(cluster) %>%
  summarise(
    Frecuencia_prom = mean(X),
    Ticket_prom = mean(Y),
    Clientes = n()
  )

resumen_clusters
## # A tibble: 3 × 4
##   cluster Frecuencia_prom Ticket_prom Clientes
##     <int>           <dbl>       <dbl>    <int>
## 1       1            1.96        11.5     1820
## 2       2            2.22        25.0     1137
## 3       3            7.23        17.1      633
datos_sin_outliers$segmento <- case_when(
  datos_sin_outliers$cluster == 1 ~ "Clientes Ocasionales",
  datos_sin_outliers$cluster == 2 ~ "Clientes Premium",
  datos_sin_outliers$cluster == 3 ~ "Clientes Frecuentes"
)
table(datos_sin_outliers$segmento)
## 
##  Clientes Frecuentes Clientes Ocasionales     Clientes Premium 
##                  633                 1820                 1137

Paso 10. Caracteristicas y recomendaciones

  1. Clientes Ocasionales Característica: Baja frecuencia de compra y bajo ticket promedio; compran esporádicamente y generan bajo valor individual. Recomendación: Implementar promociones y descuentos personalizados para incentivar una segunda y tercera compra y aumentar su frecuencia.

  2. Clientes Premium Característica: Frecuencia moderada pero el ticket promedio más alto; generan alto valor por transacción. Recomendación: Desarrollar estrategias de fidelización y beneficios exclusivos para incrementar su frecuencia sin reducir su ticket.

  3. Clientes Frecuentes Característica: Alta frecuencia de compra con ticket promedio medio; representan clientes leales y recurrentes. Recomendación: Aplicar estrategias de upselling y programas de recompensas para elevar su ticket promedio y maximizar su valor.

LS0tDQp0aXRsZTogIkNsdXN0ZXIgMiAtIFN1cGVybWVyY2Fkb3MiDQphdXRob3I6ICJFcXVpcG8gMyINCmRhdGU6ICIyMDI2LTAyLTIzIg0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDogDQogICAgdG9jOiBUUlVFDQogICAgdG9jX2Zsb2F0OiBUUlVFDQogICAgY29kZV9kb3dubG9hZDogVFJVRQ0KICAgIHRoZW1lOiB5ZXRpDQotLS0NCiFbXShodHRwczovL3ZveHkuY29tL3dwLWNvbnRlbnQvdXBsb2Fkcy8yMDIxLzA0L0Fkb2JlU3RvY2tfMzA2MzU3NjYxLTItc2NhbGVkLmpwZWcpDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWU7Ij4gUGFzbyAxLiBJbnN0YWxhciBwYXF1ZXRlcyB5IGxsYW1hciBsaWJyZXLDrWFzIDwvc3Bhbj4NCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQojaW5zdGFsbC5wYWNrYWdlcygiY2x1c3RlciIpICMgQW7DoWxpc2lzIGRlIEFncnVwYW1pZW50bw0KbGlicmFyeShjbHVzdGVyKQ0KI2luc3RhbGwucGFja2FnZXMoImdncGxvdDIiKSAjIEdyYWZpY2FyDQpsaWJyYXJ5KGdncGxvdDIpDQojaW5zdGFsbC5wYWNrYWdlcygiZGF0YS50YWJsZSIpICMgTWFuZWpvIGRlIG11Y2hvcyBkYXRvcw0KbGlicmFyeShkYXRhLnRhYmxlKQ0KI2luc3RhbGwucGFja2FnZXMoImZhY3RvZXh0cmEiKSAjIEdyw6FmaWNhIG9wdGltaXphY2nDs24gZGUgbsO6bWVybyBkZSBjbHVzdGVycw0KbGlicmFyeShmYWN0b2V4dHJhKQ0KI2luc3RhbGwucGFja2FnZXMoInJlYWR4bCIpDQpsaWJyYXJ5KHJlYWR4bCkNCiNpbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDIuIE9idGVuZXIgbG9zIGRhdG9zIDwvc3Bhbj4NCmBgYHtyfQ0KZGYxIDwtIHJlYWRfZXhjZWwoIkM6XFxVc2Vyc1xca2FybGFcXERlc2t0b3BcXENPTkNFTlRSQUNJT05cXE1vZHVsb19wcm9ncmFcXHN1cGVybWFya2V0Lnhsc3giKQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWU7Ij4gUGFzbyAzLiBFeHBsb3JhciBiYXNlIGRlIGRhdG9zIDwvc3Bhbj4NCmBgYHtyfQ0Kc3RyKGRmMSkNCnN1bW1hcnkoZGYxKQ0KYGBgDQojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IFBhc28gNC4gTGltcGllemEgZGUgZGF0b3MgPC9zcGFuPg0KDQpgYGB7cn0NCnN1bShpcy5uYShkZjEkQ3VzdG9tZXJJRCkpDQpgYGANCmBgYHtyfQ0Kc3VtKGRmMSRRdWFudGl0eSA8IDApDQpgYGANCmBgYHtyfQ0Kc3VtKGRmMSRQcmljZSA8IDApDQpgYGANCg0KYGBge3J9DQpkZl9jbGVhbiA8LSBkZjEgJT4lDQogIGZpbHRlcigNCiAgICAhaXMubmEoQ3VzdG9tZXJJRCksDQogICAgUXVhbnRpdHkgPiAwLA0KICAgIFByaWNlID4gMA0KICApDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDUuIENyZWFjaW9uIGRlIGxhcyBtZWRpZGFzIDwvc3Bhbj4NCg0KYGBge3J9DQpjbGllbnRlcyA8LSBkZl9jbGVhbiAlPiUNCiAgZ3JvdXBfYnkoQ3VzdG9tZXJJRCkgJT4lDQogIHN1bW1hcmlzZSgNCiAgICBGcmVjdWVuY2lhID0gbl9kaXN0aW5jdChCaWxsTm8pLA0KICAgIFRpY2tldFByb21lZGlvID0gbWVhbihRdWFudGl0eSAqIFByaWNlKQ0KICApDQpgYGANCg0KYGBge3J9DQpkYXRvc19jbHVzdGVyIDwtIGNsaWVudGVzICU+JQ0KICBzZWxlY3QoDQogICAgWCA9IEZyZWN1ZW5jaWEsDQogICAgWSA9IFRpY2tldFByb21lZGlvDQogICkNCmBgYA0KDQpgYGB7cn0NCmRhdG9zX3NjYWxlZCA8LSBzY2FsZShkYXRvc19jbHVzdGVyKQ0KYGBgDQoNCmBgYHtyfQ0KcGxvdChkYXRvc19jbHVzdGVyJFgsIGRhdG9zX2NsdXN0ZXIkWSwNCiAgICAgeGxhYiA9ICJGcmVjdWVuY2lhIGRlIENvbXByYSIsDQogICAgIHlsYWIgPSAiVGlja2V0IFByb21lZGlvIiwNCiAgICAgbWFpbiA9ICJTZWdtZW50YWNpw7NuIGRlIENsaWVudGVzIikNCmBgYA0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDYuIENyZWFjaW9uIGRlIGxhcyBtZWRpZGFzIDwvc3Bhbj4NCmBgYHtyfQ0KcGFyKG1mcm93ID0gYygxLDIpKQ0KDQpib3hwbG90KGRhdG9zX2NsdXN0ZXIkWCwNCiAgICAgICAgbWFpbiA9ICJGcmVjdWVuY2lhIGRlIENvbXByYSIpDQoNCmJveHBsb3QoZGF0b3NfY2x1c3RlciRZLA0KICAgICAgICBtYWluID0gIlRpY2tldCBQcm9tZWRpbyIpDQpgYGANCmBgYHtyfQ0KIyBEZXRlY3RhciBvdXRsaWVycw0Kb3V0X3ggPC0gYm94cGxvdC5zdGF0cyhkYXRvc19jbHVzdGVyJFgpJG91dA0Kb3V0X3kgPC0gYm94cGxvdC5zdGF0cyhkYXRvc19jbHVzdGVyJFkpJG91dA0KYGBgDQoNCmBgYHtyfQ0KIyBGaWx0cmFyIGRhdGFzZXQNCmRhdG9zX3Npbl9vdXRsaWVycyA8LSBkYXRvc19jbHVzdGVyICU+JQ0KICBmaWx0ZXIoIShYICVpbiUgb3V0X3ggfCBZICVpbiUgb3V0X3kpKQ0KYGBgDQoNCmBgYHtyfQ0KcGxvdChkYXRvc19zaW5fb3V0bGllcnMkWCwgZGF0b3Nfc2luX291dGxpZXJzJFksDQogICAgIHhsYWIgPSAiRnJlY3VlbmNpYSBkZSBDb21wcmEiLA0KICAgICB5bGFiID0gIlRpY2tldCBQcm9tZWRpbyIsDQogICAgIG1haW4gPSAiU2VnbWVudGFjacOzbiBkZSBDbGllbnRlcyIpDQpncnVwb3MxIDwtIDMNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IFBhc28gNy4gR2VuZXJhciBsb3MgZ3J1cG9zIHkgZXNjYWxhciA8L3NwYW4+DQoNCmBgYHtyfQ0KZGF0b3Nfc2NhbGVkIDwtIHNjYWxlKGRhdG9zX3Npbl9vdXRsaWVycykNCmBgYA0KDQpgYGB7cn0NCnNldC5zZWVkKDEyMykNCg0Ka21vZGVsIDwtIGttZWFucyhkYXRvc19zY2FsZWQsIGdydXBvczEpDQprbW9kZWwNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlOyI+IFBhc28gOC4gT3B0aW1pemFyIGVsIG7Dum1lcm8gZGUgZ3J1cG9zIDwvc3Bhbj4NCmBgYHtyfQ0Kc2V0LnNlZWQoMTIzKQ0Kb3B0aW1pemFjaW9uMSA8LSBjbHVzR2FwKGRhdG9zX3NjYWxlZCwgRlVOPWttZWFucywgbnN0YXJ0PTEsIEsubWF4PTEwKQ0KIyBFbCBLLm1heCBub3JtYWxtZW50ZSBlcyAxMCwgZW4gZXN0ZSBlamVyY2ljaW8gYWwgc2VyIDggZGF0b3Mgc2UgZGVqw7MgZW4gNy4NCnBsb3Qob3B0aW1pemFjaW9uMSwgeGxhYj0iTsO6bWVybyBkZSBjbHVzdGVycyBrIiwgbWFpbj0iT3B0aW1pemFjacOzbiBkZSBDbHVzdGVycyIpDQojIFNlIHNlbGVjY2lvbmEgY29tbyDDs3B0aW1vIGVsIHByaW1lciBwdW50byBtw6FzIGFsdG8uDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDkuIEdyYWZpY2FyIGxvcyBncnVwb3MgPC9zcGFuPg0KYGBge3J9DQpmdml6X2NsdXN0ZXIoa21vZGVsLCBkYXRhPWRhdG9zX3NjYWxlZCkNCmBgYA0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDEwLiBBZ3JlZ2FyIENsdXN0ZXJzIGEgbGEgQmFzZSBkZSBEYXRvcyA8L3NwYW4+DQpgYGB7cn0NCmRmMV9jbHVzdGVycyA8LSBjYmluZChkYXRvc19zY2FsZWQsIGNsdXN0ZXIgPSBrbW9kZWwkY2x1c3RlcikNCmhlYWQoZGYxX2NsdXN0ZXJzKQ0KdGFpbChkZjFfY2x1c3RlcnMpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZTsiPiBQYXNvIDExLiBSZW5vbWJyYXIgPC9zcGFuPg0KYGBge3J9DQpkYXRvc19zaW5fb3V0bGllcnMkY2x1c3RlciA8LSBrbW9kZWwkY2x1c3Rlcg0KDQpsaWJyYXJ5KGRwbHlyKQ0KDQpyZXN1bWVuX2NsdXN0ZXJzIDwtIGRhdG9zX3Npbl9vdXRsaWVycyAlPiUNCiAgZ3JvdXBfYnkoY2x1c3RlcikgJT4lDQogIHN1bW1hcmlzZSgNCiAgICBGcmVjdWVuY2lhX3Byb20gPSBtZWFuKFgpLA0KICAgIFRpY2tldF9wcm9tID0gbWVhbihZKSwNCiAgICBDbGllbnRlcyA9IG4oKQ0KICApDQoNCnJlc3VtZW5fY2x1c3RlcnMNCmBgYA0KDQpgYGB7cn0NCmRhdG9zX3Npbl9vdXRsaWVycyRzZWdtZW50byA8LSBjYXNlX3doZW4oDQogIGRhdG9zX3Npbl9vdXRsaWVycyRjbHVzdGVyID09IDEgfiAiQ2xpZW50ZXMgT2Nhc2lvbmFsZXMiLA0KICBkYXRvc19zaW5fb3V0bGllcnMkY2x1c3RlciA9PSAyIH4gIkNsaWVudGVzIFByZW1pdW0iLA0KICBkYXRvc19zaW5fb3V0bGllcnMkY2x1c3RlciA9PSAzIH4gIkNsaWVudGVzIEZyZWN1ZW50ZXMiDQopDQpgYGANCg0KYGBge3J9DQp0YWJsZShkYXRvc19zaW5fb3V0bGllcnMkc2VnbWVudG8pDQpgYGANCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWU7Ij4gUGFzbyAxMC4gQ2FyYWN0ZXJpc3RpY2FzIHkgcmVjb21lbmRhY2lvbmVzIDwvc3Bhbj4NCg0KMS4gQ2xpZW50ZXMgT2Nhc2lvbmFsZXMNCkNhcmFjdGVyw61zdGljYTogQmFqYSBmcmVjdWVuY2lhIGRlIGNvbXByYSB5IGJham8gdGlja2V0IHByb21lZGlvOyBjb21wcmFuIGVzcG9yw6FkaWNhbWVudGUgeSBnZW5lcmFuIGJham8gdmFsb3IgaW5kaXZpZHVhbC4NClJlY29tZW5kYWNpw7NuOiBJbXBsZW1lbnRhciBwcm9tb2Npb25lcyB5IGRlc2N1ZW50b3MgcGVyc29uYWxpemFkb3MgcGFyYSBpbmNlbnRpdmFyIHVuYSBzZWd1bmRhIHkgdGVyY2VyYSBjb21wcmEgeSBhdW1lbnRhciBzdSBmcmVjdWVuY2lhLg0KDQoyLiBDbGllbnRlcyBQcmVtaXVtDQpDYXJhY3RlcsOtc3RpY2E6IEZyZWN1ZW5jaWEgbW9kZXJhZGEgcGVybyBlbCB0aWNrZXQgcHJvbWVkaW8gbcOhcyBhbHRvOyBnZW5lcmFuIGFsdG8gdmFsb3IgcG9yIHRyYW5zYWNjacOzbi4NClJlY29tZW5kYWNpw7NuOiBEZXNhcnJvbGxhciBlc3RyYXRlZ2lhcyBkZSBmaWRlbGl6YWNpw7NuIHkgYmVuZWZpY2lvcyBleGNsdXNpdm9zIHBhcmEgaW5jcmVtZW50YXIgc3UgZnJlY3VlbmNpYSBzaW4gcmVkdWNpciBzdSB0aWNrZXQuDQoNCjMuIENsaWVudGVzIEZyZWN1ZW50ZXMNCkNhcmFjdGVyw61zdGljYTogQWx0YSBmcmVjdWVuY2lhIGRlIGNvbXByYSBjb24gdGlja2V0IHByb21lZGlvIG1lZGlvOyByZXByZXNlbnRhbiBjbGllbnRlcyBsZWFsZXMgeSByZWN1cnJlbnRlcy4NClJlY29tZW5kYWNpw7NuOiBBcGxpY2FyIGVzdHJhdGVnaWFzIGRlIHVwc2VsbGluZyB5IHByb2dyYW1hcyBkZSByZWNvbXBlbnNhcyBwYXJhIGVsZXZhciBzdSB0aWNrZXQgcHJvbWVkaW8geSBtYXhpbWl6YXIgc3UgdmFsb3IuDQoNCg==