#install.packages("cluster") # Análisis de Agrupamiento
library(cluster)
#install.packages("ggplot2") # Graficar
library(ggplot2)
#install.packages("data.table") # Manejo de muchos datos
library(data.table)
#install.packages("factoextra") # Gráfica optimización de número de clusters
library(factoextra)
#install.packages("readxl")
library(readxl)
#install.packages("tidyverse")
library(tidyverse)
df1 <- read_excel("C:\\Users\\karla\\Desktop\\CONCENTRACION\\Modulo_progra\\supermarket.xlsx")
## Warning: Expecting numeric in A522063 / R522063C1: got 'A563185'
## Warning: Expecting numeric in A522064 / R522064C1: got 'A563186'
## Warning: Expecting numeric in A522065 / R522065C1: got 'A563187'
str(df1)
## tibble [522,064 × 8] (S3: tbl_df/tbl/data.frame)
## $ BillNo : num [1:522064] 536365 536365 536365 536365 536365 ...
## $ Itemname : chr [1:522064] "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
## $ Quantity : num [1:522064] 6 6 8 6 6 2 6 6 6 32 ...
## $ Date : POSIXct[1:522064], format: "2010-12-01" "2010-12-01" ...
## $ Time : POSIXct[1:522064], format: "1899-12-31 08:26:00" "1899-12-31 08:26:00" ...
## $ Price : num [1:522064] 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
## $ CustomerID: num [1:522064] 17850 17850 17850 17850 17850 ...
## $ Country : chr [1:522064] "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
summary(df1)
## BillNo Itemname Quantity
## Min. :536365 Length:522064 Min. :-9600.00
## 1st Qu.:547892 Class :character 1st Qu.: 1.00
## Median :560603 Mode :character Median : 3.00
## Mean :559951 Mean : 10.09
## 3rd Qu.:571892 3rd Qu.: 10.00
## Max. :581587 Max. :80995.00
## NA's :3
## Date Time
## Min. :2010-12-01 00:00:00 Min. :1899-12-31 06:20:00
## 1st Qu.:2011-03-28 00:00:00 1st Qu.:1899-12-31 11:48:00
## Median :2011-07-20 00:00:00 Median :1899-12-31 13:37:00
## Mean :2011-07-03 23:15:13 Mean :1899-12-31 13:36:07
## 3rd Qu.:2011-10-19 00:00:00 3rd Qu.:1899-12-31 15:30:00
## Max. :2011-12-09 00:00:00 Max. :1899-12-31 20:18:00
##
## Price CustomerID Country
## Min. :-11062.060 Min. :12346 Length:522064
## 1st Qu.: 1.250 1st Qu.:13950 Class :character
## Median : 2.080 Median :15265 Mode :character
## Mean : 3.827 Mean :15317
## 3rd Qu.: 4.130 3rd Qu.:16837
## Max. : 13541.330 Max. :18287
## NA's :134041
sum(is.na(df1$CustomerID))
## [1] 134041
sum(df1$Quantity < 0)
## [1] 1336
sum(df1$Price < 0)
## [1] 2
df_clean <- df1 %>%
filter(
!is.na(CustomerID),
Quantity > 0,
Price > 0
)
clientes <- df_clean %>%
group_by(CustomerID) %>%
summarise(
Frecuencia = n_distinct(BillNo),
TicketPromedio = mean(Quantity * Price)
)
datos_cluster <- clientes %>%
select(
X = Frecuencia,
Y = TicketPromedio
)
datos_scaled <- scale(datos_cluster)
plot(datos_cluster$X, datos_cluster$Y,
xlab = "Frecuencia de Compra",
ylab = "Ticket Promedio",
main = "Segmentación de Clientes")
# Paso 6. Creacion de las medidas
par(mfrow = c(1,2))
boxplot(datos_cluster$X,
main = "Frecuencia de Compra")
boxplot(datos_cluster$Y,
main = "Ticket Promedio")
# Detectar outliers
out_x <- boxplot.stats(datos_cluster$X)$out
out_y <- boxplot.stats(datos_cluster$Y)$out
# Filtrar dataset
datos_sin_outliers <- datos_cluster %>%
filter(!(X %in% out_x | Y %in% out_y))
plot(datos_sin_outliers$X, datos_sin_outliers$Y,
xlab = "Frecuencia de Compra",
ylab = "Ticket Promedio",
main = "Segmentación de Clientes")
grupos1 <- 3
datos_scaled <- scale(datos_sin_outliers)
set.seed(123)
kmodel <- kmeans(datos_scaled, grupos1)
kmodel
## K-means clustering with 3 clusters of sizes 1820, 1137, 633
##
## Cluster means:
## X Y
## 1 -0.4247508 -0.63046866
## 2 -0.3183498 0.98604174
## 3 1.7930651 0.04158532
##
## Clustering vector:
## [1] 3 2 2 3 2 2 2 2 2 3 2 1 2 2 2 2 2 1 2 2 3 3 2 2 3 1 2 1 2 2 2 1 2 3 2 2 2
## [38] 2 3 2 2 2 3 2 2 2 2 2 3 1 2 2 1 1 2 2 1 1 2 2 2 2 2 1 1 2 2 2 2 2 1 3 1 2
## [75] 3 2 2 3 2 1 2 3 3 3 2 2 3 1 1 2 3 1 1 2 1 1 2 2 1 1 2 2 1 3 2 3 1 1 3 3 2
## [112] 2 3 2 1 2 1 2 1 2 2 2 2 1 2 2 1 1 2 3 1 2 2 2 1 3 2 1 2 3 1 1 1 2 3 1 2 1
## [149] 1 1 3 2 1 2 1 2 2 3 2 2 3 3 3 3 1 1 1 2 3 1 2 3 1 2 2 1 1 3 1 1 1 1 2 3 3
## [186] 2 2 1 3 1 2 2 2 2 1 2 2 1 2 1 1 2 3 2 2 2 1 2 3 2 2 2 2 2 1 2 3 2 1 3 3 1
## [223] 2 1 2 2 2 2 2 2 2 1 1 1 3 1 3 1 2 3 1 1 2 1 1 3 2 3 2 1 3 3 2 1 2 1 2 1 1
## [260] 2 1 1 3 3 1 2 2 1 2 2 2 3 2 1 3 3 3 3 3 1 2 2 2 1 1 2 2 2 2 1 1 2 1 1 2 2
## [297] 2 1 1 2 1 3 1 3 2 2 1 1 1 1 1 1 2 2 3 1 1 1 2 1 1 3 1 1 3 2 1 2 3 3 2 1 2
## [334] 1 2 1 1 1 1 1 1 1 1 2 3 1 2 3 1 1 1 2 1 2 1 1 1 3 1 1 1 1 1 2 2 3 3 3 3 1
## [371] 2 3 1 3 1 3 1 1 2 1 1 2 1 2 1 2 1 2 1 2 1 1 1 2 1 1 1 2 2 2 1 1 3 3 3 1 3
## [408] 1 2 1 1 1 1 1 2 2 2 1 2 1 1 1 3 1 2 1 2 2 2 2 2 1 1 1 1 2 2 2 1 2 1 3 1 2
## [445] 1 3 1 3 1 2 2 2 1 3 2 1 3 3 2 1 1 1 3 1 2 2 2 1 2 3 3 3 3 1 3 1 2 1 2 1 2
## [482] 1 2 1 1 2 1 2 2 1 1 1 1 1 1 1 1 1 3 1 3 3 2 1 1 1 1 2 1 3 2 1 2 3 3 1 2 1
## [519] 3 1 2 1 2 2 1 2 1 3 1 1 2 2 1 1 2 1 1 2 3 2 1 2 2 2 2 1 1 3 3 1 2 1 2 2 1
## [556] 1 1 2 1 1 1 1 2 1 2 1 2 2 2 1 1 3 1 2 3 1 2 3 1 3 2 1 1 2 3 2 1 1 1 2 1 1
## [593] 2 2 2 2 1 1 2 2 1 1 2 1 1 1 2 2 1 1 1 2 2 2 1 2 2 2 1 3 1 3 1 2 1 2 3 1 2
## [630] 2 1 1 2 2 2 2 1 2 2 1 2 1 2 2 3 2 1 1 1 2 3 1 3 1 2 2 3 1 3 1 2 2 3 1 1 1
## [667] 1 1 1 1 2 1 1 2 3 2 3 1 1 2 2 1 1 2 3 2 1 2 2 2 2 1 2 1 1 1 2 1 1 1 3 1 1
## [704] 2 2 1 2 1 1 1 2 1 2 3 2 3 2 2 1 1 3 1 1 1 1 3 2 2 1 1 1 1 2 1 2 1 1 1 1 1
## [741] 2 1 3 2 2 1 1 1 3 1 3 2 1 3 1 1 1 1 1 2 1 1 1 3 1 1 1 1 1 1 1 1 3 3 3 1 1
## [778] 3 1 1 2 1 2 2 2 1 3 1 1 3 1 1 2 2 1 1 1 1 1 2 1 2 1 2 1 2 1 2 1 1 2 2 1 2
## [815] 3 3 2 1 1 1 2 2 2 3 1 3 2 3 2 2 2 2 3 2 1 3 1 1 1 1 1 1 1 2 3 1 1 2 2 3 2
## [852] 2 3 3 2 1 1 1 3 2 1 1 2 1 1 3 2 1 1 1 1 1 1 1 3 2 1 3 2 1 2 2 2 2 1 2 1 1
## [889] 2 2 2 1 1 3 1 1 1 1 1 3 1 2 1 1 1 2 2 2 1 3 3 1 1 2 1 1 3 1 2 3 1 2 2 3 3
## [926] 3 1 1 1 1 2 3 2 2 2 2 1 1 2 1 3 1 2 3 2 1 2 2 1 3 1 2 1 1 2 2 2 2 2 1 1 1
## [963] 1 1 1 2 3 2 1 2 1 1 1 1 2 1 2 2 3 1 2 1 3 2 1 1 1 1 2 3 2 2 3 1 3 3 3 2 1
## [1000] 3 2 2 1 2 1 1 2 1 2 1 2 2 1 2 3 3 1 3 1 1 1 2 1 1 1 2 1 1 1 1 1 2 3 2 1 1
## [1037] 3 1 2 1 3 3 2 1 1 1 3 1 1 1 1 3 1 3 1 1 3 1 1 3 2 3 2 1 1 1 1 3 1 3 2 2 1
## [1074] 1 2 1 1 3 1 2 1 2 2 1 1 3 1 1 1 1 1 1 1 2 2 1 3 1 2 2 2 2 3 1 2 1 1 3 3 3
## [1111] 3 3 2 1 1 2 3 1 1 1 2 3 2 2 3 2 1 2 2 1 1 3 2 2 1 3 1 2 1 2 1 2 2 2 2 2 3
## [1148] 2 1 2 1 1 1 2 1 2 1 2 1 2 2 3 3 3 1 2 2 3 1 2 1 3 3 1 3 1 3 2 1 3 1 2 2 3
## [1185] 1 2 2 3 1 2 2 2 2 3 2 3 1 1 2 1 1 2 1 1 1 2 1 1 2 1 2 2 2 1 1 1 1 2 1 2 1
## [1222] 3 2 3 1 3 1 2 3 2 3 3 2 3 1 1 3 2 1 3 1 2 3 3 2 1 2 3 1 1 1 2 2 1 1 3 2 1
## [1259] 1 1 1 1 3 2 2 1 3 1 3 1 1 3 2 2 2 1 1 2 2 3 1 2 1 3 2 1 2 3 2 2 2 1 3 1 2
## [1296] 3 3 1 3 1 3 1 3 1 2 1 3 2 1 2 3 3 2 1 3 1 3 1 1 2 2 1 1 3 2 1 1 3 1 1 2 1
## [1333] 1 1 1 1 2 2 1 3 3 2 1 1 1 2 1 1 1 1 3 1 1 1 1 1 3 1 1 1 2 2 2 1 3 1 1 1 1
## [1370] 2 2 1 2 1 1 2 2 3 3 1 2 2 2 1 1 1 1 3 2 1 1 1 3 1 2 1 1 3 1 1 1 1 2 1 2 1
## [1407] 2 1 1 2 1 2 3 1 1 1 2 3 1 3 2 1 3 2 3 1 1 1 1 1 1 3 1 2 1 3 1 3 3 1 3 2 3
## [1444] 1 3 2 1 3 2 2 2 1 1 2 1 1 3 2 3 1 1 1 3 2 1 1 2 2 2 2 2 1 1 1 3 2 1 1 1 2
## [1481] 2 1 1 3 3 1 1 1 1 2 1 2 1 2 1 1 3 1 1 1 3 2 2 2 1 1 3 3 2 2 2 2 1 2 1 2 1
## [1518] 2 3 1 2 1 2 2 3 1 1 3 2 1 1 1 1 1 2 1 2 3 3 1 1 1 2 3 2 1 2 1 2 1 2 1 1 1
## [1555] 2 2 2 2 3 2 1 1 1 1 1 1 1 3 2 1 1 1 1 1 1 1 2 1 2 3 1 1 1 1 1 1 2 1 1 1 1
## [1592] 1 1 1 1 1 1 1 1 1 1 3 1 3 3 1 1 2 1 1 3 3 1 1 3 1 2 1 1 2 2 1 1 1 1 3 3 1
## [1629] 1 2 2 2 1 1 2 2 1 2 1 2 3 1 2 2 3 1 1 1 2 2 2 1 1 2 2 1 3 3 3 1 2 2 2 3 1
## [1666] 2 3 2 1 1 2 2 1 2 1 3 1 3 2 3 1 2 1 3 2 2 2 1 2 1 2 3 1 1 1 1 2 1 1 1 1 2
## [1703] 3 1 1 1 2 1 2 2 1 1 2 2 2 2 2 3 3 2 1 3 1 1 2 1 3 2 3 2 3 2 2 1 2 3 2 2 2
## [1740] 1 2 1 2 1 3 3 1 2 2 1 1 3 1 2 1 2 3 1 1 1 1 2 2 1 1 1 1 2 1 1 3 2 2 1 1 3
## [1777] 1 1 2 3 2 1 1 2 1 1 1 1 1 2 1 1 1 2 2 1 1 1 1 1 2 2 1 2 1 1 2 3 1 2 3 2 2
## [1814] 2 3 1 2 3 1 3 3 1 2 1 2 2 2 1 2 1 2 1 1 1 1 1 3 1 2 2 1 2 1 1 2 1 2 2 1 1
## [1851] 1 1 1 1 3 1 1 1 1 1 3 1 2 1 1 1 1 1 3 1 1 1 2 1 2 1 1 1 1 2 2 1 1 1 2 2 1
## [1888] 2 2 2 3 2 1 1 1 1 1 2 1 1 3 1 1 3 1 1 3 2 3 1 2 1 2 3 1 3 1 1 1 1 1 1 1 1
## [1925] 1 1 3 1 1 1 1 1 1 1 1 2 2 1 2 2 1 1 3 1 1 2 1 1 1 1 1 1 1 1 2 3 3 1 1 1 3
## [1962] 2 1 3 3 1 2 2 2 2 1 1 3 1 1 2 1 3 2 3 1 2 1 1 1 2 1 1 1 2 2 1 1 1 2 3 2 3
## [1999] 1 2 2 2 2 1 3 2 3 1 1 2 2 1 1 2 2 3 2 1 1 2 3 1 1 1 2 2 2 2 1 3 2 3 2 1 1
## [2036] 2 1 1 2 1 2 1 3 1 1 2 1 3 1 1 1 1 1 1 1 3 3 1 3 1 1 2 2 1 1 3 2 2 1 1 1 1
## [2073] 1 1 2 2 1 1 1 2 1 3 1 1 1 2 2 2 3 1 3 2 2 2 1 1 1 2 1 2 2 1 1 1 1 1 2 1 1
## [2110] 1 1 2 1 1 3 1 1 1 1 1 2 1 3 3 1 1 2 1 1 1 1 3 1 1 3 2 1 2 1 1 1 1 1 1 1 3
## [2147] 1 1 1 1 3 1 2 3 1 3 1 1 1 1 1 2 1 1 2 3 1 2 2 3 2 2 1 2 2 1 1 3 1 1 1 1 1
## [2184] 2 2 3 1 1 1 2 3 1 3 2 1 3 1 1 1 3 1 3 1 2 1 1 1 1 1 1 1 1 1 3 1 1 3 2 1 1
## [2221] 1 1 1 1 1 2 1 2 2 2 3 2 1 2 1 1 1 1 1 1 1 3 3 2 1 1 3 2 1 2 3 2 3 2 1 1 2
## [2258] 1 1 1 1 2 2 3 2 1 2 3 1 2 1 1 2 1 1 1 1 3 1 1 2 1 2 2 2 2 1 3 1 2 2 2 2 2
## [2295] 1 3 3 2 1 1 3 3 3 2 2 2 2 2 3 2 3 3 2 2 2 2 2 2 3 1 1 2 2 1 2 1 1 2 1 1 1
## [2332] 1 2 1 2 1 2 2 1 2 2 3 1 1 1 1 2 2 1 2 2 3 2 2 2 3 1 1 1 1 2 2 1 2 3 2 2 1
## [2369] 3 3 1 2 3 1 1 1 2 1 1 1 1 1 1 3 1 1 2 2 2 1 3 1 1 1 1 3 2 1 2 2 1 1 2 3 1
## [2406] 3 1 2 1 1 2 2 1 1 1 3 1 2 2 1 1 1 2 2 3 3 1 1 2 1 3 1 1 1 1 1 1 1 1 1 2 1
## [2443] 1 1 1 2 2 1 3 1 2 1 1 1 1 2 2 1 3 1 1 1 1 1 1 1 1 3 2 2 1 3 2 1 2 1 3 1 3
## [2480] 1 1 1 1 1 1 1 2 2 1 3 3 2 1 1 1 1 3 2 1 1 1 1 3 2 3 2 1 1 1 1 1 1 1 2 2 1
## [2517] 1 1 3 1 2 2 1 1 2 2 2 1 2 3 1 1 1 1 1 2 1 1 1 3 1 2 1 2 2 2 3 1 2 2 1 1 3
## [2554] 2 1 1 2 2 2 3 1 1 1 1 1 1 2 1 1 2 1 1 1 2 1 1 3 1 1 1 3 1 1 2 2 2 1 2 2 1
## [2591] 2 2 2 1 1 1 3 1 3 2 1 3 2 2 2 1 1 1 1 2 1 2 3 1 1 2 1 1 1 3 1 3 1 2 2 1 2
## [2628] 2 1 3 2 2 2 1 1 1 2 2 3 3 1 1 3 1 3 1 1 1 3 1 1 1 1 2 1 3 1 2 3 3 2 3 1 2
## [2665] 1 1 1 1 1 2 2 1 2 2 2 3 1 1 1 1 1 1 3 1 1 1 1 1 2 1 1 3 3 1 1 3 1 1 1 2 1
## [2702] 1 1 1 1 1 1 2 2 2 3 1 3 3 2 1 1 2 1 1 1 1 1 2 1 1 2 3 2 3 2 2 1 2 1 1 1 1
## [2739] 1 1 2 1 2 2 2 1 3 3 3 3 1 3 2 1 1 1 1 1 1 1 1 1 1 1 3 1 2 1 3 1 1 2 3 3 1
## [2776] 1 3 1 3 3 1 1 1 3 1 1 1 1 1 1 1 3 2 2 2 3 2 1 1 2 3 2 2 2 1 1 1 1 1 1 1 1
## [2813] 1 1 3 1 1 3 1 3 1 3 2 2 3 2 2 1 3 1 1 1 1 1 1 2 2 2 3 1 1 2 2 1 1 1 2 1 3
## [2850] 1 1 2 1 2 3 3 1 1 2 3 2 1 1 3 1 3 1 3 1 1 2 1 2 1 2 2 1 1 3 1 3 3 1 2 1 2
## [2887] 3 1 2 2 1 3 3 1 1 3 2 1 1 1 1 2 2 1 1 1 1 3 1 3 1 2 2 2 2 2 1 1 1 1 2 2 1
## [2924] 3 3 3 1 1 1 1 2 1 2 2 1 1 1 2 3 1 2 3 1 2 1 1 3 2 2 2 3 3 3 1 3 1 1 1 1 2
## [2961] 3 3 1 3 1 1 1 2 3 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 2 2 3 1 3
## [2998] 3 1 1 3 1 1 1 1 1 3 1 1 2 3 1 1 3 2 1 1 1 2 1 3 2 3 1 1 1 1 1 1 2 2 1 1 1
## [3035] 1 1 1 1 3 3 1 3 1 1 3 1 1 1 1 1 2 1 2 1 1 1 1 1 3 1 1 2 1 3 2 3 2 3 1 2 3
## [3072] 2 1 3 1 2 1 1 1 3 2 2 2 3 1 1 3 3 2 1 1 1 1 1 3 1 2 1 2 2 2 3 3 1 1 1 1 2
## [3109] 1 1 3 3 2 2 2 1 1 1 2 1 3 2 2 1 1 3 1 3 1 1 1 1 3 1 1 3 2 2 2 3 3 2 2 1 2
## [3146] 1 1 2 1 1 2 2 1 1 2 3 1 2 1 2 1 3 1 1 3 1 1 2 1 2 2 1 1 1 2 1 1 1 3 3 2 1
## [3183] 3 2 1 1 3 2 1 1 1 3 1 2 1 1 2 1 1 1 1 3 2 1 2 3 1 1 1 1 1 3 2 2 2 1 2 3 2
## [3220] 1 3 3 2 1 3 1 2 3 2 2 2 2 3 2 3 2 1 3 1 3 3 1 1 1 2 2 3 1 2 2 1 2 1 2 3 2
## [3257] 1 2 1 1 1 3 2 2 2 2 3 1 1 2 1 2 1 1 1 2 2 3 1 2 3 3 3 1 1 2 1 1 1 1 2 3 2
## [3294] 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 1 1 1 1 1 2 1 2 2 1 1 1 1 1 3 3 1
## [3331] 1 3 2 1 2 1 1 1 3 2 3 1 2 3 3 1 2 3 2 1 2 1 1 1 1 1 1 1 3 1 1 1 1 1 3 1 1
## [3368] 1 1 2 1 1 2 1 1 1 1 1 1 1 1 2 1 1 1 2 1 1 1 2 1 2 1 2 1 1 1 1 1 3 1 1 1 2
## [3405] 1 2 1 1 1 2 1 2 1 2 2 1 3 2 2 1 2 3 1 1 1 1 2 1 1 1 1 1 1 3 1 1 1 2 1 2 1
## [3442] 1 1 1 1 3 2 2 1 3 1 2 1 1 3 2 1 2 2 1 1 1 1 2 3 3 1 2 2 1 2 1 1 2 2 1 3 1
## [3479] 1 2 1 1 1 1 1 1 3 1 1 1 2 3 1 1 1 1 1 1 2 2 2 1 1 1 2 2 1 1 2 1 1 1 2 1 3
## [3516] 1 2 2 3 2 2 3 1 3 1 3 3 1 1 1 2 1 2 2 2 1 1 1 2 1 1 1 1 3 3 1 1 2 2 2 2 3
## [3553] 3 2 1 3 2 2 3 3 1 1 3 3 3 2 3 1 1 1 1 1 3 3 1 1 2 1 2 2 2 3 1 2 1 2 1 1 1
## [3590] 2
##
## Within cluster sum of squares by cluster:
## [1] 1056.8476 961.8176 850.5962
## (between_SS / total_SS = 60.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
set.seed(123)
optimizacion1 <- clusGap(datos_scaled, FUN=kmeans, nstart=1, K.max=10)
# El K.max normalmente es 10, en este ejercicio al ser 8 datos se dejó en 7.
plot(optimizacion1, xlab="Número de clusters k", main="Optimización de Clusters")
# Se selecciona como óptimo el primer punto más alto.
fviz_cluster(kmodel, data=datos_scaled)
# Paso 10. Agregar Clusters a la Base de
Datos
df1_clusters <- cbind(datos_scaled, cluster = kmodel$cluster)
head(df1_clusters)
## X Y cluster
## [1,] 1.697833 0.8301372 3
## [2,] -0.831479 0.8775053 2
## [3,] -0.831479 0.3488188 2
## [4,] 2.119385 1.5263624 3
## [5,] -0.831479 0.6583680 2
## [6,] -0.831479 0.2215814 2
tail(df1_clusters)
## X Y cluster
## [3585,] -0.83147901 -0.3559969 1
## [3586,] -0.83147901 0.3070122 2
## [3587,] -0.83147901 0.1555358 1
## [3588,] -0.83147901 -0.6262285 1
## [3589,] -0.40992702 -0.2311889 1
## [3590,] 0.01162497 1.1380216 2
datos_sin_outliers$cluster <- kmodel$cluster
library(dplyr)
resumen_clusters <- datos_sin_outliers %>%
group_by(cluster) %>%
summarise(
Frecuencia_prom = mean(X),
Ticket_prom = mean(Y),
Clientes = n()
)
resumen_clusters
## # A tibble: 3 × 4
## cluster Frecuencia_prom Ticket_prom Clientes
## <int> <dbl> <dbl> <int>
## 1 1 1.96 11.5 1820
## 2 2 2.22 25.0 1137
## 3 3 7.23 17.1 633
datos_sin_outliers$segmento <- case_when(
datos_sin_outliers$cluster == 1 ~ "Clientes Ocasionales",
datos_sin_outliers$cluster == 2 ~ "Clientes Premium",
datos_sin_outliers$cluster == 3 ~ "Clientes Frecuentes"
)
table(datos_sin_outliers$segmento)
##
## Clientes Frecuentes Clientes Ocasionales Clientes Premium
## 633 1820 1137
Clientes Ocasionales Característica: Baja frecuencia de compra y bajo ticket promedio; compran esporádicamente y generan bajo valor individual. Recomendación: Implementar promociones y descuentos personalizados para incentivar una segunda y tercera compra y aumentar su frecuencia.
Clientes Premium Característica: Frecuencia moderada pero el ticket promedio más alto; generan alto valor por transacción. Recomendación: Desarrollar estrategias de fidelización y beneficios exclusivos para incrementar su frecuencia sin reducir su ticket.
Clientes Frecuentes Característica: Alta frecuencia de compra con ticket promedio medio; representan clientes leales y recurrentes. Recomendación: Aplicar estrategias de upselling y programas de recompensas para elevar su ticket promedio y maximizar su valor.