Instalar paquetes y llamar librerías

# install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# install.packages("cluster")
library(cluster)
# install.packages("factoextra")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# install.packages("lubridate")
library(lubridate)
library(readxl)

Cargar datos

df <- read_excel("/Users/paolasalas/Desktop/supermarket.xlsx")
## Warning: Expecting numeric in A522063 / R522063C1: got 'A563185'
## Warning: Expecting numeric in A522064 / R522064C1: got 'A563186'
## Warning: Expecting numeric in A522065 / R522065C1: got 'A563187'
head(df)
## # A tibble: 6 × 8
##   BillNo Itemname         Quantity Date                Time                Price
##    <dbl> <chr>               <dbl> <dttm>              <dttm>              <dbl>
## 1 536365 WHITE HANGING H…        6 2010-12-01 00:00:00 1899-12-31 08:26:00  2.55
## 2 536365 WHITE METAL LAN…        6 2010-12-01 00:00:00 1899-12-31 08:26:00  3.39
## 3 536365 CREAM CUPID HEA…        8 2010-12-01 00:00:00 1899-12-31 08:26:00  2.75
## 4 536365 KNITTED UNION F…        6 2010-12-01 00:00:00 1899-12-31 08:26:00  3.39
## 5 536365 RED WOOLLY HOTT…        6 2010-12-01 00:00:00 1899-12-31 08:26:00  3.39
## 6 536365 SET 7 BABUSHKA …        2 2010-12-01 00:00:00 1899-12-31 08:26:00  7.65
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
df$Total <- df$Quantity * df$Price

Crear variables para clustering

clientes <- df %>%
  group_by(CustomerID) %>%
  summarise(
    Frecuencia = n_distinct(BillNo),
    TicketPromedio = mean(Total)
  )

head(clientes)
## # A tibble: 6 × 3
##   CustomerID Frecuencia TicketPromedio
##        <dbl>      <int>          <dbl>
## 1      12346          1        77184. 
## 2      12347          7           23.7
## 3      12349          1           24.1
## 4      12350          1           19.7
## 5      12352          8           29.5
## 6      12353          1           22.2

Estadística descriptiva

summary(clientes)
##    CustomerID      Frecuencia      TicketPromedio    
##  Min.   :12346   Min.   :   1.00   Min.   :    0.00  
##  1st Qu.:13831   1st Qu.:   1.00   1st Qu.:   12.24  
##  Median :15321   Median :   2.00   Median :   17.67  
##  Mean   :15316   Mean   :   5.04   Mean   :   68.38  
##  3rd Qu.:16790   3rd Qu.:   5.00   3rd Qu.:   24.69  
##  Max.   :18287   Max.   :3498.00   Max.   :77183.60  
##  NA's   :1
sd(clientes$Frecuencia)
## [1] 53.76148
sd(clientes$TicketPromedio)
## [1] 1474.699

Estandarizar datos

clientes_scaled <- scale(clientes[,2:3])

Optimizar número de clusters

fviz_nbclust(clientes_scaled, kmeans, method = "wss") +
  labs(title="Método del Codo") +
  theme_minimal()

Crear 4 clusters

set.seed(123)

k4 <- kmeans(clientes_scaled, centers = 4, nstart = 25)

clientes$Cluster <- as.factor(k4$cluster)

Visualización

ggplot(clientes, aes(x=Frecuencia, y=TicketPromedio, color=Cluster)) +
  geom_point(size=3, alpha=0.7) +
  theme_minimal() +
  labs(
    title="Segmentación de Clientes",
    x="Frecuencia de Compra",
    y="Ticket Promedio"
  )

Características por Cluster

clientes <- clientes %>%
  mutate(
    NombreCluster = case_when(
      Cluster == 1 ~ "Clientes Premium",
      Cluster == 2 ~ "Clientes Frecuentes",
      Cluster == 3 ~ "Clientes Ocasionales",
      Cluster == 4 ~ "Clientes Bajo Valor"
    )
  )

clientes %>%
  group_by(NombreCluster) %>%
  summarise(
    Frecuencia_Promedio = mean(Frecuencia),
    Ticket_Promedio = mean(TicketPromedio)
  )
## # A tibble: 4 × 3
##   NombreCluster        Frecuencia_Promedio Ticket_Promedio
##   <chr>                              <dbl>           <dbl>
## 1 Clientes Bajo Valor                 4.02            29.9
## 2 Clientes Frecuentes                 1.5          66671. 
## 3 Clientes Ocasionales               56.3           1911. 
## 4 Clientes Premium                 3498               12.8

Recomendaciones

Clientes Premium → Programas VIP y recompensas exclusivas

Clientes Frecuentes → Promociones por volumen

Clientes Ocasionales → Descuentos personalizados

Clientes Bajo Valor → Campañas de reactivación

LS0tCnRpdGxlOiAic3VwZXJtYXJrZXQgLSBBMDEyODYzMDAiCmF1dGhvcjogIlBhb2xhIFNhbGFzIgpkYXRlOiAiMjAyNi0wMi0yMyIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IFRSVUUKICAgIHRvY19mbG9hdDogVFJVRQogICAgY29kZV9kb3dubG9hZDogVFJVRQogICAgdGhlbWU6IHlldGkKLS0tCgohW10oaHR0cHM6Ly9tZWRpYTIuZ2lwaHkuY29tL21lZGlhL3YxLlkybGtQVGM1TUdJM05qRXhkakp6YzJsMmVXY3pPVGhtYldFNU1HeHZkV2h0Y3pKc1pXTmlaVGc0YlRRMWFHNXVZV041WVNabGNEMTJNVjlwYm5SbGNtNWhiRjluYVdaZllubGZhV1FtWTNROVp3L1owMUpYZ3ZJekxOS3cvZ2lwaHkuZ2lmKQoKIyBbSW5zdGFsYXIgcGFxdWV0ZXMgeSBsbGFtYXIgbGlicmVyw61hc117c3R5bGU9ImNvbG9yOnB1cnBsZSJ9CgpgYGB7cn0KIyBpbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKIyBpbnN0YWxsLnBhY2thZ2VzKCJjbHVzdGVyIikKbGlicmFyeShjbHVzdGVyKQojIGluc3RhbGwucGFja2FnZXMoImZhY3RvZXh0cmEiKQpsaWJyYXJ5KGZhY3RvZXh0cmEpCiMgaW5zdGFsbC5wYWNrYWdlcygibHVicmlkYXRlIikKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkocmVhZHhsKQpgYGAKCiMgW0NhcmdhciBkYXRvc117c3R5bGU9ImNvbG9yOnB1cnBsZSJ9CgpgYGB7cn0KZGYgPC0gcmVhZF9leGNlbCgiL1VzZXJzL3Bhb2xhc2FsYXMvRGVza3RvcC9zdXBlcm1hcmtldC54bHN4IikKaGVhZChkZikKZGYkVG90YWwgPC0gZGYkUXVhbnRpdHkgKiBkZiRQcmljZQpgYGAKCiMgW0NyZWFyIHZhcmlhYmxlcyBwYXJhIGNsdXN0ZXJpbmdde3N0eWxlPSJjb2xvcjpwdXJwbGUifQoKYGBge3J9CmNsaWVudGVzIDwtIGRmICU+JQogIGdyb3VwX2J5KEN1c3RvbWVySUQpICU+JQogIHN1bW1hcmlzZSgKICAgIEZyZWN1ZW5jaWEgPSBuX2Rpc3RpbmN0KEJpbGxObyksCiAgICBUaWNrZXRQcm9tZWRpbyA9IG1lYW4oVG90YWwpCiAgKQoKaGVhZChjbGllbnRlcykKYGBgCgojIFtFc3RhZMOtc3RpY2EgZGVzY3JpcHRpdmFde3N0eWxlPSJjb2xvcjpwdXJwbGUifQoKYGBge3J9CnN1bW1hcnkoY2xpZW50ZXMpCnNkKGNsaWVudGVzJEZyZWN1ZW5jaWEpCnNkKGNsaWVudGVzJFRpY2tldFByb21lZGlvKQpgYGAKCiMgW0VzdGFuZGFyaXphciBkYXRvc117c3R5bGU9ImNvbG9yOnB1cnBsZSJ9CgpgYGB7cn0KY2xpZW50ZXNfc2NhbGVkIDwtIHNjYWxlKGNsaWVudGVzWywyOjNdKQpgYGAKCiMgW09wdGltaXphciBuw7ptZXJvIGRlIGNsdXN0ZXJzXXtzdHlsZT0iY29sb3I6cHVycGxlIn0KCmBgYHtyfQpmdml6X25iY2x1c3QoY2xpZW50ZXNfc2NhbGVkLCBrbWVhbnMsIG1ldGhvZCA9ICJ3c3MiKSArCiAgbGFicyh0aXRsZT0iTcOpdG9kbyBkZWwgQ29kbyIpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgojIFtDcmVhciA0IGNsdXN0ZXJzXXtzdHlsZT0iY29sb3I6cHVycGxlIn0KCmBgYHtyfQpzZXQuc2VlZCgxMjMpCgprNCA8LSBrbWVhbnMoY2xpZW50ZXNfc2NhbGVkLCBjZW50ZXJzID0gNCwgbnN0YXJ0ID0gMjUpCgpjbGllbnRlcyRDbHVzdGVyIDwtIGFzLmZhY3RvcihrNCRjbHVzdGVyKQpgYGAKCiMgW1Zpc3VhbGl6YWNpw7NuXXtzdHlsZT0iY29sb3I6cHVycGxlIn0KCmBgYHtyfQpnZ3Bsb3QoY2xpZW50ZXMsIGFlcyh4PUZyZWN1ZW5jaWEsIHk9VGlja2V0UHJvbWVkaW8sIGNvbG9yPUNsdXN0ZXIpKSArCiAgZ2VvbV9wb2ludChzaXplPTMsIGFscGhhPTAuNykgKwogIHRoZW1lX21pbmltYWwoKSArCiAgbGFicygKICAgIHRpdGxlPSJTZWdtZW50YWNpw7NuIGRlIENsaWVudGVzIiwKICAgIHg9IkZyZWN1ZW5jaWEgZGUgQ29tcHJhIiwKICAgIHk9IlRpY2tldCBQcm9tZWRpbyIKICApCmBgYAoKIyBbQ2FyYWN0ZXLDrXN0aWNhcyBwb3IgQ2x1c3Rlcl17c3R5bGU9ImNvbG9yOnB1cnBsZSJ9CgpgYGB7cn0KY2xpZW50ZXMgPC0gY2xpZW50ZXMgJT4lCiAgbXV0YXRlKAogICAgTm9tYnJlQ2x1c3RlciA9IGNhc2Vfd2hlbigKICAgICAgQ2x1c3RlciA9PSAxIH4gIkNsaWVudGVzIFByZW1pdW0iLAogICAgICBDbHVzdGVyID09IDIgfiAiQ2xpZW50ZXMgRnJlY3VlbnRlcyIsCiAgICAgIENsdXN0ZXIgPT0gMyB+ICJDbGllbnRlcyBPY2FzaW9uYWxlcyIsCiAgICAgIENsdXN0ZXIgPT0gNCB+ICJDbGllbnRlcyBCYWpvIFZhbG9yIgogICAgKQogICkKCmNsaWVudGVzICU+JQogIGdyb3VwX2J5KE5vbWJyZUNsdXN0ZXIpICU+JQogIHN1bW1hcmlzZSgKICAgIEZyZWN1ZW5jaWFfUHJvbWVkaW8gPSBtZWFuKEZyZWN1ZW5jaWEpLAogICAgVGlja2V0X1Byb21lZGlvID0gbWVhbihUaWNrZXRQcm9tZWRpbykKICApCmBgYAoKIyBbUmVjb21lbmRhY2lvbmVzXXtzdHlsZT0iY29sb3I6cHVycGxlIn0KCkNsaWVudGVzIFByZW1pdW0g4oaSIFByb2dyYW1hcyBWSVAgeSByZWNvbXBlbnNhcyBleGNsdXNpdmFzCgpDbGllbnRlcyBGcmVjdWVudGVzIOKGkiBQcm9tb2Npb25lcyBwb3Igdm9sdW1lbgoKQ2xpZW50ZXMgT2Nhc2lvbmFsZXMg4oaSIERlc2N1ZW50b3MgcGVyc29uYWxpemFkb3MKCkNsaWVudGVzIEJham8gVmFsb3Ig4oaSIENhbXBhw7FhcyBkZSByZWFjdGl2YWNpw7NuCg==