Teoría

Clustering es una técnica de aprendizaje no supervisado que agrupa observaciones según su similitud.

En este caso lo usaremos para segmentar clientes según:

Frecuencia de compra

Ticket promedio

Esto nos ayudará a generar estrategias de marketing personalizadas.

Paso 1. Librerías

# install.packages("tidyverse")
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# install.packages("cluster")
library(cluster)

# install.packages("factoextra")
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# install.packages("lubridate")
library(lubridate)
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3

Paso 2. Cargar datos

df <- read_excel("C:/Users/robie/Downloads/supermarket.xlsx")
## Warning: Expecting numeric in A522063 / R522063C1: got 'A563185'
## Warning: Expecting numeric in A522064 / R522064C1: got 'A563186'
## Warning: Expecting numeric in A522065 / R522065C1: got 'A563187'
head(df)
## # A tibble: 6 × 8
##   BillNo Itemname         Quantity Date                Time                Price
##    <dbl> <chr>               <dbl> <dttm>              <dttm>              <dbl>
## 1 536365 WHITE HANGING H…        6 2010-12-01 00:00:00 1899-12-31 08:26:00  2.55
## 2 536365 WHITE METAL LAN…        6 2010-12-01 00:00:00 1899-12-31 08:26:00  3.39
## 3 536365 CREAM CUPID HEA…        8 2010-12-01 00:00:00 1899-12-31 08:26:00  2.75
## 4 536365 KNITTED UNION F…        6 2010-12-01 00:00:00 1899-12-31 08:26:00  3.39
## 5 536365 RED WOOLLY HOTT…        6 2010-12-01 00:00:00 1899-12-31 08:26:00  3.39
## 6 536365 SET 7 BABUSHKA …        2 2010-12-01 00:00:00 1899-12-31 08:26:00  7.65
## # ℹ 2 more variables: CustomerID <dbl>, Country <chr>
df$Total <- df$Quantity * df$Price
LS0tDQp0aXRsZTogIkFiYXJyb3RlcyBDbHVzdHJpbmciDQphdXRob3I6ICJSb2JpZSINCmRhdGU6ICIyMDI2LTAyLTI0Ig0Kb3V0cHV0OiANCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IFRSVUUNCiAgICB0b2NfZmxvYXQ6IFRSVUUNCiAgICBjb2RlX2Rvd25sb2FkOiBUUlVFDQogICAgdGhlbWU6IHNwYWNlbGFiDQotLS0NCg0KIVtdKGh0dHBzOi8vd3d3LmRnaWNvbW11bmljYXRpb25zLmNvbS93cC1jb250ZW50L3VwbG9hZHMvMjAyMi8wMS9ncm9jZXJ5LXN0b3JlLWRlc2lnbi0uanBnKQ0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjojMEIzQzVEOyI+VGVvcsOtYTwvc3Bhbj4NCg0KQ2x1c3RlcmluZyBlcyB1bmEgdMOpY25pY2EgZGUgYXByZW5kaXphamUgbm8gc3VwZXJ2aXNhZG8gcXVlIGFncnVwYSBvYnNlcnZhY2lvbmVzIHNlZ8O6biBzdSBzaW1pbGl0dWQuDQoNCkVuIGVzdGUgY2FzbyBsbyB1c2FyZW1vcyBwYXJhIHNlZ21lbnRhciBjbGllbnRlcyBzZWfDum46DQoNCkZyZWN1ZW5jaWEgZGUgY29tcHJhDQoNClRpY2tldCBwcm9tZWRpbw0KDQpFc3RvIG5vcyBheXVkYXLDoSBhIGdlbmVyYXIgZXN0cmF0ZWdpYXMgZGUgbWFya2V0aW5nIHBlcnNvbmFsaXphZGFzLg0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjojMUQ0RTg5OyI+UGFzbyAxLiBMaWJyZXLDrWFzPC9zcGFuPg0KYGBge3J9DQojIGluc3RhbGwucGFja2FnZXMoInRpZHl2ZXJzZSIpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCg0KIyBpbnN0YWxsLnBhY2thZ2VzKCJjbHVzdGVyIikNCmxpYnJhcnkoY2x1c3RlcikNCg0KIyBpbnN0YWxsLnBhY2thZ2VzKCJmYWN0b2V4dHJhIikNCmxpYnJhcnkoZmFjdG9leHRyYSkNCg0KIyBpbnN0YWxsLnBhY2thZ2VzKCJsdWJyaWRhdGUiKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpsaWJyYXJ5KHJlYWR4bCkNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjojMUQ0RTg5OyI+UGFzbyAyLiBDYXJnYXIgZGF0b3M8L3NwYW4+DQpgYGB7cn0NCmRmIDwtIHJlYWRfZXhjZWwoIkM6L1VzZXJzL3JvYmllL0Rvd25sb2Fkcy9zdXBlcm1hcmtldC54bHN4IikNCg0KDQoNCmhlYWQoZGYpDQpkZiRUb3RhbCA8LSBkZiRRdWFudGl0eSAqIGRmJFByaWNlDQpgYGA=