1. Librerías
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(BTYD)
## Loading required package: hypergeo
## Loading required package: optimx
library(ggplot2)
2. Datos
set.seed(123)
transactions <- data.frame(
cliente_id = sample(1:500, 3000, replace = TRUE),
fecha = sample(seq(as.Date("2022-01-01"), as.Date("2025-01-01"), by="day"), 3000, replace = TRUE),
monto = runif(3000, 10, 500)
)
transactions <- transactions %>% arrange(cliente_id, fecha)
3. RFM
end_date <- as.Date("2025-01-01")
rfm <- transactions %>%
group_by(cliente_id) %>%
summarise(
frequency = n() - 1,
recency = as.numeric(max(fecha) - min(fecha)),
T = as.numeric(end_date - min(fecha)),
monetary = mean(monto)
)
4. Preparación BTYD (manteniendo cliente_id)
rfm_btyd <- rfm %>%
select(cliente_id, frequency, recency, T) %>%
rename(
x = frequency,
t.x = recency,
T.cal = T
) %>%
filter(
x > 0,
t.x > 0,
T.cal > 0,
t.x < T.cal
) %>%
mutate(
t.x = t.x / 365,
T.cal = T.cal / 365
) %>%
drop_na() %>%
as.data.frame()
5. Modelo BG/NBD
bg_model <- bgnbd.EstimateParameters(
rfm_btyd[, c("x","t.x","T.cal")]
)
6. Predicción de transacciones
rfm_btyd$pred_transacciones <- bgnbd.ConditionalExpectedTransactions(
bg_model,
t = 12,
rfm_btyd$x,
rfm_btyd$t.x,
rfm_btyd$T.cal
)
7. Modelo monetario (regresión)
rfm_gg <- rfm %>%
filter(frequency > 0, monetary > 0)
model_monetary <- lm(monetary ~ frequency, data = rfm_gg)
rfm_gg$pred_monetary <- predict(model_monetary, rfm_gg)
8. Integración modelos (JOIN)
rfm_final <- rfm_gg %>%
left_join(
rfm_btyd %>% select(cliente_id, pred_transacciones),
by = "cliente_id"
)
9. CLV
rfm_final <- rfm_final %>%
mutate(
CLV = pred_transacciones * pred_monetary
)
10. Distribución CLV
ggplot(rfm_final, aes(CLV)) +
geom_histogram(bins = 40, fill = "steelblue", color = "black") +
theme_minimal()
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

11. Segmentación
q90 <- quantile(rfm_final$CLV, 0.90, na.rm = TRUE)
q70 <- quantile(rfm_final$CLV, 0.70, na.rm = TRUE)
q40 <- quantile(rfm_final$CLV, 0.40, na.rm = TRUE)
rfm_final <- rfm_final %>%
mutate(
segmento = case_when(
CLV > q90 ~ "Top 10%",
CLV > q70 ~ "High",
CLV > q40 ~ "Medium",
TRUE ~ "Low"
)
)
12. Top clientes
rfm_final %>%
arrange(desc(CLV)) %>%
slice_head(n = 10)
## # A tibble: 10 × 9
## cliente_id frequency recency T monetary pred_monetary pred_transacciones
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 115 12 1043 1069 204. 248. 24.3
## 2 318 11 860 865 291. 249. 22.3
## 3 271 11 1027 1072 314. 249. 22.3
## 4 229 11 946 1037 192. 249. 22.3
## 5 471 11 1045 1094 239. 249. 22.3
## 6 424 11 988 1084 249. 249. 22.3
## 7 77 10 764 820 304. 250. 20.2
## 8 477 10 813 889 226. 250. 20.2
## 9 374 10 808 917 242. 250. 20.2
## 10 251 10 995 1018 201. 250. 20.2
## # ℹ 2 more variables: CLV <dbl>, segmento <chr>
13. Boxplot
ggplot(rfm_final, aes(x = segmento, y = CLV, fill = segmento)) +
geom_boxplot() +
scale_y_log10() +
theme_minimal()
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

14. Insights
top_value <- round(sum(rfm_final$CLV[rfm_final$segmento == "Top 10%"]) / sum(rfm_final$CLV) * 100, 2)
cat(paste0(
"El segmento Top 10% concentra aproximadamente el ", top_value, "% del CLV total. ",
"Se recomienda enfocar estrategias de retención y personalización."
))
## El segmento Top 10% concentra aproximadamente el NA% del CLV total. Se recomienda enfocar estrategias de retención y personalización.