Predictive Modelling & Segmentation Case Study
library(tidyverse) library(cluster) library(factoextra) library(tidymodels)
set.seed(42)
Create synthetic data
n <- 300 customer_data <- tibble( customer_id = 1:n, recency = c(rnorm(n0.7, 20, 10), rnorm(n0.3, 100, 20)), frequency = c(rnorm(n0.7, 50, 15), rnorm(n0.3, 5, 5)), monetary = c(rnorm(n0.7, 500, 100), rnorm(n0.3, 50, 20)) ) %>% mutate(across(everything(), ~abs(.)))
Scale data
scaled_data <- customer_data %>% select(recency, frequency, monetary) %>% scale()
—————————–
Elbow Method Plot
—————————–
print( fviz_nbclust(scaled_data, kmeans, method = “wss”) + labs(title = “Elbow Method for Optimal K”) )
—————————–
K-Means Clustering
—————————–
km_res <- kmeans(scaled_data, centers = 3, nstart = 25) customer_data\(segment <- as.factor(km_res\)cluster)
—————————–
Cluster Visualization
—————————–
print( fviz_cluster( km_res, data = scaled_data, geom = “point”, ellipse.type = “convex”, main = “Customer Segments” ) )
—————————–
Propensity Score
—————————–
customer_data <- customer_data %>% mutate(propensity_score = (frequency * 0.6) - (recency * 0.4))
—————————–
Final Combined Plot
—————————–
print( ggplot(customer_data, aes(x = propensity_score, y = monetary, color = segment)) + geom_point(size = 3, alpha = 0.7) + theme_minimal() + labs( title = “Unsupervised Segments within Supervised Propensity”, subtitle = “Clusters overlap on score but remain behaviorally distinct”, x = “Supervised Propensity Score”, y = “Monetary Value” ) )