Predictive Modelling & Segmentation Case Study
Keep only libraries and data prep here. NO PLOTTING CODE.
library(tidyverse) library(cluster) library(factoextra) library(tidymodels)
set.seed(42)
Create synthetic data
n <- 300 customer_data <- tibble( customer_id = 1:n, recency = c(rnorm(n0.7, 20, 10), rnorm(n0.3, 100, 20)), frequency = c(rnorm(n0.7, 50, 15), rnorm(n0.3, 5, 5)), monetary = c(rnorm(n0.7, 500, 100), rnorm(n0.3, 50, 20)) ) %>% mutate(across(everything(), ~abs(.)))
Scale data
scaled_data <- customer_data %>% select(recency, frequency, monetary) %>% scale() # This chunk WILL show because it is not labeled ‘setup’ fviz_nbclust(scaled_data, kmeans, method = “wss”) + labs(title = “Elbow Method for Optimal K”) # Perform clustering km_res <- kmeans(scaled_data, centers = 3, nstart = 25) customer_data\(segment <- as.factor(km_res\)cluster)
Visualize Clusters
fviz_cluster(km_res, data = scaled_data, geom = “point”, ellipse.type = “convex”, main = “Customer Segments”) #| warning: false #| message: false
Calculate Score
customer_data <- customer_data %>% mutate(propensity_score = (frequency * 0.6) - (recency * 0.4))
Plot
ggplot(customer_data, aes(x = propensity_score, y = monetary, color = segment)) + geom_point(size = 3, alpha = 0.7) + theme_minimal() + labs(title = “Unsupervised Segments within Supervised Propensity”, subtitle = “Notice how clusters overlap on the score but are distinct in behavior”, x = “Supervised Propensity Score”, y = “Monetary Value”)