RFM helps to identify customers who are more likely to respond to promotions by segmenting them into various categories. We will use the rfm package. We will read the csv file RFM_data.csv and calculate RFM scores for each customer. The data include 39999 observations of: 1. Customer I.D 2. Recencency Days 3. Number of Orders 4. Total Revenue 5. Most Recent Visit Will not be used Four bins will be used for recency, frequency and monetary. In addition, we will present some graphs to see the recency, frequency and monetary scores for the customers. In the end, we will divide our customers into 11 categories of customers.
In the following steps we used the following reference.
df<-read.csv("RFM_data.csv",header = T)
head(df)
## customer_id recency_days number_of_orders revenue most_recent_visit
## 1 22086 232 9 777 14/05/2020
## 2 2290 115 16 1555 08/09/2020
## 3 26377 43 5 336 19/11/2020
## 4 24650 64 12 1189 29/10/2020
## 5 12883 23 12 1229 09/12/2020
## 6 2119 72 11 929 21/10/2020
nrow(df)
## [1] 39999
df$most_recent_visit<-as.Date(df$most_recent_visit, format="%d/%m/%y")
head(df)
## customer_id recency_days number_of_orders revenue most_recent_visit
## 1 22086 232 9 777 2020-05-14
## 2 2290 115 16 1555 2020-09-08
## 3 26377 43 5 336 2020-11-19
## 4 24650 64 12 1189 2020-10-29
## 5 12883 23 12 1229 2020-12-09
## 6 2119 72 11 929 2020-10-21
analysis_date <- lubridate::as_date("2020-03-21", tz = "UTC")
rfm_score<-rfm_table_customer(data = df,customer_id = customer_id,n_transactions =number_of_orders,recency_days = recency_days,total_revenue = revenue,analysis_date = analysis_date,recency_bins = 4,frequency_bins =4, monetary_bins = 4)
rfm_score[1:10,]
## customer_id recency_days transaction_count amount recency_score
## 1 22086 232 9 777 2
## 2 2290 115 16 1555 3
## 3 26377 43 5 336 4
## 4 24650 64 12 1189 4
## 5 12883 23 12 1229 4
## 6 2119 72 11 929 4
## 7 31283 112 17 1569 3
## 8 33815 142 11 778 3
## 9 15972 43 9 641 4
## 10 27650 131 10 970 3
## frequency_score monetary_score rfm_score
## 1 2 2 222
## 2 4 4 344
## 3 1 1 411
## 4 3 4 434
## 5 3 4 434
## 6 3 3 433
## 7 4 4 344
## 8 3 2 332
## 9 2 1 421
## 10 2 3 323
rfm_heatmap(rfm_score) #combination of recency vs frequency colored by monetary
rfm_bar_chart(rfm_score) #same concept but in bar graph
rfm_order_dist(rfm_score) # Number of customers per orders
##### Continuous graphs
rfm_fm_plot(rfm_score) #Frequency vs monetary
rfm_rm_plot(rfm_score) #Recency ve monetary
rfm_rf_plot(rfm_score) #Recency vs frequency
In the following section we are following this [reference] (https://medium.com/@triimamwicaksono_47213/customer-segmentation-and-strategy-using-rfm-analysis-in-rstudio-be79118c8235) in order to classify our customers into the below segment defenitions:
1. Champions 2. Loyal Customers 3. Potential Loyalist 4. Recent Customers 5. Promising 6. Customers Needing Attention 7. About to Sleep 8. At Risk 9. Can’t Lose Them 10. Hibernating 11.Lost
champions<- c(444)
loyal_customers <- c(334, 342, 343, 344, 433, 434, 443)
potential_loyalist <-c(332,333,341,412,413,414,431,432,441,442,421,422,423,424)
recent_customers <- c(411)
promising <- c(311, 312, 313, 331)
needing_attention <- c(212,213,214,231,232,233,241,314,321,322,323,324)
about_to_sleep <- c(211)
at_risk <- c(112,113,114,131,132,133,142,124,123,122,121,224,223,222,221)
cant_lose <- c(134,143,144,234,242,243,244)
hibernating <- c(141)
lost <- c(111)
rfm_scores<-as.vector(rfm_score$rfm$rfm_score)
rfm_scores[which(rfm_score$rfm$rfm_score %in% champions)]="Champions"
rfm_scores[which(rfm_scores %in% potential_loyalist)] = "Potential Loyalist"
rfm_scores[which(rfm_scores %in% loyal_customers)] = "Loyal Customers"
rfm_scores[which(rfm_scores %in% recent_customers)] = "Recent Customers"
rfm_scores[which(rfm_scores %in% promising)] = "Promising"
rfm_scores[which(rfm_scores %in% needing_attention)] = "Customer Needing Attention"
rfm_scores[which(rfm_scores %in% about_to_sleep)] = "About to Sleep"
rfm_scores[which(rfm_scores %in% at_risk)] = "At Risk"
rfm_scores[which(rfm_scores %in% cant_lose)] = "Can't Lose Them"
rfm_scores[which(rfm_scores %in% hibernating)] = "Hibernating"
rfm_scores[which(rfm_scores %in% lost)] = "Lost"
customer_sement<-data.frame(cus_seg=rfm_scores)
customer_sement%>%count(cus_seg)%>%arrange(desc(n))%>%rename(cus_seg = cus_seg, Count = n)
## # A tibble: 11 x 2
## cus_seg Count
## <fct> <int>
## 1 At Risk 7718
## 2 Loyal Customers 5643
## 3 Potential Loyalist 5440
## 4 Customer Needing Attention 5331
## 5 Can't Lose Them 3886
## 6 Lost 3558
## 7 Promising 2751
## 8 Champions 2140
## 9 About to Sleep 2096
## 10 Recent Customers 1435
## 11 Hibernating 1
ggplot(data = customer_sement) + aes(x = cus_seg, fill = cus_seg)+ geom_bar() + labs(title = "Customer Segmentation", x = "Segment", y = "Total Customer") + coord_flip()+ theme_minimal()
Finally I think it will be best to add the classification into the df data frame we start from:
df<-df%>%mutate(Segment_name=customer_sement$cus_seg)
head(df)
## customer_id recency_days number_of_orders revenue most_recent_visit
## 1 22086 232 9 777 2020-05-14
## 2 2290 115 16 1555 2020-09-08
## 3 26377 43 5 336 2020-11-19
## 4 24650 64 12 1189 2020-10-29
## 5 12883 23 12 1229 2020-12-09
## 6 2119 72 11 929 2020-10-21
## Segment_name
## 1 At Risk
## 2 Loyal Customers
## 3 Recent Customers
## 4 Loyal Customers
## 5 Loyal Customers
## 6 Loyal Customers