The basic premise of RFM is that customers who have purchased more recently, more frequently and have spent more with your company are your best prospects for future direct marketing campaigns
explore the RFM segments in relation to other customer attributes It is based on three simple customer attributes:R ecency of purchase,F requency of purchase, andM onetary value of purchase.
library(readxl)
library(rfm)
## Warning: package 'rfm' was built under R version 4.2.1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(zoo)
## Warning: package 'zoo' was built under R version 4.2.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
data <- read_excel("702A22-XLS-ENG.xls",sheet = 2)
data1 <- data %>% select(CustID,Num_Games,Avg_Seats,Tot_Sales,LastTransYear,LastTransMonth) %>% mutate(Day = 01)
data1$LastTransdate <- as.Date(paste(data1$LastTransYear, data1$LastTransMonth,data1$Day), "%Y %m %d")
str(data1)
## tibble [3,000 × 8] (S3: tbl_df/tbl/data.frame)
## $ CustID : num [1:3000] 1 2 3 4 5 6 7 8 9 10 ...
## $ Num_Games : num [1:3000] 3 1 1 1 3 2 1 7 1 5 ...
## $ Avg_Seats : num [1:3000] 3 2 2 3 4 2 6 2 6 3 ...
## $ Tot_Sales : num [1:3000] 630 140 50 75 660 220 330 560 240 825 ...
## $ LastTransYear : num [1:3000] 2001 2000 2001 2001 1998 ...
## $ LastTransMonth: num [1:3000] 2 9 10 9 10 4 2 1 11 4 ...
## $ Day : num [1:3000] 1 1 1 1 1 1 1 1 1 1 ...
## $ LastTransdate : Date[1:3000], format: "2001-02-01" "2000-09-01" ...
colSums(is.na(data1))
## CustID Num_Games Avg_Seats Tot_Sales LastTransYear
## 0 0 0 0 0
## LastTransMonth Day LastTransdate
## 0 0 0
summary(data1)
## CustID Num_Games Avg_Seats Tot_Sales
## Min. : 1.0 Min. : 1.000 Min. :1.000 Min. : 25.0
## 1st Qu.: 750.8 1st Qu.: 1.000 1st Qu.:2.000 1st Qu.: 193.8
## Median :1500.5 Median : 3.000 Median :3.000 Median : 350.0
## Mean :1500.5 Mean : 3.301 Mean :3.418 Mean : 476.5
## 3rd Qu.:2250.2 3rd Qu.: 5.000 3rd Qu.:5.000 3rd Qu.: 660.0
## Max. :3000.0 Max. :11.000 Max. :9.000 Max. :4410.0
## LastTransYear LastTransMonth Day LastTransdate
## Min. :1998 Min. : 1.000 Min. :1 Min. :1998-01-01
## 1st Qu.:1999 1st Qu.: 3.000 1st Qu.:1 1st Qu.:1999-09-01
## Median :2000 Median : 6.000 Median :1 Median :2000-09-01
## Mean :2000 Mean : 6.431 Mean :1 Mean :2000-05-29
## 3rd Qu.:2001 3rd Qu.: 9.000 3rd Qu.:1 3rd Qu.:2001-05-01
## Max. :2001 Max. :12.000 Max. :1 Max. :2001-12-01
analysis_date <- as_date("2001-12-10")
data1 <- data1 %>% mutate(recency_days=analysis_date-LastTransdate)
rfm_result <- rfm_table_customer(data1, customer_id = CustID, n_transactions = Num_Games, recency_days = recency_days,total_revenue = Tot_Sales,analysis_date = analysis_date)
rfm_result1 <- as.data.frame(rfm_result$rfm)
write.csv(rfm_result,"LondonJetsRFM.csv")
rfm_bar_chart(rfm_result)
rfm_histograms(rfm_result)
## Warning: attributes are not identical across measure variables;
## they will be dropped
rfm_order_dist(rfm_result)
rfm_rm_plot(rfm_result) # recency and monetary value
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
rfm_fm_plot(rfm_result) # frequency and monetary value
rfm_rf_plot(rfm_result)# recency and frequency
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
Best Customer : Bought Most recently and most often and spend most loyal Customer : Buys most frequently Likely to be Loyal : Recency is moderate low frequency and spent less Big spenders : Spent Most New Customer Almost lost : Havent purchased for some time but purchased frequently and spent most Lost Customers : Havent purchased for some time but purchased frequently and spent moderately Lost Cheap Customers : last purchased long ago purchased few and spent less
segment_titles <- c("Best Customer","loyal Customer","Likely to be Loyal","Big spenders","New Customer" ,"Almost lost","Lost Customer","Lost Cheap Customers")
rlow <- c(4,1,3,1,2,1,1,1)
rhigh <- c(5,5,5,5,4,3,2,2)
flow <- c(4,4,1,1,1,3,2,1)
fhigh <- c(5,5,3,5,3,5,4,2)
mlow <- c(4,1,1,4,1,3,1,1)
mhigh <- c(5,5,3,5,3,5,3,2)
divisions <- rfm_segment(rfm_result,segment_titles,rlow,rhigh,flow,fhigh,mlow,mhigh)
CustSegment <- divisions %>% count(segment) %>% arrange(desc(n)) %>% rename(SEGMENT = segment,FREQUENCY = n) %>%
mutate(PERCENTAGE = FREQUENCY/sum(FREQUENCY)*100)
View(CustSegment)
barplot(CustSegment$PERCENTAGE~CustSegment$SEGMENT,main = "Bar Plot Of CustSegment",xlab = "SEGMENT",ylab = "%Customer",axisnames = TRUE,border = "dark blue")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
data$Type <- as.factor(data$Type)
count(data$Type)
## x freq
## 1 Business 528
## 2 Personal 2472
count(data$Sex)
## x freq
## 1 Business 528
## 2 Female 842
## 3 Male 1630
count(data$LastTransYear)
## x freq
## 1 1998 455
## 2 1999 429
## 3 2000 905
## 4 2001 1211