Read data
retail <- read.csv("Online Retail1.csv", header = T, stringsAsFactors = T, sep = ",")
Loading necessary libraries
library(dplyr)
library(rfm)
library(DataExplorer)
Pre-processing the data
retail1 <- retail %>% select(Description, Quantity, InvoiceDate, UnitPrice, CustomerID, Country) %>%
filter(Quantity>=0 & UnitPrice >= 0) %>% mutate(Revenue = Quantity * UnitPrice)
retail1$CustomerID <- as.factor(retail1$CustomerID)
Removing NA values
retail2 <-retail1[-which(is.na(retail1$CustomerID)),]
Fixing ‘Analysys Date’
Analysis_date <- as.Date("2011-12-11")
Creating RFM values
rfm.values <- rfm_table_order(data = retail2, customer_id = CustomerID, order_date = InvoiceDate,
revenue = Revenue, analysis_date = Analysis_date,
recency_bins = 10,
frequency_bins = 10,
monetary_bins = 10)
write.csv(rfm.values$rfm, "online retail rfmvalues.csv")
Creating Customer categories
rfm_categories <- c("First Grade", "Loyal", "Likely to be Loyal",
"New Ones", "Could be Promising", "Require Assistance", "Getting Less Frequent",
"Almost Out", "Can't Lose Them", "Don't Show Up at All")
Assigning values to RFM categories
recency_lower <- c(8, 7, 5, 6, 3, 1, 1, 1, 2, 1)
recency_higher <- c(10, 10, 9, 10, 7, 6, 4, 4, 6, 3)
frequncy_lower <- c(8, 7, 5, 1, 4, 2, 1, 1, 3, 1)
frequency_higher <- c(10, 10, 9, 6, 8, 6, 6, 5, 7, 3)
monetary_lower <- c(8, 7, 5, 1, 4, 3, 2, 1, 6, 1)
monetary_higher <- c(10, 10, 9, 8, 8, 6, 7, 6, 9, 3)
Segmenting customers
rfm.segments <- rfm_segment(rfm.values, rfm_categories, recency_lower, recency_higher,
frequncy_lower, frequency_higher, monetary_lower, monetary_higher)
write.csv(rfm.segments, "online retail rfm categories.csv")
Final segments by adding percentage of each segments
rfm.segment_final <- rfm.segments %>% count(segment) %>% arrange(desc(n)) %>% rename(Count = n) %>%
mutate(Percentage = (Count/ sum(Count))*100)
Adding necessary plots
rfm_plot_median_recency(rfm.segments)

rfm_plot_median_frequency(rfm.segments)

rfm_plot_median_monetary(rfm.segments)

rfm_histograms(rfm.values)

rfm_order_dist(rfm.values)

rfm_bar_chart(rfm.values)
