Increase revenue by next season or lose the franchise
Previous campaigns have raised revenues slightly , but that amount has been used in ad campaigns
Need to find a way to increase RoI
We need to know the customers of London Jets. Insights we have from the study is
setwd(dir = "C:\\Users\\pahar\\OneDrive\\IIM-K\\Classes\\2022-10-15\\Data")
library(readxl)
lj_data<-read_excel("702A22-XLS-ENG.xls",sheet = "Customer")
library(janitor)
lj_data<-clean_names(lj_data)
str(lj_data)
## tibble [3,000 × 20] (S3: tbl_df/tbl/data.frame)
## $ cust_id : num [1:3000] 1 2 3 4 5 6 7 8 9 10 ...
## $ name_first : chr [1:3000] "C" "A" "D" "R" ...
## $ name_last : chr [1:3000] "Dale" "Gill" "Rodriguez" "Haider" ...
## $ address : chr [1:3000] "92 Concord Cres" "11 Kingspark Cres" "300 Dundas St" NA ...
## $ city : chr [1:3000] "London" "London" "London" "London" ...
## $ postal : chr [1:3000] "N6G1X8" "N6A5B9" "N6J4Y5" "N6J4J1" ...
## $ phone : chr [1:3000] "519471-1712" "519858-4812" "519685-3997" "0" ...
## $ type : chr [1:3000] "Personal" "Personal" "Personal" "Personal" ...
## $ num_games : num [1:3000] 3 1 1 1 3 2 1 7 1 5 ...
## $ avg_seats : num [1:3000] 3 2 2 3 4 2 6 2 6 3 ...
## $ tot_sales : num [1:3000] 630 140 50 75 660 220 330 560 240 825 ...
## $ last_trans_year : num [1:3000] 2001 2000 2001 2001 1998 ...
## $ last_trans_month: num [1:3000] 2 9 10 9 10 4 2 1 11 4 ...
## $ sex : chr [1:3000] "Male" "Male" "Male" "Female" ...
## $ income : num [1:3000] 57819 71938 23891 37231 NA ...
## $ marital_status : chr [1:3000] "Single" "Single" "Married" "Single" ...
## $ age : num [1:3000] 40 44 24 37 NA 37 35 39 30 28 ...
## $ fan_club_member : num [1:3000] 0 0 0 0 NA 0 0 1 0 0 ...
## $ automobile : num [1:3000] 1 1 1 0 NA 0 1 0 1 0 ...
## $ subscription : chr [1:3000] "Free Press" NA NA NA ...
sum(is.na(lj_data))
## [1] 6484
lj_data_customer <- lj_data[,-c(2,3,4,5,6,7,8,10,14,15,16,17,18,19,20)]
lj_data_customer$last_trans_day <- 1
lj_data_customer$last_trans_date<-paste(lj_data_customer$last_trans_day,lj_data_customer$last_trans_month,lj_data_customer$last_trans_year,sep ="-")
library(lubridate)
attach(lj_data_customer)
lj_data_customer$last_trans_date <- dmy(lj_data_customer$last_trans_date)
summary(lj_data$last_trans_date)
## Length Class Mode
## 0 NULL NULL
str(lj_data)
## tibble [3,000 × 20] (S3: tbl_df/tbl/data.frame)
## $ cust_id : num [1:3000] 1 2 3 4 5 6 7 8 9 10 ...
## $ name_first : chr [1:3000] "C" "A" "D" "R" ...
## $ name_last : chr [1:3000] "Dale" "Gill" "Rodriguez" "Haider" ...
## $ address : chr [1:3000] "92 Concord Cres" "11 Kingspark Cres" "300 Dundas St" NA ...
## $ city : chr [1:3000] "London" "London" "London" "London" ...
## $ postal : chr [1:3000] "N6G1X8" "N6A5B9" "N6J4Y5" "N6J4J1" ...
## $ phone : chr [1:3000] "519471-1712" "519858-4812" "519685-3997" "0" ...
## $ type : chr [1:3000] "Personal" "Personal" "Personal" "Personal" ...
## $ num_games : num [1:3000] 3 1 1 1 3 2 1 7 1 5 ...
## $ avg_seats : num [1:3000] 3 2 2 3 4 2 6 2 6 3 ...
## $ tot_sales : num [1:3000] 630 140 50 75 660 220 330 560 240 825 ...
## $ last_trans_year : num [1:3000] 2001 2000 2001 2001 1998 ...
## $ last_trans_month: num [1:3000] 2 9 10 9 10 4 2 1 11 4 ...
## $ sex : chr [1:3000] "Male" "Male" "Male" "Female" ...
## $ income : num [1:3000] 57819 71938 23891 37231 NA ...
## $ marital_status : chr [1:3000] "Single" "Single" "Married" "Single" ...
## $ age : num [1:3000] 40 44 24 37 NA 37 35 39 30 28 ...
## $ fan_club_member : num [1:3000] 0 0 0 0 NA 0 0 1 0 0 ...
## $ automobile : num [1:3000] 1 1 1 0 NA 0 1 0 1 0 ...
## $ subscription : chr [1:3000] "Free Press" NA NA NA ...
lj_data_customer<-lj_data_customer[,-c(4,5,6)]
max(lj_data$last_trans_date)
## [1] -Inf
analysis_date<-as_date("2001-12-02")
lj_data_customer$diff_dates = difftime(analysis_date,lj_data_customer$last_trans_date, units = "days")
library(rfm)
rfm_analysis <- rfm_table_customer(lj_data_customer,customer_id = cust_id, n_transactions = num_games,recency_days =diff_dates,total_revenue=tot_sales, analysis_date)
rfm_analysis
## # A tibble: 3,000 × 8
## customer_id recency_days transaction_count amount recency_score
## <dbl> <drtn> <dbl> <dbl> <int>
## 1 1 304 days 3 630 4
## 2 2 457 days 1 140 3
## 3 3 62 days 1 50 5
## 4 4 92 days 1 75 5
## 5 5 1158 days 3 660 1
## 6 6 610 days 2 220 2
## 7 7 1400 days 1 330 1
## 8 8 1431 days 7 560 1
## 9 9 396 days 1 240 3
## 10 10 1341 days 5 825 1
## # … with 2,990 more rows, and 3 more variables: frequency_score <int>,
## # monetary_score <int>, rfm_score <dbl>
segment_titles <- c("First Grade", "Loyal", "Likely to be Loyal",
"New Ones", "Could be Promising", "Require Assistance", "Getting Less Frequent",
"Almost Out", "Can't Lose Them", "Don't Show Up at All")
r_low <- c(4, 2, 3, 4, 3, 2, 2, 1, 1, 1)# minimum value of recency
r_high <- c(5, 5, 5, 5, 4, 3, 3, 2, 1, 2)#maximum value of recency
f_low <- c(4, 3, 1, 1, 1, 2, 1, 2, 4, 1)
f_high <- c(5, 5, 3, 1, 1, 3, 2, 5, 5, 2)
m_low <- c(4, 3, 1, 1, 1, 2, 1, 2, 4, 1)
m_high <- c(5, 5, 3, 1, 1, 3, 2, 5, 5, 2)
rfm_segments<-rfm_segment(rfm_analysis, segment_titles, r_low, r_high, f_low, f_high, m_low, m_high)
library(plotly)
library(treemap)
library(rfm)
library(dplyr)
rfm_plot_median_recency(rfm_segments,print_plot = TRUE)
rfm_plot_median_frequency(rfm_segments,print_plot = TRUE)
rfm_plot_median_monetary(rfm_segments,print_plot = TRUE)
rfm_histograms(rfm_analysis)
rfm_order_dist(rfm_analysis)
rfm_bar_chart(rfm_analysis)
rfm_segments_overall <- rfm_segments %>% count(segment) %>% arrange(desc(n)) %>% rename(Count = n) %>% mutate(Percentage = (Count/ sum(Count))*100)
rfm_segments_overall
## # A tibble: 8 × 3
## segment Count Percentage
## <chr> <int> <dbl>
## 1 Likely to be Loyal 827 27.6
## 2 Loyal 777 25.9
## 3 Almost Out 417 13.9
## 4 First Grade 333 11.1
## 5 Others 258 8.6
## 6 Getting Less Frequent 166 5.53
## 7 Don't Show Up at All 155 5.17
## 8 Require Assistance 67 2.23
df<-data.frame(rfm_segments_overall)
treemap(df,
index="segment",
vSize = "Count",
type="index",
palette = "RdYlBu",
title="Overall RFM Categorization for London Jets",
fontsize.title = 20
)
Seems like so far London Jets is surviving only on First Grade Customers which are not sufficient for its survival. They are merely 11%
Franchise must not lose the Loyal and Likely to be Loyal customers which form major chunk of its customer base (approximately 54%)
After that it can concentrate on Other and Getting Less Frequent ones to make another 15% which will boost its revenue significantly and city will retain its major sports franchise.
Further, it can launch a campaign to know the reason about customers who do not show up at all and which are already out. But those are secondary steps.