Machine Learning in Marketing
Institute: IIM Lucknow, Guide: Dr. Sameer Mathur
Project: Customer propensity to respond to the brand’s campaign through BTL activity (Direct mails campaigns)
Team Number: 6
Team Name: Hammer and Tongs
Variables: Description
Campaign_Date: Date on which the campaign was run
Customer_ID: Customer’s unique identifier
Offer_Type: Campaign Offer Name
Min_Purchase_Needed: Minimum amount of the bill needed to apply offer
Vintage: Age of customer’s relationship with the brand (in days)
Recency: Days since last visit
Average_Bill_Value: Average of the bill values (in INR)
Transaction_Count: Number of bills made by the customer
Latency: Average duration between two subsequent visits (in days)
Gender: Male/Female/Unknown
Age: Customer’s age in yrs
Number_of_times_previously_purchased: Number of times customer previously purchased
Redemption_Flag: Yes (1) / No(0)
Exploratory Data Analysis
LOADING Data INTO R ENVIRONMENT
library(data.table)
# reading data as data.table
Campaign.dt <- fread("Campaign_Data.csv")
attach(Campaign.dt)
Number of rows & columns in the dataframe
# Display the Data Dimensions
dim(Campaign.dt)
## [1] 275620 13
Column names of the dataframe
# Display the column names
colnames(Campaign.dt)
## [1] "Campaign_Date"
## [2] "Customer_ID"
## [3] "Offer_Type"
## [4] "Min_Purchase_Needed"
## [5] "Vintage"
## [6] "Recency"
## [7] "Average_Bill_Value"
## [8] "Transaction_Count"
## [9] "Latency"
## [10] "Gender"
## [11] "Age"
## [12] "Number_of_times_previously_purchased"
## [13] "Redemption_Flag"
Descriptive Statistics of the dataframe
# loading the package
library(psych)
# summary of a single variable
describe(Campaign.dt)[,c(1:5,8,9)]
## Warning in describe(Campaign.dt): NAs introduced by coercion
## Warning in describe(Campaign.dt): NAs introduced by coercion
## Warning in describe(Campaign.dt): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
## vars n mean sd
## Campaign_Date* 1 275620 NaN NA
## Customer_ID 2 275620 13886359.04 848256.45
## Offer_Type* 3 275620 NaN NA
## Min_Purchase_Needed 4 275620 385.18 66.61
## Vintage 5 275620 1716.90 5132.24
## Recency 6 275620 98.86 122.20
## Average_Bill_Value 7 275620 444.91 341.12
## Transaction_Count 8 275620 7.23 10.94
## Latency 9 275620 136.95 145.46
## Gender* 10 275620 NaN NA
## Age 11 275620 28.93 8.35
## Number_of_times_previously_purchased 12 275620 1.01 2.56
## Redemption_Flag 13 275620 0.26 0.44
## median min max
## Campaign_Date* NA Inf -Inf
## Customer_ID 14005378.00 10654283.00 15065779
## Offer_Type* NA Inf -Inf
## Min_Purchase_Needed 360.00 320.00 1000
## Vintage 1002.00 0.00 42643
## Recency 58.00 0.00 1005
## Average_Bill_Value 369.69 0.00 30430
## Transaction_Count 3.00 1.00 321
## Latency 54.00 0.33 360
## Gender* NA Inf -Inf
## Age 27.00 12.00 90
## Number_of_times_previously_purchased 0.00 0.00 140
## Redemption_Flag 0.00 0.00 1
# structure of the data
str(Campaign.dt)
## Classes 'data.table' and 'data.frame': 275620 obs. of 13 variables:
## $ Campaign_Date : chr "07-09-2016" "07-09-2016" "07-10-2016" "07-09-2016" ...
## $ Customer_ID : int 15004535 13568799 13568799 15022868 15021899 15020294 15047021 15010691 15024896 15020294 ...
## $ Offer_Type : chr "20 pc Chicken" "6 pc Chicken" "1 chocolate shake" "4 pc Chicken" ...
## $ Min_Purchase_Needed : int 1000 360 360 360 360 500 360 500 360 460 ...
## $ Vintage : int 23 1398 1430 6 7 10 16 17 36 42 ...
## $ Recency : int 11 192 229 0 1 3 15 11 35 40 ...
## $ Average_Bill_Value : num 2529 244 244 305 272 ...
## $ Transaction_Count : int 19 4 4 3 3 3 3 3 3 3 ...
## $ Latency : num 0.333 0.333 0.333 0.5 0.5 ...
## $ Gender : chr "Female" "Male" "Male" "Male" ...
## $ Age : int 25 30 30 17 18 23 20 19 15 23 ...
## $ Number_of_times_previously_purchased: int 0 0 0 0 3 0 0 0 0 0 ...
## $ Redemption_Flag : int 0 0 1 0 1 0 0 0 0 0 ...
## - attr(*, ".internal.selfref")=<externalptr>
Converting Data Type Structure
# convert Redemption flag as a factor
Campaign.dt[, Redemption_Flag := as.factor(Redemption_Flag)]
Campaign.dt[, Offer_Type := as.factor(Offer_Type)]
Campaign.dt[, Gender := as.factor(Gender)]
Structure of the data
# structure of the data
str(Campaign.dt)
## Classes 'data.table' and 'data.frame': 275620 obs. of 13 variables:
## $ Campaign_Date : chr "07-09-2016" "07-09-2016" "07-10-2016" "07-09-2016" ...
## $ Customer_ID : int 15004535 13568799 13568799 15022868 15021899 15020294 15047021 15010691 15024896 15020294 ...
## $ Offer_Type : Factor w/ 8 levels "1 chocolate shake",..: 3 5 1 4 4 8 1 8 1 6 ...
## $ Min_Purchase_Needed : int 1000 360 360 360 360 500 360 500 360 460 ...
## $ Vintage : int 23 1398 1430 6 7 10 16 17 36 42 ...
## $ Recency : int 11 192 229 0 1 3 15 11 35 40 ...
## $ Average_Bill_Value : num 2529 244 244 305 272 ...
## $ Transaction_Count : int 19 4 4 3 3 3 3 3 3 3 ...
## $ Latency : num 0.333 0.333 0.333 0.5 0.5 ...
## $ Gender : Factor w/ 3 levels "Female","Male",..: 1 2 2 2 1 2 2 2 2 2 ...
## $ Age : int 25 30 30 17 18 23 20 19 15 23 ...
## $ Number_of_times_previously_purchased: int 0 0 0 0 3 0 0 0 0 0 ...
## $ Redemption_Flag : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 1 1 1 ...
## - attr(*, ".internal.selfref")=<externalptr>
DISCRETE DATA DISTRIBUTION Percentage of the candidates (Converted/ Not Converted)
library(data.table)
tab1 <-table(Redemption_Flag)
#Proportion of candidates (Converted/ Not Converted)
tab2<-prop.table(tab1)
#Percentages of candidates (Converted/ Not Converted)
round(tab2*100,2)
## Redemption_Flag
## 0 1
## 73.95 26.05
tab1 <- round(prop.table(table(Redemption_Flag))*100,2)
# bar-plot
bp <- barplot(tab1,
xlab = "Redemption (Not Converted / Converted)", ylab = "Percent (%)",
main = "Percentage of Converted",
col = c("lightblue","red"),
legend = rownames(tab1),
beside = TRUE,
ylim = c(0, 90))
text(bp, 0, round(tab1, 1),cex=1,pos=3)
Analysis of Redemption Flag by variable Offer Type
# table for counts
t3 <- table(Offer_Type,Redemption_Flag)
# table for proportions
t3 <- prop.table(t3,1)
# making table
round(t3*100,2)
## Redemption_Flag
## Offer_Type 0 1
## 1 chocolate shake 62.84 37.16
## 2 chocolate shake with dessert 82.16 17.84
## 20 pc Chicken 92.57 7.43
## 4 pc Chicken 83.76 16.24
## 6 pc Chicken 87.65 12.35
## 6 pc Chicken with LTO 66.59 33.41
## 6 pc Fish 95.95 4.05
## 9 pc Chicken 87.01 12.99
tab4 <- round(prop.table(table(Offer_Type,Redemption_Flag),1)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Not Convert, Split by Offer Type",
col = c("lightblue"),
xlab = "Offer Type",
ylab = "Percent (%)",
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,2], 1),cex=1,pos=3)
tab4 <- round(prop.table(table(Offer_Type,Redemption_Flag),2)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Convert, Split by Offer Type",
col = c("lightblue"),
xlab = "Offer Type",
ylab = "Percent (%)",
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,1], 1),cex=1,pos=3)
Analysis of Redemption Flag by variable Minimum Purchase Needed
# table for counts
t1 <- table(Min_Purchase_Needed,Redemption_Flag)
# table for proportions
t1 <- prop.table(t1,1)
# making table
round(t1*100,2)
## Redemption_Flag
## Min_Purchase_Needed 0 1
## 320 84.02 15.98
## 360 67.33 32.67
## 460 66.59 33.41
## 500 86.27 13.73
## 1000 92.57 7.43
tab4 <- round(prop.table(table(Min_Purchase_Needed,Redemption_Flag),2)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Not Convert, Split by Minimum Purchase Needed",
col = c("lightblue"),
xlab = "Minimum Purchase Needed",
ylab = "Percent (%)",
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,1], 1),cex=1,pos=3)
tab4 <- round(prop.table(table(Min_Purchase_Needed,Redemption_Flag),1)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Convert, Split by Minimum Purchase Needed",
col = c("lightblue"),
xlab = "Minimum Purchase Needed",
ylab = "Percent (%)",
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,2], 1),cex=1,pos=3)
Analysis for Conversion based on Gender
# table for counts
t2 <- table(Gender,Redemption_Flag)
# table for proportions
t2 <- prop.table(t2,1)
# making table
round(t2*100,2)
## Redemption_Flag
## Gender 0 1
## Female 74.50 25.50
## Male 73.44 26.56
## Unknown 72.62 27.38
tab4 <- round(prop.table(table(Gender,Redemption_Flag),2)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Not Convert, Split by Gender",
col = c("lightblue"),
xlab = "Gender",
ylab = "Percent (%)",
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,1], 1),cex=1,pos=3)
tab4 <- round(prop.table(table(Gender,Redemption_Flag),2)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Convert, Split by Gender",
col = c("lightblue"),
xlab = "Gender",
ylab = "Percent (%)",
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,2], 1),cex=1,pos=3)
Average Vintage Value of the Candidates, Split by Vintage Value (Converted/ Not Converted)
op1 <- Campaign.dt[, .(AverageVintageValue = round(mean(Vintage),2)),
by = (Redemption_Flag)][order(Redemption_Flag)]
op1
## Redemption_Flag AverageVintageValue
## 1: 0 1607.76
## 2: 1 2026.63
Mean Plot for Vintage Value of the candidates, Split by Redemption Flag (Converted / Not Converted)
#loading the package
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(Vintage ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, main="Mean Plot for the Vintage Value, Split by Redemption Flag")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
BoxPlot for Vintage, Split by Redemption Flag (Converted / Not Converted)
boxplot(Vintage ~ Redemption_Flag,
main = "Boxplot for Vintage split by Redemption Flag", ylab="Vintage in days",
col=(c("lightblue","red")) ,xlim=c(0.5,3.5),ylim=c(0,5000))
Average Bill Value of the Candidates, Split by Bill Value (Converted/ Not Converted)
op2 <- Campaign.dt[, .(AverageBillValue = round(mean(Average_Bill_Value),2)),
by = (Redemption_Flag)][order(Redemption_Flag)]
op2
## Redemption_Flag AverageBillValue
## 1: 0 460.20
## 2: 1 401.54
Mean Plot for Average Bill Value of the candidates, Split by Redemption Flag (Converted / Not Converted)
#loading the package
library(gplots)
plotmeans(Average_Bill_Value ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, main="Mean Plot for the Average Bill Value, Split by Redemption Flag")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
BoxPlot for Average Bill Value, Split by Redemption Flag (Converted / Not Converted)
boxplot(Average_Bill_Value ~ Redemption_Flag,
main = "Boxplot for Average Bill Value split by Redemption Flag", ylab="Average Bill Value in days",
col=(c("lightblue","red")) ,xlim=c(0.5,3.5),ylim=c(0,1500))
CONTINUOUS DATA DISTRIBUTION #Average Age of the customers split by redemption status
op1 <- Campaign.dt[, .(Age = round(mean(Age),2)),
by = (Redemption_Flag)][order(Redemption_Flag)]
op1
## Redemption_Flag Age
## 1: 0 29.06
## 2: 1 28.56
Mean Plot for the Age, Split by redemption status
library(gplots)
plotmeans(Age ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", main="Mean Plot for the Age, Split by redemption status")
#BoxPlot for Age, Split by Redemption status
attach(Campaign.dt)
## The following objects are masked from Campaign.dt (pos = 5):
##
## Age, Average_Bill_Value, Campaign_Date, Customer_ID, Gender,
## Latency, Min_Purchase_Needed,
## Number_of_times_previously_purchased, Offer_Type, Recency,
## Redemption_Flag, Transaction_Count, Vintage
boxplot(Age ~ Redemption_Flag, main ="Boxplot",ylab="age",col=(c("lightblue","red")))
Average Latency of the customers by redemption
avgLat <- Campaign.dt[, .(Latency = round(mean(Latency),2)),
by = (Redemption_Flag)][order(Redemption_Flag)]
avgLat
## Redemption_Flag Latency
## 1: 0 164.29
## 2: 1 59.34
Mean Plot for Latencty, Split by Redemption
library(gplots)
plotmeans(Latency ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, main="Mean Plot for Latency, Split by Redemption")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
Boxplot for Latency, Split by redemption
attach(Campaign.dt)
## The following objects are masked from Campaign.dt (pos = 3):
##
## Age, Average_Bill_Value, Campaign_Date, Customer_ID, Gender,
## Latency, Min_Purchase_Needed,
## Number_of_times_previously_purchased, Offer_Type, Recency,
## Redemption_Flag, Transaction_Count, Vintage
## The following objects are masked from Campaign.dt (pos = 6):
##
## Age, Average_Bill_Value, Campaign_Date, Customer_ID, Gender,
## Latency, Min_Purchase_Needed,
## Number_of_times_previously_purchased, Offer_Type, Recency,
## Redemption_Flag, Transaction_Count, Vintage
boxplot(Latency ~ Redemption_Flag,
main = "Boxplot for Latency grouped by Redemption",
col=(c("lightblue","red")))
Average Recency of the customers by redemption
avgRec <- Campaign.dt[, .(Recency = round(mean(Recency),2)),
by = (Redemption_Flag)][order(Redemption_Flag)]
avgRec
## Redemption_Flag Recency
## 1: 0 118.94
## 2: 1 41.85
Mean Plot for the above
library(gplots)
plotmeans(Recency ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, ylab = "Recency : Days since last visit")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
Boxplot of Recency of the customers with redemption
boxplot(Recency ~ Redemption_Flag,
main = "Boxplot recency of visits by customers)", ylab="Recency",
col=(c("lightblue","red")),xlim = c(0.5, 3.5), ylim = c(0, 400))
Average of Transaction count (number of bills made by the customer)
TransCount <- Campaign.dt[, .(Transaction_Count = round(mean(Transaction_Count),2)),
by = (Redemption_Flag)][order(Redemption_Flag)]
TransCount
## Redemption_Flag Transaction_Count
## 1: 0 4.63
## 2: 1 14.61
Mean Plot for transaction count split by redemption status
library(gplots)
plotmeans(Transaction_Count ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, ylab = "Transaction Count")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
Average (Age, Recency and Latency) of customers split by redemption by Gender (Male / Female)
tab1 <- Campaign.dt[, .(
Age = round(mean(Age),2),
Latency = round(mean(Latency),2),
Average_Bill_Value = round(mean(Average_Bill_Value),2),
Recency = round(mean(Recency),2)),
by = .(Redemption_Flag,Gender)][order(Redemption_Flag)]
tab1
## Redemption_Flag Gender Age Latency Average_Bill_Value Recency
## 1: 0 Female 29.32 168.11 470.33 118.12
## 2: 0 Male 28.75 162.19 450.74 120.26
## 3: 0 Unknown 29.08 140.05 436.25 115.20
## 4: 1 Male 27.99 56.05 387.53 40.23
## 5: 1 Female 29.01 62.68 415.00 43.41
## 6: 1 Unknown 29.36 55.63 393.68 40.81
Correlation Correlation variable for all continous variables
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
##
## describe
## The following objects are masked from 'package:base':
##
## format.pval, units
mydata <- Campaign.dt[, c(5,6,7,8,9,11,12)]
#head(mydata, 6)
res <- rcorr(as.matrix(mydata))
res
## Vintage Recency Average_Bill_Value
## Vintage 1.00 -0.01 0.00
## Recency -0.01 1.00 0.20
## Average_Bill_Value 0.00 0.20 1.00
## Transaction_Count 0.04 -0.29 -0.10
## Latency -0.06 0.40 0.18
## Age 0.07 0.07 0.10
## Number_of_times_previously_purchased 0.03 -0.15 -0.03
## Transaction_Count Latency Age
## Vintage 0.04 -0.06 0.07
## Recency -0.29 0.40 0.07
## Average_Bill_Value -0.10 0.18 0.10
## Transaction_Count 1.00 -0.44 -0.03
## Latency -0.44 1.00 0.06
## Age -0.03 0.06 1.00
## Number_of_times_previously_purchased 0.52 -0.26 -0.02
## Number_of_times_previously_purchased
## Vintage 0.03
## Recency -0.15
## Average_Bill_Value -0.03
## Transaction_Count 0.52
## Latency -0.26
## Age -0.02
## Number_of_times_previously_purchased 1.00
##
## n= 275620
##
##
## P
## Vintage Recency Average_Bill_Value
## Vintage 0.0000 0.0326
## Recency 0.0000 0.0000
## Average_Bill_Value 0.0326 0.0000
## Transaction_Count 0.0000 0.0000 0.0000
## Latency 0.0000 0.0000 0.0000
## Age 0.0000 0.0000 0.0000
## Number_of_times_previously_purchased 0.0000 0.0000 0.0000
## Transaction_Count Latency Age
## Vintage 0.0000 0.0000 0.0000
## Recency 0.0000 0.0000 0.0000
## Average_Bill_Value 0.0000 0.0000 0.0000
## Transaction_Count 0.0000 0.0000
## Latency 0.0000 0.0000
## Age 0.0000 0.0000
## Number_of_times_previously_purchased 0.0000 0.0000 0.0000
## Number_of_times_previously_purchased
## Vintage 0.0000
## Recency 0.0000
## Average_Bill_Value 0.0000
## Transaction_Count 0.0000
## Latency 0.0000
## Age 0.0000
## Number_of_times_previously_purchased
Plotting correlation matrix
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 3.6.1
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.6.1
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
##
## Attaching package: 'xts'
## The following objects are masked from 'package:data.table':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:gplots':
##
## textplot
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(mydata)
SCATTER PLOTS Scatter Plot of Vintage and Transaction Count by Redemption (Converted / Not Converted)
plot(Vintage,Transaction_Count,col=c("red", "blue")[Redemption_Flag], main = "Scatter Plot of Vintage and Transaction Count by Redemption",ylim= c(0,150),xlim = c(0,3500))
legend(x="topright", legend = levels(Redemption_Flag), col=c("red","blue"), pch=1)
Scatter Plot of Vintage and Latency by Redemption (Converted / Not Converted)
plot(Vintage,Latency,col=c("red", "blue")[Redemption_Flag], main = "Scatter Plot of Vintage and Latency by Redemption",xlim = c(0,3500))
legend(x="topright", legend = levels(Redemption_Flag), col=c("red","blue"), pch=1)
Scatter Plot of Vintage and Recency by Redemption (Converted / Not Converted)
plot(Vintage,Recency, col=c("red", "blue")[Redemption_Flag], main = "Scatter Plot of Vintage and Recency by Redemption",xlim = c(0,3500))
legend(x="topright", legend = levels(Redemption_Flag), col=c("red","blue"), pch=1)