Machine Learning in Marketing

Institute: IIM Lucknow, Guide: Dr. Sameer Mathur

Project: Customer propensity to respond to the brand’s campaign through BTL activity (Direct mails campaigns)

Team Number: 6

Team Name: Hammer and Tongs

Variables: Description

Campaign_Date: Date on which the campaign was run

Customer_ID: Customer’s unique identifier

Offer_Type: Campaign Offer Name

Min_Purchase_Needed: Minimum amount of the bill needed to apply offer

Vintage: Age of customer’s relationship with the brand (in days)

Recency: Days since last visit

Average_Bill_Value: Average of the bill values (in INR)

Transaction_Count: Number of bills made by the customer

Latency: Average duration between two subsequent visits (in days)

Gender: Male/Female/Unknown

Age: Customer’s age in yrs

Number_of_times_previously_purchased: Number of times customer previously purchased

Redemption_Flag: Yes (1) / No(0)

Exploratory Data Analysis

LOADING Data INTO R ENVIRONMENT

library(data.table)
# reading data as data.table
Campaign.dt <- fread("Campaign_Data.csv")
attach(Campaign.dt)

Number of rows & columns in the dataframe

# Display the Data Dimensions
dim(Campaign.dt)
## [1] 275620     13

Column names of the dataframe

# Display the column names
colnames(Campaign.dt)
##  [1] "Campaign_Date"                       
##  [2] "Customer_ID"                         
##  [3] "Offer_Type"                          
##  [4] "Min_Purchase_Needed"                 
##  [5] "Vintage"                             
##  [6] "Recency"                             
##  [7] "Average_Bill_Value"                  
##  [8] "Transaction_Count"                   
##  [9] "Latency"                             
## [10] "Gender"                              
## [11] "Age"                                 
## [12] "Number_of_times_previously_purchased"
## [13] "Redemption_Flag"

Descriptive Statistics of the dataframe

# loading the package
library(psych)
# summary of a single variable
describe(Campaign.dt)[,c(1:5,8,9)]
## Warning in describe(Campaign.dt): NAs introduced by coercion

## Warning in describe(Campaign.dt): NAs introduced by coercion

## Warning in describe(Campaign.dt): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
##                                      vars      n        mean        sd
## Campaign_Date*                          1 275620         NaN        NA
## Customer_ID                             2 275620 13886359.04 848256.45
## Offer_Type*                             3 275620         NaN        NA
## Min_Purchase_Needed                     4 275620      385.18     66.61
## Vintage                                 5 275620     1716.90   5132.24
## Recency                                 6 275620       98.86    122.20
## Average_Bill_Value                      7 275620      444.91    341.12
## Transaction_Count                       8 275620        7.23     10.94
## Latency                                 9 275620      136.95    145.46
## Gender*                                10 275620         NaN        NA
## Age                                    11 275620       28.93      8.35
## Number_of_times_previously_purchased   12 275620        1.01      2.56
## Redemption_Flag                        13 275620        0.26      0.44
##                                           median         min      max
## Campaign_Date*                                NA         Inf     -Inf
## Customer_ID                          14005378.00 10654283.00 15065779
## Offer_Type*                                   NA         Inf     -Inf
## Min_Purchase_Needed                       360.00      320.00     1000
## Vintage                                  1002.00        0.00    42643
## Recency                                    58.00        0.00     1005
## Average_Bill_Value                        369.69        0.00    30430
## Transaction_Count                           3.00        1.00      321
## Latency                                    54.00        0.33      360
## Gender*                                       NA         Inf     -Inf
## Age                                        27.00       12.00       90
## Number_of_times_previously_purchased        0.00        0.00      140
## Redemption_Flag                             0.00        0.00        1
# structure of the data
str(Campaign.dt)
## Classes 'data.table' and 'data.frame':   275620 obs. of  13 variables:
##  $ Campaign_Date                       : chr  "07-09-2016" "07-09-2016" "07-10-2016" "07-09-2016" ...
##  $ Customer_ID                         : int  15004535 13568799 13568799 15022868 15021899 15020294 15047021 15010691 15024896 15020294 ...
##  $ Offer_Type                          : chr  "20 pc Chicken" "6 pc Chicken" "1 chocolate shake" "4 pc Chicken" ...
##  $ Min_Purchase_Needed                 : int  1000 360 360 360 360 500 360 500 360 460 ...
##  $ Vintage                             : int  23 1398 1430 6 7 10 16 17 36 42 ...
##  $ Recency                             : int  11 192 229 0 1 3 15 11 35 40 ...
##  $ Average_Bill_Value                  : num  2529 244 244 305 272 ...
##  $ Transaction_Count                   : int  19 4 4 3 3 3 3 3 3 3 ...
##  $ Latency                             : num  0.333 0.333 0.333 0.5 0.5 ...
##  $ Gender                              : chr  "Female" "Male" "Male" "Male" ...
##  $ Age                                 : int  25 30 30 17 18 23 20 19 15 23 ...
##  $ Number_of_times_previously_purchased: int  0 0 0 0 3 0 0 0 0 0 ...
##  $ Redemption_Flag                     : int  0 0 1 0 1 0 0 0 0 0 ...
##  - attr(*, ".internal.selfref")=<externalptr>

Converting Data Type Structure

# convert Redemption flag as a factor
Campaign.dt[, Redemption_Flag := as.factor(Redemption_Flag)]
Campaign.dt[, Offer_Type := as.factor(Offer_Type)]
Campaign.dt[, Gender := as.factor(Gender)]

Structure of the data

# structure of the data
str(Campaign.dt)
## Classes 'data.table' and 'data.frame':   275620 obs. of  13 variables:
##  $ Campaign_Date                       : chr  "07-09-2016" "07-09-2016" "07-10-2016" "07-09-2016" ...
##  $ Customer_ID                         : int  15004535 13568799 13568799 15022868 15021899 15020294 15047021 15010691 15024896 15020294 ...
##  $ Offer_Type                          : Factor w/ 8 levels "1 chocolate shake",..: 3 5 1 4 4 8 1 8 1 6 ...
##  $ Min_Purchase_Needed                 : int  1000 360 360 360 360 500 360 500 360 460 ...
##  $ Vintage                             : int  23 1398 1430 6 7 10 16 17 36 42 ...
##  $ Recency                             : int  11 192 229 0 1 3 15 11 35 40 ...
##  $ Average_Bill_Value                  : num  2529 244 244 305 272 ...
##  $ Transaction_Count                   : int  19 4 4 3 3 3 3 3 3 3 ...
##  $ Latency                             : num  0.333 0.333 0.333 0.5 0.5 ...
##  $ Gender                              : Factor w/ 3 levels "Female","Male",..: 1 2 2 2 1 2 2 2 2 2 ...
##  $ Age                                 : int  25 30 30 17 18 23 20 19 15 23 ...
##  $ Number_of_times_previously_purchased: int  0 0 0 0 3 0 0 0 0 0 ...
##  $ Redemption_Flag                     : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 1 1 1 ...
##  - attr(*, ".internal.selfref")=<externalptr>

DISCRETE DATA DISTRIBUTION Percentage of the candidates (Converted/ Not Converted)

library(data.table)

tab1 <-table(Redemption_Flag)
#Proportion of candidates (Converted/ Not Converted)
tab2<-prop.table(tab1)

#Percentages of candidates (Converted/ Not Converted)
round(tab2*100,2)
## Redemption_Flag
##     0     1 
## 73.95 26.05
tab1 <- round(prop.table(table(Redemption_Flag))*100,2)
# bar-plot
bp <- barplot(tab1,
        xlab = "Redemption (Not Converted / Converted)", ylab = "Percent (%)",
        main = "Percentage of Converted",
        col = c("lightblue","red"), 
        legend = rownames(tab1), 
        beside = TRUE,
        ylim = c(0, 90))
text(bp, 0, round(tab1, 1),cex=1,pos=3) 

Analysis of Redemption Flag by variable Offer Type

# table for counts
t3 <- table(Offer_Type,Redemption_Flag)

# table for proportions
t3 <- prop.table(t3,1)

# making table
round(t3*100,2)
##                                 Redemption_Flag
## Offer_Type                           0     1
##   1 chocolate shake              62.84 37.16
##   2 chocolate shake with dessert 82.16 17.84
##   20 pc Chicken                  92.57  7.43
##   4 pc Chicken                   83.76 16.24
##   6 pc Chicken                   87.65 12.35
##   6 pc Chicken with LTO          66.59 33.41
##   6 pc Fish                      95.95  4.05
##   9 pc Chicken                   87.01 12.99
tab4 <- round(prop.table(table(Offer_Type,Redemption_Flag),1)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Not Convert, Split by Offer Type", 
col = c("lightblue"),
xlab = "Offer Type",
ylab = "Percent (%)", 
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,2], 1),cex=1,pos=3) 

tab4 <- round(prop.table(table(Offer_Type,Redemption_Flag),2)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Convert, Split by Offer Type", 
col = c("lightblue"),
xlab = "Offer Type",
ylab = "Percent (%)", 
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,1], 1),cex=1,pos=3) 

Analysis of Redemption Flag by variable Minimum Purchase Needed

# table for counts
t1 <- table(Min_Purchase_Needed,Redemption_Flag)

# table for proportions
t1 <- prop.table(t1,1)

# making table
round(t1*100,2)
##                    Redemption_Flag
## Min_Purchase_Needed     0     1
##                320  84.02 15.98
##                360  67.33 32.67
##                460  66.59 33.41
##                500  86.27 13.73
##                1000 92.57  7.43
tab4 <- round(prop.table(table(Min_Purchase_Needed,Redemption_Flag),2)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Not Convert, Split by Minimum Purchase Needed", 
col = c("lightblue"),
xlab = "Minimum Purchase Needed",
ylab = "Percent (%)", 
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,1], 1),cex=1,pos=3) 

tab4 <- round(prop.table(table(Min_Purchase_Needed,Redemption_Flag),1)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Convert, Split by Minimum Purchase Needed", 
col = c("lightblue"),
xlab = "Minimum Purchase Needed",
ylab = "Percent (%)", 
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,2], 1),cex=1,pos=3) 

Analysis for Conversion based on Gender

# table for counts
t2 <- table(Gender,Redemption_Flag)

# table for proportions
t2 <- prop.table(t2,1)

# making table
round(t2*100,2)
##          Redemption_Flag
## Gender        0     1
##   Female  74.50 25.50
##   Male    73.44 26.56
##   Unknown 72.62 27.38
tab4 <- round(prop.table(table(Gender,Redemption_Flag),2)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Not Convert, Split by Gender", 
col = c("lightblue"),
xlab = "Gender",
ylab = "Percent (%)", 
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,1], 1),cex=1,pos=3) 

tab4 <- round(prop.table(table(Gender,Redemption_Flag),2)*100,2)
bp <- barplot(tab4[,2], beside = TRUE, main = "Bar Chart for % of Candidates Who Did Convert, Split by Gender", 
col = c("lightblue"),
xlab = "Gender",
ylab = "Percent (%)", 
args.legend = list(title = "Converted", x = "topright", cex = .7), ylim = c(0, 90))
text(bp, 0, round(tab4[,2], 1),cex=1,pos=3) 

Average Vintage Value of the Candidates, Split by Vintage Value (Converted/ Not Converted)

op1 <- Campaign.dt[, .(AverageVintageValue = round(mean(Vintage),2)),
                      by = (Redemption_Flag)][order(Redemption_Flag)]
op1
##    Redemption_Flag AverageVintageValue
## 1:               0             1607.76
## 2:               1             2026.63

Mean Plot for Vintage Value of the candidates, Split by Redemption Flag (Converted / Not Converted)

#loading the package
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
plotmeans(Vintage ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, main="Mean Plot for the Vintage  Value, Split by Redemption Flag")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter

BoxPlot for Vintage, Split by Redemption Flag (Converted / Not Converted)

boxplot(Vintage ~ Redemption_Flag,
                main = "Boxplot for Vintage split by Redemption Flag", ylab="Vintage in days",
                 col=(c("lightblue","red")) ,xlim=c(0.5,3.5),ylim=c(0,5000))

Average Bill Value of the Candidates, Split by Bill Value (Converted/ Not Converted)

op2 <- Campaign.dt[, .(AverageBillValue = round(mean(Average_Bill_Value),2)),
                      by = (Redemption_Flag)][order(Redemption_Flag)]
op2
##    Redemption_Flag AverageBillValue
## 1:               0           460.20
## 2:               1           401.54

Mean Plot for Average Bill Value of the candidates, Split by Redemption Flag (Converted / Not Converted)

#loading the package
library(gplots)
plotmeans(Average_Bill_Value ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, main="Mean Plot for the Average Bill Value, Split by Redemption Flag")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter

BoxPlot for Average Bill Value, Split by Redemption Flag (Converted / Not Converted)

boxplot(Average_Bill_Value ~ Redemption_Flag,
                main = "Boxplot for Average Bill Value split by Redemption Flag", ylab="Average Bill Value in days",
                 col=(c("lightblue","red")) ,xlim=c(0.5,3.5),ylim=c(0,1500))

CONTINUOUS DATA DISTRIBUTION #Average Age of the customers split by redemption status

op1 <- Campaign.dt[, .(Age = round(mean(Age),2)),
                      by = (Redemption_Flag)][order(Redemption_Flag)]
op1
##    Redemption_Flag   Age
## 1:               0 29.06
## 2:               1 28.56

Mean Plot for the Age, Split by redemption status

library(gplots)
plotmeans(Age ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", main="Mean Plot for the Age, Split by redemption status")

#BoxPlot for Age, Split by Redemption status

attach(Campaign.dt)
## The following objects are masked from Campaign.dt (pos = 5):
## 
##     Age, Average_Bill_Value, Campaign_Date, Customer_ID, Gender,
##     Latency, Min_Purchase_Needed,
##     Number_of_times_previously_purchased, Offer_Type, Recency,
##     Redemption_Flag, Transaction_Count, Vintage
boxplot(Age ~ Redemption_Flag, main ="Boxplot",ylab="age",col=(c("lightblue","red")))

Average Latency of the customers by redemption

avgLat <- Campaign.dt[, .(Latency = round(mean(Latency),2)),
                      by = (Redemption_Flag)][order(Redemption_Flag)]
avgLat
##    Redemption_Flag Latency
## 1:               0  164.29
## 2:               1   59.34

Mean Plot for Latencty, Split by Redemption

library(gplots)
plotmeans(Latency ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, main="Mean Plot for Latency, Split by Redemption")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter

Boxplot for Latency, Split by redemption

attach(Campaign.dt)
## The following objects are masked from Campaign.dt (pos = 3):
## 
##     Age, Average_Bill_Value, Campaign_Date, Customer_ID, Gender,
##     Latency, Min_Purchase_Needed,
##     Number_of_times_previously_purchased, Offer_Type, Recency,
##     Redemption_Flag, Transaction_Count, Vintage
## The following objects are masked from Campaign.dt (pos = 6):
## 
##     Age, Average_Bill_Value, Campaign_Date, Customer_ID, Gender,
##     Latency, Min_Purchase_Needed,
##     Number_of_times_previously_purchased, Offer_Type, Recency,
##     Redemption_Flag, Transaction_Count, Vintage
boxplot(Latency ~ Redemption_Flag,
                main = "Boxplot for Latency grouped by Redemption",
                 col=(c("lightblue","red")))

Average Recency of the customers by redemption

avgRec <- Campaign.dt[, .(Recency  = round(mean(Recency),2)),
                      by = (Redemption_Flag)][order(Redemption_Flag)]
avgRec
##    Redemption_Flag Recency
## 1:               0  118.94
## 2:               1   41.85

Mean Plot for the above

library(gplots)
plotmeans(Recency ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, ylab = "Recency : Days since last visit")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter

Boxplot of Recency of the customers with redemption

boxplot(Recency ~ Redemption_Flag,
                main = "Boxplot recency of visits by customers)", ylab="Recency",
                 col=(c("lightblue","red")),xlim = c(0.5, 3.5), ylim = c(0, 400))

Average of Transaction count (number of bills made by the customer)

TransCount <- Campaign.dt[, .(Transaction_Count = round(mean(Transaction_Count),2)),
                      by = (Redemption_Flag)][order(Redemption_Flag)]
TransCount
##    Redemption_Flag Transaction_Count
## 1:               0              4.63
## 2:               1             14.61

Mean Plot for transaction count split by redemption status

library(gplots)
plotmeans(Transaction_Count ~ Redemption_Flag, data=Campaign.dt,mean.labels = TRUE, col="RED", frame = FALSE, ylab = "Transaction Count")
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter

Average (Age, Recency and Latency) of customers split by redemption by Gender (Male / Female)

tab1 <- Campaign.dt[, .(
                    Age = round(mean(Age),2), 
                    Latency = round(mean(Latency),2),
                   Average_Bill_Value = round(mean(Average_Bill_Value),2),
                    Recency = round(mean(Recency),2)),
                    by = .(Redemption_Flag,Gender)][order(Redemption_Flag)]
tab1
##    Redemption_Flag  Gender   Age Latency Average_Bill_Value Recency
## 1:               0  Female 29.32  168.11             470.33  118.12
## 2:               0    Male 28.75  162.19             450.74  120.26
## 3:               0 Unknown 29.08  140.05             436.25  115.20
## 4:               1    Male 27.99   56.05             387.53   40.23
## 5:               1  Female 29.01   62.68             415.00   43.41
## 6:               1 Unknown 29.36   55.63             393.68   40.81

Correlation Correlation variable for all continous variables

library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
##   method         from 
##   [.quosures     rlang
##   c.quosures     rlang
##   print.quosures rlang
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:psych':
## 
##     describe
## The following objects are masked from 'package:base':
## 
##     format.pval, units
mydata <- Campaign.dt[, c(5,6,7,8,9,11,12)]
#head(mydata, 6)
res <- rcorr(as.matrix(mydata))
res
##                                      Vintage Recency Average_Bill_Value
## Vintage                                 1.00   -0.01               0.00
## Recency                                -0.01    1.00               0.20
## Average_Bill_Value                      0.00    0.20               1.00
## Transaction_Count                       0.04   -0.29              -0.10
## Latency                                -0.06    0.40               0.18
## Age                                     0.07    0.07               0.10
## Number_of_times_previously_purchased    0.03   -0.15              -0.03
##                                      Transaction_Count Latency   Age
## Vintage                                           0.04   -0.06  0.07
## Recency                                          -0.29    0.40  0.07
## Average_Bill_Value                               -0.10    0.18  0.10
## Transaction_Count                                 1.00   -0.44 -0.03
## Latency                                          -0.44    1.00  0.06
## Age                                              -0.03    0.06  1.00
## Number_of_times_previously_purchased              0.52   -0.26 -0.02
##                                      Number_of_times_previously_purchased
## Vintage                                                              0.03
## Recency                                                             -0.15
## Average_Bill_Value                                                  -0.03
## Transaction_Count                                                    0.52
## Latency                                                             -0.26
## Age                                                                 -0.02
## Number_of_times_previously_purchased                                 1.00
## 
## n= 275620 
## 
## 
## P
##                                      Vintage Recency Average_Bill_Value
## Vintage                                      0.0000  0.0326            
## Recency                              0.0000          0.0000            
## Average_Bill_Value                   0.0326  0.0000                    
## Transaction_Count                    0.0000  0.0000  0.0000            
## Latency                              0.0000  0.0000  0.0000            
## Age                                  0.0000  0.0000  0.0000            
## Number_of_times_previously_purchased 0.0000  0.0000  0.0000            
##                                      Transaction_Count Latency Age   
## Vintage                              0.0000            0.0000  0.0000
## Recency                              0.0000            0.0000  0.0000
## Average_Bill_Value                   0.0000            0.0000  0.0000
## Transaction_Count                                      0.0000  0.0000
## Latency                              0.0000                    0.0000
## Age                                  0.0000            0.0000        
## Number_of_times_previously_purchased 0.0000            0.0000  0.0000
##                                      Number_of_times_previously_purchased
## Vintage                              0.0000                              
## Recency                              0.0000                              
## Average_Bill_Value                   0.0000                              
## Transaction_Count                    0.0000                              
## Latency                              0.0000                              
## Age                                  0.0000                              
## Number_of_times_previously_purchased

Plotting correlation matrix

library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 3.6.1
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.6.1
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:data.table':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:gplots':
## 
##     textplot
## The following object is masked from 'package:graphics':
## 
##     legend
chart.Correlation(mydata)

SCATTER PLOTS Scatter Plot of Vintage and Transaction Count by Redemption (Converted / Not Converted)

plot(Vintage,Transaction_Count,col=c("red", "blue")[Redemption_Flag], main = "Scatter Plot of Vintage and Transaction Count by Redemption",ylim= c(0,150),xlim = c(0,3500))
legend(x="topright", legend = levels(Redemption_Flag), col=c("red","blue"), pch=1)

Scatter Plot of Vintage and Latency by Redemption (Converted / Not Converted)

plot(Vintage,Latency,col=c("red", "blue")[Redemption_Flag], main = "Scatter Plot of Vintage and Latency by Redemption",xlim = c(0,3500))
legend(x="topright", legend = levels(Redemption_Flag), col=c("red","blue"), pch=1)

Scatter Plot of Vintage and Recency by Redemption (Converted / Not Converted)

plot(Vintage,Recency, col=c("red", "blue")[Redemption_Flag], main = "Scatter Plot of Vintage and Recency by Redemption",xlim = c(0,3500))
legend(x="topright", legend = levels(Redemption_Flag), col=c("red","blue"), pch=1)