The objective of the study was two-fold: To identify which customers to target first in the next planned campaign To identify the most effective communication channel/s To develop scientific method of communication for the next planned campaigns
Import required libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gmodels)
Data import, Data cleaning, Manipulation and merging
#Campaign Response data
campResponse <-read.csv("Campaign Response Data.csv", stringsAsFactors = F)
head(campResponse)
## CustomerID response n_comp loyalty portal rewards nps n_yrs
## 1 18263 1 2 0 1 0 7 8
## 2 50429 0 1 1 1 1 3 3
## 3 98593 1 0 1 0 0 9 6
## 4 44804 0 4 1 1 1 2 5
## 5 81015 0 4 1 1 1 2 2
## 6 15273 1 2 1 1 1 5 7
campResponse[!complete.cases(campResponse),]
## [1] CustomerID response n_comp loyalty portal rewards
## [7] nps n_yrs
## <0 rows> (or 0-length row.names)
#Customer master data
masterData <- read.csv("MasterLookUp.csv", stringsAsFactors = F)
head(masterData)
## CustomerID Region
## 1 10000 North
## 2 10001 South
## 3 10002 West
## 4 10003 South
## 5 10004 East
## 6 10005 West
masterData[!complete.cases(masterData),]
## [1] CustomerID Region
## <0 rows> (or 0-length row.names)
#Customer Transaction data
transactionData <-
read.csv("Transactions.csv", stringsAsFactors = F)
head(transactionData)
## CustomerID Date Month Year Brand Sales
## 1 10000 5/20/2014 5 2014 B4 21793
## 2 10000 10/24/2014 10 2014 B5 7155
## 3 10000 8/1/2014 8 2014 B1 29630
## 4 10000 10/20/2014 10 2014 B3 1530
## 5 10000 1/11/2013 1 2013 B2 3965
## 6 10000 4/19/2013 4 2013 B2 34608
#Campaign additional details
campDetails <-
read.csv("Campaign Details.csv", stringsAsFactors = F)
head(campDetails)
## CustomerID email sms call
## 1 10048 1 0 0
## 2 10073 1 0 1
## 3 10258 1 0 0
## 4 10416 1 0 1
## 5 10444 0 0 1
## 6 10454 0 1 0
campDetails[!complete.cases(campDetails),]
## [1] CustomerID email sms call
## <0 rows> (or 0-length row.names)
#Derive additional variables
AnnualSalesData <-
transactionData %>% filter(Year == "2014") %>% group_by(CustomerID) %>% summarize(AnnualSales =
sum(Sales))
head(AnnualSalesData)
## # A tibble: 6 x 2
## CustomerID AnnualSales
## <int> <int>
## 1 10000 60108
## 2 10001 68635
## 3 10004 69882
## 4 10005 9180
## 5 10006 93386
## 6 10008 33519
SalesBrand1Data <-
transactionData %>% filter(Brand == "B1" &
Year == "2014") %>% group_by(CustomerID) %>% summarize(SalesBrand1 = sum(Sales))
head(SalesBrand1Data)
## # A tibble: 6 x 2
## CustomerID SalesBrand1
## <int> <int>
## 1 10000 29630
## 2 10001 26477
## 3 10004 6783
## 4 10006 7720
## 5 10010 11048
## 6 10012 67323
SalesBrand1Q4Data <-
transactionData %>% filter(Brand == "B1" &
Year == "2014" &
Month %in% c(10, 11, 12)) %>% group_by(CustomerID) %>% summarize(SalesBrand1Q4 =
sum(Sales))
head(SalesBrand1Q4Data)
## # A tibble: 6 x 2
## CustomerID SalesBrand1Q4
## <int> <int>
## 1 10004 6783
## 2 10017 12974
## 3 10025 58045
## 4 10033 3603
## 5 10035 8150
## 6 10045 9785
BrandEngagementData <-
transactionData %>% filter(Year == "2014") %>% group_by(CustomerID) %>% summarize(BrandEngagement =
length(unique(Brand)))
head(BrandEngagementData)
## # A tibble: 6 x 2
## CustomerID BrandEngagement
## <int> <int>
## 1 10000 4
## 2 10001 2
## 3 10004 3
## 4 10005 2
## 5 10006 4
## 6 10008 2
BuyingFreqBrand12014 <-
transactionData %>% filter(Year == "2014" &
Brand == "B1") %>% group_by(CustomerID) %>% summarize(BuyingFreqBrand1 =
length(Brand))
head(BuyingFreqBrand12014)
## # A tibble: 6 x 2
## CustomerID BuyingFreqBrand1
## <int> <int>
## 1 10000 1
## 2 10001 2
## 3 10004 1
## 4 10006 1
## 5 10010 1
## 6 10012 3
BuyingFreq2014 <-
transactionData %>% filter(Year == "2014") %>% group_by(CustomerID) %>% summarize(BuyingFreq2014 =
length(Brand))
head(BuyingFreq2014)
## # A tibble: 6 x 2
## CustomerID BuyingFreq2014
## <int> <int>
## 1 10000 4
## 2 10001 4
## 3 10004 3
## 4 10005 2
## 5 10006 5
## 6 10008 2
dim(AnnualSalesData)
## [1] 84408 2
dim(SalesBrand1Data)
## [1] 37685 2
dim(SalesBrand1Q4Data)
## [1] 11406 2
dim(BuyingFreqBrand12014)
## [1] 37685 2
dim(BuyingFreq2014)
## [1] 84408 2
transactFinal <- AnnualSalesData %>%
inner_join(SalesBrand1Data, by = c("CustomerID")) %>%
inner_join(SalesBrand1Q4Data, by = c("CustomerID")) %>%
inner_join(BrandEngagementData, by = c("CustomerID")) %>%
inner_join(BuyingFreqBrand12014, by = c("CustomerID")) %>%
inner_join(BuyingFreq2014, by = c("CustomerID"))
transactFinal <-
transactFinal %>% mutate(Brand1PercentageContri = round((SalesBrand1 / AnnualSales) * 100))
head(transactFinal)
## # A tibble: 6 x 8
## CustomerID AnnualSales SalesBrand1 SalesBrand1Q4 BrandEngagement
## <int> <int> <int> <int> <int>
## 1 10004 69882 6783 6783 3
## 2 10017 45797 12974 12974 2
## 3 10025 60271 58045 58045 2
## 4 10033 19174 3603 3603 3
## 5 10035 69617 8150 8150 4
## 6 10045 9785 9785 9785 1
## # ... with 3 more variables: BuyingFreqBrand1 <int>, BuyingFreq2014 <int>,
## # Brand1PercentageContri <dbl>
dim(transactFinal)
## [1] 11406 8
transactionData[!complete.cases(transactionData),]
## [1] CustomerID Date Month Year Brand Sales
## <0 rows> (or 0-length row.names)
Merging of data and NA imputation
#Merge all data in one
data <- campDetails %>%
inner_join(campResponse, by = c("CustomerID")) %>%
inner_join(masterData, by = c("CustomerID")) %>%
left_join(transactFinal, by = c("CustomerID"))
dim(data)
## [1] 1228 19
#Convert NA values of Sales, Brandfrequency to 0
data[which(is.na(data$AnnualSales)), "AnnualSales"] <- 0
data[which(is.na(data$SalesBrand1)), "SalesBrand1"] <- 0
data[which(is.na(data$SalesBrand1Q4)), "SalesBrand1Q4"] <- 0
data[which(is.na(data$BrandEngagement)), "BrandEngagement"] <- 0
data[which(is.na(data$BuyingFreqBrand1)), "BuyingFreqBrand1"] <- 0
data[which(is.na(data$BuyingFreq2014)), "BuyingFreq2014"] <- 0
data[which(is.na(data$Brand1PercentageContri)), "Brand1PercentageContri"] <- 0
dataCopy <- data
dataCopy$Responded<-ifelse(dataCopy$response=="0","No","Yes")
head(dataCopy)
## CustomerID email sms call response n_comp loyalty portal rewards nps
## 1 10048 1 0 0 1 2 0 0 0 5
## 2 10073 1 0 1 1 5 1 0 1 3
## 3 10258 1 0 0 0 0 0 0 0 7
## 4 10416 1 0 1 0 1 1 0 1 1
## 5 10444 0 0 1 0 1 0 0 0 3
## 6 10454 0 1 0 0 0 0 1 0 7
## n_yrs Region AnnualSales SalesBrand1 SalesBrand1Q4 BrandEngagement
## 1 5 South 0 0 0 0
## 2 7 North 0 0 0 0
## 3 9 East 115222 58909 16108 2
## 4 5 South 0 0 0 0
## 5 5 South 112583 6195 6195 5
## 6 3 South 0 0 0 0
## BuyingFreqBrand1 BuyingFreq2014 Brand1PercentageContri Responded
## 1 0 0 0 Yes
## 2 0 0 0 Yes
## 3 2 6 51 No
## 4 0 0 0 No
## 5 1 6 6 No
## 6 0 0 0 No
Descriptive stats on response variable Response varibale is dichotomous categorical variable. Measure of frequency can be applied here. Measure of central tendency of mean, median and Measures of dipersion like SD, IQR will not be useful here.
table(dataCopy$response)
##
## 0 1
## 736 492
table(dataCopy$response,dataCopy$loyalty)
##
## 0 1
## 0 379 357
## 1 229 263
table(dataCopy$response,dataCopy$BrandEngagement)
##
## 0 1 2 3 4 5
## 0 637 12 32 32 20 3
## 1 436 8 19 16 10 3
table(dataCopy$response,dataCopy$BuyingFreqBrand1)
##
## 0 1 2 3 4
## 0 637 64 26 9 0
## 1 436 30 20 5 1
table(dataCopy$response,dataCopy$n_yrs)
##
## 2 3 4 5 6 7 8 9
## 0 76 131 110 108 94 76 99 42
## 1 32 50 72 79 81 71 71 36
table(dataCopy$response,dataCopy$n_comp)
##
## 0 1 2 3 4 5
## 0 72 124 147 167 140 86
## 1 48 94 113 92 104 41
Vislauize how customers responded to the campaign using bar plot Also lets see if complaints raised in last 3 months impacts response to the campaign
a <- dataCopy %>% group_by(response) %>% count()
ggplot(data = a, aes(x = response, y = n)) +
geom_bar(stat = "identity", aes(fill = response)) +
geom_text(aes(label = n)) +
labs(title = "How customers responded to the campaign", x =
"Response to the campaign", y = "Number of customers") +
guides(fill = guide_legend("Response to campaign")) +
theme(legend.position = "bottom")
b <- dataCopy %>% group_by(Responded, n_comp) %>% count()
b
## # A tibble: 12 x 3
## # Groups: Responded, n_comp [12]
## Responded n_comp n
## <chr> <int> <int>
## 1 No 0 72
## 2 No 1 124
## 3 No 2 147
## 4 No 3 167
## 5 No 4 140
## 6 No 5 86
## 7 Yes 0 48
## 8 Yes 1 94
## 9 Yes 2 113
## 10 Yes 3 92
## 11 Yes 4 104
## 12 Yes 5 41
ggplot(data = b, aes(x = Responded, y = n)) +
geom_bar(stat = "identity", aes(fill = factor(n_comp)), position = "dodge") +
labs(title = "Relation between complaints raised and customer response to Campaign", x =
"Response to the campaign", y = "Number of customers") +
guides(fill = guide_legend("Number of compaints in last 3 months")) +
theme(legend.position = "bottom")
Visualize how Brand 1 Q4 sales and Annual sales impact Response to Campaign
ggplot(dataCopy,aes(x=SalesBrand1Q4,y=AnnualSales))+
geom_point(aes(color=Responded,size=BuyingFreqBrand1),alpha=0.5)+
labs(title = "Annual Sales v/s Q4 Sales of Brand 1 (Year 2014)", x =
"Q4 Sales of Brand 1", y = "Annual Sales of year 2014") +
guides(fill = guide_legend("Response to the campaign")) +
theme(legend.position = "bottom")
Heatmap of how customers of different region responded to the campaign
c <-dataCopy %>% group_by(Region, Responded) %>%count()
ggplot(data=c,aes(x=Region,y=Responded))+
geom_tile(aes(fill=n),colour="black")+
geom_text(aes(label=n),colour="black")+
scale_fill_gradient(low="lightblue",high="darkblue")+
labs(title = "Heatmap of how customers of different region responded to the campaign", x =
"Zones", y = "Response to Campaign") +
theme_bw()
NPS is ordinal scale. Below is the summary stats for NPS wrt response.
summary(dataCopy$nps)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 4.000 4.406 7.000 10.000
dataCopy$npsCategory <- cut(
dataCopy$nps,
breaks = c(0, 6, 8, Inf),
labels = c("Detractor", "Passive", "Promotors")
)
tail(dataCopy)
## CustomerID email sms call response n_comp loyalty portal rewards nps
## 1223 99912 1 1 1 0 1 1 1 1 8
## 1224 99921 0 1 0 1 4 0 1 0 2
## 1225 99923 1 1 1 0 2 1 1 1 8
## 1226 99972 0 1 0 0 1 0 1 0 2
## 1227 99994 1 2 1 1 1 1 0 1 4
## 1228 99997 1 0 1 1 5 1 0 1 2
## n_yrs Region AnnualSales SalesBrand1 SalesBrand1Q4 BrandEngagement
## 1223 2 West 0 0 0 0
## 1224 7 North 0 0 0 0
## 1225 7 East 0 0 0 0
## 1226 3 South 0 0 0 0
## 1227 2 East 0 0 0 0
## 1228 8 North 0 0 0 0
## BuyingFreqBrand1 BuyingFreq2014 Brand1PercentageContri Responded
## 1223 0 0 0 No
## 1224 0 0 0 Yes
## 1225 0 0 0 No
## 1226 0 0 0 No
## 1227 0 0 0 Yes
## 1228 0 0 0 Yes
## npsCategory
## 1223 Passive
## 1224 Detractor
## 1225 Passive
## 1226 Detractor
## 1227 Detractor
## 1228 Detractor
d1<-dataCopy%>%group_by(Responded)%>%summarize(minNps=min(nps),maxNps=max(nps),medianNps=median(nps))
d1
## # A tibble: 2 x 4
## Responded minNps maxNps medianNps
## <chr> <dbl> <dbl> <dbl>
## 1 No 0. 8. 4.
## 2 Yes 0. 10. 5.
Visualize the relation between NPS and respons eto the campaign using Box plot/Bar plots
#Bar plot
ggplot(d1, aes(Responded, medianNps)) +
geom_bar(stat = "identity", fill = "lightblue") +
labs(title = "Median of NPS v/s Response to the campaign", x = "Response to the campaign", y = "Median of NPS") +
theme_bw()
#BoxPlot
ggplot(dataCopy,aes(x=Responded,y=nps))+
geom_boxplot()+
labs(title = "Box plot to understand distribution of NPS over response to campaign", x = "Response to the campaign", y = "NPS") +
theme_bw()
#Histogram
ggplot(dataCopy,aes(x=nps))+
geom_histogram(binwidth=1,aes(fill=Responded),color="black")+
labs(title = "Histogram to understand distribution of NPS over response to campaign", x = "NPS", y = "Number of customers") +
theme_bw()+
facet_wrap(~Responded)
CrossTable to nalayse distribution of customers wrt to NPS category (Detractor, Passive,Promotors). Also to understand if NPS category impacts the way customer responded to the Campaign
library(gmodels)
CrossTable(dataCopy$response,dataCopy$npsCategory,chisq = T)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 1159
##
##
## | dataCopy$npsCategory
## dataCopy$response | Detractor | Passive | Promotors | Row Total |
## ------------------|-----------|-----------|-----------|-----------|
## 0 | 554 | 137 | 0 | 691 |
## | 5.649 | 0.311 | 46.504 | |
## | 0.802 | 0.198 | 0.000 | 0.596 |
## | 0.660 | 0.568 | 0.000 | |
## | 0.478 | 0.118 | 0.000 | |
## ------------------|-----------|-----------|-----------|-----------|
## 1 | 286 | 104 | 78 | 468 |
## | 8.341 | 0.459 | 68.663 | |
## | 0.611 | 0.222 | 0.167 | 0.404 |
## | 0.340 | 0.432 | 1.000 | |
## | 0.247 | 0.090 | 0.067 | |
## ------------------|-----------|-----------|-----------|-----------|
## Column Total | 840 | 241 | 78 | 1159 |
## | 0.725 | 0.208 | 0.067 | |
## ------------------|-----------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 129.9266 d.f. = 2 p = 6.120735e-29
##
##
##
Deriving summary statistics of number of cmplaints wrt response to campaign
summary(dataCopy$n_comp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 3.000 2.546 4.000 5.000
d2<-dataCopy%>%group_by(Responded)%>%summarize(minN_comp=min(n_comp),maxN_comp=max(n_comp),medianN_comp=median(n_comp),meanN_comp=mean(n_comp))
d2
## # A tibble: 2 x 5
## Responded minN_comp maxN_comp medianN_comp meanN_comp
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 No 0. 5. 3. 2.59
## 2 Yes 0. 5. 2. 2.47
summary(dataCopy$n_yrs)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 4.000 5.000 5.344 7.000 9.000
d3<-dataCopy%>%group_by(Responded)%>%summarize(minN_yrs=min(n_yrs),maxN_yrs=max(n_yrs),medianN_yrs=median(n_yrs),meanN_yrs=mean(n_yrs))
d3
## # A tibble: 2 x 5
## Responded minN_yrs maxN_yrs medianN_yrs meanN_yrs
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 No 2. 9. 5. 5.15
## 2 Yes 2. 9. 6. 5.63
Visualise mean n_comp using bar plot
ggplot(d2, aes(Responded, meanN_comp)) +
geom_bar(stat = "identity", fill = "orange") +
labs(title = "Mean of Number of complaints in last 3 months v/s Response to the campaign", x = "Response to the campaign", y = "Mean of Number of complaints") +
theme_bw()
Summary stats of Sales varaibles
summary(dataCopy$AnnualSales)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 9013 0 187991
d4<-dataCopy%>%group_by(Responded)%>%summarize(minAnnualSales=min(AnnualSales),maxAnnualSales=max(AnnualSales),medianAnnualSales=median(AnnualSales),meanAnnualSales=mean(AnnualSales))
d4
## # A tibble: 2 x 5
## Responded minAnnualSales maxAnnualSales medianAnnualSal~ meanAnnualSales
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 No 0. 177421. 0. 9446.
## 2 Yes 0. 187991. 0. 8365.
Visualize Annual Sales, Sales of 2014 Q4 with Boxplots Median of Annual Sales is 0 since around 75% of Customers showed no Annual Sales.
ggplot(dataCopy,aes(x=Responded,y=AnnualSales))+
geom_boxplot()
ggplot(dataCopy,aes(x=Responded,y=SalesBrand1))+
geom_boxplot()
Hypothesis test to see if some variables impact customers response to the campaign
CrossTable(dataCopy$response,dataCopy$portal,chisq = T)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 1228
##
##
## | dataCopy$portal
## dataCopy$response | 0 | 1 | Row Total |
## ------------------|-----------|-----------|-----------|
## 0 | 361 | 375 | 736 |
## | 0.005 | 0.005 | |
## | 0.490 | 0.510 | 0.599 |
## | 0.602 | 0.597 | |
## | 0.294 | 0.305 | |
## ------------------|-----------|-----------|-----------|
## 1 | 239 | 253 | 492 |
## | 0.008 | 0.008 | |
## | 0.486 | 0.514 | 0.401 |
## | 0.398 | 0.403 | |
## | 0.195 | 0.206 | |
## ------------------|-----------|-----------|-----------|
## Column Total | 600 | 628 | 1228 |
## | 0.489 | 0.511 | |
## ------------------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 0.0262555 d.f. = 1 p = 0.8712779
##
## Pearson's Chi-squared test with Yates' continuity correction
## ------------------------------------------------------------
## Chi^2 = 0.01077157 d.f. = 1 p = 0.917339
##
##
CrossTable(dataCopy$response,dataCopy$loyalty,chisq = T)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 1228
##
##
## | dataCopy$loyalty
## dataCopy$response | 0 | 1 | Row Total |
## ------------------|-----------|-----------|-----------|
## 0 | 379 | 357 | 736 |
## | 0.585 | 0.573 | |
## | 0.515 | 0.485 | 0.599 |
## | 0.623 | 0.576 | |
## | 0.309 | 0.291 | |
## ------------------|-----------|-----------|-----------|
## 1 | 229 | 263 | 492 |
## | 0.875 | 0.858 | |
## | 0.465 | 0.535 | 0.401 |
## | 0.377 | 0.424 | |
## | 0.186 | 0.214 | |
## ------------------|-----------|-----------|-----------|
## Column Total | 608 | 620 | 1228 |
## | 0.495 | 0.505 | |
## ------------------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 2.890214 d.f. = 1 p = 0.08911907
##
## Pearson's Chi-squared test with Yates' continuity correction
## ------------------------------------------------------------
## Chi^2 = 2.695593 d.f. = 1 p = 0.100626
##
##
CrossTable(dataCopy$response,dataCopy$rewards,chisq = T)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 1228
##
##
## | dataCopy$rewards
## dataCopy$response | 0 | 1 | Row Total |
## ------------------|-----------|-----------|-----------|
## 0 | 385 | 351 | 736 |
## | 0.529 | 0.537 | |
## | 0.523 | 0.477 | 0.599 |
## | 0.622 | 0.576 | |
## | 0.314 | 0.286 | |
## ------------------|-----------|-----------|-----------|
## 1 | 234 | 258 | 492 |
## | 0.791 | 0.804 | |
## | 0.476 | 0.524 | 0.401 |
## | 0.378 | 0.424 | |
## | 0.191 | 0.210 | |
## ------------------|-----------|-----------|-----------|
## Column Total | 619 | 609 | 1228 |
## | 0.504 | 0.496 | |
## ------------------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 2.660127 d.f. = 1 p = 0.1028925
##
## Pearson's Chi-squared test with Yates' continuity correction
## ------------------------------------------------------------
## Chi^2 = 2.473554 d.f. = 1 p = 0.1157759
##
##
Buying frequency 2014 and response to campaign
d5<-dataCopy%>%filter(response=="1")%>%group_by(BuyingFreq2014)%>%summarize(TotalCustomerYes=length(BuyingFreq2014))%>%inner_join(
dataCopy%>%group_by(BuyingFreq2014)%>%summarize(TotalCustomers=length(BuyingFreq2014)),by="BuyingFreq2014")
d6<-d5%>%mutate(ResponseRate=round((ResponseRate=TotalCustomerYes/TotalCustomers)*100))
d6
## # A tibble: 10 x 4
## BuyingFreq2014 TotalCustomerYes TotalCustomers ResponseRate
## <dbl> <int> <int> <dbl>
## 1 0. 436 1073 41.
## 2 1. 6 13 46.
## 3 2. 11 38 29.
## 4 3. 6 26 23.
## 5 4. 13 29 45.
## 6 5. 7 20 35.
## 7 6. 4 16 25.
## 8 7. 7 10 70.
## 9 8. 1 2 50.
## 10 9. 1 1 100.
Brand Engagement 2014 and response to campaign
d7<-dataCopy%>%filter(response=="1")%>%group_by(BrandEngagement)%>%summarize(TotalCustomerYes=length(BrandEngagement))%>%inner_join(
dataCopy%>%group_by(BrandEngagement)%>%summarize(TotalCustomers=length(BrandEngagement)),by="BrandEngagement")
d8<-d7%>%mutate(ResponseRate=round((ResponseRate=TotalCustomerYes/TotalCustomers)*100))
d8
## # A tibble: 6 x 4
## BrandEngagement TotalCustomerYes TotalCustomers ResponseRate
## <dbl> <int> <int> <dbl>
## 1 0. 436 1073 41.
## 2 1. 8 20 40.
## 3 2. 19 51 37.
## 4 3. 16 48 33.
## 5 4. 10 30 33.
## 6 5. 3 6 50.
Region and response rate
d7<-dataCopy%>%filter(response=="1")%>%group_by(Region)%>%summarize(CustomerCount=length(Region))%>%mutate(ResponseRate=round((CustomerCount/sum(CustomerCount))*100))
d7<-d7%>%mutate(label=paste0(Region," ",ResponseRate,"%"))
pie(d7$ResponseRate,labels = d7$label)