# Environment Setup ---------------------------------------------------------------
library(tidyverse)
library(completejourney)
library(lubridate)
library(here)
library(reshape2)
library(gganimate)
library(transformr)
library(ggthemes)
library(scales)
library(heatmaply)
library(GGally)
library(ggrepel)
library(formattable)
options(digits = 5)
options(pillar.sigfig = 10)
theme_plots <- function(base_size = 13, base_family = "", base_line_size = base_size/22,
base_rect_size = base_size/22) {
theme(
axis.title = element_text(size = 13),
axis.text.x = element_text(angle = 45, size = 10, hjust = 1, vjust = 1),
axis.text.y = element_text(size = 10),
axis.text = element_text(face = "bold"),
plot.caption = element_text(size = 10, face = "italic"),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(size = 1, colour = "black"),
strip.background = element_rect(fill = "#cddcdd"),
strip.text = element_text(colour = "black"),
legend.key = element_blank()
)
}
get_upper_tri <- function(CORMAT){
CORMAT[lower.tri(CORMAT)] <- NA
return(CORMAT)
}
AllTransactions <- get_transactions()
# Correlation Station ---------------------------------------------------------------
AgeAlias <- data.frame(age_str = c("19-24","25-34","35-44","45-54","55-64","65+"),
age_int = as.integer(c(1,2,3,4,5,6)),
stringsAsFactors = FALSE)
AgeAlias$age_f <- factor(AgeAlias$age_int, labels = c("19-24","25-34","35-44","45-54","55-64","65+"))
IncomeAlias <- data.frame(income_str = c("Under 15K","15-24K","25-34K","35-49K","50-74K","75-99K","100-124K","125-149K","150-174K","175-199K","200-249K","250K+"),
income_int = as.integer(c(1,2,3,4,5,6,7,8,9,10,11,12)),
stringsAsFactors = FALSE)
IncomeAlias$income_f <- factor(IncomeAlias$income_int, labels = c("Under 15K","15-24K","25-34K","35-49K","50-74K","75-99K","100-124K","125-149K","150-174K","175-199K","200-249K","250K+"))
HouseholdSizeAlias <- data.frame(household_size_str = c("1","2","3","4","5+"),
household_size_int = as.integer(c(1,2,3,4,5)),
stringsAsFactors = FALSE)
HouseholdSizeAlias$household_size_f <- factor(HouseholdSizeAlias$household_size_int, labels = c("1","2","3","4","5+"))
ProductsSubset <- products %>%
filter(!(department %in% c("FUEL"))) %>%
filter(!(product_type %in% c("GASOLINE-REG UNLEADED")))
HHTripTrans <- AllTransactions %>%
filter(quantity != 0) %>%
inner_join(ProductsSubset, by = c("product_id")) %>%
inner_join(demographics, by = c("household_id")) %>%
mutate(transaction_quarter = quarter(transaction_timestamp, type = "year.quarter")) %>%
mutate(transaction_quarter_str = as.character(transaction_quarter)) %>%
group_by(basket_id, age , income, household_size, transaction_quarter, transaction_quarter_str) %>%
summarize(
trip_item_count = sum(quantity),
trip_sales_value = sum(sales_value),
trip_retail_disc = sum(retail_disc),
trip_coupon_disc = sum(coupon_disc)
) %>%
mutate(
age_str = as.character(age),
income_str = as.character(income),
household_size_str = as.character(household_size)
) %>%
inner_join(AgeAlias, by = c("age_str")) %>%
inner_join(IncomeAlias, by = c("income_str")) %>%
inner_join(HouseholdSizeAlias, by = c("household_size_str")) %>%
subset(select = c(
transaction_quarter_str,
age_int,
income_int,
household_size_int,
trip_item_count,
trip_sales_value,
trip_retail_disc,
trip_coupon_disc))
HHTripTrans$transaction_quarter_str <- str_replace_all(HHTripTrans$transaction_quarter_str, pattern = "\\.", replacement = " Q")
HHTripTransPCorr <- ggpairs(
HHTripTrans[, c(1:8)],
aes(colour = transaction_quarter_str, alpha = 0.3),
columnLabels = c("Quarter", "Age", "Income", "Household Size", "Basket Item Count", "Basket Sales Value", "Basket Retail Disc", "Basket Coupon Disc"),
cardinality_threshold = NULL,
progress = FALSE
) +
labs(
title = "Correlation Station",
subtitle = "The data shows correlations between baskets and demographics.",
caption = "Data: https://github.com/bradleyboehmke/completejourney/tree/master/data",
)
AxisTune <- HHTripTransPCorr[1,1]
AxisTune <- AxisTune + coord_flip()
HHTripTransPCorr[1,1] <- AxisTune
AxisTune <- HHTripTransPCorr[8,2]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(2,4,6), labels = c("25-34", "45-54", "65+"))
HHTripTransPCorr[8,2] <- AxisTune
AxisTune <- HHTripTransPCorr[2,1]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(2,4,6), labels = c("25-34", "45-54", "65+"))
HHTripTransPCorr[2,1] <- AxisTune
AxisTune <- HHTripTransPCorr[8,3]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(3,6,9,12), labels = c("25-34K", "75-99K", "150-174K", "250K+"))
HHTripTransPCorr[8,3] <- AxisTune
AxisTune <- HHTripTransPCorr[3,1]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(3,6,9,12), labels = c("25-34K", "75-99K", "150-174K", "250K+"))
HHTripTransPCorr[3,1] <- AxisTune
AxisTune <- HHTripTransPCorr[8,4]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(1,2,3,4,5), labels = c("1", "2", "3", "4", "5+"))
HHTripTransPCorr[8,4] <- AxisTune
AxisTune <- HHTripTransPCorr[4,1]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(1,2,3,4,5), labels = c("1", "2", "3", "4", "5+"))
HHTripTransPCorr[4,1] <- AxisTune
AxisTune <- HHTripTransPCorr[8,1]
AxisTune <- AxisTune + theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
HHTripTransPCorr[8,1] <- AxisTune
HHTripTransPCorr

# Coupon Clipping -----------------------------------------------------------------
campaign_begin_order <- c(1:27)
campaigns_ordered <- campaign_descriptions %>%
arrange(start_date) %>%
cbind(campaign_begin_order)
SentCoupons <- campaigns %>%
inner_join(coupons, by = c("campaign_id")) %>%
distinct(campaign_id, household_id, coupon_upc) %>%
add_count(campaign_id, household_id, name = "coupons_sent_per_hh") %>%
distinct(campaign_id, household_id, coupons_sent_per_hh) %>%
add_count(campaign_id, coupons_sent_per_hh, name = "hh_in_campaign") %>%
distinct(campaign_id, coupons_sent_per_hh, hh_in_campaign) %>%
dplyr::mutate(total_coupons_sent = coupons_sent_per_hh * hh_in_campaign)
RedeemedCoupons <- coupon_redemptions %>%
add_count(campaign_id, household_id, name = "hh_cr") %>%
distinct(campaign_id, household_id, hh_cr) %>%
add_count(campaign_id, wt = hh_cr, name = "total_cr") %>%
add_count(campaign_id, name = "total_redeeming_hh") %>%
group_by(campaign_id, total_cr, total_redeeming_hh) %>%
summarise(avg_cr_per_redeeming_hh = mean(hh_cr)) %>%
arrange(campaign_id)
CouponStats <- campaigns_ordered %>%
left_join(SentCoupons, by = c("campaign_id")) %>%
left_join(RedeemedCoupons, by = c("campaign_id")) %>%
select(campaign_id, campaign_begin_order, coupons_sent_per_hh, hh_in_campaign, total_coupons_sent, total_cr, total_redeeming_hh, avg_cr_per_redeeming_hh)
CouponStats[is.na(CouponStats)] <- 0
CouponStatsGG <- CouponStats %>%
melt(id = c("campaign_id","campaign_begin_order"), measure = c("total_coupons_sent","coupons_sent_per_hh", "hh_in_campaign", "total_cr", "total_redeeming_hh", "avg_cr_per_redeeming_hh"))
CouponStatsGG$variable <- as.character(CouponStatsGG$variable)
CouponStatsGG[CouponStatsGG == "coupons_sent_per_hh"] <- "Coupons Sent Per Household"
CouponStatsGG[CouponStatsGG == "hh_in_campaign"] <- "Number of Households Included"
CouponStatsGG[CouponStatsGG == "total_coupons_sent"] <- "Total Coupons Sent"
CouponStatsGG[CouponStatsGG == "total_cr"] <- "Total Coupon Redemptions"
CouponStatsGG[CouponStatsGG == "total_redeeming_hh"] <- "Total Redeeming Households"
CouponStatsGG[CouponStatsGG == "avg_cr_per_redeeming_hh"] <- "Average Coupons Redeemed Per Redeeming Household"
CouponStatsGG$variable <- as.factor(CouponStatsGG$variable)
CouponStatsGG$campaign_id <- as.factor(CouponStatsGG$campaign_id)
CouponStatsGG$campaign_begin_order <- as.integer(CouponStatsGG$campaign_begin_order)
CouponStatsGG$campaign_id <- factor(CouponStatsGG$campaign_id, levels = c("24", "25", "26", "27", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"))
CouponStatsGG$variable <- factor(CouponStatsGG$variable, levels = c("Total Coupons Sent", "Number of Households Included", "Coupons Sent Per Household", "Total Coupon Redemptions", "Total Redeeming Households", "Average Coupons Redeemed Per Redeeming Household"))
ggplot(CouponStatsGG, aes(x = campaign_begin_order, y = value, colour = campaign_id)) +
geom_point() +
facet_wrap(~ variable, scales = "free") +
xlab("Campaign Sequence - December 2016 to December 2017") +
ylab("Number Of Coupons") +
theme(legend.justification = 'centre',
legend.position = 'bottom',
legend.direction = "horizontal",
legend.key.height = unit(0.5, "cm"),
legend.key.width = unit(0.5,"cm")) +
guides(col = guide_legend(nrow = 1)) +
labs(
title = "Coupon Clipping: Are Shoppers Clipped Out?",
subtitle = "The data shows household coupon redemption behaviors for 27 supermarket coupon campaigns in 2017.",
caption = "Data: https://github.com/bradleyboehmke/completejourney/tree/master/data",
color = "Campaign Id")

# Timely Sales --------------------------------------------------------------------
WeekRefactor <- AllTransactions %>%
dplyr::mutate(transaction_date = as_date(transaction_timestamp)) %>%
dplyr::mutate(transaction_week = week(transaction_timestamp)) %>%
distinct(transaction_date,transaction_week) %>%
group_by(transaction_week) %>%
summarise(week_beginning = min(transaction_date))
WeekRefactor$week_beginning <- as.factor(as.character(WeekRefactor$week_beginning))
WeekRefactor$week_beginning <- factor(WeekRefactor$week_beginning, levels = c("2017-01-01","2017-01-08","2017-01-15","2017-01-22","2017-01-29","2017-02-05","2017-02-12","2017-02-19","2017-02-26","2017-03-05","2017-03-12","2017-03-19","2017-03-26","2017-04-02","2017-04-09","2017-04-16","2017-04-23","2017-04-30","2017-05-07","2017-05-14","2017-05-21","2017-05-28","2017-06-04","2017-06-11","2017-06-18","2017-06-25","2017-07-02","2017-07-09","2017-07-16","2017-07-23","2017-07-30","2017-08-06","2017-08-13","2017-08-20","2017-08-27","2017-09-03","2017-09-10","2017-09-17","2017-09-24","2017-10-01","2017-10-08","2017-10-15","2017-10-22","2017-10-29","2017-11-05","2017-11-12","2017-11-19","2017-11-26","2017-12-03","2017-12-10","2017-12-17","2017-12-24"))
AllHHAmountsByWeek <- AllTransactions %>%
dplyr::mutate(customer_amount = (sales_value - (retail_disc + coupon_disc + coupon_match_disc))) %>%
dplyr::mutate(transaction_week = week(transaction_timestamp)) %>%
dplyr::filter(transaction_week != 53) %>%
group_by(transaction_week, household_id) %>%
summarise(customer_spent = sum(customer_amount)) %>%
left_join(WeekRefactor, by = c("transaction_week"))
HHAmountDemoWeeklyAgg <- AllHHAmountsByWeek %>%
left_join(demographics, by = c("household_id")) %>%
select(transaction_week,customer_spent,household_size)
HHAmountDemoWeeklyAgg$household_size <- as.character(HHAmountDemoWeeklyAgg$household_size)
HHAmountDemoWeeklyAgg <- HHAmountDemoWeeklyAgg %>%
dplyr::mutate_at("household_size", ~replace_na(.,"Unknown"))
HHAmountDemoWeeklyAgg$household_size <- as.factor(HHAmountDemoWeeklyAgg$household_size)
HHAmountDemoWeeklyAgg$household_size <- factor(HHAmountDemoWeeklyAgg$household_size, levels = c("1","2","3","4","5+","Unknown"))
HHAmountDemoWeeklyAgg <- HHAmountDemoWeeklyAgg %>%
group_by(transaction_week, household_size) %>%
summarize(
max_hhsize_week_spend = max(customer_spent),
min_hhsize_week_spend = min(customer_spent),
mean_hhsize_week_spend = mean(customer_spent),
n()
) %>%
left_join(WeekRefactor, by = c("transaction_week"))
MaxAnnOne <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == 1) %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MaxAnnTwo <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == 2) %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MaxAnnThree <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == 3) %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MaxAnnFour <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == 4) %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MaxAnnFive <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == "5+") %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MaxAnnUk <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == "Unknown") %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
AllMax <- rbind(MaxAnnOne, MaxAnnTwo, MaxAnnThree, MaxAnnFour, MaxAnnFive, MaxAnnUk)
AllMax$amount_label <- currency(AllMax$mean_hhsize_week_spend)
MinAnnOne <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == 1) %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MinAnnTwo <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == 2) %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MinAnnThree <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == 3) %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MinAnnFour <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == 4) %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MinAnnFive <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == "5+") %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
MinAnnUk <- HHAmountDemoWeeklyAgg %>%
group_by(household_size) %>%
filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == "Unknown") %>%
select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)
AllMin <- rbind(MinAnnOne, MinAnnTwo, MinAnnThree, MinAnnFour, MinAnnFive, MinAnnUk)
AllMin$amount_label <- currency(AllMin$mean_hhsize_week_spend)
ComboPlot <- ggplot(NULL, aes(week_beginning,customer_spent)) +
geom_boxplot(data = AllHHAmountsByWeek, outlier.shape = NA, coef = 0, color = "black", fill = "grey70") +
geom_line(data = HHAmountDemoWeeklyAgg, aes(transaction_week, mean_hhsize_week_spend, colour = household_size), size = 1.5) +
geom_label_repel(data = AllMax, aes(transaction_week, mean_hhsize_week_spend, label = amount_label, colour = household_size),
nudge_y = 110 - AllMax$mean_hhsize_week_spend,
nudge_x = -7,
box.padding = 1,
point.padding = .5,
force = 1,
segment.size = 1,
show.legend = FALSE) +
geom_label_repel(data = AllMin, aes(transaction_week, mean_hhsize_week_spend, label = amount_label, colour = household_size),
nudge_y = 5 - AllMin$mean_hhsize_week_spend,
box.padding = 1,
point.padding = .5,
force = 1,
segment.size = 1,
show.legend = FALSE) +
coord_cartesian(ylim = c(0, 110), expand = FALSE) +
scale_y_continuous(labels = dollar_format()) +
theme_bw() +
theme_plots() +
xlab("Week Beginning") +
ylab("Average Customer Spend") +
labs(
title = "Mo' Mouths, Mo' Money",
subtitle = "The data shows average weekly spending by household size against the average range for all households in 2017.",
caption = "Data: https://github.com/bradleyboehmke/completejourney/tree/master/data",
color = "Household Size")
ComboPlot
