# Environment Setup ---------------------------------------------------------------
library(tidyverse)
library(completejourney)
library(lubridate)
library(here)
library(reshape2)
library(gganimate)
library(transformr)
library(ggthemes)
library(scales)
library(heatmaply)
library(GGally)
library(ggrepel)
library(formattable)

options(digits = 5)
options(pillar.sigfig = 10)

theme_plots <- function(base_size = 13, base_family = "", base_line_size = base_size/22,
                                            base_rect_size = base_size/22) {
    theme(
        axis.title = element_text(size = 13),
        axis.text.x = element_text(angle = 45, size = 10, hjust = 1, vjust = 1),
        axis.text.y = element_text(size = 10),
        axis.text = element_text(face = "bold"),
        plot.caption = element_text(size = 10, face = "italic"),
        panel.grid.major = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(size = 1, colour = "black"),
        strip.background = element_rect(fill = "#cddcdd"),
        strip.text = element_text(colour = "black"),
        legend.key = element_blank()
    )
}

get_upper_tri <- function(CORMAT){
    CORMAT[lower.tri(CORMAT)] <- NA
    return(CORMAT)
}

AllTransactions <- get_transactions()
# Correlation Station ---------------------------------------------------------------
AgeAlias <- data.frame(age_str = c("19-24","25-34","35-44","45-54","55-64","65+"),
                                             age_int = as.integer(c(1,2,3,4,5,6)),
                                             stringsAsFactors = FALSE)
AgeAlias$age_f <- factor(AgeAlias$age_int, labels = c("19-24","25-34","35-44","45-54","55-64","65+"))

IncomeAlias <- data.frame(income_str = c("Under 15K","15-24K","25-34K","35-49K","50-74K","75-99K","100-124K","125-149K","150-174K","175-199K","200-249K","250K+"),
                                                    income_int = as.integer(c(1,2,3,4,5,6,7,8,9,10,11,12)),
                                                    stringsAsFactors = FALSE)
IncomeAlias$income_f <- factor(IncomeAlias$income_int, labels = c("Under 15K","15-24K","25-34K","35-49K","50-74K","75-99K","100-124K","125-149K","150-174K","175-199K","200-249K","250K+"))

HouseholdSizeAlias <- data.frame(household_size_str = c("1","2","3","4","5+"),
                                                                 household_size_int = as.integer(c(1,2,3,4,5)),
                                                                 stringsAsFactors = FALSE)
HouseholdSizeAlias$household_size_f <- factor(HouseholdSizeAlias$household_size_int, labels = c("1","2","3","4","5+"))

ProductsSubset <- products %>%
    filter(!(department %in% c("FUEL"))) %>%
    filter(!(product_type %in% c("GASOLINE-REG UNLEADED")))

HHTripTrans <- AllTransactions %>%
    filter(quantity != 0) %>%
    inner_join(ProductsSubset, by = c("product_id")) %>%
    inner_join(demographics, by = c("household_id")) %>%
    mutate(transaction_quarter = quarter(transaction_timestamp, type = "year.quarter")) %>%
    mutate(transaction_quarter_str = as.character(transaction_quarter)) %>%
    group_by(basket_id, age , income, household_size, transaction_quarter, transaction_quarter_str) %>%
    summarize(
        trip_item_count = sum(quantity),
        trip_sales_value = sum(sales_value),
        trip_retail_disc = sum(retail_disc),
        trip_coupon_disc = sum(coupon_disc)
    ) %>%
    mutate(
        age_str = as.character(age),
        income_str = as.character(income),
        household_size_str = as.character(household_size)
    ) %>%
    inner_join(AgeAlias, by = c("age_str")) %>%
    inner_join(IncomeAlias, by = c("income_str")) %>%
    inner_join(HouseholdSizeAlias, by = c("household_size_str")) %>%
    subset(select = c(
        transaction_quarter_str,
        age_int,
        income_int,
        household_size_int,
        trip_item_count,
        trip_sales_value,
        trip_retail_disc,
        trip_coupon_disc))

HHTripTrans$transaction_quarter_str <- str_replace_all(HHTripTrans$transaction_quarter_str, pattern = "\\.", replacement = " Q")

HHTripTransPCorr <- ggpairs(
    HHTripTrans[, c(1:8)],
    aes(colour = transaction_quarter_str, alpha = 0.3),
    columnLabels = c("Quarter", "Age", "Income", "Household Size", "Basket Item Count", "Basket Sales Value", "Basket Retail Disc", "Basket Coupon Disc"),
    cardinality_threshold = NULL,
    progress = FALSE
    ) +
    labs(
        title = "Correlation Station",
        subtitle = "The data shows correlations between baskets and demographics.",
        caption = "Data: https://github.com/bradleyboehmke/completejourney/tree/master/data",
        )

AxisTune <- HHTripTransPCorr[1,1]
AxisTune <- AxisTune + coord_flip()
HHTripTransPCorr[1,1] <- AxisTune

AxisTune <- HHTripTransPCorr[8,2]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(2,4,6), labels = c("25-34", "45-54", "65+"))
HHTripTransPCorr[8,2] <- AxisTune

AxisTune <- HHTripTransPCorr[2,1]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(2,4,6), labels = c("25-34", "45-54", "65+"))
HHTripTransPCorr[2,1] <- AxisTune

AxisTune <- HHTripTransPCorr[8,3]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(3,6,9,12), labels = c("25-34K", "75-99K", "150-174K", "250K+"))
HHTripTransPCorr[8,3] <- AxisTune

AxisTune <- HHTripTransPCorr[3,1]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(3,6,9,12), labels = c("25-34K", "75-99K", "150-174K", "250K+"))
HHTripTransPCorr[3,1] <- AxisTune

AxisTune <- HHTripTransPCorr[8,4]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(1,2,3,4,5), labels = c("1", "2", "3", "4", "5+"))
HHTripTransPCorr[8,4] <- AxisTune

AxisTune <- HHTripTransPCorr[4,1]
AxisTune <- AxisTune + scale_x_continuous(breaks = c(1,2,3,4,5), labels = c("1", "2", "3", "4", "5+"))
HHTripTransPCorr[4,1] <- AxisTune

AxisTune <- HHTripTransPCorr[8,1]
AxisTune <- AxisTune + theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
HHTripTransPCorr[8,1] <- AxisTune

HHTripTransPCorr

# Coupon Clipping -----------------------------------------------------------------
campaign_begin_order <- c(1:27)

campaigns_ordered <- campaign_descriptions %>%
    arrange(start_date) %>%
    cbind(campaign_begin_order)

SentCoupons <- campaigns %>%
    inner_join(coupons, by = c("campaign_id")) %>%
    distinct(campaign_id, household_id, coupon_upc) %>%
    add_count(campaign_id, household_id, name = "coupons_sent_per_hh") %>%
    distinct(campaign_id, household_id, coupons_sent_per_hh) %>%
    add_count(campaign_id, coupons_sent_per_hh, name = "hh_in_campaign") %>%
    distinct(campaign_id, coupons_sent_per_hh, hh_in_campaign) %>%
    dplyr::mutate(total_coupons_sent = coupons_sent_per_hh * hh_in_campaign)

RedeemedCoupons <- coupon_redemptions %>%
    add_count(campaign_id, household_id, name = "hh_cr") %>%
    distinct(campaign_id, household_id, hh_cr) %>%
    add_count(campaign_id, wt = hh_cr, name = "total_cr") %>%
    add_count(campaign_id, name = "total_redeeming_hh") %>%
    group_by(campaign_id, total_cr, total_redeeming_hh) %>%
    summarise(avg_cr_per_redeeming_hh = mean(hh_cr)) %>%
    arrange(campaign_id)

CouponStats <- campaigns_ordered %>%
    left_join(SentCoupons, by = c("campaign_id")) %>%
    left_join(RedeemedCoupons, by = c("campaign_id")) %>%
    select(campaign_id, campaign_begin_order, coupons_sent_per_hh, hh_in_campaign, total_coupons_sent, total_cr, total_redeeming_hh, avg_cr_per_redeeming_hh)

CouponStats[is.na(CouponStats)] <- 0

CouponStatsGG <- CouponStats %>%
    melt(id = c("campaign_id","campaign_begin_order"), measure = c("total_coupons_sent","coupons_sent_per_hh", "hh_in_campaign", "total_cr", "total_redeeming_hh", "avg_cr_per_redeeming_hh"))

CouponStatsGG$variable <- as.character(CouponStatsGG$variable)

CouponStatsGG[CouponStatsGG == "coupons_sent_per_hh"] <- "Coupons Sent Per Household"
CouponStatsGG[CouponStatsGG == "hh_in_campaign"] <- "Number of Households Included"
CouponStatsGG[CouponStatsGG == "total_coupons_sent"] <- "Total Coupons Sent"
CouponStatsGG[CouponStatsGG == "total_cr"] <- "Total Coupon Redemptions"
CouponStatsGG[CouponStatsGG == "total_redeeming_hh"] <- "Total Redeeming Households"
CouponStatsGG[CouponStatsGG == "avg_cr_per_redeeming_hh"] <- "Average Coupons Redeemed Per Redeeming Household"

CouponStatsGG$variable <- as.factor(CouponStatsGG$variable)
CouponStatsGG$campaign_id <- as.factor(CouponStatsGG$campaign_id)
CouponStatsGG$campaign_begin_order <- as.integer(CouponStatsGG$campaign_begin_order)

CouponStatsGG$campaign_id <- factor(CouponStatsGG$campaign_id, levels = c("24", "25", "26", "27", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"))
CouponStatsGG$variable <- factor(CouponStatsGG$variable, levels = c("Total Coupons Sent", "Number of Households Included", "Coupons Sent Per Household", "Total Coupon Redemptions", "Total Redeeming Households", "Average Coupons Redeemed Per Redeeming Household"))

ggplot(CouponStatsGG, aes(x = campaign_begin_order, y = value, colour = campaign_id)) +
    geom_point() +
    facet_wrap(~ variable, scales = "free") +
    xlab("Campaign Sequence - December 2016 to December 2017") +
    ylab("Number Of Coupons") +
    theme(legend.justification = 'centre',
                legend.position = 'bottom',
                legend.direction = "horizontal",
                legend.key.height = unit(0.5, "cm"),
                legend.key.width = unit(0.5,"cm")) +
    guides(col = guide_legend(nrow = 1)) +
    labs(
        title = "Coupon Clipping: Are Shoppers Clipped Out?",
        subtitle = "The data shows household coupon redemption behaviors for 27 supermarket coupon campaigns in 2017.",
        caption = "Data: https://github.com/bradleyboehmke/completejourney/tree/master/data",
        color = "Campaign Id")

# Timely Sales --------------------------------------------------------------------
WeekRefactor <- AllTransactions %>%
    dplyr::mutate(transaction_date = as_date(transaction_timestamp)) %>%
    dplyr::mutate(transaction_week = week(transaction_timestamp)) %>%
    distinct(transaction_date,transaction_week) %>%
    group_by(transaction_week) %>%
    summarise(week_beginning = min(transaction_date))

WeekRefactor$week_beginning <- as.factor(as.character(WeekRefactor$week_beginning))

WeekRefactor$week_beginning <- factor(WeekRefactor$week_beginning, levels = c("2017-01-01","2017-01-08","2017-01-15","2017-01-22","2017-01-29","2017-02-05","2017-02-12","2017-02-19","2017-02-26","2017-03-05","2017-03-12","2017-03-19","2017-03-26","2017-04-02","2017-04-09","2017-04-16","2017-04-23","2017-04-30","2017-05-07","2017-05-14","2017-05-21","2017-05-28","2017-06-04","2017-06-11","2017-06-18","2017-06-25","2017-07-02","2017-07-09","2017-07-16","2017-07-23","2017-07-30","2017-08-06","2017-08-13","2017-08-20","2017-08-27","2017-09-03","2017-09-10","2017-09-17","2017-09-24","2017-10-01","2017-10-08","2017-10-15","2017-10-22","2017-10-29","2017-11-05","2017-11-12","2017-11-19","2017-11-26","2017-12-03","2017-12-10","2017-12-17","2017-12-24"))

AllHHAmountsByWeek <- AllTransactions %>%
    dplyr::mutate(customer_amount = (sales_value - (retail_disc + coupon_disc + coupon_match_disc))) %>%
    dplyr::mutate(transaction_week = week(transaction_timestamp)) %>%
    dplyr::filter(transaction_week != 53)  %>%
    group_by(transaction_week, household_id) %>%
    summarise(customer_spent = sum(customer_amount)) %>%
    left_join(WeekRefactor, by = c("transaction_week"))

HHAmountDemoWeeklyAgg <- AllHHAmountsByWeek %>%
    left_join(demographics, by = c("household_id")) %>%
    select(transaction_week,customer_spent,household_size)

HHAmountDemoWeeklyAgg$household_size <- as.character(HHAmountDemoWeeklyAgg$household_size)

HHAmountDemoWeeklyAgg <- HHAmountDemoWeeklyAgg %>%
    dplyr::mutate_at("household_size", ~replace_na(.,"Unknown"))

HHAmountDemoWeeklyAgg$household_size <- as.factor(HHAmountDemoWeeklyAgg$household_size)

HHAmountDemoWeeklyAgg$household_size <- factor(HHAmountDemoWeeklyAgg$household_size, levels = c("1","2","3","4","5+","Unknown"))

HHAmountDemoWeeklyAgg <- HHAmountDemoWeeklyAgg %>%
    group_by(transaction_week, household_size) %>%
    summarize(
        max_hhsize_week_spend = max(customer_spent),
        min_hhsize_week_spend = min(customer_spent),
        mean_hhsize_week_spend = mean(customer_spent),
        n()
    ) %>%
    left_join(WeekRefactor, by = c("transaction_week"))

MaxAnnOne <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == 1) %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MaxAnnTwo <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == 2) %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MaxAnnThree <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == 3) %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MaxAnnFour <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == 4) %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MaxAnnFive <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == "5+") %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MaxAnnUk <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == max(mean_hhsize_week_spend) & household_size == "Unknown") %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

AllMax <- rbind(MaxAnnOne, MaxAnnTwo, MaxAnnThree, MaxAnnFour, MaxAnnFive, MaxAnnUk)

AllMax$amount_label <- currency(AllMax$mean_hhsize_week_spend)

MinAnnOne <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == 1) %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MinAnnTwo <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == 2) %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MinAnnThree <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == 3) %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MinAnnFour <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == 4) %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MinAnnFive <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == "5+") %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

MinAnnUk <- HHAmountDemoWeeklyAgg %>%
    group_by(household_size) %>%
    filter(mean_hhsize_week_spend == min(mean_hhsize_week_spend) & household_size == "Unknown") %>%
    select(transaction_week, week_beginning, household_size, mean_hhsize_week_spend)

AllMin <- rbind(MinAnnOne, MinAnnTwo, MinAnnThree, MinAnnFour, MinAnnFive, MinAnnUk)

AllMin$amount_label <- currency(AllMin$mean_hhsize_week_spend)

ComboPlot <- ggplot(NULL, aes(week_beginning,customer_spent)) +
    geom_boxplot(data = AllHHAmountsByWeek, outlier.shape = NA, coef = 0, color = "black", fill = "grey70") +
    geom_line(data = HHAmountDemoWeeklyAgg, aes(transaction_week, mean_hhsize_week_spend, colour = household_size), size = 1.5) +
    geom_label_repel(data = AllMax, aes(transaction_week, mean_hhsize_week_spend, label = amount_label, colour = household_size),
                                     nudge_y = 110 - AllMax$mean_hhsize_week_spend,
                                     nudge_x = -7,
                                     box.padding = 1,
                                     point.padding = .5,
                                     force = 1,
                                     segment.size  = 1,
                                     show.legend = FALSE) +
    geom_label_repel(data = AllMin, aes(transaction_week, mean_hhsize_week_spend, label = amount_label, colour = household_size),
                                     nudge_y = 5 - AllMin$mean_hhsize_week_spend,
                                     box.padding = 1,
                                     point.padding = .5,
                                     force = 1,
                                     segment.size  = 1,
                                     show.legend = FALSE) +
    coord_cartesian(ylim = c(0, 110), expand = FALSE) +
    scale_y_continuous(labels = dollar_format()) +
    theme_bw() +
    theme_plots() +
    xlab("Week Beginning") +
    ylab("Average Customer Spend") +
    labs(
        title = "Mo' Mouths, Mo' Money",
        subtitle = "The data shows average weekly spending by household size against the average range for all households in 2017.",
        caption = "Data: https://github.com/bradleyboehmke/completejourney/tree/master/data",
        color = "Household Size")

ComboPlot