library(tidyverse)
library(completejourney)
Visualization #1:
transactions <- get_transactions()
redemptions <- coupon_redemptions
households <- demographics
basket_day <- transactions %>%
mutate(day = as.Date(transaction_timestamp)) %>%
group_by(household_id, day) %>%
summarise(basket_sales = sum(sales_value, na.rm = TRUE), .groups = "drop")
redeemed_days <- redemptions %>%
transmute(household_id, day = as.Date(redemption_date), redeemed = TRUE) %>%
distinct()
avg_basket <- basket_day %>%
left_join(redeemed_days, by = c("household_id","day")) %>%
mutate(redeemed = if_else(is.na(redeemed), FALSE, redeemed)) %>%
inner_join(households %>% select(household_id, household_size), by = "household_id") %>%
group_by(household_size, redeemed) %>%
summarise(avg_basket_sales = mean(basket_sales, na.rm = TRUE), .groups = "drop")
ggplot(avg_basket, aes(x = household_size, y = avg_basket_sales, fill = redeemed)) +
geom_col(position = "dodge") +
labs(
title = "Coupon Days vs. Non-Coupon Days: Average Basket Spend by Household Size",
subtitle = "Average total spend per household-day; comparing days with a coupon redemption vs. without",
x = "Household size",
y = "Average basket spend ($)",
fill = "Coupon redeemed?"
)

Visualization #2:
tx <- get_transactions()
cheese_month <- products %>%
filter(stringr::str_to_upper(product_category) == "CHEESE") %>%
inner_join(tx, by = "product_id") %>%
mutate(month = lubridate::month(transaction_timestamp, label = TRUE, abbr = TRUE)) %>%
group_by(month) %>%
summarise(sales = sum(sales_value, na.rm = TRUE), .groups = "drop")
peak_lab <- cheese_month %>% slice_max(sales, n = 1) %>% pull(month) %>% as.character()
ggplot(cheese_month, aes(month, sales, group = 1)) +
geom_line() +
geom_point() +
scale_y_continuous(labels = scales::dollar_format()) +
labs(
title = "Amount of Cheese Bought per Month",
subtitle = paste("Highest sales in", peak_lab),
x = "Month",
y = "Total Cheese Sales ($)",
)

Visualization #3:
tx <- get_transactions()
df <- tx %>%
inner_join(demographics, by = "household_id") %>%
filter(marital_status == "Unmarried") %>%
group_by(household_id, basket_id, income) %>%
summarise(basket_spend = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
group_by(income) %>%
summarise(median_spend = median(basket_spend, na.rm = TRUE),
baskets = n(), .groups = "drop") %>%
filter(!is.na(income)) %>%
mutate(income = forcats::fct_reorder(income, median_spend))
ggplot(df, aes(x = median_spend, y = income)) +
geom_segment(aes(x = 0, xend = median_spend, yend = income), color = "navyblue", linewidth = 1) +
geom_point(size = 3, color = "red") +
scale_x_continuous(labels = scales::dollar_format()) +
labs(
title = "Unmarried Households: Median Basket Spend by Income",
subtitle = "Each dot is the median total spend per basket; line shows distance from $0",
x = "Median basket spend ($)", y = "Income",
)
