library(tidyverse)
library(completejourney)

Visualization #1:

transactions <- get_transactions()
redemptions  <- coupon_redemptions
households   <- demographics

basket_day <- transactions %>%
  mutate(day = as.Date(transaction_timestamp)) %>%
  group_by(household_id, day) %>%
  summarise(basket_sales = sum(sales_value, na.rm = TRUE), .groups = "drop")

redeemed_days <- redemptions %>%
  transmute(household_id, day = as.Date(redemption_date), redeemed = TRUE) %>%
  distinct()

avg_basket <- basket_day %>%
  left_join(redeemed_days, by = c("household_id","day")) %>%
  mutate(redeemed = if_else(is.na(redeemed), FALSE, redeemed)) %>%
  inner_join(households %>% select(household_id, household_size), by = "household_id") %>%
  group_by(household_size, redeemed) %>%
  summarise(avg_basket_sales = mean(basket_sales, na.rm = TRUE), .groups = "drop")

ggplot(avg_basket, aes(x = household_size, y = avg_basket_sales, fill = redeemed)) +
  geom_col(position = "dodge") +
  labs(
    title    = "Coupon Days vs. Non-Coupon Days: Average Basket Spend by Household Size",
    subtitle = "Average total spend per household-day; comparing days with a coupon redemption vs. without",
    x = "Household size",
    y = "Average basket spend ($)",
    fill = "Coupon redeemed?"
  )

Visualization #2:

tx <- get_transactions()

cheese_month <- products %>%
  filter(stringr::str_to_upper(product_category) == "CHEESE") %>%
  inner_join(tx, by = "product_id") %>%
  mutate(month = lubridate::month(transaction_timestamp, label = TRUE, abbr = TRUE)) %>%
  group_by(month) %>%
  summarise(sales = sum(sales_value, na.rm = TRUE), .groups = "drop")

peak_lab <- cheese_month %>% slice_max(sales, n = 1) %>% pull(month) %>% as.character()

ggplot(cheese_month, aes(month, sales, group = 1)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels = scales::dollar_format()) +
  labs(
    title = "Amount of Cheese Bought per Month",
    subtitle = paste("Highest sales in", peak_lab),
    x = "Month",
    y = "Total Cheese Sales ($)",
  )

Visualization #3:

tx <- get_transactions()

df <- tx %>%
  inner_join(demographics, by = "household_id") %>%
  filter(marital_status == "Unmarried") %>%
  group_by(household_id, basket_id, income) %>%
  summarise(basket_spend = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
  group_by(income) %>%
  summarise(median_spend = median(basket_spend, na.rm = TRUE),
            baskets = n(), .groups = "drop") %>%
  filter(!is.na(income)) %>%
  mutate(income = forcats::fct_reorder(income, median_spend))

ggplot(df, aes(x = median_spend, y = income)) +
  geom_segment(aes(x = 0, xend = median_spend, yend = income), color = "navyblue", linewidth = 1) +
  geom_point(size = 3, color = "red") +
  scale_x_continuous(labels = scales::dollar_format()) +
  labs(
    title = "Unmarried Households: Median Basket Spend by Income",
    subtitle = "Each dot is the median total spend per basket; line shows distance from $0",
    x = "Median basket spend ($)", y = "Income",
  )