Plot 1 — Top Products by Household Income Tier

plot1_df <- transactions %>%
  left_join(products %>%
              select(product_id, product_group = department, product_name = product_type),
            by = "product_id") %>%
  left_join(demographics %>% select(household_id, income), by = "household_id") %>%
  filter(!is.na(income)) %>%
  group_by(income, product_group) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
  group_by(income) %>%
  slice_max(total_sales, n = 5) %>%
  ungroup()

ggplot(plot1_df, aes(x = fct_reorder(product_group, total_sales),
                     y = total_sales, fill = income)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  facet_wrap(~ income, scales = "free_y") +
  labs(
    title = "Top 5 Product Groups Purchased by Income Tier",
    subtitle = "Higher-income households spend more on premium categories,\nwhile lower-income households lean toward essentials.",
    x = "Product Group",
    y = "Total Sales ($)",
    caption = "Data: completejourney dataset"
  ) +
  scale_y_continuous(labels = dollar) +
  theme_minimal(base_size = 20) +
  theme(
    plot.title = element_text(size = 30, face = "bold"),
    plot.subtitle = element_text(size = 20),
    axis.title = element_text(size = 19),
    axis.text = element_text(size = 20),
    strip.text = element_text(size = 20, face = "bold")
  )

Plot 2 — Weekly Sales Trend by Channel (Top Product Groups)

weekly_sales <- transactions %>%
  left_join(products, by = "product_id") %>%
  mutate(week = floor_date(transaction_timestamp, "week")) %>%
  group_by(week, product_group = department) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop")

top_groups <- weekly_sales %>%
  group_by(product_group) %>%
  summarise(total = sum(total_sales)) %>%
  arrange(desc(total)) %>%
  slice_head(n = 3) %>%
  pull(product_group)

weekly_sales %>%
  filter(product_group %in% top_groups) %>%
  ggplot(aes(x = week, y = total_sales, color = product_group)) +
  geom_line(size = 2) +
  labs(
    title = "Weekly Sales Trend (Top 3 Product Groups)",
    subtitle = "Total sales per week",
    x = "Week",
    y = "Total Sales ($)",
    caption = "Data: completejourney dataset"
  ) +
  theme_minimal(base_size = 22) +
  scale_y_continuous(labels = dollar) +
  theme(
    plot.title = element_text(size = 32, face = "bold"),
    plot.subtitle = element_text(size = 22),
    axis.title = element_text(size = 22),
    axis.text = element_text(size = 18),
    legend.title = element_text(size = 22),
    legend.text = element_text(size = 18)
  )

Plot 3 — Household Size and Promotion Sensitivity

library(plotly)

basket_vs_discount <- transactions %>%
  group_by(household_id, basket_id) %>%
  summarise(
    basket_spend = sum(sales_value, na.rm = TRUE),
    discount = sum(retail_disc + coupon_disc, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  left_join(demographics %>% select(household_id, income), by = "household_id") %>%
  filter(!is.na(income))

p <- ggplot(basket_vs_discount, aes(x = -discount, y = basket_spend,
                                    color = income,
                                    text = paste0("Basket Spend: $", round(basket_spend,2),
                                                  "<br>Discount: $", round(-discount,2),
                                                  "<br>Income: ", income))) +
  geom_point(alpha = 0.6, size = 3) +
  scale_x_continuous(labels = dollar, name = "Total Discount per Basket ($)") +
  scale_y_continuous(labels = dollar, name = "Basket Spend ($)") +
  labs(
    title = "Basket Spend vs. Discounts by Household Income",
    subtitle = "Hover points to see spend & discount values",
    color = "Income Tier",
    caption = "Data: completejourney dataset"
  ) +
  theme_minimal(base_size = 16)

ggplotly(p, tooltip = "text")