#Setup

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
df <- transactions_sample %>%
  inner_join(products,     by = "product_id") %>%
  inner_join(demographics, by = "household_id")

#Plot 1

top3_depts <- df %>%
  count(department, sort = TRUE) %>%
  slice_head(n = 3) %>%
  pull(department)

df %>%
  filter(department %in% top3_depts, sales_value > 0) %>%
  ggplot(aes(sales_value)) +
  geom_histogram(bins = 40, fill = "steelblue", color = "white") +
  facet_wrap(~ department, nrow = 1, scales = "free_y") +
  labs(
    title    = "Typical sales amounts depending on department",
    subtitle = "Distribution of transaction-level sales values in the three busiest departments",
    x        = "Sales value ($)", y = "Count of transactions",
    caption  = "Data: Complete Journey (transactions_sample + products)."
  ) +
  scale_x_continuous(labels = scales::dollar) +
  theme_minimal(base_size = 12)

#Plot 2

df %>%
  filter(!is.na(brand)) %>%
  ggplot(aes(brand)) +
  geom_bar(fill = "gray40") +
  labs(
    title    = "Brand mix across all transactions",
    subtitle = "Count of items purchased by brand type",
    x        = "Brand", y = "Number of items",
    caption  = "Data: Complete Journey (transactions_sample + products)."
  ) +
  theme_minimal(base_size = 12)

#Plot 3

df %>%
  filter(quantity > 0, sales_value > 0, !is.na(brand)) %>%
  ggplot(aes(quantity, sales_value, color = brand)) +
  geom_point(alpha = 0.3) +
  labs(
    title    = "Quantity vs. sales value",
    subtitle = "Larger quantities tend to correspond to higher spend; color shows brand on each transaction",
    x        = "Quantity (units in transaction)", y = "Sales value ($)",
    color    = "Brand",
    caption  = "Data: Complete Journey (transactions_sample + products + demographics)."
  ) +
  scale_y_continuous(labels = scales::dollar) +
  theme_minimal(base_size = 12)