library(tidyverse)
library(completejourney)
library(lubridate)

transactions <- get_transactions()

Plot 1

# Creating the dataset that maps coupon redemptions with the household id
consumer_behavior <- coupon_redemptions %>%
  inner_join(demographics, by = "household_id") %>%
  group_by(income, kids_count) %>%
  summarize(
    total_redemptions = n(),
    household_count = n_distinct(household_id),
    redemptions_per_household = total_redemptions / household_count
  )

kid_labels <- c(
  "0" = "No Children",
  "1" = "1 Child",
  "2" = "2 Children",
  "3+" = "3+ Children"
)

# Plotting Bar Chart with Data Points and Line Curve Trend
ggplot(consumer_behavior, aes(x = income, y = redemptions_per_household)) +
  geom_col(aes(fill = income), alpha = 0.3) +
  geom_smooth(aes(x = as.numeric(income)), method = "loess", color = "black", linewidth = 1, se = FALSE) +
  geom_point(aes(color = income), size = 3) +
  facet_wrap(~kids_count, labeller = labeller(kids_count = kid_labels)) +
  scale_fill_viridis_d(option = "plasma", guide = "none") +
  scale_color_viridis_d(option = "plasma", guide = "none") +
  labs(
    title = "Coupon Usage Trends Across Income and Household Size",
    subtitle = "Presence of children drives strategic shopping behavior in middle-income families more than in households without children.",
    x = "Household Income Bracket",
    y = "Average Redemptions per Household"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    strip.background = element_rect(fill = "gray95"),
    strip.text = element_text(face = "bold"),
    plot.title = element_text(face = "bold", size = 16)
  )

Plot 2

# Creating the dataset that identifies the transaction counts based on categories and timestamp
time_data_pattern <- transactions %>%
  inner_join(products, by = "product_id") %>%
  mutate(hour = hour(transaction_timestamp)) %>%
  filter(hour >= 6 & hour <= 23) %>% 
  group_by(hour, product_category) %>%
  summarize(transaction_count = n()) %>%
  group_by(product_category) %>%
  mutate(total_cat_vol = sum(transaction_count)) %>%
  ungroup() %>%
  slice_max(order_by = total_cat_vol, n = 1000) %>% 
  filter(product_category %in% c("SOFT DRINKS", "BEEF", "CHEESE", "MILK BY-PRODUCTS", "FROZEN PIZZA"))

# Plotting Time Series Data
ggplot(time_data_pattern, aes(x = hour, y = transaction_count, color = product_category)) +
  geom_line(linewidth = 1, alpha = 0.6) +
  geom_smooth(aes(), method = "loess", se = FALSE, span = 0.4) +
  geom_point(size = 2) +
  scale_color_viridis_d(option = "turbo") +
  scale_x_continuous(breaks = seq(6, 23, by = 1)) +
  labs(
    title = "The Circadian Rhythm of the Grocery Store (6 am - 11 pm)",
    subtitle = "Hourly volume reveals distinct morning vs. evening purchasing patterns",
    x = "Hour of Day (24h Clock)",
    y = "Total Transactions",
    color = "Product Category"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    panel.grid.minor = element_blank(),
    legend.position = "bottom"
  )

Plot 3

# Creating the dataset to calculate savings rate 
savings_data <- transactions %>%
  inner_join(products, by = "product_id") %>%
  inner_join(demographics, by = "household_id") %>%
  filter(product_category %in% c("SOFT DRINKS", "BEEF", "CHEESE", "FROZEN PIZZA", "MILK BY-PRODUCTS")) %>%
  group_by(income, product_category) %>%
  summarize(
    total_retail = sum(sales_value + retail_disc + coupon_disc, na.rm = TRUE),
    total_disc = sum(retail_disc + coupon_disc, na.rm = TRUE),
    savings_rate = (total_disc / total_retail)
  )

# Plotting Heatmap visualization
ggplot(savings_data, aes(x = income, y = product_category, fill = savings_rate)) +
  geom_tile(color = "white") +
  scale_fill_distiller(palette = "Spectral", labels = scales::percent) +
  labs(
    title = "Strategic Savings: Coupon Sensitivity Across Income Brackets",
    subtitle = "Soft Drinks and Cheese emerge as the primary drivers of household discount savings almost across all income brackets.",
    x = "Household Income Bracket",
    y = "Product Category",
    fill = "Avg. Savings %",
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid = element_blank()
  )