library(tidyverse)
library(completejourney)
library(lubridate)
transactions <- get_transactions()
Plot 1
# Creating the dataset that maps coupon redemptions with the household id
consumer_behavior <- coupon_redemptions %>%
inner_join(demographics, by = "household_id") %>%
group_by(income, kids_count) %>%
summarize(
total_redemptions = n(),
household_count = n_distinct(household_id),
redemptions_per_household = total_redemptions / household_count
)
kid_labels <- c(
"0" = "No Children",
"1" = "1 Child",
"2" = "2 Children",
"3+" = "3+ Children"
)
# Plotting Bar Chart with Data Points and Line Curve Trend
ggplot(consumer_behavior, aes(x = income, y = redemptions_per_household)) +
geom_col(aes(fill = income), alpha = 0.3) +
geom_smooth(aes(x = as.numeric(income)), method = "loess", color = "black", linewidth = 1, se = FALSE) +
geom_point(aes(color = income), size = 3) +
facet_wrap(~kids_count, labeller = labeller(kids_count = kid_labels)) +
scale_fill_viridis_d(option = "plasma", guide = "none") +
scale_color_viridis_d(option = "plasma", guide = "none") +
labs(
title = "Coupon Usage Trends Across Income and Household Size",
subtitle = "Presence of children drives strategic shopping behavior in middle-income families more than in households without children.",
x = "Household Income Bracket",
y = "Average Redemptions per Household"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
strip.background = element_rect(fill = "gray95"),
strip.text = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 16)
)

Plot 2
# Creating the dataset that identifies the transaction counts based on categories and timestamp
time_data_pattern <- transactions %>%
inner_join(products, by = "product_id") %>%
mutate(hour = hour(transaction_timestamp)) %>%
filter(hour >= 6 & hour <= 23) %>%
group_by(hour, product_category) %>%
summarize(transaction_count = n()) %>%
group_by(product_category) %>%
mutate(total_cat_vol = sum(transaction_count)) %>%
ungroup() %>%
slice_max(order_by = total_cat_vol, n = 1000) %>%
filter(product_category %in% c("SOFT DRINKS", "BEEF", "CHEESE", "MILK BY-PRODUCTS", "FROZEN PIZZA"))
# Plotting Time Series Data
ggplot(time_data_pattern, aes(x = hour, y = transaction_count, color = product_category)) +
geom_line(linewidth = 1, alpha = 0.6) +
geom_smooth(aes(), method = "loess", se = FALSE, span = 0.4) +
geom_point(size = 2) +
scale_color_viridis_d(option = "turbo") +
scale_x_continuous(breaks = seq(6, 23, by = 1)) +
labs(
title = "The Circadian Rhythm of the Grocery Store (6 am - 11 pm)",
subtitle = "Hourly volume reveals distinct morning vs. evening purchasing patterns",
x = "Hour of Day (24h Clock)",
y = "Total Transactions",
color = "Product Category"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
panel.grid.minor = element_blank(),
legend.position = "bottom"
)

Plot 3
# Creating the dataset to calculate savings rate
savings_data <- transactions %>%
inner_join(products, by = "product_id") %>%
inner_join(demographics, by = "household_id") %>%
filter(product_category %in% c("SOFT DRINKS", "BEEF", "CHEESE", "FROZEN PIZZA", "MILK BY-PRODUCTS")) %>%
group_by(income, product_category) %>%
summarize(
total_retail = sum(sales_value + retail_disc + coupon_disc, na.rm = TRUE),
total_disc = sum(retail_disc + coupon_disc, na.rm = TRUE),
savings_rate = (total_disc / total_retail)
)
# Plotting Heatmap visualization
ggplot(savings_data, aes(x = income, y = product_category, fill = savings_rate)) +
geom_tile(color = "white") +
scale_fill_distiller(palette = "Spectral", labels = scales::percent) +
labs(
title = "Strategic Savings: Coupon Sensitivity Across Income Brackets",
subtitle = "Soft Drinks and Cheese emerge as the primary drivers of household discount savings almost across all income brackets.",
x = "Household Income Bracket",
y = "Product Category",
fill = "Avg. Savings %",
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid = element_blank()
)
