# Aggregate total quantity by age group
age_quantity <- transactions_sample %>%
inner_join(demographics, by = "household_id") %>%
group_by(age) %>%
summarise(total_quantity = sum(quantity), .groups = 'drop')
# Plot
ggplot(age_quantity, aes(x = factor(age), y = total_quantity, fill = factor(age))) +
geom_col(show.legend = FALSE) +
labs(
title = "Total Quantity Purchased by Age Group",
subtitle = "Shows how different age groups contribute to purchases",
x = "Age Group",
y = "Total Quantity",
caption = "Source: transactions_sample & demographics"
) +
theme_minimal(base_size = 12)
Younger and middle-aged groups appear to purchase higher quantities overall compared to older groups.
library(dplyr)
library(ggplot2)
# Aggregate total sales_value by product
top_products <- transactions_sample %>%
group_by(product_id) %>% # or "product_category" if that column exists
summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%
arrange(desc(total_sales)) %>%
slice_head(n = 7) # Top 7 products
# Plot horizontal bar chart
ggplot(top_products, aes(x = reorder(product_id, total_sales),
y = total_sales, fill = total_sales)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(title = "Top 7 Products by Total Sales",
x = "Product",
y = "Total Sales ($)") +
theme_minimal(base_size = 14) +
scale_fill_gradient(low = "skyblue", high = "darkblue") +
geom_text(aes(label = round(total_sales,0)),
hjust = -0.2, size = 4)
A few products dominate overall sales, highlighting strong consumer preference for specific categories
# Box plot: sales_value distribution by household_size
transactions_sample %>%
inner_join(demographics, by = "household_id") %>%
ggplot(aes(x = factor(household_size), y = sales_value, fill = factor(household_size))) +
geom_boxplot(show.legend = FALSE) +
labs(
title = "Distribution of Sales Value by Household Size",
subtitle = "Shows how sales per transaction vary across different household sizes",
x = "Household Size",
y = "Sales Value ($)",
caption = "Source: transactions_sample & demographics"
) +
theme_minimal(base_size = 12)
A few products dominate overall sales, highlighting strong consumer preference for specific categories