Which Age Groups Buy the Most?

# Aggregate total quantity by age group
age_quantity <- transactions_sample %>%
  inner_join(demographics, by = "household_id") %>%
  group_by(age) %>%
  summarise(total_quantity = sum(quantity), .groups = 'drop')

# Plot
ggplot(age_quantity, aes(x = factor(age), y = total_quantity, fill = factor(age))) +
  geom_col(show.legend = FALSE) +
  labs(
    title = "Total Quantity Purchased by Age Group",
    subtitle = "Shows how different age groups contribute to purchases",
    x = "Age Group",
    y = "Total Quantity",
    caption = "Source: transactions_sample & demographics"
  ) +
  theme_minimal(base_size = 12)

Younger and middle-aged groups appear to purchase higher quantities overall compared to older groups.

Top 7 Best-Selling Products by Total Sales Value

library(dplyr)
library(ggplot2)

# Aggregate total sales_value by product
top_products <- transactions_sample %>%
  group_by(product_id) %>%   # or "product_category" if that column exists
  summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  arrange(desc(total_sales)) %>%
  slice_head(n = 7)   # Top 7 products

# Plot horizontal bar chart
ggplot(top_products, aes(x = reorder(product_id, total_sales), 
                         y = total_sales, fill = total_sales)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(title = "Top 7 Products by Total Sales",
       x = "Product",
       y = "Total Sales ($)") +
  theme_minimal(base_size = 14) +
  scale_fill_gradient(low = "skyblue", high = "darkblue") +
  geom_text(aes(label = round(total_sales,0)), 
            hjust = -0.2, size = 4)

A few products dominate overall sales, highlighting strong consumer preference for specific categories

How Household Size Influences Sales Value?

# Box plot: sales_value distribution by household_size
transactions_sample %>%
  inner_join(demographics, by = "household_id") %>%
  ggplot(aes(x = factor(household_size), y = sales_value, fill = factor(household_size))) +
  geom_boxplot(show.legend = FALSE) +
  labs(
    title = "Distribution of Sales Value by Household Size",
    subtitle = "Shows how sales per transaction vary across different household sizes",
    x = "Household Size",
    y = "Sales Value ($)",
    caption = "Source: transactions_sample & demographics"
  ) +
  theme_minimal(base_size = 12)

A few products dominate overall sales, highlighting strong consumer preference for specific categories