Packages & Data

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
data("transactions_sample")
data("demographics")
data("coupons")
data("products")
data("campaigns")

Plot 1

coupon_data <- transactions_sample %>%
  inner_join(coupons, by = "product_id") %>%
  inner_join(demographics, by = "household_id")
## Warning in inner_join(., coupons, by = "product_id"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 8185 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
coupon_summary <- coupon_data %>%
  group_by(income, age) %>%
  summarise(coupons_redeemed = n())
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
ggplot(coupon_summary, aes(x = income, y = coupons_redeemed, fill = age)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Coupon Redemption by Demographic Group",
    subtitle = "Number of coupons redeemed by income and age group",
    x = "Income Level",
    y = "Coupons Redeemed",
    fill = "Age Group",
    caption = "Data Source: completejourney"
  ) +
  theme_minimal()

# Plot 2

spending_data <- transactions_sample %>%
  inner_join(demographics, by = "household_id")

spending_summary <- spending_data %>%
  group_by(income, age) %>%
  summarise(avg_spending = mean(sales_value), .groups = 'drop')

ggplot(spending_summary, aes(x = income, y = avg_spending, fill = age)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Average Spending per Transaction by Demographic Group",
    subtitle = "Average spending per transaction for different income levels and age groups",
    x = "Income Level",
    y = "Average Spending",
    fill = "Age Group",
    caption = "Data Source: completejourney"
  ) +
  theme_minimal()

# Plot 3

product_data <- transactions_sample %>%
  inner_join(products, by = "product_id") %>%
  inner_join(demographics, by = "household_id")

top_products <- product_data %>%
  group_by(income, product_id) %>%
  summarise(total_sales = sum(sales_value)) %>%
  arrange(income, desc(total_sales)) %>%
  group_by(income) %>%
  slice_head(n = 5)
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
ggplot(top_products, aes(x = reorder(product_id, total_sales), y = total_sales, fill = income)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ income, scales = "free_x") +
  labs(
    title = "Top 5 Products by Income Level",
    subtitle = "Total sales value for the top 5 products purchased by each income level",
    x = "Product ID",
    y = "Total Sales Value",
    fill = "Income Level",
    caption = "Data Source: completejourney"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))