library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(lubridate)
ls("package:completejourney")
##  [1] "%<-%"                  "%>%"                   "campaign_descriptions"
##  [4] "campaigns"             "coupon_redemptions"    "coupons"              
##  [7] "demographics"          "get_data"              "get_promotions"       
## [10] "get_transactions"      "products"              "promotions_sample"    
## [13] "transactions_sample"
transactions <- get_transactions()
promotions <- get_promotions()
data("demographics")
data("products")

Plot 1: Loyalty Programs Impact on Spending

income_spending <- transactions %>%
  inner_join(demographics, by = "household_id") %>%
  group_by(household_id, income) %>%
  summarize(total_spent = sum(sales_value, na.rm = TRUE), .groups = "drop")

income_spending %>%
  ggplot(aes(x = income, y = total_spent, fill = income)) + 
  geom_boxplot(show.legend = FALSE) + 
  labs(
    title = "Does Household Income Impact Spending?", 
    subtitle = "Comparing Total Spending per Household Across Income Levels", 
    x = "Household Income Level", 
    y = "Total Spending ($)", 
    caption = "Data: completejourney"
  ) +
  theme_minimal()

Plot 2: Discount Impact on Product Sales

transactions %>%
  mutate(total_discount = coalesce(retail_disc, 0) +
           coalesce(coupon_disc, 0 ) +
           coalesce(coupon_match_disc, 0)) %>%
  group_by(product_id) %>%
  summarize(avg_discount = mean(total_discount, na.rm = TRUE), 
            total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
  filter(total_sales < quantile(total_sales, .99)) %>%
  ggplot(aes(x = avg_discount, y = total_sales)) + 
  geom_point(alpha = .5, color = "blue") + 
  geom_smooth(method = "lm", color = "red") +
  labs(
    title = "Do Discounts Boost Sales?",
    subtitle = "Comparing Total Sales with Average Discounts Applied", 
    x = "Average Discount ($)", 
    y = "Total Sales ($)", 
    caption = "Data: completejourney"
     ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'