library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(lubridate)
ls("package:completejourney")
## [1] "%<-%" "%>%" "campaign_descriptions"
## [4] "campaigns" "coupon_redemptions" "coupons"
## [7] "demographics" "get_data" "get_promotions"
## [10] "get_transactions" "products" "promotions_sample"
## [13] "transactions_sample"
transactions <- get_transactions()
promotions <- get_promotions()
data("demographics")
data("products")
Plot 1: Loyalty Programs Impact on Spending
income_spending <- transactions %>%
inner_join(demographics, by = "household_id") %>%
group_by(household_id, income) %>%
summarize(total_spent = sum(sales_value, na.rm = TRUE), .groups = "drop")
income_spending %>%
ggplot(aes(x = income, y = total_spent, fill = income)) +
geom_boxplot(show.legend = FALSE) +
labs(
title = "Does Household Income Impact Spending?",
subtitle = "Comparing Total Spending per Household Across Income Levels",
x = "Household Income Level",
y = "Total Spending ($)",
caption = "Data: completejourney"
) +
theme_minimal()

Plot 2: Discount Impact on Product Sales
transactions %>%
mutate(total_discount = coalesce(retail_disc, 0) +
coalesce(coupon_disc, 0 ) +
coalesce(coupon_match_disc, 0)) %>%
group_by(product_id) %>%
summarize(avg_discount = mean(total_discount, na.rm = TRUE),
total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
filter(total_sales < quantile(total_sales, .99)) %>%
ggplot(aes(x = avg_discount, y = total_sales)) +
geom_point(alpha = .5, color = "blue") +
geom_smooth(method = "lm", color = "red") +
labs(
title = "Do Discounts Boost Sales?",
subtitle = "Comparing Total Sales with Average Discounts Applied",
x = "Average Discount ($)",
y = "Total Sales ($)",
caption = "Data: completejourney"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Plot 3: Seasonal Spending Trends
transactions %>%
mutate(month = lubridate::month(transaction_timestamp, label = TRUE)) %>%
group_by(month) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(x = month, y = total_sales, group = 1)) +
geom_line(color = "darkgreen", size = 1) +
geom_point(color = "black", size = 3) +
labs(
title = "Seasonal Spending Trends",
subtitle = "Tracking total sales per month",
x = "Month",
y = "Total Sales ($)",
caption = "Data: CompleteJourney"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
