Graph 1- Top 10 Product Categories Purchased with Coupons by Number
of Kids
library(ggplot2)
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
ls("package:completejourney")
## [1] "%<-%" "%>%" "campaign_descriptions"
## [4] "campaigns" "coupon_redemptions" "coupons"
## [7] "demographics" "get_data" "get_promotions"
## [10] "get_transactions" "products" "promotions_sample"
## [13] "transactions_sample"
transactions <- completejourney::get_transactions()
coupon_purchases <- transactions %>%
filter(coupon_disc > 0)
coupon_purchases <- coupon_purchases %>%
left_join(products, by = "product_id")
coupon_purchases <- coupon_purchases %>%
left_join(demographics %>% select(household_id, kids_count), by = "household_id")
coupon_summary_ <- coupon_purchases %>%
filter(kids_count > 0) %>%
group_by(product_category, kids_count) %>%
summarize(purchase_count = n(), .group = "drop")
## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.
top_10_categories <- coupon_summary_ %>%
group_by(product_category) %>%
summarize(total_purchase_count = sum(purchase_count), .groups = "drop") %>%
arrange(desc(total_purchase_count)) %>%
slice_head(n = 10)
top_10_data <- coupon_summary_ %>%
filter(product_category %in% top_10_categories$product_category)
top_10_data <- top_10_data %>%
filter(!is.na(product_category))
ggplot(top_10_data, aes(x = reorder(product_category, purchase_count), y = purchase_count, fill = as.factor(kids_count))) +
geom_bar(stat = "identity", show.legend = TRUE) +
labs(title = "Top 10 Product Categories Purchased with Coupons by Number of Kids",
x = "Products",
y = "Total Number of Purchases",
fill = "Kids Count") +
scale_fill_brewer(palette = "Set3") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)
)

Graph 2- Top Daily Purchases for Unmarried Homeowners for February
2017
ls("package:completejourney")
## [1] "%<-%" "%>%" "campaign_descriptions"
## [4] "campaigns" "coupon_redemptions" "coupons"
## [7] "demographics" "get_data" "get_promotions"
## [10] "get_transactions" "products" "promotions_sample"
## [13] "transactions_sample"
feb_data <- transactions %>%
left_join(demographics, by = "household_id") %>%
left_join(products, by = "product_id")
feb_data <- feb_data %>%
filter(!is.na(marital_status == "Unmarried") & !is.na(home_ownership == "Homeowner")) %>%
filter(marital_status == "Unmarried", home_ownership == "Homeowner") %>%
filter(as.Date(transaction_timestamp) >= as.Date("2017-02-01") &
as.Date(transaction_timestamp) <= as.Date("2017-02-28")) %>%
group_by(day = as.Date(transaction_timestamp), product_category, marital_status, home_ownership) %>%
summarize(purchase_count = n(), .groups = "drop") %>%
arrange(day, desc(purchase_count)) %>%
group_by(day) %>%
slice_max(order_by = purchase_count, n = 1)
ggplot(feb_data, aes(x = day, y = purchase_count, group = product_category, color = product_category)) +
geom_line(size = 1) +
geom_point(size = 2) +
scale_x_date(breaks = seq(as.Date("2017-02-01"), as.Date("2017-02-28"), by = "1 day"),
date_labels = "%d") +
labs(
title = "Top Daily Purchases for Unmarried Homeowners (Feb 2017)",
x = "Day of February",
y = "Number of Purchases",
color = "Product Category"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Graph 3- Total Soft Drinks Purchased by Income Level
top_purchases_income <- transactions %>%
left_join(demographics, by = "household_id") %>%
left_join(products, by = "product_id") %>%
filter(product_category == "SOFT DRINKS") %>%
filter(!is.na(income)) %>%
group_by(income, product_category) %>%
summarize(total_soft_drinks = sum(quantity))
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
ggplot(top_purchases_income, aes(x = income, y = total_soft_drinks, group = 1)) +
geom_line(size = 1, color = "pink") +
geom_point() +
labs(title = "Total Soft Drinks Purchased by Income Level (2017)",
x = "Income Level",
y = "Total Soft Drinks Purchased") +
scale_y_continuous(breaks = seq(0, max(top_purchases_income$total_soft_drinks), by = 1000)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
