Graph 1- Top 10 Product Categories Purchased with Coupons by Number of Kids

library(ggplot2)
library(completejourney)

## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

ls("package:completejourney")

##  [1] "%<-%"                  "%>%"                   "campaign_descriptions"
##  [4] "campaigns"             "coupon_redemptions"    "coupons"              
##  [7] "demographics"          "get_data"              "get_promotions"       
## [10] "get_transactions"      "products"              "promotions_sample"    
## [13] "transactions_sample"

transactions <- completejourney::get_transactions()

coupon_purchases <- transactions %>%
  filter(coupon_disc > 0)

coupon_purchases <- coupon_purchases %>%
  left_join(products, by = "product_id")
coupon_purchases <- coupon_purchases %>%
  left_join(demographics %>% select(household_id, kids_count), by = "household_id")

coupon_summary_ <- coupon_purchases %>%
  filter(kids_count > 0) %>%
  group_by(product_category, kids_count) %>%
  summarize(purchase_count = n(), .group = "drop")

## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.

top_10_categories <- coupon_summary_ %>%
  group_by(product_category) %>%
  summarize(total_purchase_count = sum(purchase_count), .groups = "drop") %>%
  arrange(desc(total_purchase_count)) %>%
  slice_head(n = 10)

top_10_data <- coupon_summary_ %>%
  filter(product_category %in% top_10_categories$product_category)

top_10_data <- top_10_data %>%
  filter(!is.na(product_category))


ggplot(top_10_data, aes(x = reorder(product_category, purchase_count), y = purchase_count, fill = as.factor(kids_count))) +
  geom_bar(stat = "identity", show.legend = TRUE) +  
  labs(title = "Top 10 Product Categories Purchased with Coupons by Number of Kids",
       x = "Products",
       y = "Total Number of Purchases",
       fill = "Kids Count") +
  scale_fill_brewer(palette = "Set3") + 
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),  
    plot.title = element_text(hjust = 0.5)
  )

Graph 2- Top Daily Purchases for Unmarried Homeowners for February 2017

ls("package:completejourney")

##  [1] "%<-%"                  "%>%"                   "campaign_descriptions"
##  [4] "campaigns"             "coupon_redemptions"    "coupons"              
##  [7] "demographics"          "get_data"              "get_promotions"       
## [10] "get_transactions"      "products"              "promotions_sample"    
## [13] "transactions_sample"

feb_data <- transactions %>%
  left_join(demographics, by = "household_id") %>%
  left_join(products, by = "product_id")


feb_data <- feb_data %>%
  filter(!is.na(marital_status == "Unmarried") & !is.na(home_ownership == "Homeowner")) %>%
  filter(marital_status == "Unmarried", home_ownership == "Homeowner") %>%  
  filter(as.Date(transaction_timestamp) >= as.Date("2017-02-01") & 
           as.Date(transaction_timestamp) <= as.Date("2017-02-28")) %>%  
  group_by(day = as.Date(transaction_timestamp), product_category, marital_status, home_ownership) %>%  
  summarize(purchase_count = n(), .groups = "drop") %>%
  arrange(day, desc(purchase_count)) %>%  
  group_by(day) %>%
  slice_max(order_by = purchase_count, n = 1)  

ggplot(feb_data, aes(x = day, y = purchase_count, group = product_category, color = product_category)) + 
  geom_line(size = 1) +  
  geom_point(size = 2) +  
  scale_x_date(breaks = seq(as.Date("2017-02-01"), as.Date("2017-02-28"), by = "1 day"),
               date_labels = "%d") + 
  labs(
    title = "Top Daily Purchases for Unmarried Homeowners (Feb 2017)",
    x = "Day of February",
    y = "Number of Purchases",
    color = "Product Category"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Graph 3- Total Soft Drinks Purchased by Income Level

top_purchases_income <- transactions %>%
  left_join(demographics, by = "household_id") %>%
  left_join(products, by = "product_id") %>%
  filter(product_category == "SOFT DRINKS") %>%
  filter(!is.na(income)) %>%
  group_by(income, product_category) %>%
  summarize(total_soft_drinks = sum(quantity))

## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.

ggplot(top_purchases_income, aes(x = income, y = total_soft_drinks, group = 1)) +
  geom_line(size = 1, color = "pink") +
  geom_point() +
  labs(title = "Total Soft Drinks Purchased by Income Level (2017)",
       x = "Income Level",
       y = "Total Soft Drinks Purchased") +
  scale_y_continuous(breaks = seq(0, max(top_purchases_income$total_soft_drinks), by = 1000)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Graphs

Sydney Bodenstein

2025-02-16

Graph 1- Top 10 Product Categories Purchased with Coupons by Number of Kids

Graph 2- Top Daily Purchases for Unmarried Homeowners for February 2017

Graph 3- Total Soft Drinks Purchased by Income Level