library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(RColorBrewer)
library(viridisLite)
PLOT 1
summary(transactions_sample$transaction_timestamp)
## Min. 1st Qu.
## "2017-01-01 07:30:27.0000" "2017-04-01 13:23:49.0000"
## Median Mean
## "2017-07-02 11:43:04.0000" "2017-07-02 09:39:57.5612"
## 3rd Qu. Max.
## "2017-10-02 16:28:50.2500" "2017-12-31 22:47:38.0000"
summary(transactions_sample$day_of_week)
## Warning: Unknown or uninitialised column: `day_of_week`.
## Length Class Mode
## 0 NULL NULL
summary(demographics$income)
## Under 15K 15-24K 25-34K 35-49K 50-74K 75-99K 100-124K 125-149K
## 61 74 77 172 192 96 34 38
## 150-174K 175-199K 200-249K 250K+
## 30 11 5 11
transactions_sample <- transactions_sample %>%
mutate(day_of_week = wday(transaction_timestamp, label = TRUE))
weekly_sales <- transactions_sample %>%
left_join(demographics, by = "household_id") %>%
group_by(day_of_week, income) %>%
summarise(total_sales_value = sum(sales_value, na.rm = TRUE))
## `summarise()` has grouped output by 'day_of_week'. You can override using the
## `.groups` argument.
head(weekly_sales)
## # A tibble: 6 × 3
## # Groups: day_of_week [1]
## day_of_week income total_sales_value
## <ord> <ord> <dbl>
## 1 Sun Under 15K 1569.
## 2 Sun 15-24K 1639.
## 3 Sun 25-34K 2004.
## 4 Sun 35-49K 4295.
## 5 Sun 50-74K 6151.
## 6 Sun 75-99K 3340.
max_sales_value <- max(weekly_sales$total_sales_value, na.rm = TRUE)
ggplot(weekly_sales, aes(x = day_of_week, y = total_sales_value, fill = income)) +
geom_col(position = "dodge") +
labs(title = "Weekly Sales Trend by Household Income Levels",
subtitle = "Total sales value by day of the week segmented by income",
x = "Day of the Week",
y = "Total Sales Value",
fill = "Income Level") +
scale_y_continuous(limits = c(0, 8000)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_col()`).

PLOT 2
coupon_income_data <- coupon_redemptions %>%
left_join(demographics, by = "household_id")
coupon_redemption_rate <- coupon_income_data %>%
group_by(income) %>%
summarise(total_redemptions = n())
ggplot(coupon_redemption_rate, aes(x = income, y = total_redemptions, fill = income)) +
geom_bar(stat = "identity") +
labs(
title = "Coupon Redemption Rate vs Household Income",
subtitle = "Mostly Around 50 - 74K",
x = "Household Income",
y = "Total Coupon Redemptions"
) +
theme_minimal()

PLOT 3
data_merged <- transactions_sample %>%
left_join(products, by = "product_id") %>%
left_join(demographics, by = "household_id")
category_sales <- data_merged %>%
group_by(product_category, income) %>%
summarise(total_sales_value = sum(sales_value, na.rm = TRUE), .groups = 'drop')
top_categories <- category_sales %>%
group_by(product_category) %>%
summarise(total_sales_value = sum(total_sales_value, na.rm = TRUE)) %>%
top_n(10, wt = total_sales_value) %>%
pull(product_category)
filtered_data <- category_sales %>%
filter(product_category %in% top_categories)
ggplot(filtered_data, aes(x = reorder(product_category, -total_sales_value),
y = total_sales_value, fill = income)) +
geom_col(position = position_dodge(width = 1)) +
labs(title = "Top 10 Product Categories vs Household Income Levels",
subtitle = "Total sales value for top product categories across income levels",
x = "Product Category",
y = "Total Sales Value",
fill = "Income Level") +
scale_y_continuous(limits = c(0, 3000)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Set3")
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_col()`).
