Load Packages and Data
library(completejourney)
library(tidyverse)
data("transactions_sample")
data("products")
data("demographics")
data("campaigns")
transactions <- transactions_sample
products <- products
demographics <- demographics
campaigns <- campaigns
head(transactions)
## # A tibble: 6 × 11
## household_id store_id basket_id product_id quantity sales_value retail_disc
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2261 309 31625220889 940996 1 3.86 0.43
## 2 2131 368 32053127496 873902 1 1.59 0.9
## 3 511 316 32445856036 847901 1 1 0.69
## 4 400 388 31932241118 13094913 2 11.9 2.9
## 5 918 340 32074655895 1085604 1 1.29 0
## 6 718 324 32614612029 883203 1 2.5 0.49
## # ℹ 4 more variables: coupon_disc <dbl>, coupon_match_disc <dbl>, week <int>,
## # transaction_timestamp <dttm>
head(products)
## # A tibble: 6 × 7
## product_id manufacturer_id department brand product_category product_type
## <chr> <chr> <chr> <fct> <chr> <chr>
## 1 25671 2 GROCERY Nation… FRZN ICE ICE - CRUSH…
## 2 26081 2 MISCELLANEOUS Nation… <NA> <NA>
## 3 26093 69 PASTRY Private BREAD BREAD:ITALI…
## 4 26190 69 GROCERY Private FRUIT - SHELF S… APPLE SAUCE
## 5 26355 69 GROCERY Private COOKIES/CONES SPECIALTY C…
## 6 26426 69 GROCERY Private SPICES & EXTRAC… SPICES & SE…
## # ℹ 1 more variable: package_size <chr>
head(demographics)
## # A tibble: 6 × 8
## household_id age income home_ownership marital_status household_size
## <chr> <ord> <ord> <ord> <ord> <ord>
## 1 1 65+ 35-49K Homeowner Married 2
## 2 1001 45-54 50-74K Homeowner Unmarried 1
## 3 1003 35-44 25-34K <NA> Unmarried 1
## 4 1004 25-34 15-24K <NA> Unmarried 1
## 5 101 45-54 Under 15K Homeowner Married 4
## 6 1012 35-44 35-49K <NA> Married 5+
## # ℹ 2 more variables: household_comp <ord>, kids_count <ord>
head(campaigns)
## # A tibble: 6 × 2
## campaign_id household_id
## <chr> <chr>
## 1 1 105
## 2 1 1238
## 3 1 1258
## 4 1 1483
## 5 1 2200
## 6 1 293
Plot 1: Top Product Categories by Sales in Each Income Bracket
transactions_joined <- transactions %>%
left_join(products, by = "product_id") %>%
left_join(demographics, by = "household_id") %>%
filter(!is.na(income) & !is.na(product_category))
top_categories <- transactions_joined %>%
group_by(income, product_category) %>%
summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%
group_by(income) %>%
slice_max(total_sales, n = 3, with_ties = FALSE) %>%
ungroup() %>%
mutate(product_category = fct_reorder(product_category, total_sales))
ggplot(top_categories, aes(x = total_sales, y = product_category)) +
geom_col(fill = "steelblue", width = 0.8) +
facet_wrap(~income, scales = "free_y", ncol = 3) +
labs(
title = "Top-Selling Product Categories by Income Bracket",
subtitle = "The three product categories with the highest total sales in each income group.",
x = "Total Sales Value ($)",
y = NULL,
caption = "Data: CompleteJourney"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 10, face = "bold"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 10),
panel.spacing = unit(1.5, "lines")
)

Plot 2: Average Sales per Household Member by Household Size
transactions_demo <- transactions %>%
left_join(demographics, by = "household_id") %>%
mutate(
household_size = factor(household_size, levels = c("1", "2", "3", "4", "5+")),
household_size_numeric = case_when(
household_size == "5+" ~ 5,
TRUE ~ as.numeric(as.character(household_size))
)
)
household_sales <- transactions_demo %>%
group_by(household_id, household_size, household_size_numeric) %>%
summarise(total_sales = sum(sales_value)) %>%
mutate(sales_per_member = total_sales / household_size_numeric)
sales_summary <- household_sales %>%
group_by(household_size) %>%
summarise(avg_sales_per_member = mean(sales_per_member))
ggplot(sales_summary, aes(x = household_size, y = avg_sales_per_member)) +
geom_col(fill = "darkorange", alpha = 0.8) +
labs(
title = "Average Sales per Household Member",
subtitle = "Total household sales divided by the number of members in each household size group.",
x = "Household Size",
y = "Average Sales per Member ($)",
caption = "Data: CompleteJourney"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 10),
axis.text.x = element_text(angle = 45, hjust = 1)
)

Plot 3: Campaign Participation Impact by Income Bracket
data("campaigns")
campaign_participants <- campaigns %>%
distinct(household_id) %>%
mutate(participated = TRUE)
transactions_campaign <- transactions %>%
left_join(campaign_participants, by = "household_id") %>%
mutate(participated = if_else(is.na(participated), FALSE, participated)) %>%
left_join(demographics, by = "household_id")
household_sales_campaign <- transactions_campaign %>%
group_by(household_id, income, participated) %>%
summarise(total_sales = sum(sales_value)) %>%
ungroup()
avg_sales_campaign <- household_sales_campaign %>%
group_by(income, participated) %>%
summarise(avg_sales = mean(total_sales)) %>%
ungroup()
ggplot(avg_sales_campaign, aes(x = income, y = avg_sales, fill = participated)) +
geom_col(position = "dodge", width = 0.7) +
scale_fill_manual(
values = c("FALSE" = "#999999", "TRUE" = "#009E73"),
labels = c("Non-Participant", "Participant")
) +
labs(
title = "Impact of Campaign Participation on Spending",
subtitle = "Comparison of average total sales between campaign participants and non-participants across income levels",
x = "Income Bracket",
y = "Average Total Sales ($)",
fill = "Campaign Participation",
caption = "Data: CompleteJourney"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top",
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 10)
)
