Candy Spending by Age and Marital Status
products %>%
filter(str_detect(product_category, regex("CANDY - PACKAGED", ignore_case = TRUE))) %>%
inner_join(transactions_sample, by = "product_id") %>%
inner_join(demographics, by = "household_id") %>%
group_by(age, household_id, marital_status) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(x = factor(age), y = total_sales)) +
geom_point() +
facet_grid(. ~ marital_status) +
scale_y_continuous("Total Sales for Candy", labels = scales::dollar) +
scale_x_discrete("Age") +
ggtitle("Which Age Group Spends the Most on Candy?",
subtitle = "People aged 45-54 spend the most money on Candy")

Household Spending on Retail Discounts
demographics %>%
inner_join(completejourney::transactions_sample, by = "household_id") %>%
group_by(kids_count, income) %>%
summarize(total_retail_disc = sum(retail_disc, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(x = factor(kids_count), y = total_retail_disc, fill = factor(kids_count))) +
geom_col() +
scale_y_continuous("Total Retail Discount", labels = scales::dollar) +
scale_x_discrete("Number of Kids in Household") +
facet_wrap(~income, scales = "free_y") +
ggtitle("Who Uses the Most Retail Discounts?",
subtitle = "Households with 2 kids and income ranging from $50-74k use the most retail discounts") +
theme_minimal() +
theme(legend.position = "none")
