library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
transactions <- get_transactions()
products <- products
demographics <- demographics
data1 <- transactions %>%
inner_join(demographics, by = "household_id") %>%
inner_join(products, by = "product_id")
top_products_income <- data1 %>%
group_by(income, product_category) %>%
summarize(total_sales = sum(sales_value)) %>%
top_n(5, total_sales) %>%
ungroup()
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
ggplot(top_products_income, aes(x = product_category, y = total_sales, fill = income)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Top 5 Product Preferences by Income Level",
subtitle = "Comparing the total sales value for the most purchased products across income groups",
x = "Product Category", y = "Total Sales Value") +
theme_minimal() +
theme(axis.text.x = element_text(size = 3))

data2 <- transactions %>%
mutate(month = lubridate::month(transaction_timestamp, label = TRUE)) %>%
inner_join(demographics, by = "household_id")
monthly_spending <- data2 %>%
group_by(month, household_size) %>%
summarize(total_spending = sum(sales_value))
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
ggplot(monthly_spending, aes(x = month, y = total_spending, color = household_size, group = household_size)) +
geom_line(size = 1.2) +
labs(title = "Monthly Spending Patterns by Household Size",
subtitle = "Tracking total spending across different household sizes over the year",
x = "Month", y = "Total Spending") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

data3 <- transactions %>%
inner_join(demographics, by = "household_id") %>%
inner_join(products, by = "product_id")
top_products_household_size <- data3 %>%
group_by(household_size, product_category) %>%
summarize(total_quantity = sum(quantity)) %>%
arrange(desc(total_quantity)) %>%
slice_head(n = 5) %>%
ungroup()
## `summarise()` has grouped output by 'household_size'. You can override using
## the `.groups` argument.
ggplot(top_products_household_size, aes(x = reorder(product_category, total_quantity),
y = total_quantity, fill = as.factor(household_size))) +
geom_col(position = "dodge", width = 0.7) +
geom_text(aes(label = scales::comma(total_quantity)),
position = position_dodge(width = 0.7),
hjust = -0.2, size = 4) + # Move labels outside the bars
coord_flip() + # Rotate bars instead of x-axis text
scale_fill_viridis_d(option = "plasma") + # Improve color contrast
scale_y_log10(labels = scales::comma) + # Use log scale for better visibility
labs(
title = "🏠 Top 5 Products Purchased by Household Size",
subtitle = "Larger households tend to buy more of certain products",
x = "Product Category",
y = "Total Quantity Purchased (Log Scale)",
fill = "Household Size",
caption = "Data: completejourney"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "right", # Move legend to the side
legend.title = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(size = 12, margin = margin(b = 10))
)
