library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
transactions <- get_transactions()
products <- products
demographics <- demographics

data1 <- transactions %>%
  inner_join(demographics, by = "household_id") %>%
  inner_join(products, by = "product_id")

top_products_income <- data1 %>%
  group_by(income, product_category) %>%
  summarize(total_sales = sum(sales_value)) %>%
  top_n(5, total_sales) %>%
  ungroup()
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
ggplot(top_products_income, aes(x = product_category, y = total_sales, fill = income)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Top 5 Product Preferences by Income Level",
       subtitle = "Comparing the total sales value for the most  purchased products across income groups",
       x = "Product Category", y = "Total Sales Value") +
  theme_minimal() +
  theme(axis.text.x = element_text(size = 3))

data2 <- transactions %>%
  mutate(month = lubridate::month(transaction_timestamp, label = TRUE)) %>%
  inner_join(demographics, by = "household_id")

monthly_spending <- data2 %>%
  group_by(month, household_size) %>%
  summarize(total_spending = sum(sales_value))
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.
ggplot(monthly_spending, aes(x = month, y = total_spending, color = household_size, group = household_size)) +
  geom_line(size = 1.2) +
  labs(title = "Monthly Spending Patterns by Household Size",
       subtitle = "Tracking total spending across different household sizes over the year",
       x = "Month", y = "Total Spending") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

data3 <- transactions %>%
  inner_join(demographics, by = "household_id") %>%
  inner_join(products, by = "product_id")

top_products_household_size <- data3 %>%
  group_by(household_size, product_category) %>%
  summarize(total_quantity = sum(quantity)) %>%
  arrange(desc(total_quantity)) %>%
  slice_head(n = 5) %>%
  ungroup()
## `summarise()` has grouped output by 'household_size'. You can override using
## the `.groups` argument.
ggplot(top_products_household_size, aes(x = reorder(product_category, total_quantity), 
                                        y = total_quantity, fill = as.factor(household_size))) +
  geom_col(position = "dodge", width = 0.7) +  
  geom_text(aes(label = scales::comma(total_quantity)), 
            position = position_dodge(width = 0.7), 
            hjust = -0.2, size = 4) +  # Move labels outside the bars
  coord_flip() +  # Rotate bars instead of x-axis text
  scale_fill_viridis_d(option = "plasma") +  # Improve color contrast
  scale_y_log10(labels = scales::comma) +  # Use log scale for better visibility
  labs(
    title = "🏠 Top 5 Products Purchased by Household Size",
    subtitle = "Larger households tend to buy more of certain products",
    x = "Product Category",
    y = "Total Quantity Purchased (Log Scale)",
    fill = "Household Size",
    caption = "Data: completejourney"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "right",  # Move legend to the side
    legend.title = element_text(face = "bold"),
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(size = 12, margin = margin(b = 10))
  )