library(tidyverse)
library(completejourney)
transactions <- get_transactions()
promotions <- get_promotions()

Plot 1: Who’s Buying 7LB & 22 LB Ice?

selected_products <- products %>%
  filter(str_detect(package_size, regex("^(7 LB|22 LB)$", ignore_case = TRUE)))

sales_summary <- selected_products %>%
  inner_join(transactions_sample, by = "product_id") %>%
  inner_join(demographics, by = "household_id") %>%
  group_by(marital_status, package_size) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE), .groups = 'drop') %>%
  mutate(marital_status = fct_na_value_to_level(marital_status, level = "Unknown"))

sales_plot <- sales_summary %>%
  ggplot(aes(x = marital_status, y = total_sales, fill = package_size)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Total Sales for Ice Products by Marital Status",
       subtitle = "Comparison of total sales for 7 LB and 22 LB ice products",
       x = "Marital Status",
       y = "Total Sales",
       fill = "Package Size") +
  scale_y_continuous(labels = scales::dollar_format())

sales_plot

Plot 2: What days of the week are People Buying Ice the most? Which Type of Bag?

ice_data <- transactions %>%
  inner_join(products %>% filter(package_size %in% c("7 LB", "22 LB") & product_category == "FRZN ICE"), by = "product_id")

ice_data <- ice_data %>%
  mutate(weekday = weekdays(as.Date(transaction_timestamp)))

day_order <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")

ice_summary <- ice_data %>%
  group_by(weekday, package_size) %>%
  summarise(total_quantity = sum(quantity), .groups = "drop")
ice_summary$weekday <- factor(ice_summary$weekday, levels = day_order)

combined_plot <- ggplot() +
  geom_bar(data = ice_summary, aes(x = weekday, y = total_quantity, fill = "Total Quantity Purchased"), stat = "identity") +
  geom_bar(data = ice_summary, aes(x = weekday, y = total_quantity, fill = package_size), stat = "identity", position = "dodge") +
  labs(title = "Distribution of Ice Purchases Across Days of the Week",
       x = "Day of the Week",
       y = "Total Quantity Purchased",
       fill = "Package Size") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  facet_grid(rows = vars(package_size), scales = "free_y")

combined_plot

Plot 3: What age range is buying 7 LB Bags and 22 LB Bags of Ice?

demographics$age <- factor(demographics$age, levels = c("19-24", "25-34", "35-44", "45-54", "55-64", "65+"))


ice_data_with_age <- ice_data %>%
  inner_join(demographics, by = "household_id")

ice_summary_age <- ice_data_with_age %>%
  group_by(age, package_size) %>%
  summarise(total_quantity = sum(quantity), .groups = "drop")

age_plots <- ice_summary_age %>%
  ggplot(aes(x = package_size, y = total_quantity, fill = package_size)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Distribution of Ice Purchases by Age Group and Package Size",
       x = "Package Size",
       y = "Total Quantity Purchased",
       fill = "Package Size") +
  theme_minimal() +
  facet_wrap(~age, scales = "free_y")

  age_plots