library(dplyr)
library(ggplot2)
library(scales)
library(lubridate)
#Plot 1: Top 10 Grocery Departments by Total Spending
top_grocery_departments <- completejourney::transactions_sample %>%
  inner_join(completejourney::products, by = "product_id") %>%
  filter(!department %in% c("FUEL", "MISCELLANEOUS")) %>%  # 
  group_by(department) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  arrange(desc(total_sales)) %>%
  slice_max(order_by = total_sales, n = 10)
ggplot(top_grocery_departments, aes(x = reorder(department, total_sales), y = total_sales, fill = department)) +
  geom_col() +
  coord_flip() +
  scale_y_continuous(labels = dollar) +  # Format y-axis with dollar signs
  labs(
    title = "Top 10 Grocery Departments by Total Sales",
    subtitle = "Departments that generate the highest revenue",
    x = "Grocery Department",
    y = "Total Sales ($)",
    caption = "Data Source: CompleteJourney"
  ) +
  theme_minimal() 

#Plot 2: Popular Product Categories for Each Age Group
category_sales <- completejourney::transactions_sample %>%
  inner_join(completejourney::products, by = "product_id") %>%
  inner_join(completejourney::demographics, by = "household_id") %>%
  group_by(age, department) %>%  
  summarise(total_quantity = sum(quantity, na.rm = TRUE)) %>%
  arrange(age, desc(total_quantity)) %>%
  group_by(age) %>%
  slice_max(order_by = total_quantity, n = 5)
ggplot(category_sales, aes(x = reorder(department, total_quantity), y = total_quantity, fill = age)) +
  geom_col(position = "dodge") +
  coord_flip() +
  scale_y_continuous(labels = comma) +  
  labs(
    title = "Most Popular Product Departments by Age Group",
    subtitle = "Top 5 most frequently purchased departments per age group",
    x = "Product Department",
    y = "Total Quantity Purchased",
    caption = "Data Source: CompleteJourney"
  ) +
  theme_minimal() +
  facet_wrap(~age)

# Plot 3: Most Popular Days of the Week for Shopping
shopping_days <- completejourney::transactions_sample %>%
  mutate(day_of_week = wday(transaction_timestamp, label = TRUE, abbr = FALSE)) %>%
  group_by(day_of_week) %>%
  summarise(total_transactions = n()) %>%
  arrange(match(day_of_week, c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")))
ggplot(shopping_days, aes(x = day_of_week, y = total_transactions, fill = day_of_week)) +
  geom_col() +
  scale_y_continuous(labels = scales::comma) +  # Format y-axis
  labs(
    title = "Most Popular Shopping Days",
    subtitle = "Total number of transactions per day of the week",
    x = "Day of the Week",
    y = "Total Transactions",
    caption = "Data Source: CompleteJourney"
  ) +
  theme_minimal() +
  theme(legend.position = "none")