### Peak Shopping Times for Each Age Range

# Name the data sets 
transactions <- transactions_sample
demographics <- demographics

# Join data the data using household id
joined_data <- transactions_sample %>%
  left_join(demographics, by = "household_id")

# Isolate "hour" from transaction time stamp 
joined_data <- joined_data %>%
  mutate(t_time= ymd_hms(transaction_timestamp), 
        t_hour = hour(t_time))

# Create 3-Hour blocks for t_time
joined_data <- joined_data %>%
  mutate(hour_block = cut(t_hour,
                          breaks = c(0,3,6,9,12,15,18,21,24),
                          labels = c("Midnight-3 AM", "3 AM-6 AM", "6 AM-9 AM", "9 AM-Noon", "Noon- 3 PM", "3 PM-6 PM", "6 PM-9 PM", "9 PM-Midnight"),
                          include.lowest = TRUE))

# Count how many transaction in each 3-hour block
blocked_data <- joined_data %>% 
  group_by(age, hour_block) %>%
  summarise(t_per_block = n(), .groups = 'drop')

# Create bar chart
ggplot(blocked_data, aes(x = hour_block, y = t_per_block, fill = age)) + 
  geom_bar(stat = "identity",  position = "dodge") +
  ylim(0,5000) + 
  labs(
    title = "Transactions by 3-Hour Time Blocks for Different Age Ranges", 
    subtitle = "Insights Into the Shopping Habits of Each Age Range", 
    x = "Time Block", 
    y = "Total Transactions", 
    fill = "Age Range", 
    caption = "Source: Complete Journey Dataset (Demographics and Transactions Data)"
  ) + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Name the data sets 
products <- products
transactions <- transactions_sample

# Join the data 
sales_category <- transactions %>%
  inner_join(products, by = "product_id")

# Data Visualization 
ggplot(sales_category, aes(x = department, y = sales_value)) + 
  geom_boxplot(fill = "purple", outlier.color = "green", outlier.size = 1.5) + 
  labs(
    title = "Distribution of Sales Value by Product Category", 
    subtitle = "Visualizing price variance within product categories",
    x = "Product Category", 
    y = "Sales Value ($)", 
    caption = "Source: Complete Journey Datasets (Products and Transactions Data)"
  ) + 
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Name the data sets 
products <- products
transactions <- transactions_sample

# Merge data and calculate mean discount for each department 
average_discount <- transactions %>% 
  inner_join(products, by = "product_id") %>%
  mutate(discount = abs(retail_disc - sales_value)) %>%
  group_by(department) %>%
  summarise(average_discount = round(mean(discount, na.rm = TRUE), 2)) 

# Data Visualization 
ggplot(average_discount, aes(x = reorder(department, average_discount), y = average_discount, fill = department)) +
  geom_bar(stat = "identity", show.legend = FALSE) + 
  labs(
    title = "Average Discount for Each Product Category", 
    subtitle = "Comparing average discounts across various product categories", 
    x = "Product Category",
    y = "Average Discount ($)", 
    caption = "Source: Complete Journey Data Set (Products and Transactions Data)"
  ) + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))