library(tidyverse)
library(completejourney)

##Plot 1: Coupon Redemption Rate by Age and Income Group

engagement <- campaigns %>%
  inner_join(coupon_redemptions, by = c("campaign_id", "household_id")) %>%
  inner_join(demographics, by = "household_id") %>%
  group_by(age, income) %>%
  summarise(campaigns_received = n_distinct(campaign_id),
            coupons_redeemed = n(),
            redemption_rate = coupons_redeemed / campaigns_received)

  ggplot(engagement, aes(x = income, y = redemption_rate, fill = age)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Coupon Redemption Rate by Age and Income Group",
       subtitle = "Comparing the effectiveness of marketing campaigns across demographics",
       x = "Income Group",
       y = "Redemption Rate",
       fill = "Age Group") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          plot.title = element_text(size = 14, face = "bold"),
          plot.subtitle = element_text(size = 10),
          legend.title = element_text(size = 10),
          legend.text = element_text(size = 8)) +
    scale_fill_brewer(palette = "Spectral")

##Plot 2: Product Categories Targeted by Campaigns vs. Actual Purchases

product_categories_comparison <- promotions_sample %>%
    inner_join(products, by = "product_id") %>%
    count(product_category) %>%
    rename(promoted_count = n) %>%
    inner_join(transactions_sample %>%
                 inner_join(products, by = "product_id") %>%
                 count(product_category) %>%
                 rename(purchased_count = n),
               by = "product_category")
  
  top_categories <- product_categories_comparison %>%
    arrange(desc(promoted_count)) %>%
    head(20)
  
  ggplot(top_categories, aes(x = reorder(product_category, promoted_count), y = promoted_count)) +
    geom_col(aes(x = reorder(product_category, promoted_count), y = promoted_count, fill = "Promoted"), position = position_dodge(width = 0.9)) +
    geom_col(aes(x = reorder(product_category, promoted_count), y = purchased_count, fill = "Purchased"), position = position_dodge(width = 0.9)) +
    labs(title = "Top 20 Promoted Product Categories vs. Purchases",
         subtitle = "Comparing Promotion Focus vs. Actual Purchases",
         x = "Product Category",
         y = "Count",
         caption = "Data from Complete Journey Dataset") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

##Plot 3: Scatter Plot of Average Sales Value vs. Quantity Purchased by Demographic Group

  average_metrics <- transactions_sample %>%
    inner_join(products, by = "product_id") %>%
    inner_join(demographics, by = "household_id") %>%
    group_by(age, income) %>%
    summarise(average_sales_value = mean(sales_value),
              average_quantity = mean(quantity),
              .groups = 'drop')
  
  # Plot
  ggplot(average_metrics, aes(x = average_sales_value, y = average_quantity, color = age, size = income)) +
    geom_point(alpha = 0.7) +
    scale_color_viridis_d() + # For a visually appealing color scale
    labs(title = "Average Sales Value vs. Quantity Purchased by Demographic Group",
         subtitle = "Exploring consumer spending behavior across demographics",
         x = "Average Sales Value ($)",
         y = "Average Quantity Purchased",
         color = "Age Group",
         size = "Income Level") +
    theme_minimal() +
    theme(legend.position = "right")