# Comparison of Sales of Top Product Types by Household Size #

# Loading necessary libraries
library(tidyverse)
library(ggplot2)
library(dplyr)
library(lubridate)
library(completejourney)

# Importing data sets needed for analysis
transactions <- get_transactions()
products <- products
demographics <- demographics


# Joining, mutating, and cleaning data sets
tran_prod_dem <- transactions %>% 
  inner_join(products) %>% 
  inner_join(demographics) %>% 
  mutate(Transaction_date = date(transaction_timestamp))


# Data manipulation and visualization
tran_prod_dem %>%
  group_by(household_size, product_type) %>%
  summarize(total = sum(sales_value, na.rm = TRUE)) %>%
  arrange(desc(total)) %>%
  slice(6:10) %>%  # Focusing on a few product types to keep the plot compact
  ggplot(aes(x = total, y = reorder(product_type, total), fill = household_size)) +  
  geom_bar(stat = 'identity', position = "stack", width = 0.6) +  # Stacked bars with narrower width
  geom_text(aes(label = scales::dollar_format(accuracy = 1)(total)),  # Format annotations with no decimal points
            position = position_stack(vjust = 0.5), color = "black", size = 2.5) +  # Add annotations on bars
  scale_x_continuous(labels = scales::dollar, limits = c(0, NA), expand = c(0, 0)) +  # Dollar formatting on x-axis
  scale_fill_viridis_d(option = "plasma", begin = 0.2, end = 0.8) +  # Vibrant color palette
  theme_minimal(base_size = 12) +  # Compact, clean theme
  theme(
    axis.text.x = element_text(size = 8, color = "gray30", face = "bold"),  # Smaller x-axis labels (total sales)
    axis.text.y = element_text(size = 8, color = "gray30", face = "bold"),  # Smaller y-axis labels (product type)
    plot.title = element_text(size = 15.5, face = "bold", hjust = 0.5, color = "blue"),  # Smaller title
    plot.subtitle = element_text(size = 10, hjust = 0.5, color = "gray40"),  # Smaller subtitle
    axis.title.x = element_text(size = 12, face = "bold", color = "darkblue"),  # Smaller x-axis title
    axis.title.y = element_text(size = 12, face = "bold", color = "darkblue"),  # Smaller y-axis title
    legend.position = "top",  # Keep legend at the top for space-saving
    legend.title = element_blank(),  # Remove legend title for compactness
    legend.text = element_text(size = 10),  # Smaller legend text
    panel.grid.major.y = element_blank(),  # Remove y-axis grid lines for a cleaner look
    panel.grid.major.x = element_line(size = 0.4, linetype = "dotted", color = "gray80"),  # Minimal x-axis grid lines
    panel.grid.minor = element_blank(),
    plot.margin = margin(2, 2, 2, 2)  # Tighter margins
  ) +
  labs(
    title = "Sales of Top Product Types by Household Size",
    subtitle = "Compact view of total sales for selected product types",
    x = "Total Sales",  # Total sales on X-axis
    y = "Product Type",  # Product type on Y-axis
    fill = "Household Size"
  )




# Monthly Sales Comparison of Top Products for Families with Kids #

# Loading necessary libraries
library(tidyverse)
library(ggplot2)
library(dplyr)
library(lubridate)
library(completejourney)

# Importing data sets needed for analysis
transactions <- get_transactions()
products <- products
demographics <- demographics

# Joining, mutating, and cleaning data sets
tran_prod_dem <- transactions %>%
  inner_join(products) %>%
  inner_join(demographics) %>%
  mutate(Transaction_date = date(transaction_timestamp))

# Finding top 5 departments by sales
top_5_sales <- tran_prod_dem %>%
  filter(marital_status != "NA") %>%
  group_by(department) %>%
  summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  arrange(desc(total_sales)) %>%
  slice(1:5)  # Get the top 5 departments by sales

# Stacked bar chart
tran_prod_dem %>%
  filter(marital_status != "NA") %>%
  ggplot(aes(x = department, y = sales_value, fill = marital_status)) +
  
  # Stacked bars to show the breakdown by marital status
  geom_bar(stat = "identity", position = "stack") +
  
  # Adding annotations for the top 5 sales departments
  geom_text(data = top_5_sales, 
            aes(x = department, y = total_sales, label = scales::dollar(total_sales)),
            inherit.aes = FALSE,  # Ensure this geom doesn't inherit incorrect aesthetics
            color = "blue", size = 2, vjust = -0.5) +  # Adjust position
    # Adding annotation for "top 5 sales by department" income range, pointing to the bar
  annotate("text", 
           x = 18,  # Position on x-axis (adjust based on your data)
           y = 900000,  # Position on y-axis
           label = "Top 5 sales by department",
           color = "blue", size = 3, hjust = 0) +  # Positioning and style for the annotation
  # Adding labels and formatting
  labs(
    title = "Total Sales by Department and Marital Status",
    subtitle = "Top 5 Departments Highlighted with Sales Figures",
    x = "Department",
    y = "Total Sales",
    fill = "Marital Status"
  ) +
  scale_y_continuous(name = "Total Sales", labels = scales::dollar) +  # Format sales as dollar values
  # Adjust y-axis limit and set y-axis intervals to 100,000
  coord_cartesian(ylim = c(0, 1200000)) +  # Adjust the y-axis limit for more space
  scale_y_continuous(name = "Total Sales", 
                     labels = scales::dollar, 
                     breaks = seq(0, 1200000, by = 100000)) +  # Set y-axis breaks
  
  # Customize the theme
  theme_minimal(base_size = 14) +  # Clean and minimal theme
  theme(
    plot.title = element_text(size = 15.5, face = "bold", color = "blue", hjust = 0.5),  # Bold and centered title
    plot.subtitle = element_text(size = 10, hjust = 0.5, color = "gray50"),  # Subtitle formatting
    axis.text.x = element_text(size = 8, angle = 45, hjust = 1),  # Rotate department labels for readability
    axis.text.y = element_text(size = 8, face = "bold", color = "gray30"),  # Y-axis label formatting
    legend.position = "top",  # Move legend to the top for better visibility 
    axis.title.x = element_text(size = 12, face = "bold", color = "darkblue"),  # Smaller x-axis title
    axis.title.y = element_text(size = 12, face = "bold", color = "darkblue")  # Smaller y-axis title
  )




# Comparison of Total Sales Across Kids Count and Income Range #

# Loading necessary libraries
library(tidyverse)
library(ggplot2)
library(dplyr)
library(lubridate)
library(completejourney)

# Importing data sets needed for analysis
transactions <- get_transactions()
products <- products
demographics <- demographics

# Joining, mutating, and cleaning data sets
tran_prod_dem <- transactions %>% 
  inner_join(products) %>% 
  inner_join(demographics) %>% 
  mutate(Transaction_date = date(transaction_timestamp))


demographics %>%
  group_by(kids_count, income) %>%
  inner_join(transactions, by = "household_id") %>%
  summarize(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
  
  # Identify the top 3 largest total sales
  mutate(rank = rank(-total_sales)) %>%
  
  ggplot(aes(x = factor(kids_count), y = total_sales, fill = income)) +  
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.7) +  # Clustered bar chart
  
  # Add annotations only for the top 3 total sales
  geom_text(data = . %>% filter(rank <= 3),  # Filter the top 3 largest total sales
            aes(label = scales::dollar(total_sales)), 
            vjust = -0.1, size = 2.5, color = "blue", nudge_y = 14000) +  # Positioning and size
  
  # Adding annotation for "200-249K" income range, pointing to the bar
  annotate("text", 
           x = 3,  # Position on x-axis (adjust based on your data)
           y = 500000,  # Position on y-axis
           label = "Spending is less for 200-249K income\nacross all kids categories",
           color = "grey20", size = 2.5, hjust = 0) +  # Positioning and style for the annotation
  
  # Adding annotation for "top 3 total sales" pointing to the bar
  annotate("text", 
           x = 1,  # Position on x-axis (adjust based on your data)
           y = 400000,  # Position on y-axis
           label = "Top 3 total sales",
           color = "blue", size = 3, hjust = 0) +  # Positioning and style for the annotation
  
  # Adjust y-axis limit and set y-axis intervals to 100,000
  coord_cartesian(ylim = c(0, 600000)) +  # Adjust the y-axis limit for more space
  scale_y_continuous(name = "Total Sales", 
                     labels = scales::dollar, 
                     breaks = seq(0, 600000, by = 100000)) +  # Set y-axis breaks
  
  # Color palette for bars
  scale_fill_brewer(palette = "Paired") +  # Use "Paired" palette
  
  # Custom labels for title, subtitle, and axes
  labs(
    title = "Total Sales by Kids Count and Income Range",
    subtitle = "Bar chart comparing total sales across income ranges for families with different kids counts",
    x = "Number of Kids",
    y = "Total Sales",
    fill = "Income Range"
  ) +
  
  # Theme settings with reduced title size and blue color
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(size = 15.5, face = "bold", color = "blue", hjust = 0.5),  # Adjust title size and color
    plot.subtitle = element_text(size = 10, color = "gray40", hjust = 0.5),  # Adjust subtitle size and color
    axis.text.x = element_text(size = 12, face = "bold", color = "gray30"),  # Adjust x-axis labels
    axis.text.y = element_text(size = 12, face = "bold", color = "gray30"),  # Adjust y-axis labels
    legend.position = "top",  # Move legend to the top
    legend.title = element_text(size = 12, face = "bold"),  # Bold legend title
    legend.text = element_text(size = 10),  # Adjust legend text size,
    axis.title.x = element_text(size = 12, face = "bold", color = "darkblue"),  # Smaller x-axis   title
    axis.title.y = element_text(size = 12, face = "bold", color = "darkblue")  # Smaller y-axis
  )




# Comparison of Coupon Redemption by Income Group #

# Load necessary libraries
library(tidyverse)
library(ggplot2)

# Assuming you already have the 'demographics' and 'coupon_redemptions' datasets loaded

# Join the demographics and coupon_redemptions datasets, group by income, and count coupon redemptions
coupon_by_income <- demographics %>%
  inner_join(coupon_redemptions, by = "household_id") %>%
  group_by(income) %>%
  summarize(coupon_redemptions = n())

# Plot the data
ggplot(coupon_by_income, aes(x = reorder(income, coupon_redemptions), y = coupon_redemptions, fill = income)) +
  geom_bar(stat = "identity", width = 0.7) +
  
  # Add text labels for coupon redemption counts on the bars
  geom_text(aes(label = coupon_redemptions), vjust = -0.5, size = 3, color = "black") +  
  
  # Customize labels and theme
  labs(
    title = "Coupon Redemptions by Income Group",
    subtitle = "Analysis of coupon redemption trends across different income levels",
    x = "Income Group",
    y = "Number of Coupon Redemptions",
    fill = "Income Group"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(size = 15.5, face = "bold",color = "blue", hjust = 0.5),
    plot.subtitle = element_text(size = 10, hjust = 0.5, color = "gray50"),
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
    axis.text.y = element_text(size = 12),
    legend.position = "none",
        axis.title.x = element_text(size = 12, face = "bold", color = "darkblue"),  # Smaller   x-axis title
    axis.title.y = element_text(size = 12, face = "bold", color = "darkblue")  # Smaller y-axis
  )