# Comparison of Sales of Top Product Types by Household Size #
# Loading necessary libraries
library(tidyverse)
library(ggplot2)
library(dplyr)
library(lubridate)
library(completejourney)
# Importing data sets needed for analysis
transactions <- get_transactions()
products <- products
demographics <- demographics
# Joining, mutating, and cleaning data sets
tran_prod_dem <- transactions %>%
inner_join(products) %>%
inner_join(demographics) %>%
mutate(Transaction_date = date(transaction_timestamp))
# Data manipulation and visualization
tran_prod_dem %>%
group_by(household_size, product_type) %>%
summarize(total = sum(sales_value, na.rm = TRUE)) %>%
arrange(desc(total)) %>%
slice(6:10) %>% # Focusing on a few product types to keep the plot compact
ggplot(aes(x = total, y = reorder(product_type, total), fill = household_size)) +
geom_bar(stat = 'identity', position = "stack", width = 0.6) + # Stacked bars with narrower width
geom_text(aes(label = scales::dollar_format(accuracy = 1)(total)), # Format annotations with no decimal points
position = position_stack(vjust = 0.5), color = "black", size = 2.5) + # Add annotations on bars
scale_x_continuous(labels = scales::dollar, limits = c(0, NA), expand = c(0, 0)) + # Dollar formatting on x-axis
scale_fill_viridis_d(option = "plasma", begin = 0.2, end = 0.8) + # Vibrant color palette
theme_minimal(base_size = 12) + # Compact, clean theme
theme(
axis.text.x = element_text(size = 8, color = "gray30", face = "bold"), # Smaller x-axis labels (total sales)
axis.text.y = element_text(size = 8, color = "gray30", face = "bold"), # Smaller y-axis labels (product type)
plot.title = element_text(size = 15.5, face = "bold", hjust = 0.5, color = "blue"), # Smaller title
plot.subtitle = element_text(size = 10, hjust = 0.5, color = "gray40"), # Smaller subtitle
axis.title.x = element_text(size = 12, face = "bold", color = "darkblue"), # Smaller x-axis title
axis.title.y = element_text(size = 12, face = "bold", color = "darkblue"), # Smaller y-axis title
legend.position = "top", # Keep legend at the top for space-saving
legend.title = element_blank(), # Remove legend title for compactness
legend.text = element_text(size = 10), # Smaller legend text
panel.grid.major.y = element_blank(), # Remove y-axis grid lines for a cleaner look
panel.grid.major.x = element_line(size = 0.4, linetype = "dotted", color = "gray80"), # Minimal x-axis grid lines
panel.grid.minor = element_blank(),
plot.margin = margin(2, 2, 2, 2) # Tighter margins
) +
labs(
title = "Sales of Top Product Types by Household Size",
subtitle = "Compact view of total sales for selected product types",
x = "Total Sales", # Total sales on X-axis
y = "Product Type", # Product type on Y-axis
fill = "Household Size"
)

# Monthly Sales Comparison of Top Products for Families with Kids #
# Loading necessary libraries
library(tidyverse)
library(ggplot2)
library(dplyr)
library(lubridate)
library(completejourney)
# Importing data sets needed for analysis
transactions <- get_transactions()
products <- products
demographics <- demographics
# Joining, mutating, and cleaning data sets
tran_prod_dem <- transactions %>%
inner_join(products) %>%
inner_join(demographics) %>%
mutate(Transaction_date = date(transaction_timestamp))
# Finding top 5 departments by sales
top_5_sales <- tran_prod_dem %>%
filter(marital_status != "NA") %>%
group_by(department) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
arrange(desc(total_sales)) %>%
slice(1:5) # Get the top 5 departments by sales
# Stacked bar chart
tran_prod_dem %>%
filter(marital_status != "NA") %>%
ggplot(aes(x = department, y = sales_value, fill = marital_status)) +
# Stacked bars to show the breakdown by marital status
geom_bar(stat = "identity", position = "stack") +
# Adding annotations for the top 5 sales departments
geom_text(data = top_5_sales,
aes(x = department, y = total_sales, label = scales::dollar(total_sales)),
inherit.aes = FALSE, # Ensure this geom doesn't inherit incorrect aesthetics
color = "blue", size = 2, vjust = -0.5) + # Adjust position
# Adding annotation for "top 5 sales by department" income range, pointing to the bar
annotate("text",
x = 18, # Position on x-axis (adjust based on your data)
y = 900000, # Position on y-axis
label = "Top 5 sales by department",
color = "blue", size = 3, hjust = 0) + # Positioning and style for the annotation
# Adding labels and formatting
labs(
title = "Total Sales by Department and Marital Status",
subtitle = "Top 5 Departments Highlighted with Sales Figures",
x = "Department",
y = "Total Sales",
fill = "Marital Status"
) +
scale_y_continuous(name = "Total Sales", labels = scales::dollar) + # Format sales as dollar values
# Adjust y-axis limit and set y-axis intervals to 100,000
coord_cartesian(ylim = c(0, 1200000)) + # Adjust the y-axis limit for more space
scale_y_continuous(name = "Total Sales",
labels = scales::dollar,
breaks = seq(0, 1200000, by = 100000)) + # Set y-axis breaks
# Customize the theme
theme_minimal(base_size = 14) + # Clean and minimal theme
theme(
plot.title = element_text(size = 15.5, face = "bold", color = "blue", hjust = 0.5), # Bold and centered title
plot.subtitle = element_text(size = 10, hjust = 0.5, color = "gray50"), # Subtitle formatting
axis.text.x = element_text(size = 8, angle = 45, hjust = 1), # Rotate department labels for readability
axis.text.y = element_text(size = 8, face = "bold", color = "gray30"), # Y-axis label formatting
legend.position = "top", # Move legend to the top for better visibility
axis.title.x = element_text(size = 12, face = "bold", color = "darkblue"), # Smaller x-axis title
axis.title.y = element_text(size = 12, face = "bold", color = "darkblue") # Smaller y-axis title
)

# Comparison of Total Sales Across Kids Count and Income Range #
# Loading necessary libraries
library(tidyverse)
library(ggplot2)
library(dplyr)
library(lubridate)
library(completejourney)
# Importing data sets needed for analysis
transactions <- get_transactions()
products <- products
demographics <- demographics
# Joining, mutating, and cleaning data sets
tran_prod_dem <- transactions %>%
inner_join(products) %>%
inner_join(demographics) %>%
mutate(Transaction_date = date(transaction_timestamp))
demographics %>%
group_by(kids_count, income) %>%
inner_join(transactions, by = "household_id") %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
# Identify the top 3 largest total sales
mutate(rank = rank(-total_sales)) %>%
ggplot(aes(x = factor(kids_count), y = total_sales, fill = income)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.7) + # Clustered bar chart
# Add annotations only for the top 3 total sales
geom_text(data = . %>% filter(rank <= 3), # Filter the top 3 largest total sales
aes(label = scales::dollar(total_sales)),
vjust = -0.1, size = 2.5, color = "blue", nudge_y = 14000) + # Positioning and size
# Adding annotation for "200-249K" income range, pointing to the bar
annotate("text",
x = 3, # Position on x-axis (adjust based on your data)
y = 500000, # Position on y-axis
label = "Spending is less for 200-249K income\nacross all kids categories",
color = "grey20", size = 2.5, hjust = 0) + # Positioning and style for the annotation
# Adding annotation for "top 3 total sales" pointing to the bar
annotate("text",
x = 1, # Position on x-axis (adjust based on your data)
y = 400000, # Position on y-axis
label = "Top 3 total sales",
color = "blue", size = 3, hjust = 0) + # Positioning and style for the annotation
# Adjust y-axis limit and set y-axis intervals to 100,000
coord_cartesian(ylim = c(0, 600000)) + # Adjust the y-axis limit for more space
scale_y_continuous(name = "Total Sales",
labels = scales::dollar,
breaks = seq(0, 600000, by = 100000)) + # Set y-axis breaks
# Color palette for bars
scale_fill_brewer(palette = "Paired") + # Use "Paired" palette
# Custom labels for title, subtitle, and axes
labs(
title = "Total Sales by Kids Count and Income Range",
subtitle = "Bar chart comparing total sales across income ranges for families with different kids counts",
x = "Number of Kids",
y = "Total Sales",
fill = "Income Range"
) +
# Theme settings with reduced title size and blue color
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(size = 15.5, face = "bold", color = "blue", hjust = 0.5), # Adjust title size and color
plot.subtitle = element_text(size = 10, color = "gray40", hjust = 0.5), # Adjust subtitle size and color
axis.text.x = element_text(size = 12, face = "bold", color = "gray30"), # Adjust x-axis labels
axis.text.y = element_text(size = 12, face = "bold", color = "gray30"), # Adjust y-axis labels
legend.position = "top", # Move legend to the top
legend.title = element_text(size = 12, face = "bold"), # Bold legend title
legend.text = element_text(size = 10), # Adjust legend text size,
axis.title.x = element_text(size = 12, face = "bold", color = "darkblue"), # Smaller x-axis title
axis.title.y = element_text(size = 12, face = "bold", color = "darkblue") # Smaller y-axis
)

# Comparison of Coupon Redemption by Income Group #
# Load necessary libraries
library(tidyverse)
library(ggplot2)
# Assuming you already have the 'demographics' and 'coupon_redemptions' datasets loaded
# Join the demographics and coupon_redemptions datasets, group by income, and count coupon redemptions
coupon_by_income <- demographics %>%
inner_join(coupon_redemptions, by = "household_id") %>%
group_by(income) %>%
summarize(coupon_redemptions = n())
# Plot the data
ggplot(coupon_by_income, aes(x = reorder(income, coupon_redemptions), y = coupon_redemptions, fill = income)) +
geom_bar(stat = "identity", width = 0.7) +
# Add text labels for coupon redemption counts on the bars
geom_text(aes(label = coupon_redemptions), vjust = -0.5, size = 3, color = "black") +
# Customize labels and theme
labs(
title = "Coupon Redemptions by Income Group",
subtitle = "Analysis of coupon redemption trends across different income levels",
x = "Income Group",
y = "Number of Coupon Redemptions",
fill = "Income Group"
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(size = 15.5, face = "bold",color = "blue", hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5, color = "gray50"),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 12),
legend.position = "none",
axis.title.x = element_text(size = 12, face = "bold", color = "darkblue"), # Smaller x-axis title
axis.title.y = element_text(size = 12, face = "bold", color = "darkblue") # Smaller y-axis
)
