Understanding Customer Purchasing Behavior
Plot 1: Total Sales by Age and Income Segments
# Loading and merging the datasets
data("demographics")
transactions <- get_transactions()
aggregated_transactions <- aggregate(sales_value ~ household_id, data = transactions, sum)
merged_data1 <- merge(demographics, aggregated_transactions, by = "household_id")
# Aggregating total sales_value for each age and income segment
aggregated_data1 <- aggregate(sales_value ~ age + income, data = merged_data1, sum)
# Creating the plot
ggplot(aggregated_data1) +
geom_bar(aes(x=age, y=sales_value, fill=income), stat="identity", position="stack") +
ggtitle("Total Sales by Age and Income Segments") +
xlab("Age Segment") +
ylab("Total Sales Value") +
theme_minimal() # ensuring visually appealing and not cluttered plot

Plot 2: Frequency of Purchases by Product Categories and Income
Levels
# Load the dplyr package
library(dplyr)
# Load the data
data("demographics")
data("products")
transactions <- get_transactions()
# Merge the transactions data with demographics and products
merged_data2 <- transactions %>%
left_join(demographics, by = "household_id") %>%
left_join(products, by = "product_id")
# Group by product_category and income, and count the number of transactions
aggregated_data2 <- merged_data2 %>%
group_by(product_category, income) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.
# Get the top 10 product categories based on total count
top_categories <- aggregated_data2 %>%
group_by(product_category) %>%
summarise(total_count = sum(count)) %>%
arrange(desc(total_count)) %>%
head(10) %>%
pull(product_category)
# Filter the aggregated data to include only the top 10 product categories
filtered_data2 <- aggregated_data2 %>%
filter(product_category %in% top_categories)
# Create the plot
ggplot(filtered_data2) +
geom_bar(aes(x = product_category, y = count, fill = income), stat = "identity", position = "dodge") +
ggtitle("Frequency of Purchases by Top 10 Product Categories and Income Levels") +
xlab("Product Category") +
ylab("Frequency of Purchases") +
ylim(0, 10000) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 9 rows containing missing values (`geom_bar()`).

Plot 3: Coupon Redemption Rates by Household Size and Marital
Status
# Loading and merging the datasets
data("demographics")
data("products")
transactions <- get_transactions()
# Assuming coupon_redemptions dataset is available
aggregated_redemptions <- aggregate(coupon_upc ~ household_id, data = coupon_redemptions, length)
# Merging the datasets
merged_data3 <- merge(aggregated_redemptions, demographics, by = "household_id")
# Aggregating redemption rate for each household_size and marital_status
aggregated_data3 <- aggregate(coupon_upc ~ household_size + marital_status, data = merged_data3, sum)
# Creating the plot (heatmap)
ggplot(aggregated_data3) +
geom_tile(aes(x=household_size, y=marital_status, fill=coupon_upc)) +
ggtitle("Coupon Redemption Rates by Household Size and Marital Status") +
xlab("Household Size") +
ylab("Marital Status") +
scale_fill_gradient(low="white", high="red") +
theme_minimal() # ensuring visually appealing and not cluttered plot
