Understanding Customer Purchasing Behavior

Plot 1: Total Sales by Age and Income Segments

# Loading and merging the datasets
data("demographics")
transactions <- get_transactions()
aggregated_transactions <- aggregate(sales_value ~ household_id, data = transactions, sum)
merged_data1 <- merge(demographics, aggregated_transactions, by = "household_id")

# Aggregating total sales_value for each age and income segment
aggregated_data1 <- aggregate(sales_value ~ age + income, data = merged_data1, sum)

# Creating the plot
ggplot(aggregated_data1) +
  geom_bar(aes(x=age, y=sales_value, fill=income), stat="identity", position="stack") +
  ggtitle("Total Sales by Age and Income Segments") +
  xlab("Age Segment") +
  ylab("Total Sales Value") +
  theme_minimal()  # ensuring visually appealing and not cluttered plot

Plot 2: Frequency of Purchases by Product Categories and Income Levels

# Load the dplyr package
library(dplyr)

# Load the data
data("demographics")
data("products")
transactions <- get_transactions()

# Merge the transactions data with demographics and products
merged_data2 <- transactions %>%
  left_join(demographics, by = "household_id") %>%
  left_join(products, by = "product_id")

# Group by product_category and income, and count the number of transactions
aggregated_data2 <- merged_data2 %>%
  group_by(product_category, income) %>%
  summarise(count = n()) %>%
  ungroup()
## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.
# Get the top 10 product categories based on total count
top_categories <- aggregated_data2 %>%
  group_by(product_category) %>%
  summarise(total_count = sum(count)) %>%
  arrange(desc(total_count)) %>%
  head(10) %>%
  pull(product_category)

# Filter the aggregated data to include only the top 10 product categories
filtered_data2 <- aggregated_data2 %>%
  filter(product_category %in% top_categories)

# Create the plot
ggplot(filtered_data2) +
  geom_bar(aes(x = product_category, y = count, fill = income), stat = "identity", position = "dodge") +
  ggtitle("Frequency of Purchases by Top 10 Product Categories and Income Levels") +
  xlab("Product Category") +
  ylab("Frequency of Purchases") + 
  ylim(0, 10000) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Removed 9 rows containing missing values (`geom_bar()`).

Plot 3: Coupon Redemption Rates by Household Size and Marital Status

# Loading and merging the datasets
data("demographics")
data("products")
transactions <- get_transactions()
# Assuming coupon_redemptions dataset is available
aggregated_redemptions <- aggregate(coupon_upc ~ household_id, data = coupon_redemptions, length)

# Merging the datasets
merged_data3 <- merge(aggregated_redemptions, demographics, by = "household_id")

# Aggregating redemption rate for each household_size and marital_status
aggregated_data3 <- aggregate(coupon_upc ~ household_size + marital_status, data = merged_data3, sum)

# Creating the plot (heatmap)
ggplot(aggregated_data3) +
  geom_tile(aes(x=household_size, y=marital_status, fill=coupon_upc)) +
  ggtitle("Coupon Redemption Rates by Household Size and Marital Status") +
  xlab("Household Size") +
  ylab("Marital Status") +
  scale_fill_gradient(low="white", high="red") +
  theme_minimal()  # ensuring visually appealing and not cluttered plot