For this lab we are using the “completejourney” dataset. Here, I have created three plots as mentioned below:
Below we have the code and plots for the same.
Loading the libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
transactions <- transactions_sample
1. Average Spending by Age Group and Household Size
spending_by_demographics <- transactions %>%
inner_join(demographics, by = "household_id") %>%
group_by(age, household_size) %>%
summarize(average_spending = mean(sales_value, na.rm = TRUE), .groups = "drop")
# Create the plot
ggplot(data = spending_by_demographics, aes(x = age, y = average_spending, fill = as.factor(household_size))) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Average Spending by Age Group and Household Size",
subtitle = "The bar chart displays average spending across different
age groups, differentiated by household size. This helps uncover
trends such as which age groups and household sizes tend to spend more
on purchases.", x = "Age Group",
y = "Average Spending ($)",
fill = "Household Size"
) +
theme_minimal() +
scale_y_continuous(labels = scales::dollar_format())
2. Customer Segmentation by Spending and Frequency
customer_data <- transactions %>%
group_by(household_id) %>%
summarize(total_spending = sum(sales_value), purchase_frequency = n()) %>%
inner_join(demographics, by = "household_id")
# Create the scatter plot
ggplot(customer_data, aes(x = purchase_frequency, y = total_spending, color = household_size)) +
geom_point(alpha = 0.6, size = 3) +
labs(
title = "Customer Segmentation by Spending and Frequency",
subtitle = "The graph shows how customer spending increases with
purchase frequency, highlighting the impact of household size on
spending behavior. Larger households tend to shop more frequently and
spend more overall.",
x = "Number of Purchases",
y = "Total Spending",
color = "Household Size"
) +
theme_minimal()
3. Total Sales Value per Quantity for Top 5 Products by Income Level
product_sales_by_income <- transactions %>%
inner_join(demographics, by = "household_id") %>%
group_by(income, product_id) %>%
summarize(
total_sales_value = sum(sales_value, na.rm = TRUE),
total_quantity = n(), # Assuming each transaction is a purchase of one item
.groups = "drop"
)
# Calculate sales value per quantity
product_sales_by_income <- product_sales_by_income %>%
mutate(sales_value_per_quantity = total_sales_value / total_quantity)
# Get top 5 products for each income level based on total sales value
top_products <- product_sales_by_income %>%
group_by(income) %>%
arrange(desc(total_sales_value)) %>%
slice_head(n = 5)
# Create the plot
ggplot(top_products, aes(x = reorder(product_id, sales_value_per_quantity),
y = sales_value_per_quantity,
fill = income)) +
geom_bar(stat = "identity", position = position_dodge()) +
labs(
title = "Total Sales Value per Quantity for Top 5 Products by Income Level",
subtitle = "The bar plot shows the sales value per quantity for the
top 5 products purchased by each income level, allowing for insights
into which products are generating higher revenue per item sold within
different income brackets.",
x = "Product ID",
y = "Sales Value per Quantity ($)",
fill = "Income Level"
) +
theme_minimal(base_size = 8) +
scale_y_continuous(labels = scales::dollar_format()) +
coord_flip()