library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
transactions <- get_transactions()
transactions
#Plot 1
# Summarizing total quantity of vitamins by product type
top_vitamins <- transactions %>%
inner_join(products, by = 'product_id') %>%
inner_join(demographics, by = 'household_id') %>%
filter(product_category == 'VITAMINS') %>%
group_by(product_type) %>%
summarise(total_quantity = sum(quantity, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(total_quantity)) %>%
slice_head(n = 6)
# Filtering transactions to include the top 6 vitamin types
vitamin_data <- transactions %>%
inner_join(products, by = 'product_id') %>%
inner_join(demographics, by = 'household_id') %>%
filter(product_category == 'VITAMINS', product_type %in% top_vitamins$product_type) %>%
group_by(age, product_type) %>%
summarise(total_quantity = sum(quantity, na.rm = TRUE), .groups = "drop")
# Creating a bar plot
ggplot(vitamin_data, aes(x = reorder(product_type, total_quantity), y = total_quantity, fill = as.factor(age))) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Types of Vitamin Consumption by Age Group",
x = "Vitamins Type",
y = "Total Quantity",
fill = "Age") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90 , vjust = 0.5, hjust = 1))
#Plot 2
transactions <- get_transactions()
demographics
# Merge transactions with demographics
coupon_data <- transactions %>%
inner_join(demographics, by = "household_id") %>%
group_by(age, household_id) %>%
summarise(total_coupon_discount = sum(coupon_disc, na.rm = TRUE), .groups = "drop")
# Visualization:
ggplot(coupon_data, aes(x = age, y = total_coupon_discount, fill = age)) +
geom_boxplot(alpha = 0.7, width = 0.6, outlier.color = "black", outlier.size = 1.5) +
scale_fill_brewer(palette = "Set3") +
scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 10)) +
labs(
title = "Total Coupon Discount Redeemed by Age Group",
x = "Age Group",
y = "Total Coupon Discount"
) +
theme_minimal() +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"))
## Warning: Removed 38 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# Plot 3
# Get top 6 bread product types
top_bread_types <- products %>%
filter(product_category == "BREAD") %>%
inner_join(transactions, by = 'product_id') %>%
group_by(product_type) %>%
summarise(total_quantity = sum(quantity, na.rm = TRUE), .groups = "drop") %>%
slice_max(total_quantity, n = 6) %>%
pull(product_type)
# Create scatter plot
products %>%
filter(product_category == "BREAD", product_type %in% top_bread_types) %>%
inner_join(transactions, by = 'product_id') %>%
inner_join(demographics, by = 'household_id') %>%
ggplot(aes(x = quantity, y = product_type)) +
geom_point(aes(color = income), alpha = 0.7) +
facet_wrap(~ age) +
scale_color_brewer(palette = "Set3") +
labs(
title = "Top Bread Products Purchased By Different Age-Groups",
x = "Quantity Purchased",
y = "Different Product Types of Bread"
) +
guides(color = guide_legend(title = "Income Group")) +
theme_minimal()
```