#Loading library files
library(tidyverse)
library(completejourney)
library(dplyr)
data(package = 'completejourney')
# Plot 1
p1_products <- products
p1_transactions <- transactions_sample
p1_coupons <- coupon_redemptions
p1_hshld_info <- demographics
top_five_hslds <- p1_transactions %>%
inner_join(p1_hshld_info, by = "household_id") %>%
inner_join(p1_coupons, by = "household_id") %>%
group_by(income, household_id) %>%
summarize(total_coupons_redeemed = sum(coupon_upc > 0)) %>%
arrange(desc(total_coupons_redeemed)) %>%
slice(1:5)
category_nums <- p1_transactions %>%
inner_join(p1_coupons, by = "household_id") %>%
inner_join(p1_products, by = "product_id") %>%
inner_join(coupons, by = "product_id") %>%
filter(household_id == top_five_hslds$household_id) %>%
group_by(Months = month(transaction_timestamp, label = TRUE), household_id) %>%
summarize(n_transactions = sum(quantity)) %>%
arrange(desc(n_transactions))
data1 <- top_five_hslds %>% inner_join(category_nums)
p1 <- ggplot(data1, aes(x = income, y = n_transactions)) +
geom_col(aes(fill = income)) +
facet_wrap(~Months) +
labs(
title = "Transactions Made Each Month",
subtitle = "Grouped by each Income Range",
caption = "This takes data from the top 5
households that redeem the most coupons,
and their number of transactions made each month.",
y = "Number of Transactions",
x = "Income Ranges",
fill = "Income"
) +
theme(axis.text.x = element_blank())
p1

# Plot 2
p2_transactions <- transactions_sample
p2_demographics <- demographics
p2_produtcs <- products
data2 <- p2_transactions %>%
inner_join(p2_demographics, by = "household_id") %>%
inner_join(p2_produtcs, by = "product_id") %>%
group_by(age, product_category) %>%
summarize(total_sales = sum(quantity), total_value = sum(sales_value)) %>%
arrange(desc(total_sales)) %>%
slice(1:3)
data_pc <- p2_transactions %>%
inner_join(p2_demographics, by = "household_id") %>%
inner_join(p2_produtcs, by = "product_id") %>%
group_by(age, product_category) %>%
summarize(total_sales_ = sum(quantity)) %>%
arrange(desc(total_sales_)) %>%
slice(1:3)
c1 <- as.numeric(data_pc[15,3])
c2 <- as.numeric(data_pc[14,3])
c3 <- sum(data_pc[c(2, 5, 8, 11, 17), 3])
c4 <- as.numeric(data_pc[3,3])
c5 <- sum(data_pc[c(6, 9, 12, 18),3])
c6 <- sum(data_pc[c(1, 4, 7, 10, 13, 16), 3])
label_data <- rev(c(c1, c2, c3, c4, c5, c6))
p2 <- ggplot(data2, aes(y = product_category, x = total_value, fill = age)) +
geom_col() +
labs(
title = "Total Transactions Quantity and Value",
subtitle = "Sorted by Top 3 Product Categories for Each Age Group",
caption = "This takes data from the top 3 product categories based on number
of transactions, and their total sale value graphed by each age group.
The total number of transactions is labeled in black for its own product category.",
x = "Sales Value (in $)",
y = "Type of Product",
fill = "Age Group"
) +
xlim(c(0,15000)) +
theme(axis.text.y = element_text(angle = 0, vjust = .5, hjust = 1)) +
geom_text(
aes(label = after_stat(label_data), group = product_category), stat = 'summary', fun = sum,
vjust = .5, hjust = -.5, size = 3, color = "black", fontface = "bold")
p2

# Plot 3
p3_transactions <- transactions_sample
p3_demographics <- demographics
p3_produtcs <- products
p3_demographics[is.na(p3_demographics)] <- "Unknown"
top_dept <- p3_transactions %>%
inner_join(p3_demographics, by = "household_id") %>%
inner_join(p3_produtcs, by = "product_id") %>%
group_by(department) %>%
summarize(total_sales = sum(sales_value)) %>%
arrange(desc(total_sales)) %>%
slice(1:5)
data3 <- p3_transactions %>%
inner_join(p3_demographics, by = "household_id") %>%
inner_join(p3_produtcs, by = "product_id") %>%
filter(department == "GROCERY" | department == "DRUG GM" | department == "FUEL" |
department == "PRODUCE" | department == "MEAT") %>%
group_by(department, marital_status, week) %>%
summarize(total_value = sum(sales_value)) %>%
arrange(desc(total_value))
p3 <- ggplot(data3, aes(x = week, y = total_value, color = department)) +
geom_line() +
labs(
title = "Total Transaction Value Throughout the Year",
subtitle = "Sorted by Product Department and Marital Status",
caption = "This takes data from the top 5 product departments
based on total sales value of the transactions, and their total
sale value graphed for the year (52 Weeks) for each marital status.",
x = "Week Number",
y = "Sales Value (in $)",
color = "Product Department"
) +
scale_x_continuous() +
xlim(c(0,52))+
facet_wrap(~marital_status)
p3
