# Load necessary packages
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Plot 1
cheese_sales_income <- transactions_sample %>%
inner_join(products, by = "product_id") %>%
inner_join(demographics, by = "household_id") %>%
filter(grepl("cheese", product_category, ignore.case = TRUE)) %>%
group_by(income) %>%
summarise(total_sales_value = sum(sales_value, na.rm = TRUE)) %>%
arrange(desc(total_sales_value))
cheese_sales_income$highlight <- ifelse(cheese_sales_income$total_sales_value == max(cheese_sales_income$total_sales_value), "highlight", "normal")
ggplot(cheese_sales_income, aes(x = reorder(income, -total_sales_value), y = total_sales_value, fill = highlight)) +
geom_bar (stat = "identity") +
scale_fill_manual(values = c("highlight" = "pink", "normal" = "blue")) +
geom_text(aes(label = scales:: dollar(total_sales_value)),
vjust = -0.5,size = 4,
color = "black",
data = cheese_sales_income %>% filter(highlight == "highlight")) +
labs(title = "Average Spent on Cheese by Income Level (2017)",
x = "Income Level",
y = "Average Spent") +
scale_y_continuous(labels = scales::dollar_format()) +
theme_minimal() +
theme(legend.position = "none")

# Plot 2
# Plotransactions_sample
# Plot 2
coupon_redemptions_with_coupons <- coupon_redemptions %>%
inner_join(coupons, by = "coupon_upc", relationship = "many-to-many")
coupon_redemptions_with_products <- coupon_redemptions_with_coupons %>%
inner_join(products, by = "product_id") %>%
filter(grepl("cheese", product_category, ignore.case = TRUE))
cheese_coupons_agg <- coupon_redemptions_with_products%>%
group_by(household_id)%>%
summarise(coupon_count = n())
cheese_coupons <- cheese_coupons_agg %>%
inner_join(transactions_sample, by = "household_id") %>%
mutate(month = floor_date(transaction_timestamp, "month")) %>%
group_by(month)%>%
summarise(coupon_redemptions = sum(coupon_count))
peak_month <- cheese_coupons%>%
filter(coupon_redemptions == max(coupon_redemptions))
ggplot(cheese_coupons, aes(x = month, y= coupon_redemptions)) +
geom_line(color= "purple", linewidth = 1.2) +
geom_point(data=peak_month, aes(x=month, y=coupon_redemptions), color= "orange", size =5) +
geom_text(data=peak_month, aes(x=month, y=coupon_redemptions, label = coupon_redemptions),
vjust = -1, size=3, color = "black")+
labs(title = "Total Cheese Coupon Usage by Month (2017)",
x="Month",
y = "Coupon Uses") +
theme_minimal()

#Plot 3
avg_cheese_spending_hh <- transactions_sample %>%
inner_join(products, by = "product_id") %>%
inner_join(demographics, by = "household_id") %>%
filter(grepl("cheese", product_category, ignore.case = TRUE))%>%
group_by(household_size) %>%
summarise(avg_spending = mean(sales_value, na.rm = TRUE)) %>%
arrange(desc(avg_spending))
avg_cheese_spending_hh$highlight <- ifelse(avg_cheese_spending_hh$household_size == "5+", "highlight", "normal")
ggplot(avg_cheese_spending_hh, aes(x=reorder(household_size, -avg_spending), y=avg_spending, fill=highlight)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("highlight" = "hotpink", "normal" = "lightblue")) +
geom_text(aes(label=scales::dollar(avg_spending)),
vjust = -0.5, size=4,
color = "black",
data=avg_cheese_spending_hh %>% filter(highlight == "highlight"))+
labs(title = "Cheese Spending by Household Size (2017)",
x = "Household Size",
y = "Average Spent per Transaction") +
scale_y_continuous(labels=scales::dollar_format()) +
theme_minimal() +
theme(legend.position = "none")
