library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## Warning: package 'ggplot2' was built under R version 4.2.2
## Warning: package 'tidyr' was built under R version 4.2.2
## Warning: package 'readr' was built under R version 4.2.2
## Warning: package 'purrr' was built under R version 4.2.2
## Warning: package 'dplyr' was built under R version 4.2.2
## Warning: package 'stringr' was built under R version 4.2.2
## Warning: package 'forcats' was built under R version 4.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2) # for plotting capabilities
library(completejourney) # for data
## Warning: package 'completejourney' was built under R version 4.2.2
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(dplyr) # for additional data wrangling
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.2.2
## Loading required package: timechange
## Warning: package 'timechange' was built under R version 4.2.2
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
transactions <- get_transactions()
demographics <- completejourney::demographics
products <- completejourney::products
coupons <- completejourney::coupons
coupon_redemptions <- completejourney::coupon_redemptions
campaigns <- completejourney::campaigns
promotions <- completejourney::get_promotions()
campaign_descriptions <- completejourney::campaign_descriptions
top_selling_prods <- products %>%
merge(transactions, how = "left", on = "product_id") %>%
group_by(product_category, product_type) %>%
summarize(total_sales = sum(sales_value)) %>%
arrange(desc(total_sales))
## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.
top_selling_prods
total_df <- transactions %>%
merge(products, how = "inner", on = "product_id") %>%
merge(demographics, how = "inner", on = "household_id")
p <- total_df %>%
filter(str_detect(product_category, regex("Cheese", ignore_case = TRUE))) %>%
mutate(transaction_date = date(transaction_timestamp),
trans_week = week(transaction_timestamp),
trans_month = month(transaction_timestamp)) %>%
group_by(trans_week) %>%
summarize(total_sales = sum(sales_value),
total_quantity = sum(quantity),
total_coupon = sum(coupon_disc),
total_retail_disc = sum(retail_disc)) %>%
arrange(trans_week) %>%
ggplot(mapping = aes(trans_week, total_sales)) +
geom_line() +
geom_smooth() +
coord_cartesian(clip = "off") +
ggtitle("Cheese Sales by Week in 2017", subtitle="After decreasing for the first 20 weeks, cheese sales rebound and peak in late December.") +
scale_x_continuous(name = "Transaction Week", breaks = c(5,10,15,20,25,30,35,40,45,50)) +
scale_y_continuous(name = "Total Cheese Sales")
p +
annotate("text", x = 49.5, y = 2300, label = "Sales peak in week 51",size = 3.5) +
annotate("text", x = 25, y = 1100, label = "Sales reach a minimum in week 25", size = 3.5)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
# theme(aspect.ratio=5/10, plot.subtitle = element_text(size = 11))
#, plot.margin = margin(2,2,2,2, "mm"))
total_df %>%
group_by(household_id) %>%
mutate(total_sales = sum(sales_value),
total_quantity = sum(quantity),
total_coupon = sum(coupon_disc),
total_retail_disc = sum(retail_disc)) %>%
ungroup() %>%
filter(str_detect(product_category, regex("Cheese", ignore_case = TRUE))) %>%
group_by(household_id) %>%
mutate(cheese_sales = sum(sales_value),
cheese_quantity = sum(quantity),
cheese_coupon = sum(coupon_disc),
cheese_retail_disc = sum(retail_disc)) %>%
mutate(perc_cheese_sales = cheese_sales/total_sales,
perc_cheese_quantity = cheese_quantity/total_quantity,
perc_cheese_coupon = cheese_coupon/total_coupon,
perc_cheese_rdisc = cheese_retail_disc/total_retail_disc) %>%
ggplot(mapping = aes(x = perc_cheese_sales, y = perc_cheese_coupon)) +
geom_point() +
scale_x_log10(name = "Percent Spend on Cheese", labels = scales::percent) +
scale_y_log10(name = "Percent Coupon Discount on Cheese", labels = scales::percent) +
geom_smooth(se=FALSE) +
ggtitle("Percent Coupon Discount versus Percent Spend on Cheese by Household", subtitle = "Cheese coupons appear to be effective in enlarging household share of cheese spend") +
theme(plot.margin = margin(0,2,2,2, "mm"))
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 19891 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2850 rows containing missing values (`geom_point()`).
## Plot 3
total_df %>%
filter(str_detect(product_category, regex("Cheese", ignore_case = TRUE))) %>%
group_by(product_type) %>%
summarize(cheese_sales = sum(sales_value),
cheese_quantity = sum(quantity),
cheese_coupon = sum(coupon_disc),
cheese_retail_disc = sum(retail_disc)) %>%
arrange(desc(cheese_sales)) %>%
ggplot(mapping = aes(fct_reorder(product_type, cheese_sales), cheese_sales, color = cheese_coupon)) +
geom_point() +
guides(color = guide_legend(title = "Cheese Coupon Discount")) +
scale_x_discrete(name = "Product Type") +
scale_y_continuous(name = "Total Sales on Cheese", labels = scales::dollar) +
coord_flip(clip = "off") +
ggtitle("Cheese Sales and Coupon Discounts: Top 25 Products", subtitle = "Coupons are most volume-effective on shredded, single, & cream cheese") +
theme(axis.title = element_text(size = 10),
plot.subtitle = element_text(size = 9))