Load in packages

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.2.2

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2

## Warning: package 'ggplot2' was built under R version 4.2.2

## Warning: package 'tidyr' was built under R version 4.2.2

## Warning: package 'readr' was built under R version 4.2.2

## Warning: package 'purrr' was built under R version 4.2.2

## Warning: package 'dplyr' was built under R version 4.2.2

## Warning: package 'stringr' was built under R version 4.2.2

## Warning: package 'forcats' was built under R version 4.2.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(ggplot2)          # for plotting capabilities
library(completejourney)  # for data

## Warning: package 'completejourney' was built under R version 4.2.2

## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.

library(dplyr)            # for additional data wrangling
library(lubridate)

## Warning: package 'lubridate' was built under R version 4.2.2

## Loading required package: timechange

## Warning: package 'timechange' was built under R version 4.2.2

## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Load in data

transactions <- get_transactions()
demographics <- completejourney::demographics
products <- completejourney::products
coupons <- completejourney::coupons
coupon_redemptions <- completejourney::coupon_redemptions
campaigns <- completejourney::campaigns
promotions <- completejourney::get_promotions()
campaign_descriptions <- completejourney::campaign_descriptions

Choose product category to focus in on

top_selling_prods <- products %>%
  merge(transactions, how = "left", on = "product_id") %>%
  group_by(product_category, product_type) %>%
  summarize(total_sales = sum(sales_value)) %>%
  arrange(desc(total_sales))

## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.

top_selling_prods

Cheese is in the top 10 selling product categories: Analysis on Cheese Sales

Create dataframe with products & demographics data

total_df <- transactions %>%
  merge(products, how = "inner", on = "product_id") %>%
  merge(demographics, how = "inner", on = "household_id")

Create dataframe with cheese sales by Date

p <- total_df %>%
  filter(str_detect(product_category, regex("Cheese", ignore_case = TRUE))) %>%
  mutate(transaction_date = date(transaction_timestamp),
         trans_week = week(transaction_timestamp),
         trans_month = month(transaction_timestamp)) %>%
  group_by(trans_week) %>%
  summarize(total_sales = sum(sales_value),
            total_quantity = sum(quantity),
            total_coupon = sum(coupon_disc),
            total_retail_disc = sum(retail_disc)) %>%
  arrange(trans_week) %>%
  ggplot(mapping = aes(trans_week, total_sales)) +
  geom_line() +
  geom_smooth() +
  coord_cartesian(clip = "off") +
  ggtitle("Cheese Sales by Week in 2017", subtitle="After decreasing for the first 20 weeks, cheese sales rebound and peak in late December.") +
  scale_x_continuous(name = "Transaction Week", breaks = c(5,10,15,20,25,30,35,40,45,50)) +
  scale_y_continuous(name = "Total Cheese Sales")

Plot 1

p +
  annotate("text", x = 49.5, y = 2300, label = "Sales peak in week 51",size = 3.5) +
  annotate("text", x = 25, y = 1100, label = "Sales reach a minimum in week 25", size = 3.5)

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

 # theme(aspect.ratio=5/10, plot.subtitle = element_text(size = 11)) 
        #, plot.margin = margin(2,2,2,2, "mm"))

Plot 2

total_df %>%
  group_by(household_id) %>%
  mutate(total_sales = sum(sales_value),
         total_quantity = sum(quantity),
          total_coupon = sum(coupon_disc),
          total_retail_disc = sum(retail_disc)) %>%
  ungroup() %>%
  filter(str_detect(product_category, regex("Cheese", ignore_case = TRUE))) %>%
  group_by(household_id) %>%
  mutate(cheese_sales = sum(sales_value),
            cheese_quantity = sum(quantity),
            cheese_coupon = sum(coupon_disc),
            cheese_retail_disc = sum(retail_disc)) %>%
  mutate(perc_cheese_sales = cheese_sales/total_sales,
         perc_cheese_quantity = cheese_quantity/total_quantity,
         perc_cheese_coupon = cheese_coupon/total_coupon,
         perc_cheese_rdisc = cheese_retail_disc/total_retail_disc) %>%
  ggplot(mapping = aes(x = perc_cheese_sales, y = perc_cheese_coupon)) +
  geom_point() +
  scale_x_log10(name = "Percent Spend on Cheese", labels = scales::percent) +
  scale_y_log10(name = "Percent Coupon Discount on Cheese", labels = scales::percent) +
  geom_smooth(se=FALSE) +
  ggtitle("Percent Coupon Discount versus Percent Spend on Cheese by Household", subtitle = "Cheese coupons appear to be effective in enlarging household share of cheese spend") +
  theme(plot.margin = margin(0,2,2,2, "mm"))

## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

## Warning: Removed 19891 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 2850 rows containing missing values (`geom_point()`).

## Plot 3

total_df %>%
  filter(str_detect(product_category, regex("Cheese", ignore_case = TRUE))) %>%
  group_by(product_type) %>%
  summarize(cheese_sales = sum(sales_value),
            cheese_quantity = sum(quantity),
            cheese_coupon = sum(coupon_disc),
            cheese_retail_disc = sum(retail_disc)) %>%
  arrange(desc(cheese_sales)) %>%
  ggplot(mapping = aes(fct_reorder(product_type, cheese_sales), cheese_sales, color = cheese_coupon)) +
  geom_point() +
  guides(color = guide_legend(title = "Cheese Coupon Discount")) +
  scale_x_discrete(name = "Product Type") +
  scale_y_continuous(name = "Total Sales on Cheese", labels = scales::dollar) +
  coord_flip(clip = "off") +
  ggtitle("Cheese Sales and Coupon Discounts: Top 25 Products", subtitle = "Coupons are most volume-effective on shredded, single, & cream cheese") +
  theme(axis.title = element_text(size = 10),
        plot.subtitle = element_text(size = 9))

Module 5 Lab: Laura Neltner

Load in packages

Load in data

Choose product category to focus in on

Cheese is in the top 10 selling product categories: Analysis on Cheese Sales

Create dataframe with products & demographics data

Create dataframe with cheese sales by Date

Plot 1

Plot 2