library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dplyr)
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(stringr)
library(tidyr)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
transactions <- get_transactions()
promotions <- get_promotions()
demographics %>%
inner_join(transactions,"household_id") %>%
inner_join(products, by = 'product_id') %>%
group_by(age) %>%
summarize(total_discount = retail_disc + coupon_disc + coupon_match_disc) %>%
arrange(total_discount) %>%
ggplot(aes(x=age, y=total_discount, fill = age)) +
geom_bar(stat = 'identity') +
scale_y_continuous(name = "Total Discount", labels = scales::dollar) +
scale_x_discrete(name = "Age Group") +
labs(
title = "Total Discount by Age Group",
)
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.

transactions_sample %>%
inner_join(demographics, by = 'household_id') %>%
group_by(age,household_comp) %>%
summarize(total_discount = sum(coupon_disc)) %>%
ggplot(aes(x = age, y = total_discount, fill = household_comp)) +
geom_bar(stat = "identity") +
guides(fill = guide_legend(title = "Household Composition")) +
labs(title = "Total Discount by each Age Group classified by Household composition",
x = "Age Group",
y = "Total Discount")
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.

transactions %>%
inner_join(demographics, by = 'household_id') %>%
inner_join(products, by = 'product_id') %>%
mutate(total_discount = retail_disc + coupon_disc + coupon_match_disc) %>%
filter(total_discount > 0) %>%
ggplot(aes(x = age , y = total_discount , fill = age)) +
geom_col() +
facet_wrap(~department) +
scale_y_continuous(limits = c(0, 15000), breaks = seq(0, 20000, by = 3000)) +
labs(
title = "Total Discount Per Age Group across departments",
x = "Age Group",
y = "Total Discount"
) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())
## Warning: Removed 204954 rows containing missing values (geom_col).

transactions %>%
inner_join(demographics) %>%
filter(coupon_disc > 0) %>%
ggplot(aes(x = coupon_disc, y = sales_value, color = kids_count)) +
geom_smooth(method = "lm") +
ggtitle("Transaction Sales Value by Coupon Discount and Number of Kids")
## Joining, by = "household_id"
## `geom_smooth()` using formula 'y ~ x'

transactions %>%
inner_join(demographics, by = "household_id") %>%
inner_join(products, by = "product_id") %>%
group_by(age,brand) %>%
summarize(total_discount = sum(coupon_disc, na.rm = TRUE), .groups = 'keep') %>%
ggplot(aes(x = age, y = total_discount, color = brand)) +
geom_point() +
scale_y_continuous(name = "Total Discount", labels = scales::dollar) +
labs(title = "National vs Private Total Discount per Age Range",
subtitle = "Comparing total discount of two competing brands per income level",
x = "Age Range",
color = "Brand")

transactions %>%
inner_join(products) %>%
group_by(manufacturer_id) %>%
filter(sales_value > 10) %>%
ggplot(aes(x = retail_disc, y = sales_value)) +
geom_smooth(method = "lm") +
facet_wrap(~ department) +
ggtitle("Retail Discount vs Sales Value by Department")
## Joining, by = "product_id"
## `geom_smooth()` using formula 'y ~ x'
