library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dplyr)
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
The bar plot describes the total quantity purchased by each age-group & household size division under each age-group.
transactions_sample %>%
inner_join(demographics, by = 'household_id') %>%
group_by(age,household_size) %>%
summarize(total_quant = sum(quantity)) %>%
arrange(desc(total_quant)) %>%
ggplot(aes(x = total_quant , y = age , fill = household_size)) +
geom_bar(stat = "identity") +
guides(fill = guide_legend(title = "Household Size")) +
scale_x_continuous(breaks = seq(0,1500000, by = 300000)) +
labs(
title = "Total Quantity Purchased by each age-group & Household Size division",
x = "Total Quantity Purchased",
y = "Age Group"
)
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.
The violin plot describes the loyalty price per age-group for Grocery Department along with Brand Division.
transactions_sample %>%
inner_join(demographics, by = 'household_id') %>%
inner_join(products, by = 'product_id') %>%
filter(department == 'GROCERY') %>%
mutate(loyalty_price = (sales_value + coupon_match_disc) / quantity) %>%
filter(loyalty_price > 0) %>%
ggplot(aes(x = age , y = loyalty_price , fill = age)) +
geom_violin(color = 'red') +
facet_grid(department ~ brand) +
scale_y_continuous(breaks = seq(0,65, by = 10)) +
labs(
title = "Loyalty Price Per Age Group for Grocery Department with Brand Division",
x = "Age Group",
y = "Loyalty Price"
) +
theme(legend.position = "none")
The scatter plot describes the quantity purchased by different age-groups for Product Type: Pizza
products %>%
filter(str_detect(product_type, "PIZZA")) %>%
inner_join(transactions_sample , by = 'product_id') %>%
inner_join(demographics , by = 'household_id') %>%
ggplot(aes(x = quantity , y = product_type)) +
geom_point(aes(color = income)) +
facet_wrap(~ age) +
scale_color_manual(values = c('red' , 'blue' , 'green' , 'darkmagenta' , 'maroon' , 'firebrick' , 'gold' , 'orange', 'black' , 'dark blue' , 'dark green' , 'cream')) +
labs(
title = "Pizza Products Quantity Purchased By Different Age-Groups",
x = "Quantity Purchased",
y = "Different Product Types of Pizza" ) +
guides(color = guide_legend(title = "Income Group"))