library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

The bar plot describes the total quantity purchased by each age-group & household size division under each age-group.

transactions_sample %>%
  inner_join(demographics, by = 'household_id') %>%
  group_by(age,household_size) %>%
  summarize(total_quant = sum(quantity)) %>%
  arrange(desc(total_quant)) %>%
  ggplot(aes(x = total_quant , y = age , fill = household_size)) +
  geom_bar(stat = "identity") +
  guides(fill = guide_legend(title = "Household Size")) +
  scale_x_continuous(breaks = seq(0,1500000, by = 300000)) +
  labs(
    title = "Total Quantity Purchased by each age-group & Household Size division",
    x = "Total Quantity Purchased",
    y = "Age Group"
  )
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.


The violin plot describes the loyalty price per age-group for Grocery Department along with Brand Division.

transactions_sample %>%
  inner_join(demographics, by = 'household_id') %>%
  inner_join(products, by = 'product_id') %>%
  filter(department == 'GROCERY') %>%
  mutate(loyalty_price = (sales_value + coupon_match_disc) / quantity) %>%
  filter(loyalty_price > 0) %>%
  ggplot(aes(x = age , y = loyalty_price , fill = age)) +
  geom_violin(color = 'red') +
  facet_grid(department ~ brand) +
  scale_y_continuous(breaks = seq(0,65, by = 10)) +
  labs(
    title = "Loyalty Price Per Age Group for Grocery Department with Brand Division",
    x = "Age Group",
    y = "Loyalty Price"
  ) +
  theme(legend.position = "none")


The scatter plot describes the quantity purchased by different age-groups for Product Type: Pizza

products %>%
    filter(str_detect(product_type, "PIZZA")) %>%
    inner_join(transactions_sample , by = 'product_id') %>%
    inner_join(demographics , by = 'household_id') %>%
    ggplot(aes(x = quantity , y = product_type)) +
    geom_point(aes(color = income)) +
    facet_wrap(~ age) +
    scale_color_manual(values = c('red' , 'blue' , 'green' , 'darkmagenta' , 'maroon' , 'firebrick' , 'gold' , 'orange', 'black' , 'dark blue' , 'dark green' , 'cream')) +
   labs(
    title = "Pizza Products Quantity Purchased By Different Age-Groups",
    x = "Quantity Purchased",
    y = "Different Product Types of Pizza" ) +
    guides(color = guide_legend(title = "Income Group"))