Prerequisites

library(ggplot2)
library(forcats)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggrepel)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(wesanderson)

Spending and Quantity by Month

transactions <- get_transactions()

df <- demographics %>%
  inner_join(transactions, by = 'household_id') %>%
  inner_join(products, by = 'product_id') %>%
  mutate(
    month = month(transaction_timestamp, label = TRUE)
    ) %>%
  group_by(month) %>%
  summarise(
    spend = sum(sales_value),
    qty = sum(quantity)
  ) 

df %>%
  ggplot(aes(x = month)) +
    geom_col(aes(y = spend, fill = 'Total Net Spend ($)')) +
    geom_point(aes(y = qty/50)) +
    geom_path(aes(y = qty/50, group = 1, color = 'Quantity Sold')) +
    scale_y_continuous(
      name = 'Total Net Spend ($)',
      sec.axis = sec_axis(~.*50, name = 'Quantity Sold')
        ) +
    labs(
        title = 'Spending and Quantity by Month',
        x = 'Month',
        subtitle =
'The data below shows the total net amount in dollars spent
and the net quantity sold by month in the year 2017.'
    ) +
    scale_fill_manual(
      name = '', values = c('Total Net Spend ($)' = 'navy')
    ) +
    scale_colour_manual(
      name = '',values = c('Quantity Sold' = 'orange')
    ) +
    theme(
        plot.title = element_text(face = "bold", size = 20),
        legend.key.width = unit(0.5, 'cm'),
        legend.text = element_text(size = 6),
        axis.text = element_text(size = 8),
        axis.title = element_text(size = 9)
    )

Top Product Categories

df <- transactions %>%
  inner_join(products, by = 'product_id') %>%
  inner_join(demographics, by = 'household_id') %>%
  group_by(product_category, age) %>%
  summarise(sales = sum(sales_value)) %>%
  arrange(desc(sales)) %>%
  head(75)
## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.
df %>%
  ggplot(aes(x = reorder(product_category, sales, FUN = sum), 
             y = sales, 
             fill = age
             )
         ) +
    geom_col() +
    coord_flip() +
    labs(
      title = 'Top Product Categories',
      y = 'Total Net Sales ($)',
      x = 'Product Category',
      subtitle = 
'The data shows the age groups for the most popular product 
categories sold in 2017.'
    ) +
    guides(fill = guide_legend(title = "Age Group")) +
    theme(plot.title = element_text(face = "bold", size = 20), 
        plot.subtitle = element_text(size = 9),
        axis.text = element_text(size = 8)
        )

Number of Products Purchased by Hour of Day

transactions <- get_transactions()

df <- transactions %>%
  inner_join(products, by = 'product_id') %>%
  inner_join(demographics, by = 'household_id') %>%
  mutate(hour =  hour(transaction_timestamp)) %>%
  filter(hour != 0) %>%
  group_by(basket_id, hour, household_size) %>%
  summarise(qty = sum(quantity)) %>%
  arrange(desc(qty))
## `summarise()` has grouped output by 'basket_id', 'hour'. You can override using
## the `.groups` argument.
df %>%
  ggplot(aes(x = hour, y = qty, fill = household_size)) +
    geom_col() +
    labs(
        title = 'Number of Products Purchased by Hour of Day',
        subtitle = 
'The data below shows the net quantity of products purchased in 2017
by the hour of the day for each household size.',
        x = 'Hour of Day',
        y = 'Net Items Purchased'

    ) +
    scale_x_continuous(breaks = seq(from = 1, to = 24, by = 1)) +
    theme(
      axis.text.x = element_text(size = 7),
      plot.title = element_text(face = 'bold', size = 20)
  )