Prerequisites
library(ggplot2)
library(forcats)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggrepel)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(wesanderson)
Spending and Quantity by Month
transactions <- get_transactions()
df <- demographics %>%
inner_join(transactions, by = 'household_id') %>%
inner_join(products, by = 'product_id') %>%
mutate(
month = month(transaction_timestamp, label = TRUE)
) %>%
group_by(month) %>%
summarise(
spend = sum(sales_value),
qty = sum(quantity)
)
df %>%
ggplot(aes(x = month)) +
geom_col(aes(y = spend, fill = 'Total Net Spend ($)')) +
geom_point(aes(y = qty/50)) +
geom_path(aes(y = qty/50, group = 1, color = 'Quantity Sold')) +
scale_y_continuous(
name = 'Total Net Spend ($)',
sec.axis = sec_axis(~.*50, name = 'Quantity Sold')
) +
labs(
title = 'Spending and Quantity by Month',
x = 'Month',
subtitle =
'The data below shows the total net amount in dollars spent
and the net quantity sold by month in the year 2017.'
) +
scale_fill_manual(
name = '', values = c('Total Net Spend ($)' = 'navy')
) +
scale_colour_manual(
name = '',values = c('Quantity Sold' = 'orange')
) +
theme(
plot.title = element_text(face = "bold", size = 20),
legend.key.width = unit(0.5, 'cm'),
legend.text = element_text(size = 6),
axis.text = element_text(size = 8),
axis.title = element_text(size = 9)
)

Top Product Categories
df <- transactions %>%
inner_join(products, by = 'product_id') %>%
inner_join(demographics, by = 'household_id') %>%
group_by(product_category, age) %>%
summarise(sales = sum(sales_value)) %>%
arrange(desc(sales)) %>%
head(75)
## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.
df %>%
ggplot(aes(x = reorder(product_category, sales, FUN = sum),
y = sales,
fill = age
)
) +
geom_col() +
coord_flip() +
labs(
title = 'Top Product Categories',
y = 'Total Net Sales ($)',
x = 'Product Category',
subtitle =
'The data shows the age groups for the most popular product
categories sold in 2017.'
) +
guides(fill = guide_legend(title = "Age Group")) +
theme(plot.title = element_text(face = "bold", size = 20),
plot.subtitle = element_text(size = 9),
axis.text = element_text(size = 8)
)

Number of Products Purchased by Hour of Day
transactions <- get_transactions()
df <- transactions %>%
inner_join(products, by = 'product_id') %>%
inner_join(demographics, by = 'household_id') %>%
mutate(hour = hour(transaction_timestamp)) %>%
filter(hour != 0) %>%
group_by(basket_id, hour, household_size) %>%
summarise(qty = sum(quantity)) %>%
arrange(desc(qty))
## `summarise()` has grouped output by 'basket_id', 'hour'. You can override using
## the `.groups` argument.
df %>%
ggplot(aes(x = hour, y = qty, fill = household_size)) +
geom_col() +
labs(
title = 'Number of Products Purchased by Hour of Day',
subtitle =
'The data below shows the net quantity of products purchased in 2017
by the hour of the day for each household size.',
x = 'Hour of Day',
y = 'Net Items Purchased'
) +
scale_x_continuous(breaks = seq(from = 1, to = 24, by = 1)) +
theme(
axis.text.x = element_text(size = 7),
plot.title = element_text(face = 'bold', size = 20)
)
