library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(forcats)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(gganimate)
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
transactions <- get_transactions()
transactions
## # A tibble: 1,469,307 × 11
## household_id store_id basket_id product_id quantity sales_value retail_disc
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 900 330 31198570044 1095275 1 0.5 0
## 2 900 330 31198570047 9878513 1 0.99 0.1
## 3 1228 406 31198655051 1041453 1 1.43 0.15
## 4 906 319 31198705046 1020156 1 1.5 0.29
## 5 906 319 31198705046 1053875 2 2.78 0.8
## 6 906 319 31198705046 1060312 1 5.49 0.5
## 7 906 319 31198705046 1075313 1 1.5 0.29
## 8 1058 381 31198676055 985893 1 1.88 0.21
## 9 1058 381 31198676055 988791 1 1.5 1.29
## 10 1058 381 31198676055 9297106 1 2.69 0
## # ℹ 1,469,297 more rows
## # ℹ 4 more variables: coupon_disc <dbl>, coupon_match_disc <dbl>, week <int>,
## # transaction_timestamp <dttm>
monthly_spending_and_quantity <- demographics %>%
inner_join(transactions, by = 'household_id') %>%
inner_join(products, by = 'product_id') %>%
mutate(
month = month(transaction_timestamp, label = TRUE)
) %>%
group_by(month) %>%
summarise(
spend = sum(sales_value),
qty = sum(quantity)
)
monthly_spending_and_quantity
## # A tibble: 12 × 3
## month spend qty
## <ord> <dbl> <dbl>
## 1 Jan 210096. 7284739
## 2 Feb 196315. 6884626
## 3 Mar 210758. 7527520
## 4 Apr 210276. 7515694
## 5 May 227715. 8482626
## 6 Jun 207592. 7596975
## 7 Jul 229790. 8778121
## 8 Aug 223957. 9158135
## 9 Sep 219468. 8571793
## 10 Oct 228174. 9317935
## 11 Nov 217088. 8377593
## 12 Dec 243656. 8497607
monthly_spending_and_quantity %>%
ggplot(aes(x = month)) +
geom_col(aes(y = spend, fill = 'Total Net Spend ($)')) +
geom_point(aes(y = qty/50)) +
geom_path(aes(y = qty/50, group = 1, color = 'Quantity Sold')) +
scale_y_continuous(
name = 'Total Net Spend ($)',
sec.axis = sec_axis(~.*50, name = 'Quantity Sold')
) +
labs(
title = 'Spending and Quantity by Month',
x = 'Month',
subtitle =
'The data below shows the total net amount in dollars spent
and the net quantity sold by month in the year 2017.'
) +
scale_fill_manual(
name = '', values = c('Total Net Spend ($)' = 'pink')
) +
scale_colour_manual(
name = '',values = c('Quantity Sold' = 'brown')
) +
theme(
plot.title = element_text(face = "bold", size = 20),
legend.key.width = unit(0.5, 'cm'),
legend.text = element_text(size = 6),
axis.text = element_text(size = 8),
axis.title = element_text(size = 9)
)

product_sales_by_age <- transactions %>%
inner_join(products, by = 'product_id') %>%
inner_join(demographics, by = 'household_id') %>%
group_by(product_category, age) %>%
summarise(sales = sum(sales_value)) %>%
arrange(desc(sales)) %>%
head(75)
## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.
product_sales_by_age
## # A tibble: 75 × 3
## # Groups: product_category [37]
## product_category age sales
## <chr> <ord> <dbl>
## 1 COUPON/MISC ITEMS 45-54 89171.
## 2 COUPON/MISC ITEMS 35-44 72982.
## 3 COUPON/MISC ITEMS 25-34 43606.
## 4 SOFT DRINKS 45-54 38801.
## 5 BEEF 45-54 36485.
## 6 SOFT DRINKS 35-44 27337.
## 7 BEEF 35-44 27124.
## 8 FLUID MILK PRODUCTS 45-54 25563.
## 9 CHEESE 45-54 21791.
## 10 FRZN MEAT/MEAT DINNERS 45-54 18892.
## # ℹ 65 more rows
product_sales_by_age %>%
ggplot(aes(x = reorder(product_category, sales, FUN = sum),
y = sales,
fill = age
)
) +
geom_col() +
coord_flip() +
labs(
title = 'Top Product Categories',
y = 'Total Net Sales ($)',
x = 'Product Category',
subtitle =
'The data shows the age groups for the most popular product
categories sold in 2017.'
) +
guides(fill = guide_legend(title = "Age Group")) +
theme(plot.title = element_text(face = "bold", size = 20),
plot.subtitle = element_text(size = 9),
axis.text = element_text(size = 8)
)

products %>%
filter(department == c("GROCERY", "DRUG GM", "MISCELLANEOUS", "PASTRY")) %>%
inner_join(transactions, by = "product_id") %>%
group_by(department) %>%
summarise(total_sales = sum(sales_value)) %>%
ggplot(aes(x = total_sales, fct_reorder(department,total_sales))) +
geom_col() +
scale_fill_manual(values = c("GROCERY" = "orange", "drug_gm" = "green", "miscellaneous" = "purple", "pastry" = "red")) +
scale_y_discrete(name = "Departments") +
labs(title = "Total Sales by Department")
## Warning: There was 1 warning in `filter()`.
## ℹ In argument: `department == c("GROCERY", "DRUG GM", "MISCELLANEOUS",
## "PASTRY")`.
## Caused by warning in `department == c("GROCERY", "DRUG GM", "MISCELLANEOUS", "PASTRY")`:
## ! longer object length is not a multiple of shorter object length
