library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(forcats)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(gganimate)
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
transactions <- get_transactions()
transactions
## # A tibble: 1,469,307 × 11
##    household_id store_id basket_id   product_id quantity sales_value retail_disc
##    <chr>        <chr>    <chr>       <chr>         <dbl>       <dbl>       <dbl>
##  1 900          330      31198570044 1095275           1        0.5         0   
##  2 900          330      31198570047 9878513           1        0.99        0.1 
##  3 1228         406      31198655051 1041453           1        1.43        0.15
##  4 906          319      31198705046 1020156           1        1.5         0.29
##  5 906          319      31198705046 1053875           2        2.78        0.8 
##  6 906          319      31198705046 1060312           1        5.49        0.5 
##  7 906          319      31198705046 1075313           1        1.5         0.29
##  8 1058         381      31198676055 985893            1        1.88        0.21
##  9 1058         381      31198676055 988791            1        1.5         1.29
## 10 1058         381      31198676055 9297106           1        2.69        0   
## # ℹ 1,469,297 more rows
## # ℹ 4 more variables: coupon_disc <dbl>, coupon_match_disc <dbl>, week <int>,
## #   transaction_timestamp <dttm>
monthly_spending_and_quantity <- demographics %>%
  inner_join(transactions, by = 'household_id') %>%
  inner_join(products, by = 'product_id') %>%
  mutate(
    month = month(transaction_timestamp, label = TRUE)
  ) %>%
  group_by(month) %>%
  summarise(
    spend = sum(sales_value),
    qty = sum(quantity)
  ) 
monthly_spending_and_quantity
## # A tibble: 12 × 3
##    month   spend     qty
##    <ord>   <dbl>   <dbl>
##  1 Jan   210096. 7284739
##  2 Feb   196315. 6884626
##  3 Mar   210758. 7527520
##  4 Apr   210276. 7515694
##  5 May   227715. 8482626
##  6 Jun   207592. 7596975
##  7 Jul   229790. 8778121
##  8 Aug   223957. 9158135
##  9 Sep   219468. 8571793
## 10 Oct   228174. 9317935
## 11 Nov   217088. 8377593
## 12 Dec   243656. 8497607
monthly_spending_and_quantity %>%
  ggplot(aes(x = month)) +
  geom_col(aes(y = spend, fill = 'Total Net Spend ($)')) +
  geom_point(aes(y = qty/50)) +
  geom_path(aes(y = qty/50, group = 1, color = 'Quantity Sold')) +
  scale_y_continuous(
    name = 'Total Net Spend ($)',
    sec.axis = sec_axis(~.*50, name = 'Quantity Sold')
  ) +
  labs(
    title = 'Spending and Quantity by Month',
    x = 'Month',
    subtitle =
      'The data below shows the total net amount in dollars spent
and the net quantity sold by month in the year 2017.'
  ) +
  scale_fill_manual(
    name = '', values = c('Total Net Spend ($)' = 'pink')
  ) +
  scale_colour_manual(
    name = '',values = c('Quantity Sold' = 'brown')
  ) +
  theme(
    plot.title = element_text(face = "bold", size = 20),
    legend.key.width = unit(0.5, 'cm'),
    legend.text = element_text(size = 6),
    axis.text = element_text(size = 8),
    axis.title = element_text(size = 9)
  )

product_sales_by_age <- transactions %>%
  inner_join(products, by = 'product_id') %>%
  inner_join(demographics, by = 'household_id') %>%
  group_by(product_category, age) %>%
  summarise(sales = sum(sales_value)) %>%
  arrange(desc(sales)) %>%
  head(75)
## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.
product_sales_by_age
## # A tibble: 75 × 3
## # Groups:   product_category [37]
##    product_category       age    sales
##    <chr>                  <ord>  <dbl>
##  1 COUPON/MISC ITEMS      45-54 89171.
##  2 COUPON/MISC ITEMS      35-44 72982.
##  3 COUPON/MISC ITEMS      25-34 43606.
##  4 SOFT DRINKS            45-54 38801.
##  5 BEEF                   45-54 36485.
##  6 SOFT DRINKS            35-44 27337.
##  7 BEEF                   35-44 27124.
##  8 FLUID MILK PRODUCTS    45-54 25563.
##  9 CHEESE                 45-54 21791.
## 10 FRZN MEAT/MEAT DINNERS 45-54 18892.
## # ℹ 65 more rows
product_sales_by_age %>%
  ggplot(aes(x = reorder(product_category, sales, FUN = sum), 
             y = sales, 
             fill = age
  )
  ) +
  geom_col() +
  coord_flip() +
  labs(
    title = 'Top Product Categories',
    y = 'Total Net Sales ($)',
    x = 'Product Category',
    subtitle = 
      'The data shows the age groups for the most popular product 
categories sold in 2017.'
  ) +
  guides(fill = guide_legend(title = "Age Group")) +
  theme(plot.title = element_text(face = "bold", size = 20), 
        plot.subtitle = element_text(size = 9),
        axis.text = element_text(size = 8)
  )

products %>%
  filter(department == c("GROCERY", "DRUG GM", "MISCELLANEOUS", "PASTRY")) %>% 
  inner_join(transactions, by = "product_id") %>%
  group_by(department) %>%
  summarise(total_sales = sum(sales_value)) %>%
  ggplot(aes(x = total_sales, fct_reorder(department,total_sales))) +
  geom_col() +
  scale_fill_manual(values = c("GROCERY" = "orange", "drug_gm" = "green", "miscellaneous" = "purple", "pastry" = "red")) +
  scale_y_discrete(name = "Departments") +
  labs(title = "Total Sales by Department")
## Warning: There was 1 warning in `filter()`.
## ℹ In argument: `department == c("GROCERY", "DRUG GM", "MISCELLANEOUS",
##   "PASTRY")`.
## Caused by warning in `department == c("GROCERY", "DRUG GM", "MISCELLANEOUS", "PASTRY")`:
## ! longer object length is not a multiple of shorter object length