library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v tibble  3.1.8     v dplyr   1.0.9
## v tidyr   1.2.0     v stringr 1.4.1
## v readr   2.1.2     v forcats 0.5.2
## v purrr   0.3.4
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(completejourney)
## Warning: package 'completejourney' was built under R version 4.1.3
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(stringr)
library(tidyr)
library(scales)
## Warning: package 'scales' was built under R version 4.1.3
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
trans = get_transactions()
prods = products
demo = demographics

store_sales = trans %>%
  inner_join(products) %>%
  group_by(store_id, department) %>%
  summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  arrange(desc(total_sales)) %>%
  slice_max(order_by = total_sales) %>%
  head(10) %>%
  arrange(department)
## Joining, by = "product_id"
## `summarise()` has grouped output by 'store_id'. You can override using the
## `.groups` argument.
store_plot = store_sales %>%
  ggplot(aes(total_sales, fct_reorder(store_id, total_sales), color = department)) +
  geom_col(width = .02, size = .4) +
  geom_point(size = 3) +
        geom_text(aes(label = total_sales, size = NULL), nudge_x = 7.5) +
  ggtitle("Top 10 Stores in Total Sales", subtitle = "With top selling departments") +
  labs(x = "Sales ($) in millions",
       y = "Store ID",
       color = "Department") +
  scale_x_continuous(labels = scales::dollar_format()) +
  labs(color = "Department") +
  theme_classic()

store_plot

demo.rm <- demo[complete.cases(demo), ]

trans_income = trans %>%
  mutate(day = lubridate::wday(transaction_timestamp, label = TRUE)) %>%
  inner_join(demo.rm) %>%
  group_by(income, marital_status) %>%
  summarize(total_quantity = sum(quantity, na.rm = TRUE)) %>%
  arrange(desc(total_quantity))
## Joining, by = "household_id"
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
trans_plot = trans_income %>%
  ggplot(aes(income, total_quantity)) +
  geom_line() +
  geom_point(aes(color = marital_status), size = 2) +
  coord_flip() +
  theme_bw() +
  labs(title = "Quantity of Goods sold by Income", 
       subtitle = "with regards to marriage", 
       y = "Number of Goods", 
       x = "Income ($)",
       color = "Marital Status") +
  scale_y_continuous(labels = comma) +
  scale_colour_discrete(na.translate = F) +
  theme_replace()

trans_plot

trans_age = trans %>%
  mutate(day = lubridate::wday(transaction_timestamp, label = TRUE)) %>%
  inner_join(demographics) %>%
  group_by(age, day) %>%
  summarize(total_quantity = sum(quantity, na.rm = TRUE)) %>%
  arrange(desc(total_quantity))
## Joining, by = "household_id"
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.
Age_plot = trans_age %>%
  ggplot(aes(x = day, y = total_quantity)) +
  geom_point(color = "thistle4") +
  labs(title = "Quantity Sold by Age group",
       subtitle = "Each Day of the Week",
       x = "Day",
       y = "Quantity Sold") +
  scale_y_continuous(labels = comma) +
  theme_minimal() +
  facet_wrap(~age)

Age_plot