library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.2 --
## v tibble 3.1.8 v dplyr 1.0.9
## v tidyr 1.2.0 v stringr 1.4.1
## v readr 2.1.2 v forcats 0.5.2
## v purrr 0.3.4
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(completejourney)
## Warning: package 'completejourney' was built under R version 4.1.3
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(stringr)
library(tidyr)
library(scales)
## Warning: package 'scales' was built under R version 4.1.3
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
trans = get_transactions()
prods = products
demo = demographics
store_sales = trans %>%
inner_join(products) %>%
group_by(store_id, department) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
arrange(desc(total_sales)) %>%
slice_max(order_by = total_sales) %>%
head(10) %>%
arrange(department)
## Joining, by = "product_id"
## `summarise()` has grouped output by 'store_id'. You can override using the
## `.groups` argument.
store_plot = store_sales %>%
ggplot(aes(total_sales, fct_reorder(store_id, total_sales), color = department)) +
geom_col(width = .02, size = .4) +
geom_point(size = 3) +
geom_text(aes(label = total_sales, size = NULL), nudge_x = 7.5) +
ggtitle("Top 10 Stores in Total Sales", subtitle = "With top selling departments") +
labs(x = "Sales ($) in millions",
y = "Store ID",
color = "Department") +
scale_x_continuous(labels = scales::dollar_format()) +
labs(color = "Department") +
theme_classic()
store_plot

demo.rm <- demo[complete.cases(demo), ]
trans_income = trans %>%
mutate(day = lubridate::wday(transaction_timestamp, label = TRUE)) %>%
inner_join(demo.rm) %>%
group_by(income, marital_status) %>%
summarize(total_quantity = sum(quantity, na.rm = TRUE)) %>%
arrange(desc(total_quantity))
## Joining, by = "household_id"
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
trans_plot = trans_income %>%
ggplot(aes(income, total_quantity)) +
geom_line() +
geom_point(aes(color = marital_status), size = 2) +
coord_flip() +
theme_bw() +
labs(title = "Quantity of Goods sold by Income",
subtitle = "with regards to marriage",
y = "Number of Goods",
x = "Income ($)",
color = "Marital Status") +
scale_y_continuous(labels = comma) +
scale_colour_discrete(na.translate = F) +
theme_replace()
trans_plot

trans_age = trans %>%
mutate(day = lubridate::wday(transaction_timestamp, label = TRUE)) %>%
inner_join(demographics) %>%
group_by(age, day) %>%
summarize(total_quantity = sum(quantity, na.rm = TRUE)) %>%
arrange(desc(total_quantity))
## Joining, by = "household_id"
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.
Age_plot = trans_age %>%
ggplot(aes(x = day, y = total_quantity)) +
geom_point(color = "thistle4") +
labs(title = "Quantity Sold by Age group",
subtitle = "Each Day of the Week",
x = "Day",
y = "Quantity Sold") +
scale_y_continuous(labels = comma) +
theme_minimal() +
facet_wrap(~age)
Age_plot
