library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
products %>%
inner_join(transactions_sample) %>%
group_by(product_category) %>%
summarize(total_sales = sum(sales_value)) %>%
arrange(desc(total_sales))
## Joining with `by = join_by(product_id)`
I am going to exclude the one that is just a general miscellaneous.
products %>%
inner_join(transactions_sample) %>%
inner_join(demographics) %>%
filter(str_detect(product_category, regex("SOFT DRINKS|BEEF|FLUID MILK PRODUCTS|CHEESE$|FRZN MEAT/MEAT DINNERS"))) %>%
group_by(product_category, household_comp) %>%
summarize(total_sales = sum(sales_value)) %>%
ggplot(aes(x = household_comp, fill = product_category, y = total_sales)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Spending on Product Categories by Household Composition",
subtitle = "Looking at the Top 5 Product Categories",
x = "Household Compostion",
y = "Total Sales ($)",
fill = "Product Category")
## Joining with `by = join_by(product_id)`
## Joining with `by = join_by(household_id)`
## `summarise()` has grouped output by 'product_category'. You can override using
## the `.groups` argument.
transactions_sample %>%
inner_join(demographics) %>%
inner_join(products) %>%
filter(str_detect(product_category, regex("CANDY", ignore_case = TRUE))) %>%
group_by(week, age) %>%
summarize(total_sales = sum(sales_value)) %>%
ggplot(aes(week, total_sales, group = age, color = age)) +
geom_line() +
labs(title = "Candy Spending Accross the Year by Age Group",
x = "Week",
y = "Total Sales ($)",
color = "Age Group") +
annotate(geom = "label", label = "Largetst Spike in Sales (for Easter?)", x = 15, y = 42)
## Joining with `by = join_by(household_id)`
## Joining with `by = join_by(product_id)`
## `summarise()` has grouped output by 'week'. You can override using the
## `.groups` argument.
transactions_sample %>%
inner_join(products) %>%
group_by(product_id) %>%
summarize(total_sales = sum(sales_value)) %>%
arrange(desc(total_sales))
## Joining with `by = join_by(product_id)`
demographics %>%
inner_join(transactions_sample) %>%
inner_join(products) %>%
filter(kids_count >= 1) %>%
filter(str_detect(product_id, regex( "6534178|6533889|6533765|1029743|6534166|1106523|1082185|995242|916122|1044078"))) %>%
group_by(product_id) %>%
summarize(total_sales = sum(sales_value)) %>%
ggplot(aes(product_id, total_sales)) +
geom_col() +
labs(title = "Spending on Products by Households with Kids",
subtitle = "Looking at the Top 10 Products",
x = "Product ID",
y = "Total Sales ($)") +
annotate(geom = "label", label = "~8 Times Greater Sales than Any Other", x = 8, y = 4200)
## Joining with `by = join_by(household_id)`
## Joining with `by = join_by(product_id)`