IN CLASS EXAMPLE PLOT: Which income level and marital status spends
the most on pet food products
library(tidyverse)
library(completejourney)
products %>%
filter(str_detect(product_category, regex("(DOG FOOD)|(CAT FOOD)"))) %>%
inner_join(transactions_sample, "product_id") %>%
inner_join(demographics, "household_id") %>%
group_by(income, marital_status) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
mutate(martial_status = fct_na_value_to_level(marital_status, level = "Unknown")) %>%
ggplot(aes(x = martial_status, y = total_sales)) +
geom_col() +
facet_wrap(~ income) +
scale_y_continuous("Total Sales for Cat & Dog Food", labels = scales::dollar) +
scale_x_discrete("Marital Status") +
ggtitle("Who's buying Cat & Dog Food",
subtitle = "Married households with income ranging from $50-99K buy the most cat and dog food.")

Plot 1: What age buys the most Ice Cream?
library(tidyverse)
library(completejourney)
products %>%
filter(str_detect(product_category, regex("ICE CREAM", ignore_case = FALSE))) %>%
inner_join(transactions_sample, "product_id") %>%
inner_join(demographics, "household_id") %>%
group_by(product_id, age) %>%
summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%
arrange(desc(total_sales)) %>%
ggplot(aes(x = age, y = total_sales)) +
geom_col() +
scale_y_continuous("Total Sales for Ice Cream", labels = scales::dollar) +
scale_x_discrete("Age") +
ggtitle("What age is buying Ice Cream")

Plot 2: Do people buy medicine and vitamins closer towards
winter?
library(tidyverse)
library(completejourney)
products %>%
filter(str_detect(product_category, regex("(COLD AND FLU|(VITAMINS))",
ignore_case = FALSE))) %>%
inner_join(transactions_sample, "product_id") %>%
group_by(product_category, transaction_timestamp) %>%
summarize(total_sales = sum(sales_value)) %>%
arrange(total_sales) %>%
ggplot(aes(x = transaction_timestamp, y = total_sales)) +
geom_point() +
scale_y_continuous("Total Sales for Cold&Flu Medicine and Vitamins") +
scale_x_datetime("Dates") +
ggtitle("Dates when people buy medicine and vitamins",
subtitle = "There isn't much correlation between time of year and when people buy medicine and vitamins.
Theroretically, people would buy more during the winter season")

Plot 3: What size household based on income buys the most
cereal?
library(tidyverse)
library(completejourney)
products %>%
filter(str_detect(product_type, regex("CEREAL", ignore_case = FALSE))) %>%
inner_join(transactions_sample, "product_id") %>%
inner_join(demographics, "household_id") %>%
group_by( income, household_size) %>%
summarise(total_sales = sum(sales_value)) %>%
ggplot(aes(x = household_size, y = total_sales, color = income)) +
geom_point() +
geom_line() +
facet_wrap(~ income) +
scale_y_continuous("Total Sales of Cereal") +
scale_x_discrete("Household Size") +
ggtitle("Who buys the most cereal?",
subtitle = "Is odd that smaller households buy more cereal?")
