IN CLASS EXAMPLE PLOT: Which income level and marital status spends the most on pet food products

library(tidyverse)
library(completejourney)
products %>%
  filter(str_detect(product_category, regex("(DOG FOOD)|(CAT FOOD)"))) %>%
  inner_join(transactions_sample, "product_id") %>%
  inner_join(demographics, "household_id") %>%
  group_by(income, marital_status) %>%
  summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  mutate(martial_status = fct_na_value_to_level(marital_status, level = "Unknown")) %>%
  ggplot(aes(x = martial_status, y = total_sales)) + 
  geom_col() +
  facet_wrap(~ income) +
  scale_y_continuous("Total Sales for Cat & Dog Food", labels = scales::dollar) +
  scale_x_discrete("Marital Status") +
  ggtitle("Who's buying Cat & Dog Food", 
          subtitle = "Married households with income ranging from $50-99K buy the most cat and dog food.")

Plot 1: What age buys the most Ice Cream?

library(tidyverse)
library(completejourney)
products %>%
  filter(str_detect(product_category, regex("ICE CREAM", ignore_case = FALSE))) %>%
  inner_join(transactions_sample, "product_id") %>%
  inner_join(demographics, "household_id") %>%
  group_by(product_id, age) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  arrange(desc(total_sales)) %>%
  ggplot(aes(x = age, y = total_sales)) +
  geom_col() +
  scale_y_continuous("Total Sales for Ice Cream", labels = scales::dollar) +
  scale_x_discrete("Age") +
  ggtitle("What age is buying Ice Cream")

Plot 2: Do people buy medicine and vitamins closer towards winter?

library(tidyverse)
library(completejourney)
products %>%
  filter(str_detect(product_category, regex("(COLD AND FLU|(VITAMINS))",
                                            ignore_case = FALSE))) %>%
  inner_join(transactions_sample, "product_id") %>%
  group_by(product_category, transaction_timestamp) %>%
  summarize(total_sales = sum(sales_value)) %>%
  arrange(total_sales) %>%  
  ggplot(aes(x = transaction_timestamp, y = total_sales)) +
  geom_point() +
  scale_y_continuous("Total Sales for Cold&Flu Medicine and Vitamins") +
  scale_x_datetime("Dates") +
  ggtitle("Dates when people buy medicine and vitamins",
          subtitle = "There isn't much correlation between time of year and when people buy medicine and vitamins. 
Theroretically, people would buy more during the winter season")

Plot 3: What size household based on income buys the most cereal?

library(tidyverse)
library(completejourney)
products %>%
  filter(str_detect(product_type, regex("CEREAL", ignore_case = FALSE))) %>%
  inner_join(transactions_sample, "product_id") %>%
  inner_join(demographics, "household_id") %>%
  group_by( income, household_size) %>%
  summarise(total_sales = sum(sales_value)) %>%
  ggplot(aes(x = household_size, y = total_sales, color = income)) +
  geom_point() +
  geom_line() +
  facet_wrap(~ income) +
  scale_y_continuous("Total Sales of Cereal") +
  scale_x_discrete("Household Size") +
  ggtitle("Who buys the most cereal?",
        subtitle = "Is odd that smaller households buy more cereal?")