library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
transactions <- get_transactions()

Plot 1

products %>%
  filter(str_detect(product_category, regex('beer', ignore_case = TRUE))) %>%
  inner_join(transactions, by = 'product_id') %>%
  inner_join(demographics, by = 'household_id') %>%
  group_by(age) %>%
  summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  ggplot(aes(x = age, y = total_sales, group = 1)) +
  geom_col(color = 'black', fill = 'orange') +
  scale_x_discrete('Age') +
  scale_y_continuous("Total Beer Sales", labels = scales::dollar) +
  ggtitle('Which Age Group Spends the Most on Beer?',
          subtitle = 'Analyzing beer spending patterns across different age groups')

Plot 2

transactions %>%
  inner_join(products, by = 'product_id') %>%
  inner_join(demographics, by = 'household_id') %>%
  filter(str_detect(product_category, 'VITAMIN')) %>%
  group_by(household_size) %>%
  summarize(total_purchases = n()) %>%
  arrange(desc(total_purchases)) %>%
  ggplot(aes(x = factor(household_size), y = total_purchases, fill = factor(household_size))) +
  geom_bar(stat = 'identity') +
  labs(title = 'Quantity of Vitamins Purchased by Household Size',
       subtitle = 'Exploring how household size influences vitamin purchases',
       x = 'Household Size',
       y = 'Total Purchases',
       fill = 'Household Size')

Plot 3

products %>%
  inner_join(transactions, by = 'product_id') %>%
  filter(str_detect(product_type, regex('cookies', ignore_case = TRUE))) %>%
  mutate(day_of_week = wday(transaction_timestamp, label = TRUE)) %>% 
  mutate(transaction_date = as.Date(transaction_timestamp)) %>%
  group_by(day_of_week, transaction_date) %>%
  summarize(total_sales = sum(sales_value), .groups = 'drop') %>%
  ggplot(aes(x=factor(day_of_week), y= total_sales)) +
  geom_boxplot(color = 'red', fill = 'black') +
  coord_flip() +
  labs(title = 'Total Cookie Sales by Day of Week',
       subtitle = 'Analyzing daily sales patterns to uncover peak cookie demand',
       x = 'Day of Week',
       y= 'Total Sales')