library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
transactions <- get_transactions()
Plot 1
products %>%
filter(str_detect(product_category, regex('beer', ignore_case = TRUE))) %>%
inner_join(transactions, by = 'product_id') %>%
inner_join(demographics, by = 'household_id') %>%
group_by(age) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
ggplot(aes(x = age, y = total_sales, group = 1)) +
geom_col(color = 'black', fill = 'orange') +
scale_x_discrete('Age') +
scale_y_continuous("Total Beer Sales", labels = scales::dollar) +
ggtitle('Which Age Group Spends the Most on Beer?',
subtitle = 'Analyzing beer spending patterns across different age groups')

Plot 2
transactions %>%
inner_join(products, by = 'product_id') %>%
inner_join(demographics, by = 'household_id') %>%
filter(str_detect(product_category, 'VITAMIN')) %>%
group_by(household_size) %>%
summarize(total_purchases = n()) %>%
arrange(desc(total_purchases)) %>%
ggplot(aes(x = factor(household_size), y = total_purchases, fill = factor(household_size))) +
geom_bar(stat = 'identity') +
labs(title = 'Quantity of Vitamins Purchased by Household Size',
subtitle = 'Exploring how household size influences vitamin purchases',
x = 'Household Size',
y = 'Total Purchases',
fill = 'Household Size')

Plot 3
products %>%
inner_join(transactions, by = 'product_id') %>%
filter(str_detect(product_type, regex('cookies', ignore_case = TRUE))) %>%
mutate(day_of_week = wday(transaction_timestamp, label = TRUE)) %>%
mutate(transaction_date = as.Date(transaction_timestamp)) %>%
group_by(day_of_week, transaction_date) %>%
summarize(total_sales = sum(sales_value), .groups = 'drop') %>%
ggplot(aes(x=factor(day_of_week), y= total_sales)) +
geom_boxplot(color = 'red', fill = 'black') +
coord_flip() +
labs(title = 'Total Cookie Sales by Day of Week',
subtitle = 'Analyzing daily sales patterns to uncover peak cookie demand',
x = 'Day of Week',
y= 'Total Sales')
