library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(dplyr)
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
transactions <- get_transactions()
transactions
promotions <- get_promotions()
promotions
#plot 1, total sales for fruits and vegetables based on income and age
transactions %>%
inner_join (demographics) %>%
inner_join(products)%>%
filter(str_detect(product_category, "FRUIT|VEGETABLE"))%>%
group_by(income, age)%>%
summarize(total = sum(sales_value, na.rm = TRUE))%>%
mutate(label_y = cumsum(total))%>%
ggplot(aes(x = income, y = total, fill = age)) +
geom_histogram(stat="identity") +
scale_y_continuous(name = "Total Cost", labels = scales::dollar) +
theme(axis.text.x=element_text(size=5))+
labs(x = "Income Range")+
ggtitle("Total Cost of Fruits and Vegetables Based on Income and Age")
## Joining, by = "household_id"
## Joining, by = "product_id"
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
## Warning: Ignoring unknown parameters: binwidth, bins, pad

#plot 2, monthly total sales for just food items
products %>%
filter(str_detect(department,"DELI|FROZEN GROCERY|GROCERY|MEAT|MEAT-PCKGD|NUTRITION|PASTRY|PROD-WHS SALES|PRODUCE|SALAD BAR|SEAFOOD| SEAFOOD-PCKGD"))%>%
inner_join(transactions)%>%
mutate(month = month(transaction_timestamp, label = TRUE)) %>%
group_by(month, department)%>%
summarize(total = sum(sales_value, na.rm = TRUE))%>%
ggplot(aes(x=month,y=total, color = department))+
geom_point()+
scale_y_log10(name = "Total Cost", labels = scales::dollar)+
labs(x = "Month")+
ggtitle("Total Cost of Food Per Month Based on Department")
## Joining, by = "product_id"
## `summarise()` has grouped output by 'month'. You can override using the
## `.groups` argument.

#plot 3, savings amount each day for frozen items
transactions %>%
mutate(Weekday = wday(transaction_timestamp, label = TRUE))%>%
inner_join(products)%>%
filter(str_detect(product_category, "FROZEN|FRZN"))%>%
group_by(Weekday, coupon_disc, product_category)%>%
summarize(totals = sum(coupon_disc, na.rm = TRUE))%>%
ggplot(aes(y=totals,x=Weekday))+
geom_boxplot() +
labs(x = "Weekday", y = "Total Savings")+
ggtitle("Range of Total Amount Saved Each Day on Frozen Items")
## Joining, by = "product_id"
## `summarise()` has grouped output by 'Weekday', 'coupon_disc'. You can override
## using the `.groups` argument.
