# Calling Packages and Importing Data -------------------------------------------------------
library(tidyverse)
library(completejourney)
library(ggplot2)
library(scales)
library(dplyr)
transactions <- get_transactions()
promotions <- get_promotions()
# Display Location vs Sales -------------------------------------------------------
#Display locations associated with their total sales
display_location_names <- c('1' = 'Store Front', '2' = 'Store Rear', '3' = 'Front End Cap', '4' = 'Mid-aisle End Cap', '5' = 'Rear End Cap', '6' = 'Side Aisle End Cap', '7' = 'In-aisle', '9' = 'Secondary Location Display', 'A' = 'In-shelf')
display_locations <- promotions %>%
group_by(display_location) %>%
inner_join(transactions, by = c('product_id', 'store_id', 'week')) %>%
summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
filter(display_location != first(display_location)) %>%
mutate(display_location = factor(display_location, levels = names(display_location_names), labels = display_location_names))
#Plotting
ggplot(display_locations, aes(x = display_location, y = total_sales)) +
geom_point() +
labs(x = "Display Location", y = "Total Sales") +
scale_y_continuous(labels = label_dollar()) +
labs(title = "Which Display Location Causes the Most Sales?",
subtitle = "Display Location vs. Sales",
x = "Display Location",
y = "Total Sales ($)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Coupon Usage by Age Group -------------------------------------------------------
#Identifying coupon users
couponers <- coupon_redemptions %>%
inner_join(demographics, by = "household_id") %>%
group_by(age) %>%
summarise(coupon_users = n_distinct(household_id)) %>%
ungroup()
# Total households by age
total_households_by_age <- demographics %>%
group_by(age) %>%
summarise(total_households = n_distinct(household_id))
# Coupon users and non coupon users
couponers_and_noncouponers <- couponers %>%
inner_join(total_households_by_age, by = "age") %>%
mutate(non_coupon_users = total_households - coupon_users,
proportion_coupon_users = coupon_users / total_households,
proportion_non_coupon_users = non_coupon_users / total_households)
# Reshaping the data to long
age_group_long <- couponers_and_noncouponers %>%
select(age, proportion_coupon_users, proportion_non_coupon_users) %>%
pivot_longer(cols = c(proportion_coupon_users, proportion_non_coupon_users),
names_to = "type", values_to = "proportion")
# Plotting coupon users vs non coupon users by age in a stacked bar chart
ggplot(age_group_long, aes(x = factor(age), y = proportion, fill = type)) +
geom_bar(stat = "identity") +
labs(title = "Which Age Group Uses the Most Coupons?",
subtitle = 'Coupon Usage by Age Group',
x = "Age Group",
y = "Proportion of Households") +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = c("proportion_coupon_users" = "blue", "proportion_non_coupon_users" = "gray"), labels = c("Coupon Users", "Non-Coupon Users"))

# Popular Departments ---------------------------------------------
#Finding sales per department
sales_for_departments <- products %>%
inner_join(transactions, by = "product_id") %>%
group_by(department) %>%
filter(department == 'GROCERY' | department == 'DRUG GM' | department == 'FUEL' | department == 'PRODUCE' | department == 'MEAT' | department == 'TOYS')
#Plotting sales per department
ggplot(sales_for_departments, aes(x = sales_value, color = department, fill = department)) +
geom_freqpoly() +
xlim(0,20) +
labs(title = "At Which Prices are Departments Most Successful?",
subtitle = 'Number of transactions at a sales value.',
x = "Sales Value")
