# Calling Packages and Importing Data -------------------------------------------------------
library(tidyverse)
library(completejourney)
library(ggplot2)
library(scales)
library(dplyr)

transactions <- get_transactions()

promotions <- get_promotions()
# Display Location vs Sales -------------------------------------------------------

#Display locations associated with their total sales
display_location_names <- c('1' = 'Store Front', '2' = 'Store Rear', '3' = 'Front End Cap', '4' = 'Mid-aisle End Cap', '5' = 'Rear End Cap', '6' = 'Side Aisle End Cap', '7' = 'In-aisle', '9' = 'Secondary Location Display', 'A' = 'In-shelf')

display_locations <- promotions %>%
  group_by(display_location) %>%
  inner_join(transactions, by = c('product_id', 'store_id', 'week')) %>%
  summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  filter(display_location != first(display_location)) %>%
  mutate(display_location = factor(display_location, levels = names(display_location_names), labels = display_location_names))

#Plotting
ggplot(display_locations, aes(x = display_location, y = total_sales)) +
  geom_point() +
  labs(x = "Display Location", y = "Total Sales") +
  scale_y_continuous(labels = label_dollar()) +
  labs(title = "Which Display Location Causes the Most Sales?",
       subtitle = "Display Location vs. Sales",
       x = "Display Location",
       y = "Total Sales ($)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Coupon Usage by Age Group -------------------------------------------------------

#Identifying coupon users
couponers <- coupon_redemptions %>%
  inner_join(demographics, by = "household_id") %>%
  group_by(age) %>%
  summarise(coupon_users = n_distinct(household_id)) %>%
  ungroup()

# Total households by age
total_households_by_age <- demographics %>%
  group_by(age) %>%
  summarise(total_households = n_distinct(household_id))

# Coupon users and non coupon users
couponers_and_noncouponers <- couponers %>%
  inner_join(total_households_by_age, by = "age") %>%
  mutate(non_coupon_users = total_households - coupon_users,
         proportion_coupon_users = coupon_users / total_households,
         proportion_non_coupon_users = non_coupon_users / total_households)

# Reshaping the data to long
age_group_long <- couponers_and_noncouponers %>%
  select(age, proportion_coupon_users, proportion_non_coupon_users) %>%
  pivot_longer(cols = c(proportion_coupon_users, proportion_non_coupon_users),
               names_to = "type", values_to = "proportion")

# Plotting coupon users vs non coupon users by age in a stacked bar chart
ggplot(age_group_long, aes(x = factor(age), y = proportion, fill = type)) +
  geom_bar(stat = "identity") +
  labs(title = "Which Age Group Uses the Most Coupons?",
       subtitle = 'Coupon Usage by Age Group',
       x = "Age Group", 
       y = "Proportion of Households") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_manual(values = c("proportion_coupon_users" = "blue", "proportion_non_coupon_users" = "gray"), labels = c("Coupon Users", "Non-Coupon Users"))

# Popular Departments ---------------------------------------------

#Finding sales per department
sales_for_departments <- products %>%
  inner_join(transactions, by = "product_id") %>%
  group_by(department) %>%
  filter(department == 'GROCERY' | department == 'DRUG GM' | department == 'FUEL' | department == 'PRODUCE' | department == 'MEAT' | department == 'TOYS')

#Plotting sales per department
ggplot(sales_for_departments, aes(x = sales_value, color = department, fill = department)) +
  geom_freqpoly() +
  xlim(0,20) +
  labs(title = "At Which Prices are Departments Most Successful?",
       subtitle = 'Number of transactions at a sales value.',
       x = "Sales Value")