# Libraries
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(stringr)
library(ggrepel)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
# Get full datasets
transactions <- get_transactions()
promotions <- get_promotions()
#Narrowing down the dataset to a samples of 50 stores and assigning all endcap locations 
#to one variable

endcap_codes <- c("3", "4", "5", "6")

# Sample 50 stores due to processing constraints
set.seed(20260222) #So that the same 50 stores are chosen reproducibly

sample_stores <- promotions %>%
  distinct(store_id) %>%
  sample_n(100)

transactions_50 <- transactions %>%
  filter(store_id %in% sample_stores$store_id)

promo_type <- promotions %>%
  filter(store_id %in% sample_stores$store_id) %>%
  group_by(product_id, week, store_id)

df <- transactions_50 %>%
  left_join(products, by = "product_id") %>%
  left_join(promotions, by = c("product_id", "week", "store_id"), 
             relationship = "many-to-many") %>% #This is to make sure that transactions are not duplicated
  mutate(display_group = ifelse(display_location %in% endcap_codes, "endcap", "no_endcap"))
#KEY QUESTION: Can we identify endcap products that don't perform well to make
#a recommendation for better use of that space?

#Question 1: What department(s) get the most endcap exposure timewise?

endcap_time_by_dept <- df %>%
  filter(display_group == "endcap") %>%
  group_by(department) %>%
  summarize(n_store_weeks = n_distinct(str_c(store_id, week, sep = "_"))) %>%
    arrange(desc(n_store_weeks))

ggplot(endcap_time_by_dept, aes(x = reorder(department, n_store_weeks), y = n_store_weeks)) +
  geom_col() + 
  coord_flip() +
  labs(title = "Endcap Allocation by Department",
       subtitle = "Measured in store-weeks across 50 stores",
       x = "Department",
       y = "Store-weeks on endcap") +
  theme_minimal()

endcap_revenue_by_dept <- df %>%
   filter(display_group == "endcap", !is.na(department)) %>%
   group_by(department) %>%
   summarize(endcap_sales = sum(sales_value, na.rm = TRUE),
             n_store_weeks = n_distinct(str_c(store_id, week, sep = "_")),
             revenue_per_store_week = endcap_sales / n_store_weeks) %>%
     arrange(desc(revenue_per_store_week))

ggplot(endcap_revenue_by_dept, aes(x = reorder(department, revenue_per_store_week), 
                                   y = revenue_per_store_week)) +
  geom_col() + 
  coord_flip() +
  labs(title = "Endcap Revenue by Department",
       subtitle = "Measured across 50 stores",
       x = "Department",
       y = "Revenue from endcap sales") +
  theme_minimal()

#
scatter_data <- df %>%
  filter(display_group == "endcap",
         department %in% c("GROCERY", "DRUG GM"),
         !is.na(product_category)) %>%
  group_by(product_category, department) %>%
  summarise(
    avg_revenue_per_transaction = mean(sales_value, na.rm = TRUE),
    total_revenue = sum(sales_value, na.rm = TRUE),
    n_transactions = n(),
    n_store_weeks = n_distinct(str_c(store_id, week, sep = "_")),
    .groups = "drop"
  ) %>%
  filter(n_transactions >= 20)

ggplot(scatter_data, aes(x = n_store_weeks, 
                          y = avg_revenue_per_transaction,
                          color = department)) +
  geom_vline(xintercept = median(scatter_data$n_store_weeks), 
             linetype = "dashed", color = "gray50") +
  geom_hline(yintercept = median(scatter_data$avg_revenue_per_transaction), 
             linetype = "dashed", color = "gray50") +
  geom_point(aes(size = total_revenue), alpha = 0.7) +
  geom_text(aes(label = product_category), 
            size = 2.5, vjust = -0.8, check_overlap = TRUE) +
  annotate("text", x = max(scatter_data$n_store_weeks) * 0.75, 
           y = max(scatter_data$avg_revenue_per_transaction) * 0.95,
           label = "High Revenue\nLow Exposure", 
           color = "darkgreen", size = 3, fontface = "bold") +
  annotate("text", x = max(scatter_data$n_store_weeks) * 0.75,
           y = min(scatter_data$avg_revenue_per_transaction) * 1.0,
           label = "Low Revenue\nHigh Exposure", 
           color = "red", size = 3, fontface = "bold") +
  scale_size_continuous(name = "Total Revenue ($)") +
  labs(
    title = "Endcap Exposure vs Revenue per Transaction",
    subtitle = "Bubble size represents total revenue",
    x = "Endcap Store-Weeks (Exposure)",
    y = "Avg Revenue per Transaction ($)",
    color = "Department"
  ) +
  theme_minimal()

#MAKE THIS AN INTERACTIVE BUBBLE GRAPH WITH PLOTLY
#What are the best and worst performers on endcaps from the GROCERY department?

grocery_endcap <- df %>%
  filter(display_group == "endcap", department == "GROCERY") %>%
  group_by(product_category) %>%
  summarize(avg_rev_per_tx = mean(sales_value, na.rm = TRUE),
            total_revenue = sum(sales_value, na.rm = TRUE),
            n_transactions = n(),
            n_store_weeks = n_distinct(str_c(store_id, week, sep = "_"))) %>%
  filter(n_transactions >= 10) %>%
  arrange(desc(avg_rev_per_tx))

grocery_top_bottom <- rbind(
  head(grocery_endcap, 10) %>% mutate(group = "Top 10"),
  tail(grocery_endcap, 10) %>% mutate(group = "Bottom 10")
)

ggplot(grocery_top_bottom, aes(x = reorder(product_category, avg_rev_per_tx), 
                                y = avg_rev_per_tx,
                                fill = group)) +
  geom_col() +
  scale_fill_manual(values = c("Bottom 10" = "tomato", "Top 10" = "steelblue")) +
  coord_flip() +
  labs(title = "Grocery Department: Best vs Worst Endcap Performers",
       subtitle = "By average revenue per transaction",
       x = "Product Category",
       y = "Avg Revenue per Transaction ($)",
       fill = "") +
  theme_minimal()

grocery_endcap_raw <- df %>%
  filter(display_group == "endcap", department == "GROCERY",
         !is.na(product_category)) %>%
  group_by(product_category) %>%
  mutate(n_transactions = n()) %>%
  filter(n_transactions >= 10) %>%
  ungroup()

# Get top and bottom 10 categories by median
grocery_order <- grocery_endcap_raw %>%
  group_by(product_category) %>%
  summarise(med = median(sales_value)) %>%
  arrange(desc(med))

top_bottom <- c(head(grocery_order$product_category, 10),
                tail(grocery_order$product_category, 10))

grocery_endcap_raw %>%
  filter(product_category %in% top_bottom) %>%
  mutate(group = ifelse(product_category %in% head(grocery_order$product_category, 10),
                        "Top 10", "Bottom 10")) %>%
  ggplot(aes(x = reorder(product_category, sales_value, median),
             y = sales_value,
             fill = group)) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_manual(values = c("Bottom 10" = "tomato", "Top 10" = "steelblue")) +
  coord_flip() +
  labs(title = "Grocery Department: Best vs Worst Endcap Performers",
       subtitle = "Distribution of transaction values, 50 stores 2017",
       x = "Product Category",
       y = "Sales Value per Transaction ($)",
       fill = "") +
  theme_minimal()

drug_endcap_raw <- df %>%
  filter(display_group == "endcap", department == "DRUG GM",
         !is.na(product_type)) %>%
  group_by(product_type) %>%
  mutate(n_transactions = n()) %>%
  filter(n_transactions >= 5) %>% # low threshold - noted limitation
  ungroup()

drug_order <- drug_endcap_raw %>%
  group_by(product_type) %>%
  summarise(med = median(sales_value)) %>%
  arrange(desc(med))

top_bottom_drug <- c(head(drug_order$product_type, 10),
                     tail(drug_order$product_type, 10))

drug_endcap_raw %>%
  filter(product_type %in% top_bottom_drug) %>%
  mutate(group = ifelse(product_type %in% head(drug_order$product_type, 10),
                        "Top 10", "Bottom 10")) %>%
  ggplot(aes(x = reorder(product_type, sales_value, median),
             y = sales_value,
             fill = group)) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_manual(values = c("Bottom 10" = "tomato", "Top 10" = "steelblue")) +
  coord_flip() +
  labs(title = "Drug GM Department: Best vs Worst Endcap Performers",
       subtitle = "Distribution of transaction values, 50 stores 2017. Note: low transaction threshold.",
       x = "Product Type",
       y = "Sales Value per Transaction ($)",
       fill = "") +
  theme_minimal()

# How many total grocery endcap transactions do we have?
df %>%
  filter(display_group == "endcap", department == "GROCERY") %>%
  nrow()
## [1] 48996
# Compare to total grocery transactions
df %>%
  filter(department == "GROCERY") %>%
  nrow()
## [1] 829301
#What are the best and worst performers on endcaps from the DRUG GM department?

drug_endcap <- df %>%
  filter(display_group == "endcap", department == "DRUG GM") %>%
  group_by(product_type) %>%
  summarize(avg_rev_per_tx = mean(sales_value, na.rm = TRUE),
            total_revenue = sum(sales_value, na.rm = TRUE),
            n_transactions = n(),
            n_store_weeks = n_distinct(str_c(store_id, week, sep = "_"))) %>%
  filter(n_transactions >= 5) %>% #this is a MAJOR limitation
  arrange(desc(avg_rev_per_tx))

drug_top_bottom <- rbind(
  head(drug_endcap, 10) %>% mutate(group = "Top 10"),
  tail(drug_endcap, 10) %>% mutate(group = "Bottom 10")
)

ggplot(drug_top_bottom, aes(x = reorder(product_type, avg_rev_per_tx), y = avg_rev_per_tx)) +
  geom_col() +
    coord_flip() +
  labs(    
    title = "Drug Department: Best vs Worst Endcap Performers",
    subtitle = "By average revenue per transaction",
    x = "Product Category",
    y = "Avg Revenue per Transaction ($)")

categories_final <- c(
  "BEERS/ALES", "DOMESTIC WINE", "LAUNDRY DETERGENTS", "COFFEE", "PAPER TOWELS",
  "EGGS", "SOUP", "COLD CEREAL", "FLUID MILK PRODUCTS", "COOKIES/CONES",
  "DIAPERS & DISPOSABLES", "SOFT DRINKS", "FROZEN PIZZA",
  "CANDY - PACKAGED", "BAG SNACKS"
)

results <- data.frame()
for(cat in categories_final) {
  temp <- df %>%
    filter(store_id == "319", product_category == cat) %>%
    group_by(week) %>%
    summarise(total_weekly_sales = sum(sales_value, na.rm = TRUE),
              had_endcap = any(display_group == "endcap"),
              .groups = "drop") %>%
    mutate(week_type = ifelse(had_endcap, "Endcap Week", "No Display Week"),
           product_category = cat)
  results <- rbind(results, temp)
}

category_lift <- results %>%
  group_by(product_category, week_type) %>%
  summarise(
    avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
    n_weeks = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(names_from = week_type,
              values_from = c(avg_weekly_sales, n_weeks)) %>%
  mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
                           `avg_weekly_sales_No Display Week` * 100) %>%
  filter(!is.na(sales_lift_pct),
         `n_weeks_Endcap Week` >= 4,
         `n_weeks_No Display Week` >= 4) %>%
  arrange(desc(sales_lift_pct))

category_lift
## # A tibble: 11 × 6
##    product_category `avg_weekly_sales_Endcap Week` avg_weekly_sales_No Display…¹
##    <chr>                                     <dbl>                         <dbl>
##  1 COFFEE                                    20.1                          10.8 
##  2 DOMESTIC WINE                             32.0                          18.7 
##  3 CANDY - PACKAGED                          10.9                           6.99
##  4 SOFT DRINKS                               47.1                          33.9 
##  5 BAG SNACKS                                21.3                          15.6 
##  6 COOKIES/CONES                              6.81                          5.45
##  7 SOUP                                      13.9                          11.8 
##  8 FROZEN PIZZA                              17.5                          15.7 
##  9 BEERS/ALES                                24.9                          22.7 
## 10 COLD CEREAL                               15.8                          17.6 
## 11 EGGS                                       3.32                          5.66
## # ℹ abbreviated name: ¹​`avg_weekly_sales_No Display Week`
## # ℹ 3 more variables: `n_weeks_Endcap Week` <int>,
## #   `n_weeks_No Display Week` <int>, sales_lift_pct <dbl>
ggplot(category_lift, aes(x = reorder(product_category, sales_lift_pct),
                           y = sales_lift_pct,
                           fill = sales_lift_pct > 20)) +
  geom_col() +
  scale_fill_manual(values = c("TRUE" = "steelblue", "FALSE" = "tomato"),
                    guide = "none") +
  coord_flip() +
  geom_hline(yintercept = 20, linetype = "dashed", color = "gray50") +
  geom_text(aes(label = paste0(round(sales_lift_pct, 1), "%")), 
            hjust = -0.2, size = 3) +
  labs(
    title = "Endcap Sales Lift by Product Category at Store",
    subtitle = "Dashed line at 20% lift threshold. Red bars = low lift categories.",
    x = "Product Category",
    y = "Sales Lift (%)"
  ) +
  theme_minimal() +
  expand_limits(y = max(category_lift$sales_lift_pct) * 1.15)

categories_final <- c(
  "BEERS/ALES", "DOMESTIC WINE", "LAUNDRY DETERGENTS", "COFFEE", "PAPER TOWELS",
  "EGGS", "SOUP", "COLD CEREAL", "FLUID MILK PRODUCTS", "COOKIES/CONES",
  "DIAPERS & DISPOSABLES", "SOFT DRINKS", "FROZEN PIZZA",
  "CANDY - PACKAGED", "BAG SNACKS"
)

results <- data.frame()
for(cat in categories_final) {
  temp <- df %>%
    filter(store_id == "334", product_category == cat) %>%
    group_by(week) %>%
    summarise(total_weekly_sales = sum(sales_value, na.rm = TRUE),
              had_endcap = any(display_group == "endcap"),
              .groups = "drop") %>%
    mutate(week_type = ifelse(had_endcap, "Endcap Week", "No Display Week"),
           product_category = cat)
  results <- rbind(results, temp)
}

category_lift <- results %>%
  group_by(product_category, week_type) %>%
  summarise(
    avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
    n_weeks = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(names_from = week_type,
              values_from = c(avg_weekly_sales, n_weeks)) %>%
  mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
                           `avg_weekly_sales_No Display Week` * 100) %>%
  filter(!is.na(sales_lift_pct),
         `n_weeks_Endcap Week` >= 4,
         `n_weeks_No Display Week` >= 4) %>%
  arrange(desc(sales_lift_pct))

category_lift
## # A tibble: 7 × 6
##   product_category `avg_weekly_sales_Endcap Week` avg_weekly_sales_No Display …¹
##   <chr>                                     <dbl>                          <dbl>
## 1 CANDY - PACKAGED                           8.67                           6.29
## 2 SOFT DRINKS                               36.1                           27.1 
## 3 COLD CEREAL                               12.9                            9.71
## 4 BAG SNACKS                                10.6                            8.61
## 5 FROZEN PIZZA                              16.0                           13.0 
## 6 SOUP                                       7.70                          10.2 
## 7 COOKIES/CONES                              3.76                           5.36
## # ℹ abbreviated name: ¹​`avg_weekly_sales_No Display Week`
## # ℹ 3 more variables: `n_weeks_Endcap Week` <int>,
## #   `n_weeks_No Display Week` <int>, sales_lift_pct <dbl>
ggplot(category_lift, aes(x = reorder(product_category, sales_lift_pct),
                           y = sales_lift_pct,
                           fill = sales_lift_pct > 20)) +
  geom_col() +
  scale_fill_manual(values = c("TRUE" = "steelblue", "FALSE" = "tomato"),
                    guide = "none") +
  coord_flip() +
  geom_hline(yintercept = 20, linetype = "dashed", color = "gray50") +
  geom_text(aes(label = paste0(round(sales_lift_pct, 1), "%")), 
            hjust = -0.2, size = 3) +
  labs(
    title = "Endcap Sales Lift by Product Category at Store 334",
    subtitle = "Dashed line at 20% lift threshold. Red bars = low lift categories.",
    x = "Product Category",
    y = "Sales Lift (%)"
  ) +
  theme_minimal() +
  expand_limits(y = max(category_lift$sales_lift_pct) * 1.15)

# Find stores in our sample with good transaction counts
df %>%
  filter(product_category %in% categories_final) %>%
  group_by(store_id) %>%
  summarise(
    n_transactions = n(),
    n_categories = n_distinct(product_category),
    has_endcap = any(display_group == "endcap")
  ) %>%
  filter(has_endcap == TRUE) %>%
  arrange(desc(n_categories))
## # A tibble: 100 × 4
##    store_id n_transactions n_categories has_endcap
##    <chr>             <int>        <int> <lgl>     
##  1 289                2939           15 TRUE      
##  2 292                5596           15 TRUE      
##  3 296                2515           15 TRUE      
##  4 297                2262           15 TRUE      
##  5 299                1245           15 TRUE      
##  6 300                2000           15 TRUE      
##  7 304                3206           15 TRUE      
##  8 306                2170           15 TRUE      
##  9 309                1731           15 TRUE      
## 10 310                2284           15 TRUE      
## # ℹ 90 more rows
stores_to_check <- df %>%
  distinct(store_id) %>%
  pull(store_id)

all_results <- data.frame()
for(store in stores_to_check) {
  for(cat in categories_final) {
    temp <- df %>%
      filter(store_id == store, product_category == cat) %>%
      group_by(week) %>%
      summarise(total_weekly_sales = sum(sales_value, na.rm = TRUE),
                had_endcap = any(display_group == "endcap"),
                .groups = "drop") %>%
      mutate(week_type = ifelse(had_endcap, "Endcap Week", "No Display Week"),
             product_category = cat,
             store_id = store)
    all_results <- rbind(all_results, temp)
  }
}

# Get lift per store per category
lift_by_store <- all_results %>%
  group_by(store_id, product_category, week_type) %>%
  summarise(avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
            n_weeks = n(),
            .groups = "drop") %>%
  pivot_wider(names_from = week_type,
              values_from = c(avg_weekly_sales, n_weeks)) %>%
  mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
                           `avg_weekly_sales_No Display Week` * 100) %>%
  filter(!is.na(sales_lift_pct),
         `n_weeks_Endcap Week` >= 2,
         `n_weeks_No Display Week` >= 2)

# Then average lift across stores
lift_summary <- lift_by_store %>%
  group_by(product_category) %>%
  summarise(
    avg_lift = mean(sales_lift_pct),
    median_lift = median(sales_lift_pct),
    n_stores = n()
  ) %>%
  arrange(desc(avg_lift))

lift_summary
## # A tibble: 15 × 4
##    product_category      avg_lift median_lift n_stores
##    <chr>                    <dbl>       <dbl>    <int>
##  1 PAPER TOWELS             56.8        42.4        60
##  2 FROZEN PIZZA             56.3        50.8        95
##  3 SOUP                     49.4        42.9        93
##  4 DOMESTIC WINE            49.2        37.0        35
##  5 BEERS/ALES               48.0        40.0        66
##  6 COFFEE                   47.0        33.2        77
##  7 CANDY - PACKAGED         41.7        34.7        88
##  8 DIAPERS & DISPOSABLES    40.6        29.9        35
##  9 COOKIES/CONES            40.4        32.2        95
## 10 BAG SNACKS               39.7        32.1       100
## 11 COLD CEREAL              33.4        31.3        99
## 12 LAUNDRY DETERGENTS       24.6        10.6        77
## 13 SOFT DRINKS              23.3        19.9        87
## 14 EGGS                     14.0        10.2        24
## 15 FLUID MILK PRODUCTS       8.63        6.30       34
all_results_full <- data.frame()
for(store in stores_to_check) {
  temp <- df %>%
    filter(store_id == store, !is.na(product_category)) %>%
    group_by(week, product_category) %>%
    summarise(total_weekly_sales = sum(sales_value, na.rm = TRUE),
              had_endcap = any(display_group == "endcap"),
              .groups = "drop") %>%
    mutate(week_type = ifelse(had_endcap, "Endcap Week", "No Display Week"),
           store_id = store)
  all_results_full <- rbind(all_results_full, temp)
}

lift_full <- all_results_full %>%
  group_by(store_id, product_category, week_type) %>%
  summarise(avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
            n_weeks = n(),
            .groups = "drop") %>%
  pivot_wider(names_from = week_type,
              values_from = c(avg_weekly_sales, n_weeks)) %>%
  mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
                           `avg_weekly_sales_No Display Week` * 100) %>%
  filter(!is.na(sales_lift_pct),
         `n_weeks_Endcap Week` >= 5,
         `n_weeks_No Display Week` >= 5) %>%
group_by(product_category) %>%
  summarise(
    avg_lift = mean(sales_lift_pct),
    median_lift = median(sales_lift_pct),
    n_stores = n()
  ) %>%
  filter(n_stores >= 5) %>%
  arrange(desc(median_lift))

print(lift_full, n = Inf)
## # A tibble: 65 × 4
##    product_category               avg_lift median_lift n_stores
##    <chr>                             <dbl>       <dbl>    <int>
##  1 BAKING NEEDS                     157.        136.         15
##  2 FROZEN PIE/DESSERTS               83.6        76.7        35
##  3 VEGETABLES - SHELF STABLE         56.5        61.0        48
##  4 FROZEN PIZZA                      55.5        49.2        88
##  5 DOMESTIC WINE                     57.8        48.6        11
##  6 FRUIT - SHELF STABLE              45.8        44.3        13
##  7 MEAT - SHELF STABLE               47.9        43.5        53
##  8 CHIPS&SNACKS                      52.6        42.1         5
##  9 DRY BN/VEG/POTATO/RICE            43.4        41.8        13
## 10 YOGURT                            47.8        41.4        17
## 11 REFRGRATD DOUGH PRODUCTS          48.8        41.1         8
## 12 SOUP                              43.8        40.7        69
## 13 CRACKERS/MISC BKD FD              46.5        40.0        92
## 14 BEERS/ALES                        46.0        39.5        43
## 15 FRZN MEAT/MEAT DINNERS            46.1        39.3        86
## 16 CONVENIENT BRKFST/WHLSM SNACKS    37.8        39.1        60
## 17 PAPER TOWELS                      45.0        37.9        14
## 18 CANDY - PACKAGED                  42.1        37.8        63
## 19 MEAT - MISC                       40.4        37.3         9
## 20 ICE CREAM/MILK/SHERBTS            37.3        36.8        93
## 21 COFFEE                            38.8        36.2        28
## 22 BEANS - CANNED GLASS & MW         40.5        35.0        10
## 23 FRZN VEGETABLE/VEG DSH            36.1        34.9        58
## 24 COOKIES/CONES                     41.1        34.2        77
## 25 BAG SNACKS                        43.6        34.0        88
## 26 MILK BY-PRODUCTS                  36.7        33.8         6
## 27 CANNED JUICES                     34.7        33.1        58
## 28 DRY MIX DESSERTS                  57.7        32.7        18
## 29 COLD CEREAL                       36.3        32.6        87
## 30 WATER - CARBONATED/FLVRD DRINK    38.4        31.3        38
## 31 FRZN NOVELTIES/WTR ICE            40.2        31.2        63
## 32 CANDY - CHECKLANE                 41.6        30.6        40
## 33 FROZEN MEAT                       46.6        30.1        14
## 34 BUTTER                            49.3        29.0         6
## 35 BATH TISSUES                      35.1        28.6        60
## 36 WAREHOUSE SNACKS                  34.4        27.0        20
## 37 BAKING MIXES                      34.0        26.4        21
## 38 PASTA SAUCE                       30.2        26.4        11
## 39 SNACK NUTS                        24.7        25.8         6
## 40 ISOTONIC DRINKS                   30.2        25.5        41
## 41 CONDIMENTS/SAUCES                 30.3        25.4        43
## 42 FRZN POTATOES                     18.6        23.3        16
## 43 SUGARS/SWEETNERS                  28.9        21.4        11
## 44 SALD DRSNG/SNDWCH SPRD            27.1        20.9        68
## 45 SOFT DRINKS                       25.7        20.3        72
## 46 DINNER MXS:DRY                    22.2        19.4        34
## 47 PNT BTR/JELLY/JAMS                19.0        17.7        16
## 48 PAPER HOUSEWARES                  35.6        17.4         6
## 49 REFRGRATD JUICES/DRNKS            18.7        15.6        23
## 50 FD WRAPS/BAGS/TRSH BG             20.0        15.3         6
## 51 CHEESE                            17.0        14.6        28
## 52 BAKED BREAD/BUNS/ROLLS            11.2        14.1        31
## 53 LAUNDRY DETERGENTS                23.5        14.1        38
## 54 SOAP - LIQUID & BAR               16.0        12.6        14
## 55 HAIR CARE PRODUCTS                30.7        11.5        17
## 56 ORAL HYGIENE PRODUCTS             25.1        11.4        17
## 57 EGGS                              18.2        11.2        15
## 58 PWDR/CRYSTL DRNK MX               25.4        10.1        21
## 59 CEREAL/BREAKFAST                  13.8         9.28        9
## 60 SEAFOOD - FROZEN                  15.3         7.62        7
## 61 FLUID MILK PRODUCTS                9.44        6.85       31
## 62 DIAPERS & DISPOSABLES             19.2         3.44        5
## 63 COLD AND FLU                      31.9         1.62        5
## 64 FRZN BREAKFAST FOODS               4.24        1.49       29
## 65 SHORTENING/OIL                    -9.81      -17.9         8
lift_by_store %>%
  filter(product_category %in% lift_full$product_category) %>%
  ggplot(aes(x = reorder(product_category, sales_lift_pct, median), 
             y = sales_lift_pct)) +
  geom_boxplot() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  coord_flip() +
  labs(title = "Distribution of Endcap Sales Lift Across Stores",
       x = "Product Category",
       y = "Sales Lift (%)")

mailer_quantity <- df %>%
  filter(display_group == "endcap",
         department %in% c("GROCERY", "DRUG GM"),
         !is.na(product_category)) %>%
  mutate(mailer_flag = ifelse(!is.na(mailer_location) & mailer_location != "0",
                               TRUE, FALSE)) %>%
  group_by(product_category, department, mailer_flag) %>%
  summarise(
    avg_revenue = mean(sales_value, na.rm = TRUE),
    avg_quantity = mean(quantity, na.rm = TRUE),
    n_transactions = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(names_from = mailer_flag,
              values_from = c(avg_revenue, avg_quantity, n_transactions)) %>%
  rename(
    avg_revenue_endcap_only = avg_revenue_FALSE,
    avg_revenue_endcap_mailer = avg_revenue_TRUE,
    avg_quantity_endcap_only = avg_quantity_FALSE,
    avg_quantity_endcap_mailer = avg_quantity_TRUE
  ) %>%
  filter(!is.na(avg_revenue_endcap_only) & !is.na(avg_revenue_endcap_mailer)) %>%
  mutate(
    quantity_effect = avg_quantity_endcap_mailer - avg_quantity_endcap_only,
    revenue_effect = avg_revenue_endcap_mailer - avg_revenue_endcap_only
  )

p <- ggplot(mailer_quantity, aes(x = revenue_effect, 
                                  y = quantity_effect,
                                  color = department,
                                  text = product_category)) +
  geom_point(alpha = 0.7) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  annotate("text", x = max(mailer_quantity$revenue_effect) * 0.7,
           y = max(mailer_quantity$quantity_effect) * 0.9,
           label = "More units\nMore revenue", color = "darkgreen", 
           size = 3, fontface = "bold") +
  annotate("text", x = min(mailer_quantity$revenue_effect) * 0.7,
           y = max(mailer_quantity$quantity_effect) * 0.9,
           label = "More units\nLess revenue", color = "steelblue", 
           size = 3, fontface = "bold") +
  annotate("text", x = max(mailer_quantity$revenue_effect) * 0.7,
           y = min(mailer_quantity$quantity_effect) * 0.9,
           label = "Fewer units\nMore revenue", color = "darkorange", 
           size = 3, fontface = "bold") +
  annotate("text", x = min(mailer_quantity$revenue_effect) * 0.7,
           y = min(mailer_quantity$quantity_effect) * 0.9,
           label = "Fewer units\nLess revenue", color = "red", 
           size = 3, fontface = "bold") +
  labs(
    title = "Mailer Effect on Quantity vs Revenue When on Endcap",
    subtitle = "Each point represents a product category. 50 stores, 2017.",
    x = "Change in Avg Revenue per Transaction ($)",
    y = "Change in Avg Quantity per Transaction",
    color = "Department"
  ) +
  theme_minimal()

ggplotly(p, tooltip = "text")
#SHOULD FILTER THIS DOWN TO SOMETHING LIKE TOP 10 BOTTOM 10


lift_by_store_full <- all_results_full %>%
  group_by(store_id, product_category, week_type) %>%
  summarise(avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
            n_weeks = n(),
            .groups = "drop") %>%
  pivot_wider(names_from = week_type,
              values_from = c(avg_weekly_sales, n_weeks)) %>%
  mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
                           `avg_weekly_sales_No Display Week` * 100) %>%
  filter(!is.na(sales_lift_pct),
         `n_weeks_Endcap Week` >= 5,
         `n_weeks_No Display Week` >= 5)

lift_by_store_full %>%
  filter(product_category %in% lift_full$product_category) %>%
  ggplot(aes(x = reorder(product_category, sales_lift_pct, median), 
             y = sales_lift_pct)) +
  geom_boxplot() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  coord_flip() +
  labs(title = "Distribution of Endcap Sales Lift Across Stores",
       x = "Product Category",
       y = "Sales Lift (%)") +
  theme_minimal()

library(knitr)

lift_by_store_full %>%
  filter(product_category %in% lift_full$product_category) %>%
  group_by(product_category) %>%
  summarise(
    median_lift = median(sales_lift_pct),
    avg_lift = mean(sales_lift_pct),
    pct_stores_positive = mean(sales_lift_pct > 0) * 100,
    n_stores = n()
  ) %>%
  arrange(desc(median_lift)) %>%
  kable(digits = 1,
        col.names = c("Product Category", "Median Lift (%)", 
                      "Avg Lift (%)", "% Stores Positive", "N Stores"),
        caption = "Endcap Sales Lift by Product Category (All Stores)")
Endcap Sales Lift by Product Category (All Stores)
Product Category Median Lift (%) Avg Lift (%) % Stores Positive N Stores
BAKING NEEDS 136.2 157.3 100.0 15
FROZEN PIE/DESSERTS 76.7 83.6 85.7 35
VEGETABLES - SHELF STABLE 61.0 56.5 87.5 48
FROZEN PIZZA 49.2 55.5 94.3 88
DOMESTIC WINE 48.6 57.8 81.8 11
FRUIT - SHELF STABLE 44.3 45.8 100.0 13
MEAT - SHELF STABLE 43.5 47.9 96.2 53
CHIPS&SNACKS 42.1 52.6 100.0 5
DRY BN/VEG/POTATO/RICE 41.8 43.4 84.6 13
YOGURT 41.4 47.8 100.0 17
REFRGRATD DOUGH PRODUCTS 41.1 48.8 87.5 8
SOUP 40.7 43.8 97.1 69
CRACKERS/MISC BKD FD 40.0 46.5 92.4 92
BEERS/ALES 39.5 46.0 93.0 43
FRZN MEAT/MEAT DINNERS 39.3 46.1 93.0 86
CONVENIENT BRKFST/WHLSM SNACKS 39.1 37.8 86.7 60
PAPER TOWELS 37.9 45.0 85.7 14
CANDY - PACKAGED 37.8 42.1 76.2 63
MEAT - MISC 37.3 40.4 88.9 9
ICE CREAM/MILK/SHERBTS 36.8 37.3 89.2 93
COFFEE 36.2 38.8 85.7 28
BEANS - CANNED GLASS & MW 35.0 40.5 80.0 10
FRZN VEGETABLE/VEG DSH 34.9 36.1 86.2 58
COOKIES/CONES 34.2 41.1 88.3 77
BAG SNACKS 34.0 43.6 94.3 88
MILK BY-PRODUCTS 33.8 36.7 100.0 6
CANNED JUICES 33.1 34.7 82.8 58
DRY MIX DESSERTS 32.7 57.7 72.2 18
COLD CEREAL 32.6 36.3 89.7 87
WATER - CARBONATED/FLVRD DRINK 31.3 38.4 81.6 38
FRZN NOVELTIES/WTR ICE 31.2 40.2 88.9 63
CANDY - CHECKLANE 30.6 41.6 85.0 40
FROZEN MEAT 30.1 46.6 71.4 14
BUTTER 29.0 49.3 100.0 6
BATH TISSUES 28.6 35.1 81.7 60
WAREHOUSE SNACKS 27.0 34.4 70.0 20
BAKING MIXES 26.4 34.0 85.7 21
PASTA SAUCE 26.4 30.2 72.7 11
SNACK NUTS 25.8 24.7 66.7 6
ISOTONIC DRINKS 25.5 30.2 70.7 41
CONDIMENTS/SAUCES 25.4 30.3 86.0 43
FRZN POTATOES 23.3 18.6 68.8 16
SUGARS/SWEETNERS 21.4 28.9 90.9 11
SALD DRSNG/SNDWCH SPRD 20.9 27.1 80.9 68
SOFT DRINKS 20.3 25.7 79.2 72
DINNER MXS:DRY 19.4 22.2 70.6 34
PNT BTR/JELLY/JAMS 17.7 19.0 68.8 16
PAPER HOUSEWARES 17.4 35.6 66.7 6
REFRGRATD JUICES/DRNKS 15.6 18.7 73.9 23
FD WRAPS/BAGS/TRSH BG 15.3 20.0 83.3 6
CHEESE 14.6 17.0 78.6 28
BAKED BREAD/BUNS/ROLLS 14.1 11.2 74.2 31
LAUNDRY DETERGENTS 14.1 23.5 73.7 38
SOAP - LIQUID & BAR 12.6 16.0 64.3 14
HAIR CARE PRODUCTS 11.5 30.7 76.5 17
ORAL HYGIENE PRODUCTS 11.4 25.1 64.7 17
EGGS 11.2 18.2 80.0 15
PWDR/CRYSTL DRNK MX 10.1 25.4 71.4 21
CEREAL/BREAKFAST 9.3 13.8 77.8 9
SEAFOOD - FROZEN 7.6 15.3 57.1 7
FLUID MILK PRODUCTS 6.9 9.4 64.5 31
DIAPERS & DISPOSABLES 3.4 19.2 60.0 5
COLD AND FLU 1.6 31.9 60.0 5
FRZN BREAKFAST FOODS 1.5 4.2 51.7 29
SHORTENING/OIL -17.9 -9.8 37.5 8