# Libraries
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(stringr)
library(ggrepel)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
# Get full datasets
transactions <- get_transactions()
promotions <- get_promotions()
#Narrowing down the dataset to a samples of 50 stores and assigning all endcap locations
#to one variable
endcap_codes <- c("3", "4", "5", "6")
# Sample 50 stores due to processing constraints
set.seed(20260222) #So that the same 50 stores are chosen reproducibly
sample_stores <- promotions %>%
distinct(store_id) %>%
sample_n(100)
transactions_50 <- transactions %>%
filter(store_id %in% sample_stores$store_id)
promo_type <- promotions %>%
filter(store_id %in% sample_stores$store_id) %>%
group_by(product_id, week, store_id)
df <- transactions_50 %>%
left_join(products, by = "product_id") %>%
left_join(promotions, by = c("product_id", "week", "store_id"),
relationship = "many-to-many") %>% #This is to make sure that transactions are not duplicated
mutate(display_group = ifelse(display_location %in% endcap_codes, "endcap", "no_endcap"))
#KEY QUESTION: Can we identify endcap products that don't perform well to make
#a recommendation for better use of that space?
#Question 1: What department(s) get the most endcap exposure timewise?
endcap_time_by_dept <- df %>%
filter(display_group == "endcap") %>%
group_by(department) %>%
summarize(n_store_weeks = n_distinct(str_c(store_id, week, sep = "_"))) %>%
arrange(desc(n_store_weeks))
ggplot(endcap_time_by_dept, aes(x = reorder(department, n_store_weeks), y = n_store_weeks)) +
geom_col() +
coord_flip() +
labs(title = "Endcap Allocation by Department",
subtitle = "Measured in store-weeks across 50 stores",
x = "Department",
y = "Store-weeks on endcap") +
theme_minimal()

endcap_revenue_by_dept <- df %>%
filter(display_group == "endcap", !is.na(department)) %>%
group_by(department) %>%
summarize(endcap_sales = sum(sales_value, na.rm = TRUE),
n_store_weeks = n_distinct(str_c(store_id, week, sep = "_")),
revenue_per_store_week = endcap_sales / n_store_weeks) %>%
arrange(desc(revenue_per_store_week))
ggplot(endcap_revenue_by_dept, aes(x = reorder(department, revenue_per_store_week),
y = revenue_per_store_week)) +
geom_col() +
coord_flip() +
labs(title = "Endcap Revenue by Department",
subtitle = "Measured across 50 stores",
x = "Department",
y = "Revenue from endcap sales") +
theme_minimal()

#
scatter_data <- df %>%
filter(display_group == "endcap",
department %in% c("GROCERY", "DRUG GM"),
!is.na(product_category)) %>%
group_by(product_category, department) %>%
summarise(
avg_revenue_per_transaction = mean(sales_value, na.rm = TRUE),
total_revenue = sum(sales_value, na.rm = TRUE),
n_transactions = n(),
n_store_weeks = n_distinct(str_c(store_id, week, sep = "_")),
.groups = "drop"
) %>%
filter(n_transactions >= 20)
ggplot(scatter_data, aes(x = n_store_weeks,
y = avg_revenue_per_transaction,
color = department)) +
geom_vline(xintercept = median(scatter_data$n_store_weeks),
linetype = "dashed", color = "gray50") +
geom_hline(yintercept = median(scatter_data$avg_revenue_per_transaction),
linetype = "dashed", color = "gray50") +
geom_point(aes(size = total_revenue), alpha = 0.7) +
geom_text(aes(label = product_category),
size = 2.5, vjust = -0.8, check_overlap = TRUE) +
annotate("text", x = max(scatter_data$n_store_weeks) * 0.75,
y = max(scatter_data$avg_revenue_per_transaction) * 0.95,
label = "High Revenue\nLow Exposure",
color = "darkgreen", size = 3, fontface = "bold") +
annotate("text", x = max(scatter_data$n_store_weeks) * 0.75,
y = min(scatter_data$avg_revenue_per_transaction) * 1.0,
label = "Low Revenue\nHigh Exposure",
color = "red", size = 3, fontface = "bold") +
scale_size_continuous(name = "Total Revenue ($)") +
labs(
title = "Endcap Exposure vs Revenue per Transaction",
subtitle = "Bubble size represents total revenue",
x = "Endcap Store-Weeks (Exposure)",
y = "Avg Revenue per Transaction ($)",
color = "Department"
) +
theme_minimal()

#MAKE THIS AN INTERACTIVE BUBBLE GRAPH WITH PLOTLY
#What are the best and worst performers on endcaps from the GROCERY department?
grocery_endcap <- df %>%
filter(display_group == "endcap", department == "GROCERY") %>%
group_by(product_category) %>%
summarize(avg_rev_per_tx = mean(sales_value, na.rm = TRUE),
total_revenue = sum(sales_value, na.rm = TRUE),
n_transactions = n(),
n_store_weeks = n_distinct(str_c(store_id, week, sep = "_"))) %>%
filter(n_transactions >= 10) %>%
arrange(desc(avg_rev_per_tx))
grocery_top_bottom <- rbind(
head(grocery_endcap, 10) %>% mutate(group = "Top 10"),
tail(grocery_endcap, 10) %>% mutate(group = "Bottom 10")
)
ggplot(grocery_top_bottom, aes(x = reorder(product_category, avg_rev_per_tx),
y = avg_rev_per_tx,
fill = group)) +
geom_col() +
scale_fill_manual(values = c("Bottom 10" = "tomato", "Top 10" = "steelblue")) +
coord_flip() +
labs(title = "Grocery Department: Best vs Worst Endcap Performers",
subtitle = "By average revenue per transaction",
x = "Product Category",
y = "Avg Revenue per Transaction ($)",
fill = "") +
theme_minimal()

grocery_endcap_raw <- df %>%
filter(display_group == "endcap", department == "GROCERY",
!is.na(product_category)) %>%
group_by(product_category) %>%
mutate(n_transactions = n()) %>%
filter(n_transactions >= 10) %>%
ungroup()
# Get top and bottom 10 categories by median
grocery_order <- grocery_endcap_raw %>%
group_by(product_category) %>%
summarise(med = median(sales_value)) %>%
arrange(desc(med))
top_bottom <- c(head(grocery_order$product_category, 10),
tail(grocery_order$product_category, 10))
grocery_endcap_raw %>%
filter(product_category %in% top_bottom) %>%
mutate(group = ifelse(product_category %in% head(grocery_order$product_category, 10),
"Top 10", "Bottom 10")) %>%
ggplot(aes(x = reorder(product_category, sales_value, median),
y = sales_value,
fill = group)) +
geom_boxplot(alpha = 0.7) +
scale_fill_manual(values = c("Bottom 10" = "tomato", "Top 10" = "steelblue")) +
coord_flip() +
labs(title = "Grocery Department: Best vs Worst Endcap Performers",
subtitle = "Distribution of transaction values, 50 stores 2017",
x = "Product Category",
y = "Sales Value per Transaction ($)",
fill = "") +
theme_minimal()

drug_endcap_raw <- df %>%
filter(display_group == "endcap", department == "DRUG GM",
!is.na(product_type)) %>%
group_by(product_type) %>%
mutate(n_transactions = n()) %>%
filter(n_transactions >= 5) %>% # low threshold - noted limitation
ungroup()
drug_order <- drug_endcap_raw %>%
group_by(product_type) %>%
summarise(med = median(sales_value)) %>%
arrange(desc(med))
top_bottom_drug <- c(head(drug_order$product_type, 10),
tail(drug_order$product_type, 10))
drug_endcap_raw %>%
filter(product_type %in% top_bottom_drug) %>%
mutate(group = ifelse(product_type %in% head(drug_order$product_type, 10),
"Top 10", "Bottom 10")) %>%
ggplot(aes(x = reorder(product_type, sales_value, median),
y = sales_value,
fill = group)) +
geom_boxplot(alpha = 0.7) +
scale_fill_manual(values = c("Bottom 10" = "tomato", "Top 10" = "steelblue")) +
coord_flip() +
labs(title = "Drug GM Department: Best vs Worst Endcap Performers",
subtitle = "Distribution of transaction values, 50 stores 2017. Note: low transaction threshold.",
x = "Product Type",
y = "Sales Value per Transaction ($)",
fill = "") +
theme_minimal()

# How many total grocery endcap transactions do we have?
df %>%
filter(display_group == "endcap", department == "GROCERY") %>%
nrow()
## [1] 48996
# Compare to total grocery transactions
df %>%
filter(department == "GROCERY") %>%
nrow()
## [1] 829301
#What are the best and worst performers on endcaps from the DRUG GM department?
drug_endcap <- df %>%
filter(display_group == "endcap", department == "DRUG GM") %>%
group_by(product_type) %>%
summarize(avg_rev_per_tx = mean(sales_value, na.rm = TRUE),
total_revenue = sum(sales_value, na.rm = TRUE),
n_transactions = n(),
n_store_weeks = n_distinct(str_c(store_id, week, sep = "_"))) %>%
filter(n_transactions >= 5) %>% #this is a MAJOR limitation
arrange(desc(avg_rev_per_tx))
drug_top_bottom <- rbind(
head(drug_endcap, 10) %>% mutate(group = "Top 10"),
tail(drug_endcap, 10) %>% mutate(group = "Bottom 10")
)
ggplot(drug_top_bottom, aes(x = reorder(product_type, avg_rev_per_tx), y = avg_rev_per_tx)) +
geom_col() +
coord_flip() +
labs(
title = "Drug Department: Best vs Worst Endcap Performers",
subtitle = "By average revenue per transaction",
x = "Product Category",
y = "Avg Revenue per Transaction ($)")

categories_final <- c(
"BEERS/ALES", "DOMESTIC WINE", "LAUNDRY DETERGENTS", "COFFEE", "PAPER TOWELS",
"EGGS", "SOUP", "COLD CEREAL", "FLUID MILK PRODUCTS", "COOKIES/CONES",
"DIAPERS & DISPOSABLES", "SOFT DRINKS", "FROZEN PIZZA",
"CANDY - PACKAGED", "BAG SNACKS"
)
results <- data.frame()
for(cat in categories_final) {
temp <- df %>%
filter(store_id == "319", product_category == cat) %>%
group_by(week) %>%
summarise(total_weekly_sales = sum(sales_value, na.rm = TRUE),
had_endcap = any(display_group == "endcap"),
.groups = "drop") %>%
mutate(week_type = ifelse(had_endcap, "Endcap Week", "No Display Week"),
product_category = cat)
results <- rbind(results, temp)
}
category_lift <- results %>%
group_by(product_category, week_type) %>%
summarise(
avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
n_weeks = n(),
.groups = "drop"
) %>%
pivot_wider(names_from = week_type,
values_from = c(avg_weekly_sales, n_weeks)) %>%
mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
`avg_weekly_sales_No Display Week` * 100) %>%
filter(!is.na(sales_lift_pct),
`n_weeks_Endcap Week` >= 4,
`n_weeks_No Display Week` >= 4) %>%
arrange(desc(sales_lift_pct))
category_lift
## # A tibble: 11 × 6
## product_category `avg_weekly_sales_Endcap Week` avg_weekly_sales_No Display…¹
## <chr> <dbl> <dbl>
## 1 COFFEE 20.1 10.8
## 2 DOMESTIC WINE 32.0 18.7
## 3 CANDY - PACKAGED 10.9 6.99
## 4 SOFT DRINKS 47.1 33.9
## 5 BAG SNACKS 21.3 15.6
## 6 COOKIES/CONES 6.81 5.45
## 7 SOUP 13.9 11.8
## 8 FROZEN PIZZA 17.5 15.7
## 9 BEERS/ALES 24.9 22.7
## 10 COLD CEREAL 15.8 17.6
## 11 EGGS 3.32 5.66
## # ℹ abbreviated name: ¹`avg_weekly_sales_No Display Week`
## # ℹ 3 more variables: `n_weeks_Endcap Week` <int>,
## # `n_weeks_No Display Week` <int>, sales_lift_pct <dbl>
ggplot(category_lift, aes(x = reorder(product_category, sales_lift_pct),
y = sales_lift_pct,
fill = sales_lift_pct > 20)) +
geom_col() +
scale_fill_manual(values = c("TRUE" = "steelblue", "FALSE" = "tomato"),
guide = "none") +
coord_flip() +
geom_hline(yintercept = 20, linetype = "dashed", color = "gray50") +
geom_text(aes(label = paste0(round(sales_lift_pct, 1), "%")),
hjust = -0.2, size = 3) +
labs(
title = "Endcap Sales Lift by Product Category at Store",
subtitle = "Dashed line at 20% lift threshold. Red bars = low lift categories.",
x = "Product Category",
y = "Sales Lift (%)"
) +
theme_minimal() +
expand_limits(y = max(category_lift$sales_lift_pct) * 1.15)

categories_final <- c(
"BEERS/ALES", "DOMESTIC WINE", "LAUNDRY DETERGENTS", "COFFEE", "PAPER TOWELS",
"EGGS", "SOUP", "COLD CEREAL", "FLUID MILK PRODUCTS", "COOKIES/CONES",
"DIAPERS & DISPOSABLES", "SOFT DRINKS", "FROZEN PIZZA",
"CANDY - PACKAGED", "BAG SNACKS"
)
results <- data.frame()
for(cat in categories_final) {
temp <- df %>%
filter(store_id == "334", product_category == cat) %>%
group_by(week) %>%
summarise(total_weekly_sales = sum(sales_value, na.rm = TRUE),
had_endcap = any(display_group == "endcap"),
.groups = "drop") %>%
mutate(week_type = ifelse(had_endcap, "Endcap Week", "No Display Week"),
product_category = cat)
results <- rbind(results, temp)
}
category_lift <- results %>%
group_by(product_category, week_type) %>%
summarise(
avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
n_weeks = n(),
.groups = "drop"
) %>%
pivot_wider(names_from = week_type,
values_from = c(avg_weekly_sales, n_weeks)) %>%
mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
`avg_weekly_sales_No Display Week` * 100) %>%
filter(!is.na(sales_lift_pct),
`n_weeks_Endcap Week` >= 4,
`n_weeks_No Display Week` >= 4) %>%
arrange(desc(sales_lift_pct))
category_lift
## # A tibble: 7 × 6
## product_category `avg_weekly_sales_Endcap Week` avg_weekly_sales_No Display …¹
## <chr> <dbl> <dbl>
## 1 CANDY - PACKAGED 8.67 6.29
## 2 SOFT DRINKS 36.1 27.1
## 3 COLD CEREAL 12.9 9.71
## 4 BAG SNACKS 10.6 8.61
## 5 FROZEN PIZZA 16.0 13.0
## 6 SOUP 7.70 10.2
## 7 COOKIES/CONES 3.76 5.36
## # ℹ abbreviated name: ¹`avg_weekly_sales_No Display Week`
## # ℹ 3 more variables: `n_weeks_Endcap Week` <int>,
## # `n_weeks_No Display Week` <int>, sales_lift_pct <dbl>
ggplot(category_lift, aes(x = reorder(product_category, sales_lift_pct),
y = sales_lift_pct,
fill = sales_lift_pct > 20)) +
geom_col() +
scale_fill_manual(values = c("TRUE" = "steelblue", "FALSE" = "tomato"),
guide = "none") +
coord_flip() +
geom_hline(yintercept = 20, linetype = "dashed", color = "gray50") +
geom_text(aes(label = paste0(round(sales_lift_pct, 1), "%")),
hjust = -0.2, size = 3) +
labs(
title = "Endcap Sales Lift by Product Category at Store 334",
subtitle = "Dashed line at 20% lift threshold. Red bars = low lift categories.",
x = "Product Category",
y = "Sales Lift (%)"
) +
theme_minimal() +
expand_limits(y = max(category_lift$sales_lift_pct) * 1.15)

# Find stores in our sample with good transaction counts
df %>%
filter(product_category %in% categories_final) %>%
group_by(store_id) %>%
summarise(
n_transactions = n(),
n_categories = n_distinct(product_category),
has_endcap = any(display_group == "endcap")
) %>%
filter(has_endcap == TRUE) %>%
arrange(desc(n_categories))
## # A tibble: 100 × 4
## store_id n_transactions n_categories has_endcap
## <chr> <int> <int> <lgl>
## 1 289 2939 15 TRUE
## 2 292 5596 15 TRUE
## 3 296 2515 15 TRUE
## 4 297 2262 15 TRUE
## 5 299 1245 15 TRUE
## 6 300 2000 15 TRUE
## 7 304 3206 15 TRUE
## 8 306 2170 15 TRUE
## 9 309 1731 15 TRUE
## 10 310 2284 15 TRUE
## # ℹ 90 more rows
stores_to_check <- df %>%
distinct(store_id) %>%
pull(store_id)
all_results <- data.frame()
for(store in stores_to_check) {
for(cat in categories_final) {
temp <- df %>%
filter(store_id == store, product_category == cat) %>%
group_by(week) %>%
summarise(total_weekly_sales = sum(sales_value, na.rm = TRUE),
had_endcap = any(display_group == "endcap"),
.groups = "drop") %>%
mutate(week_type = ifelse(had_endcap, "Endcap Week", "No Display Week"),
product_category = cat,
store_id = store)
all_results <- rbind(all_results, temp)
}
}
# Get lift per store per category
lift_by_store <- all_results %>%
group_by(store_id, product_category, week_type) %>%
summarise(avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
n_weeks = n(),
.groups = "drop") %>%
pivot_wider(names_from = week_type,
values_from = c(avg_weekly_sales, n_weeks)) %>%
mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
`avg_weekly_sales_No Display Week` * 100) %>%
filter(!is.na(sales_lift_pct),
`n_weeks_Endcap Week` >= 2,
`n_weeks_No Display Week` >= 2)
# Then average lift across stores
lift_summary <- lift_by_store %>%
group_by(product_category) %>%
summarise(
avg_lift = mean(sales_lift_pct),
median_lift = median(sales_lift_pct),
n_stores = n()
) %>%
arrange(desc(avg_lift))
lift_summary
## # A tibble: 15 × 4
## product_category avg_lift median_lift n_stores
## <chr> <dbl> <dbl> <int>
## 1 PAPER TOWELS 56.8 42.4 60
## 2 FROZEN PIZZA 56.3 50.8 95
## 3 SOUP 49.4 42.9 93
## 4 DOMESTIC WINE 49.2 37.0 35
## 5 BEERS/ALES 48.0 40.0 66
## 6 COFFEE 47.0 33.2 77
## 7 CANDY - PACKAGED 41.7 34.7 88
## 8 DIAPERS & DISPOSABLES 40.6 29.9 35
## 9 COOKIES/CONES 40.4 32.2 95
## 10 BAG SNACKS 39.7 32.1 100
## 11 COLD CEREAL 33.4 31.3 99
## 12 LAUNDRY DETERGENTS 24.6 10.6 77
## 13 SOFT DRINKS 23.3 19.9 87
## 14 EGGS 14.0 10.2 24
## 15 FLUID MILK PRODUCTS 8.63 6.30 34
all_results_full <- data.frame()
for(store in stores_to_check) {
temp <- df %>%
filter(store_id == store, !is.na(product_category)) %>%
group_by(week, product_category) %>%
summarise(total_weekly_sales = sum(sales_value, na.rm = TRUE),
had_endcap = any(display_group == "endcap"),
.groups = "drop") %>%
mutate(week_type = ifelse(had_endcap, "Endcap Week", "No Display Week"),
store_id = store)
all_results_full <- rbind(all_results_full, temp)
}
lift_full <- all_results_full %>%
group_by(store_id, product_category, week_type) %>%
summarise(avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
n_weeks = n(),
.groups = "drop") %>%
pivot_wider(names_from = week_type,
values_from = c(avg_weekly_sales, n_weeks)) %>%
mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
`avg_weekly_sales_No Display Week` * 100) %>%
filter(!is.na(sales_lift_pct),
`n_weeks_Endcap Week` >= 5,
`n_weeks_No Display Week` >= 5) %>%
group_by(product_category) %>%
summarise(
avg_lift = mean(sales_lift_pct),
median_lift = median(sales_lift_pct),
n_stores = n()
) %>%
filter(n_stores >= 5) %>%
arrange(desc(median_lift))
print(lift_full, n = Inf)
## # A tibble: 65 × 4
## product_category avg_lift median_lift n_stores
## <chr> <dbl> <dbl> <int>
## 1 BAKING NEEDS 157. 136. 15
## 2 FROZEN PIE/DESSERTS 83.6 76.7 35
## 3 VEGETABLES - SHELF STABLE 56.5 61.0 48
## 4 FROZEN PIZZA 55.5 49.2 88
## 5 DOMESTIC WINE 57.8 48.6 11
## 6 FRUIT - SHELF STABLE 45.8 44.3 13
## 7 MEAT - SHELF STABLE 47.9 43.5 53
## 8 CHIPS&SNACKS 52.6 42.1 5
## 9 DRY BN/VEG/POTATO/RICE 43.4 41.8 13
## 10 YOGURT 47.8 41.4 17
## 11 REFRGRATD DOUGH PRODUCTS 48.8 41.1 8
## 12 SOUP 43.8 40.7 69
## 13 CRACKERS/MISC BKD FD 46.5 40.0 92
## 14 BEERS/ALES 46.0 39.5 43
## 15 FRZN MEAT/MEAT DINNERS 46.1 39.3 86
## 16 CONVENIENT BRKFST/WHLSM SNACKS 37.8 39.1 60
## 17 PAPER TOWELS 45.0 37.9 14
## 18 CANDY - PACKAGED 42.1 37.8 63
## 19 MEAT - MISC 40.4 37.3 9
## 20 ICE CREAM/MILK/SHERBTS 37.3 36.8 93
## 21 COFFEE 38.8 36.2 28
## 22 BEANS - CANNED GLASS & MW 40.5 35.0 10
## 23 FRZN VEGETABLE/VEG DSH 36.1 34.9 58
## 24 COOKIES/CONES 41.1 34.2 77
## 25 BAG SNACKS 43.6 34.0 88
## 26 MILK BY-PRODUCTS 36.7 33.8 6
## 27 CANNED JUICES 34.7 33.1 58
## 28 DRY MIX DESSERTS 57.7 32.7 18
## 29 COLD CEREAL 36.3 32.6 87
## 30 WATER - CARBONATED/FLVRD DRINK 38.4 31.3 38
## 31 FRZN NOVELTIES/WTR ICE 40.2 31.2 63
## 32 CANDY - CHECKLANE 41.6 30.6 40
## 33 FROZEN MEAT 46.6 30.1 14
## 34 BUTTER 49.3 29.0 6
## 35 BATH TISSUES 35.1 28.6 60
## 36 WAREHOUSE SNACKS 34.4 27.0 20
## 37 BAKING MIXES 34.0 26.4 21
## 38 PASTA SAUCE 30.2 26.4 11
## 39 SNACK NUTS 24.7 25.8 6
## 40 ISOTONIC DRINKS 30.2 25.5 41
## 41 CONDIMENTS/SAUCES 30.3 25.4 43
## 42 FRZN POTATOES 18.6 23.3 16
## 43 SUGARS/SWEETNERS 28.9 21.4 11
## 44 SALD DRSNG/SNDWCH SPRD 27.1 20.9 68
## 45 SOFT DRINKS 25.7 20.3 72
## 46 DINNER MXS:DRY 22.2 19.4 34
## 47 PNT BTR/JELLY/JAMS 19.0 17.7 16
## 48 PAPER HOUSEWARES 35.6 17.4 6
## 49 REFRGRATD JUICES/DRNKS 18.7 15.6 23
## 50 FD WRAPS/BAGS/TRSH BG 20.0 15.3 6
## 51 CHEESE 17.0 14.6 28
## 52 BAKED BREAD/BUNS/ROLLS 11.2 14.1 31
## 53 LAUNDRY DETERGENTS 23.5 14.1 38
## 54 SOAP - LIQUID & BAR 16.0 12.6 14
## 55 HAIR CARE PRODUCTS 30.7 11.5 17
## 56 ORAL HYGIENE PRODUCTS 25.1 11.4 17
## 57 EGGS 18.2 11.2 15
## 58 PWDR/CRYSTL DRNK MX 25.4 10.1 21
## 59 CEREAL/BREAKFAST 13.8 9.28 9
## 60 SEAFOOD - FROZEN 15.3 7.62 7
## 61 FLUID MILK PRODUCTS 9.44 6.85 31
## 62 DIAPERS & DISPOSABLES 19.2 3.44 5
## 63 COLD AND FLU 31.9 1.62 5
## 64 FRZN BREAKFAST FOODS 4.24 1.49 29
## 65 SHORTENING/OIL -9.81 -17.9 8
lift_by_store %>%
filter(product_category %in% lift_full$product_category) %>%
ggplot(aes(x = reorder(product_category, sales_lift_pct, median),
y = sales_lift_pct)) +
geom_boxplot() +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
coord_flip() +
labs(title = "Distribution of Endcap Sales Lift Across Stores",
x = "Product Category",
y = "Sales Lift (%)")

mailer_quantity <- df %>%
filter(display_group == "endcap",
department %in% c("GROCERY", "DRUG GM"),
!is.na(product_category)) %>%
mutate(mailer_flag = ifelse(!is.na(mailer_location) & mailer_location != "0",
TRUE, FALSE)) %>%
group_by(product_category, department, mailer_flag) %>%
summarise(
avg_revenue = mean(sales_value, na.rm = TRUE),
avg_quantity = mean(quantity, na.rm = TRUE),
n_transactions = n(),
.groups = "drop"
) %>%
pivot_wider(names_from = mailer_flag,
values_from = c(avg_revenue, avg_quantity, n_transactions)) %>%
rename(
avg_revenue_endcap_only = avg_revenue_FALSE,
avg_revenue_endcap_mailer = avg_revenue_TRUE,
avg_quantity_endcap_only = avg_quantity_FALSE,
avg_quantity_endcap_mailer = avg_quantity_TRUE
) %>%
filter(!is.na(avg_revenue_endcap_only) & !is.na(avg_revenue_endcap_mailer)) %>%
mutate(
quantity_effect = avg_quantity_endcap_mailer - avg_quantity_endcap_only,
revenue_effect = avg_revenue_endcap_mailer - avg_revenue_endcap_only
)
p <- ggplot(mailer_quantity, aes(x = revenue_effect,
y = quantity_effect,
color = department,
text = product_category)) +
geom_point(alpha = 0.7) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
annotate("text", x = max(mailer_quantity$revenue_effect) * 0.7,
y = max(mailer_quantity$quantity_effect) * 0.9,
label = "More units\nMore revenue", color = "darkgreen",
size = 3, fontface = "bold") +
annotate("text", x = min(mailer_quantity$revenue_effect) * 0.7,
y = max(mailer_quantity$quantity_effect) * 0.9,
label = "More units\nLess revenue", color = "steelblue",
size = 3, fontface = "bold") +
annotate("text", x = max(mailer_quantity$revenue_effect) * 0.7,
y = min(mailer_quantity$quantity_effect) * 0.9,
label = "Fewer units\nMore revenue", color = "darkorange",
size = 3, fontface = "bold") +
annotate("text", x = min(mailer_quantity$revenue_effect) * 0.7,
y = min(mailer_quantity$quantity_effect) * 0.9,
label = "Fewer units\nLess revenue", color = "red",
size = 3, fontface = "bold") +
labs(
title = "Mailer Effect on Quantity vs Revenue When on Endcap",
subtitle = "Each point represents a product category. 50 stores, 2017.",
x = "Change in Avg Revenue per Transaction ($)",
y = "Change in Avg Quantity per Transaction",
color = "Department"
) +
theme_minimal()
ggplotly(p, tooltip = "text")
#SHOULD FILTER THIS DOWN TO SOMETHING LIKE TOP 10 BOTTOM 10
lift_by_store_full <- all_results_full %>%
group_by(store_id, product_category, week_type) %>%
summarise(avg_weekly_sales = mean(total_weekly_sales, na.rm = TRUE),
n_weeks = n(),
.groups = "drop") %>%
pivot_wider(names_from = week_type,
values_from = c(avg_weekly_sales, n_weeks)) %>%
mutate(sales_lift_pct = (`avg_weekly_sales_Endcap Week` - `avg_weekly_sales_No Display Week`) /
`avg_weekly_sales_No Display Week` * 100) %>%
filter(!is.na(sales_lift_pct),
`n_weeks_Endcap Week` >= 5,
`n_weeks_No Display Week` >= 5)
lift_by_store_full %>%
filter(product_category %in% lift_full$product_category) %>%
ggplot(aes(x = reorder(product_category, sales_lift_pct, median),
y = sales_lift_pct)) +
geom_boxplot() +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
coord_flip() +
labs(title = "Distribution of Endcap Sales Lift Across Stores",
x = "Product Category",
y = "Sales Lift (%)") +
theme_minimal()

library(knitr)
lift_by_store_full %>%
filter(product_category %in% lift_full$product_category) %>%
group_by(product_category) %>%
summarise(
median_lift = median(sales_lift_pct),
avg_lift = mean(sales_lift_pct),
pct_stores_positive = mean(sales_lift_pct > 0) * 100,
n_stores = n()
) %>%
arrange(desc(median_lift)) %>%
kable(digits = 1,
col.names = c("Product Category", "Median Lift (%)",
"Avg Lift (%)", "% Stores Positive", "N Stores"),
caption = "Endcap Sales Lift by Product Category (All Stores)")
Endcap Sales Lift by Product Category (All Stores)
| BAKING NEEDS |
136.2 |
157.3 |
100.0 |
15 |
| FROZEN PIE/DESSERTS |
76.7 |
83.6 |
85.7 |
35 |
| VEGETABLES - SHELF STABLE |
61.0 |
56.5 |
87.5 |
48 |
| FROZEN PIZZA |
49.2 |
55.5 |
94.3 |
88 |
| DOMESTIC WINE |
48.6 |
57.8 |
81.8 |
11 |
| FRUIT - SHELF STABLE |
44.3 |
45.8 |
100.0 |
13 |
| MEAT - SHELF STABLE |
43.5 |
47.9 |
96.2 |
53 |
| CHIPS&SNACKS |
42.1 |
52.6 |
100.0 |
5 |
| DRY BN/VEG/POTATO/RICE |
41.8 |
43.4 |
84.6 |
13 |
| YOGURT |
41.4 |
47.8 |
100.0 |
17 |
| REFRGRATD DOUGH PRODUCTS |
41.1 |
48.8 |
87.5 |
8 |
| SOUP |
40.7 |
43.8 |
97.1 |
69 |
| CRACKERS/MISC BKD FD |
40.0 |
46.5 |
92.4 |
92 |
| BEERS/ALES |
39.5 |
46.0 |
93.0 |
43 |
| FRZN MEAT/MEAT DINNERS |
39.3 |
46.1 |
93.0 |
86 |
| CONVENIENT BRKFST/WHLSM SNACKS |
39.1 |
37.8 |
86.7 |
60 |
| PAPER TOWELS |
37.9 |
45.0 |
85.7 |
14 |
| CANDY - PACKAGED |
37.8 |
42.1 |
76.2 |
63 |
| MEAT - MISC |
37.3 |
40.4 |
88.9 |
9 |
| ICE CREAM/MILK/SHERBTS |
36.8 |
37.3 |
89.2 |
93 |
| COFFEE |
36.2 |
38.8 |
85.7 |
28 |
| BEANS - CANNED GLASS & MW |
35.0 |
40.5 |
80.0 |
10 |
| FRZN VEGETABLE/VEG DSH |
34.9 |
36.1 |
86.2 |
58 |
| COOKIES/CONES |
34.2 |
41.1 |
88.3 |
77 |
| BAG SNACKS |
34.0 |
43.6 |
94.3 |
88 |
| MILK BY-PRODUCTS |
33.8 |
36.7 |
100.0 |
6 |
| CANNED JUICES |
33.1 |
34.7 |
82.8 |
58 |
| DRY MIX DESSERTS |
32.7 |
57.7 |
72.2 |
18 |
| COLD CEREAL |
32.6 |
36.3 |
89.7 |
87 |
| WATER - CARBONATED/FLVRD DRINK |
31.3 |
38.4 |
81.6 |
38 |
| FRZN NOVELTIES/WTR ICE |
31.2 |
40.2 |
88.9 |
63 |
| CANDY - CHECKLANE |
30.6 |
41.6 |
85.0 |
40 |
| FROZEN MEAT |
30.1 |
46.6 |
71.4 |
14 |
| BUTTER |
29.0 |
49.3 |
100.0 |
6 |
| BATH TISSUES |
28.6 |
35.1 |
81.7 |
60 |
| WAREHOUSE SNACKS |
27.0 |
34.4 |
70.0 |
20 |
| BAKING MIXES |
26.4 |
34.0 |
85.7 |
21 |
| PASTA SAUCE |
26.4 |
30.2 |
72.7 |
11 |
| SNACK NUTS |
25.8 |
24.7 |
66.7 |
6 |
| ISOTONIC DRINKS |
25.5 |
30.2 |
70.7 |
41 |
| CONDIMENTS/SAUCES |
25.4 |
30.3 |
86.0 |
43 |
| FRZN POTATOES |
23.3 |
18.6 |
68.8 |
16 |
| SUGARS/SWEETNERS |
21.4 |
28.9 |
90.9 |
11 |
| SALD DRSNG/SNDWCH SPRD |
20.9 |
27.1 |
80.9 |
68 |
| SOFT DRINKS |
20.3 |
25.7 |
79.2 |
72 |
| DINNER MXS:DRY |
19.4 |
22.2 |
70.6 |
34 |
| PNT BTR/JELLY/JAMS |
17.7 |
19.0 |
68.8 |
16 |
| PAPER HOUSEWARES |
17.4 |
35.6 |
66.7 |
6 |
| REFRGRATD JUICES/DRNKS |
15.6 |
18.7 |
73.9 |
23 |
| FD WRAPS/BAGS/TRSH BG |
15.3 |
20.0 |
83.3 |
6 |
| CHEESE |
14.6 |
17.0 |
78.6 |
28 |
| BAKED BREAD/BUNS/ROLLS |
14.1 |
11.2 |
74.2 |
31 |
| LAUNDRY DETERGENTS |
14.1 |
23.5 |
73.7 |
38 |
| SOAP - LIQUID & BAR |
12.6 |
16.0 |
64.3 |
14 |
| HAIR CARE PRODUCTS |
11.5 |
30.7 |
76.5 |
17 |
| ORAL HYGIENE PRODUCTS |
11.4 |
25.1 |
64.7 |
17 |
| EGGS |
11.2 |
18.2 |
80.0 |
15 |
| PWDR/CRYSTL DRNK MX |
10.1 |
25.4 |
71.4 |
21 |
| CEREAL/BREAKFAST |
9.3 |
13.8 |
77.8 |
9 |
| SEAFOOD - FROZEN |
7.6 |
15.3 |
57.1 |
7 |
| FLUID MILK PRODUCTS |
6.9 |
9.4 |
64.5 |
31 |
| DIAPERS & DISPOSABLES |
3.4 |
19.2 |
60.0 |
5 |
| COLD AND FLU |
1.6 |
31.9 |
60.0 |
5 |
| FRZN BREAKFAST FOODS |
1.5 |
4.2 |
51.7 |
29 |
| SHORTENING/OIL |
-17.9 |
-9.8 |
37.5 |
8 |