Business question. Are health‑oriented products (e.g., organic, low‑fat, gluten‑free) growing faster among specific demographic segments, and do promotions/coupons amplify this growth—suggesting segments and timings for targeted expansion?
Why it matters. If we can identify who is shifting into health products—and when offers move the needle—we can target promotions, optimize assortment, and increase basket share of high‑margin categories.
Datasets used. Transactions, products, demographics, promotions, coupons, coupon redemptions, campaigns.
library(tidyverse)
library(lubridate)
library(janitor)
library(scales)
library(glue)
library(gt)
library(patchwork)
library(completejourney) trans <- get_transactions()
prods <- products
house <- demographics
promos <- get_promotions()
cpn <- coupons
redeem <- coupon_redemptions
camps <- campaigns
camp_desc <- campaign_descriptions
# Clean names
trans <- clean_names(trans)
prods <- clean_names(prods)
house <- clean_names(house)
promos <- clean_names(promos)
cpn <- clean_names(cpn)
redeem <- clean_names(redeem)
camps <- clean_names(camps)
camp_desc <- clean_names(camp_desc)
# Timestamp + calendar fields
trans <- trans %>%
mutate(
transaction_timestamp = as.POSIXct(transaction_timestamp, tz = "UTC"),
date = as.Date(transaction_timestamp),
week = lubridate::floor_date(date, "week", week_start = 1),
month = lubridate::floor_date(date, "month"),
dow = wday(date, label = TRUE, abbr = TRUE, week_start = 1)
)
by_time <- if (identical(params$time_granularity, "week")) "week" else "month"
# Quick ranges & sizes
list(
rows = list(
transactions = nrow(trans), products = nrow(prods), households = nrow(house),
promotions = nrow(promos), coupons = nrow(cpn), redemptions = nrow(redeem)
),
date_range = trans %>% summarise(min = min(date), max = max(date))
)## $rows
## $rows$transactions
## [1] 1469307
##
## $rows$products
## [1] 92331
##
## $rows$households
## [1] 801
##
## $rows$promotions
## [1] 20940529
##
## $rows$coupons
## [1] 116204
##
## $rows$redemptions
## [1] 2102
##
##
## $date_range
## # A tibble: 1 × 2
## min max
## <date> <date>
## 1 2017-01-01 2018-01-01
# Consolidate candidate text fields
prods2 <- prods %>%
mutate(
across(everything(), ~.x),
text = paste(
coalesce(brand, ""),
coalesce(product_category, ""),
coalesce(product_type, ""),
coalesce(department, ""),
sep = " "
) %>% str_to_lower()
)
is_health_regex <- params$health_regex %>% tolower()
# Health flag via regex + optional taxonomy nudge
prods2 <- prods2 %>%
mutate(
is_health = str_detect(text, is_health_regex) |
department %in% c("PRODUCE", "NATURAL/ORGANIC", "HEALTH & WELLNESS")
)
# Keep a dictionary
health_dict <- prods2 %>%
dplyr::select(dplyr::any_of(c(
"product_id", "department", "brand",
"product_category", "product_type",
"is_health"
)))Reproducibility. Our is_health
definition is transparent and editable via
params$health_regex and the department whitelist.
# Join transactions to products & households
trx <- trans %>%
left_join(health_dict, by = "product_id") %>%
left_join(house, by = "household_id") %>%
mutate(
spend = (quantity * (retail_disc + coupon_disc + coupon_match_disc) + sales_value) %>% as.numeric(),
spend = if_else(is.na(spend) | spend == 0, sales_value, spend),
price_paid = if_else(quantity > 0, spend/quantity, NA_real_),
level = dplyr::case_when(
params$basket_level == "department" ~ department,
params$basket_level %in% c("commodity", "product_category") ~ product_category,
params$basket_level %in% c("sub_commodity", "product_type") ~ product_type,
TRUE ~ product_category
)
)
# Segment features (coarse bands)
trx <- trx %>%
mutate(
income_band = fct_collapse(as.factor(income),
Low = c("Under 15K","15-24K","25-34K"),
Mid = c("35-49K","50-74K"),
High = c("75-99K","100-124K","125-149K","150K+"),
other_level = "Unknown"
),
age_band = fct_collapse(as.factor(age),
"<35" = c("19-24","25-34"),
"35-54" = c("35-44","45-54"),
"55+" = c("55-64","65+"),
other_level = "Unknown"
),
hh_size_band = case_when(
is.na(household_size) ~ "Unknown",
household_size <= 2 ~ "1-2",
household_size <= 4 ~ "3-4",
TRUE ~ "5+"
),
has_kids = if_else(!is.na(kids_count) & kids_count > 0, "Kids in HH", "No kids")
)# Helper: check columns + coerce flags safely
has_cols <- function(df, cols) all(cols %in% names(df))
flagify <- function(x) { if (!is.null(x)) (!is.na(x) & x != "") else rep(FALSE, nrow(promos)) }
# Build trx_prom depending on promos schema
if (has_cols(promos, c("start_date", "end_date"))) {
promos2 <- promos %>%
mutate(
start_date = as.Date(start_date),
end_date = as.Date(end_date),
# handle string/NA indicators for display/mailer
display = if ("display" %in% names(.)) flagify(display) else FALSE,
mailer = if ("mailer" %in% names(.)) flagify(mailer) else FALSE
) %>%
select(product_id, store_id, start_date, end_date, display, mailer)
if (isTRUE(params$promo_join_by_store)) {
trx_prom <- trx %>%
left_join(promos2, by = c("product_id","store_id")) %>%
mutate(on_promo = !is.na(start_date) & date >= start_date & date <= end_date & (display | mailer))
} else {
promos3 <- promos2 %>% select(product_id, start_date, end_date, display, mailer) %>% distinct()
trx_prom <- trx %>%
left_join(promos3, by = "product_id") %>%
mutate(on_promo = !is.na(start_date) & date >= start_date & date <= end_date & (display | mailer))
}
} else if (has_cols(promos, c("display_location","mailer_location","week"))) {
promos2 <- promos %>%
mutate(
week = as.Date(week),
display = flagify(display_location),
mailer = flagify(mailer_location)
) %>%
select(product_id, store_id, week, display, mailer)
if (isTRUE(params$promo_join_by_store)) {
trx_prom <- trx %>%
left_join(promos2, by = c("product_id","store_id","week")) %>%
mutate(on_promo = coalesce(display | mailer, FALSE))
} else {
promos3 <- promos2 %>% select(product_id, week, display, mailer) %>% distinct()
trx_prom <- trx %>%
left_join(promos3, by = c("product_id","week")) %>%
mutate(on_promo = coalesce(display | mailer, FALSE))
}
} else {
stop("Unknown promotions schema. Columns found: ", paste(names(promos), collapse = ", "))
}
# --- Summaries & viz (works for either schema) ------------------------------
promo_effect <- trx_prom %>%
group_by(on_promo, is_health) %>%
summarise(
revenue = sum(sales_value, na.rm = TRUE),
qty = sum(quantity, na.rm = TRUE),
.groups = "drop"
) %>%
group_by(on_promo) %>%
mutate(share = revenue / sum(revenue))
gt(promo_effect) %>% tab_header(title = "Promo vs. non-promo revenue & share (overall)")| Promo vs. non-promo revenue & share (overall) | |||
| is_health | revenue | qty | share |
|---|---|---|---|
| FALSE | |||
| FALSE | 4207551.64 | 152718359 | 9.154733e-01 |
| TRUE | 388315.37 | 204708 | 8.448913e-02 |
| NA | 172.57 | 41 | 3.754754e-05 |
# Segment-specific: weekend emphasis
promo_seg <- trx_prom %>%
mutate(is_weekend = dow %in% c("Sat","Sun")) %>%
group_by(is_weekend, income_band, on_promo) %>%
summarise(
health_rev = sum(if_else(is_health, sales_value, 0), na.rm = TRUE),
total_rev = sum(sales_value, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(health_share = if_else(total_rev > 0, health_rev / total_rev, NA_real_))
p_promo <- promo_seg %>%
ggplot(aes(x = income_band, y = health_share, fill = on_promo)) +
geom_col(position = position_dodge()) +
facet_wrap(~is_weekend) +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Health share by income band: promo vs. non-promo (weekday/weekend)",
x = NULL, y = "Health share") +
theme(legend.position = "bottom")
p_promo# Identify coupons tied to health products
cpn2 <- cpn %>% left_join(health_dict, by = "product_id") %>% mutate(is_health_coupon = is_health %in% TRUE)
# First redemption date per household
first_red <- redeem %>%
left_join(cpn2 %>% select(coupon_upc, is_health_coupon), by = "coupon_upc") %>%
filter(is_health_coupon %in% TRUE) %>%
mutate(redemption_date = as.Date(redemption_date)) %>%
group_by(household_id) %>%
summarise(first_health_coupon = min(redemption_date), .groups = "drop")
# Compute household-level health share before/after first redemption (4-week windows)
trx_hh <- trx %>% select(household_id, date, sales_value, is_health)
prepost <- first_red %>%
left_join(trx_hh, by = "household_id") %>%
mutate(
rel = as.integer(date - first_health_coupon),
window = case_when(rel >= -28 & rel < 0 ~ "Pre (−4w)", rel >= 0 & rel <= 28 ~ "Post (+4w)", TRUE ~ NA_character_)
) %>%
filter(!is.na(window)) %>%
group_by(household_id, window) %>%
summarise(health_rev = sum(if_else(is_health, sales_value, 0), na.rm=TRUE),
total_rev = sum(sales_value, na.rm=TRUE), .groups = "drop") %>%
mutate(health_share = if_else(total_rev>0, health_rev/total_rev, NA_real_))
prepost_summary <- prepost %>%
group_by(window) %>%
summarise(avg_health_share = mean(health_share, na.rm = TRUE), n = n_distinct(household_id), .groups = "drop")
gt(prepost_summary) %>%
fmt_percent(columns = avg_health_share, decimals = 2) %>%
tab_header(title = "Health coupon redeemers: avg health share pre vs post (4-week windows)")| Health coupon redeemers: avg health share pre vs post (4-week windows) | ||
| window | avg_health_share | n |
|---|---|---|
| Post (+4w) | 9.04% | 267 |
| Pre (−4w) | 9.09% | 265 |
# Compare redeemers vs non-redeemers within income segments (overall period)
redeemer_flag <- first_red %>% mutate(redeemer = TRUE)
comp_seg <- trx %>%
left_join(redeemer_flag, by = "household_id") %>%
mutate(redeemer = coalesce(redeemer, FALSE)) %>%
group_by(income_band, redeemer, household_id) %>%
summarise(health_rev = sum(if_else(is_health, sales_value, 0), na.rm=TRUE), total_rev = sum(sales_value, na.rm=TRUE), .groups = "drop") %>%
mutate(health_share = if_else(total_rev>0, health_rev/total_rev, NA_real_)) %>%
group_by(income_band, redeemer) %>%
summarise(avg_health_share = mean(health_share, na.rm=TRUE), n_hh = n_distinct(household_id), .groups = "drop")
gt(comp_seg) %>% fmt_percent(columns = avg_health_share, decimals = 2) %>% tab_header(title = "Redeemers vs non-redeemers by income band")| Redeemers vs non-redeemers by income band | |||
| income_band | redeemer | avg_health_share | n_hh |
|---|---|---|---|
| Low | FALSE | 7.89% | 176 |
| Low | TRUE | 7.57% | 36 |
| Mid | FALSE | 8.14% | 264 |
| Mid | TRUE | 8.58% | 100 |
| High | FALSE | 9.72% | 120 |
| High | TRUE | 9.89% | 48 |
| Unknown | FALSE | 11.92% | 38 |
| Unknown | TRUE | 12.64% | 19 |
| NA | FALSE | 8.40% | 1604 |
| NA | TRUE | 8.29% | 64 |
library(rlang)
# figure out the basket (trip) id column name
basket_col <- dplyr::case_when(
"basket_id" %in% names(trx) ~ "basket_id",
"transaction_id" %in% names(trx) ~ "transaction_id",
TRUE ~ NA_character_
)
if (is.na(basket_col)) {
stop("No basket-level identifier found. Looked for 'basket_id' or 'transaction_id' in trx.")
}
# make sure is_health exists and is logical
trx <- trx %>% mutate(is_health = coalesce(is_health, FALSE))
# items at basket level
basket_items <- trx %>%
dplyr::select(!!sym(basket_col), item = level, is_health) %>%
distinct()
# mark baskets that contain at least one health item
health_baskets <- basket_items %>%
group_by(!!sym(basket_col)) %>%
summarise(has_health = any(is_health), .groups = "drop")
# attach-rate by category when basket has any health item vs not
attach_tbl <- basket_items %>%
left_join(health_baskets, by = basket_col) %>%
group_by(item, has_health) %>%
summarise(baskets = n_distinct(!!sym(basket_col)), .groups = "drop") %>%
group_by(item) %>%
mutate(attach_rate = baskets / sum(baskets)) %>%
tidyr::pivot_wider(
names_from = has_health,
values_from = attach_rate,
names_prefix = "has_health_",
values_fill = 0
) %>%
mutate(delta = has_health_TRUE - has_health_FALSE) %>%
arrange(desc(delta))
gt(head(attach_tbl, 15)) %>%
tab_header(title = "Categories that over-index when health items are in the basket")| Categories that over-index when health items are in the basket | |||
| baskets | has_health_FALSE | has_health_TRUE | delta |
|---|---|---|---|
| PROD-WHS SALES | |||
| 3 | 0 | 1.0000000 | 1.0000000 |
| PRODUCE | |||
| 50756 | 0 | 1.0000000 | 1.0000000 |
| FROZEN GROCERY | |||
| 141 | 0 | 0.9929577 | 0.9929577 |
| TRAVEL & LEISURE | |||
| 413 | 0 | 0.8787234 | 0.8787234 |
| CHEF SHOPPE | |||
| 511 | 0 | 0.8720137 | 0.8720137 |
| SEAFOOD | |||
| 1498 | 0 | 0.7520080 | 0.7520080 |
| MEAT | |||
| 22411 | 0 | 0.7452447 | 0.7452447 |
| NUTRITION | |||
| 7211 | 0 | 0.7321555 | 0.7321555 |
| SEAFOOD-PCKGD | |||
| 3688 | 0 | 0.7242734 | 0.7242734 |
| GM MERCH EXP | |||
| 28 | 0 | 0.7179487 | 0.7179487 |
| MEAT-PCKGD | |||
| 21320 | 0 | 0.6471785 | 0.6471785 |
| NA | |||
| 2803 | 0 | 0.6231659 | 0.6231659 |
| DELI | |||
| 12351 | 0 | 0.6175809 | 0.6175809 |
| POSTAL CENTER | |||
| 3 | 0 | 0.6000000 | 0.6000000 |
| PHOTO & VIDEO | |||
| 10 | 0 | 0.5882353 | 0.5882353 |
items_df <- trx %>%
dplyr::select(basket_id_var = !!sym(basket_col), item = level) %>%
filter(!is.na(basket_id_var), !is.na(item)) %>%
distinct()
n_baskets <- n_distinct(items_df$basket_id_var)
item_ct <- items_df %>% count(item, name = "n_item")
basket_pairs <- items_df %>%
group_by(basket_id_var) %>%
summarise(items = list(sort(unique(item))), .groups = "drop") %>%
mutate(pairs = purrr::map(items, function(x) {
if (length(x) < 2) return(tibble(lhs = character(), rhs = character()))
m <- t(combn(x, 2))
tibble(lhs = m[, 1], rhs = m[, 2])
})) %>%
select(-items) %>%
tidyr::unnest(pairs)
pair_ct <- basket_pairs %>% count(lhs, rhs, name = "n_pair")
# directional rules both ways
rules_tbl <- pair_ct %>%
left_join(item_ct %>% rename(lhs = item, n_lhs = n_item), by = "lhs") %>%
left_join(item_ct %>% rename(rhs = item, n_rhs = n_item), by = "rhs") %>%
mutate(
support = n_pair / n_baskets,
conf_lhs_rhs = n_pair / n_lhs,
base_rhs = n_rhs / n_baskets,
lift_lhs_rhs = conf_lhs_rhs / base_rhs
) %>%
transmute(lhs, rhs, n_pair, support, confidence = conf_lhs_rhs, lift = lift_lhs_rhs) %>%
bind_rows(
pair_ct %>% rename(lhs = rhs, rhs = lhs) %>%
left_join(item_ct %>% rename(lhs = item, n_lhs = n_item), by = "lhs") %>%
left_join(item_ct %>% rename(rhs = item, n_rhs = n_item), by = "rhs") %>%
mutate(
support = n_pair / n_baskets,
conf_lhs_rhs = n_pair / n_lhs,
base_rhs = n_rhs / n_baskets,
lift_lhs_rhs = conf_lhs_rhs / base_rhs
) %>%
transmute(lhs, rhs, n_pair, support, confidence = conf_lhs_rhs, lift = lift_lhs_rhs)
) %>%
arrange(desc(lift))
# screen by your params
opp <- rules_tbl %>%
filter(support >= params$min_support, lift >= params$min_lift) %>%
distinct(lhs, rhs, .keep_all = TRUE)
gt(head(opp, 20)) %>%
fmt_percent(columns = c(support, confidence), decimals = 1) %>%
tab_header(title = "Top co-occurring pairs by lift (no arules)")| Top co-occurring pairs by lift (no arules) | |||||
| lhs | rhs | n_pair | support | confidence | lift |
|---|---|---|---|---|---|
| SEAFOOD | SEAFOOD-PCKGD | 472 | 0.3% | 23.7% | 7.244823 |
| SEAFOOD-PCKGD | SEAFOOD | 472 | 0.3% | 9.3% | 7.244823 |
| CHEF SHOPPE | MEAT | 352 | 0.2% | 60.1% | 3.109899 |
| MEAT | CHEF SHOPPE | 352 | 0.2% | 1.2% | 3.109899 |
| NUTRITION | SEAFOOD | 389 | 0.2% | 3.9% | 3.086965 |
| SEAFOOD | NUTRITION | 389 | 0.2% | 19.5% | 3.086965 |
| PRODUCE | TRAVEL & LEISURE | 405 | 0.3% | 0.8% | 2.643220 |
| TRAVEL & LEISURE | PRODUCE | 405 | 0.3% | 86.2% | 2.643220 |
| PRODUCE | CHEF SHOPPE | 498 | 0.3% | 1.0% | 2.606801 |
| CHEF SHOPPE | PRODUCE | 498 | 0.3% | 85.0% | 2.606801 |
| MEAT-PCKGD | SEAFOOD-PCKGD | 2799 | 1.8% | 8.5% | 2.597855 |
| SEAFOOD-PCKGD | MEAT-PCKGD | 2799 | 1.8% | 55.0% | 2.597855 |
| SEAFOOD-PCKGD | MEAT | 2531 | 1.6% | 49.7% | 2.573386 |
| MEAT | SEAFOOD-PCKGD | 2531 | 1.6% | 8.4% | 2.573386 |
| SEAFOOD | MEAT | 938 | 0.6% | 47.1% | 2.437893 |
| MEAT | SEAFOOD | 938 | 0.6% | 3.1% | 2.437893 |
| NUTRITION | SEAFOOD-PCKGD | 764 | 0.5% | 7.8% | 2.371791 |
| SEAFOOD-PCKGD | NUTRITION | 764 | 0.5% | 15.0% | 2.371791 |
| MEAT | MEAT-PCKGD | 14603 | 9.4% | 48.6% | 2.294986 |
| MEAT-PCKGD | MEAT | 14603 | 9.4% | 44.3% | 2.294986 |
fmt_pct <- function(x, d = 2) ifelse(is.na(x), "—", percent(x, accuracy = 1/10^d))
fmt_pp <- function(x, d = 2) ifelse(is.na(x), "—", paste0(round(100*x, d), " pp"))
# --- Top 2–3 segments (from trend_tbl) ---
trend_ok <- exists("trend_tbl") && is.data.frame(trend_tbl) && nrow(trend_tbl) > 0
top_txt <- if (trend_ok) {
top3 <- trend_tbl %>%
mutate(Segment = paste(income_band, age_band, hh_size_band, has_kids, sep = " / ")) %>%
arrange(desc(slope_per_period)) %>%
slice_head(n = 3)
paste(glue("{top3$Segment} (slope ≈ {round(100*top3$slope_per_period, 2)}%-pts/period)"), collapse = "; ")
} else {
"See the Fastest-growing segments table above."
}
# --- Promo effect (prefer promo_effect; fallback to promo_seg) ---
promo_line <- {
if (exists("promo_effect") && nrow(promo_effect) > 0) {
pe <- promo_effect %>%
group_by(on_promo, is_health) %>%
summarise(revenue = sum(revenue, na.rm = TRUE), .groups = "drop") %>%
group_by(on_promo) %>%
mutate(share = revenue / sum(revenue)) %>%
ungroup() %>%
filter(is_health) %>%
mutate(on_promo = if_else(on_promo, "Promo", "No promo")) %>%
tidyr::pivot_wider(names_from = on_promo, values_from = share)
hs_promo <- pe$Promo %||% NA_real_
hs_nop <- pe$`No promo` %||% NA_real_
if (is.na(hs_promo) || is.na(hs_nop)) {
"- **What moves them?** Use the weekday/weekend + segment facets in *Promotions* to time offers (no valid on/off promo comparison in aggregate)."
} else {
glue("- **What moves them?** Health share **on promo** {fmt_pct(hs_promo)} vs **no promo** {fmt_pct(hs_nop)} (Δ {fmt_pp(hs_promo - hs_nop)}). ",
"Use the weekday/weekend + segment facets in *Promotions* to time offers.")
}
} else if (exists("promo_seg") && nrow(promo_seg) > 0) {
# fallback: summarize weekend vs weekday lift among on_promo only
ps <- promo_seg %>%
filter(on_promo %in% TRUE) %>%
summarise(weekend = mean(health_share[dplyr::coalesce(is_weekend, FALSE)], na.rm = TRUE),
weekday = mean(health_share[!dplyr::coalesce(is_weekend, FALSE)], na.rm = TRUE))
if (is.finite(ps$weekend) && is.finite(ps$weekday)) {
glue("- **What moves them?** On-promo **weekend** health share {fmt_pct(ps$weekend)} vs **weekday** {fmt_pct(ps$weekday)} (Δ {fmt_pp(ps$weekend - ps$weekday)}). ",
"Prioritize Fri–Sun windows if weekend lift persists.")
} else {
"- **What moves them?** Use the *Promotions* section to identify timing/segment lift patterns."
}
} else {
"- **What moves them?** Use the *Promotions* section to identify timing/segment lift patterns."
}
}
# --- Coupons pre/post (from prepost_summary) ---
coupon_line <- {
if (exists("prepost_summary") && nrow(prepost_summary) > 0) {
post <- prepost_summary %>% filter(str_detect(window, "Post")) %>% pull(avg_health_share) %>% {.[1]}
pre <- prepost_summary %>% filter(str_detect(window, "Pre")) %>% pull(avg_health_share) %>% {.[1]}
if (is.na(post) || is.na(pre)) {
"- **Coupons.** See the pre/post table in *Coupons*; if missing, re-run that chunk."
} else {
glue("- **Coupons.** Health-coupon redeemers average {fmt_pct(post)} **post** vs {fmt_pct(pre)} **pre** (±4w; Δ {fmt_pp(post - pre)}). ",
"Broad couponing shows limited mix shift—favor targeting/bundles.")
}
} else {
"- **Coupons.** See the pre/post table in *Coupons*; if missing, re-run that chunk."
}
}
# --- Scope (context) ---
scope_line <- tryCatch({
date_min <- suppressWarnings(min(trans$date, na.rm = TRUE)); date_max <- suppressWarnings(max(trans$date, na.rm = TRUE))
n_txn <- tryCatch(nrow(trans), error = function(e) NA_integer_)
n_prod <- tryCatch(nrow(prods), error = function(e) NA_integer_)
n_hh <- tryCatch(nrow(house), error = function(e) NA_integer_)
glue("- **Scope.** {format(date_min, '%Y-%m-%d')} to {format(date_max, '%Y-%m-%d')}; ",
"transactions: {comma(n_txn)}, products: {comma(n_prod)}, households: {comma(n_hh)}.")
}, error = function(e) NULL)
# --- Compose markdown and emit as asis output ---
md <- paste0(
"**Executive summary & recommendations**\n\n",
glue("- **Who’s growing the fastest?** {top_txt}."), "\n\n",
promo_line, "\n\n",
coupon_line, "\n\n",
"- **What to do (targeted plan):** 1) Target the top-growth segments with **Fri–Sun** promos on health staples; ",
"2) Replace broad coupons with **bundled offers** (e.g., healthy breakfast basket) and track **basket health-share**; ",
"3) Stagger tests by segment and daypart; scale winners.\n\n",
"- **Limitations.** Text tagging & taxonomy granularity may misclassify edge items; trend ≠ causation; ",
"pre/post can mask heterogeneity. Run randomized A/B or geo tests (household × week) for incremental lift.\n\n",
if (!is.null(scope_line)) paste0(scope_line, "\n") else ""
)
knitr::asis_output(md)Executive summary & recommendations
Who’s growing the fastest? High / <35 / 1-2 / Kids in HH (slope ≈ 0.42%-pts/period); Low / <35 / 1-2 / No kids (slope ≈ 0.1%-pts/period); Low / 55+ / 3-4 / Kids in HH (slope ≈ 0.08%-pts/period).
What moves them? Use the weekday/weekend + segment facets in Promotions to time offers (no valid on/off promo comparison in aggregate).
Coupons. Health-coupon redeemers average 9.04% post vs 9.09% pre (±4w; Δ -0.06 pp). Broad couponing shows limited mix shift—favor targeting/bundles.
What to do (targeted plan): 1) Target the top-growth segments with Fri–Sun promos on health staples; 2) Replace broad coupons with bundled offers (e.g., healthy breakfast basket) and track basket health-share; 3) Stagger tests by segment and daypart; scale winners.
Limitations. Text tagging & taxonomy granularity may misclassify edge items; trend ≠ causation; pre/post can mask heterogeneity. Run randomized A/B or geo tests (household × week) for incremental lift.
Scope. 2017-01-01 to 2018-01-01; transactions: 1,469,307, products: 92,331, households: 801.
if (exists("trend_tbl") && is.data.frame(trend_tbl) && nrow(trend_tbl) > 0) {
top3_tbl <- trend_tbl %>%
mutate(Segment = paste(income_band, age_band, hh_size_band, has_kids, sep = " / ")) %>%
arrange(desc(slope_per_period)) %>%
slice_head(n = 3) %>%
transmute(
`Segment (income / age / HH size / kids)` = Segment,
`Avg health share` = scales::percent(avg_share, accuracy = 0.01),
`Trend slope (per period)` = paste0(round(100*slope_per_period, 2), "%-pts"),
`R²` = round(r2, 2),
Periods = n_periods
)
# Use kable for a minimal, readable table
knitr::kable(top3_tbl, caption = "Top segments by health-share growth", align = c("l","r","r","r","r"))
} else {
cat("_Top segments table unavailable (run the growth-metrics chunk to build `trend_tbl`)._\n")
}| Segment (income / age / HH size / kids) | Avg health share | Trend slope (per period) | R² | Periods |
|---|---|---|---|---|
| High / <35 / 1-2 / Kids in HH | 4.31% | 0.42%-pts | 0.10 | 13 |
| Low / <35 / 1-2 / No kids | 8.50% | 0.1%-pts | 0.06 | 54 |
| Low / 55+ / 3-4 / Kids in HH | 13.04% | 0.08%-pts | 0.04 | 53 |