1 Introduction

Business question. Are health‑oriented products (e.g., organic, low‑fat, gluten‑free) growing faster among specific demographic segments, and do promotions/coupons amplify this growth—suggesting segments and timings for targeted expansion?

Why it matters. If we can identify who is shifting into health products—and when offers move the needle—we can target promotions, optimize assortment, and increase basket share of high‑margin categories.

Datasets used. Transactions, products, demographics, promotions, coupons, coupon redemptions, campaigns.


2 Libraries

library(tidyverse)      
library(lubridate)    
library(janitor)      
library(scales)       
library(glue)          
library(gt)             
library(patchwork)      

library(completejourney) 

3 Import & audit

trans <- get_transactions()
prods <- products
house <- demographics
promos <- get_promotions()
cpn <- coupons
redeem <- coupon_redemptions
camps <- campaigns
camp_desc <- campaign_descriptions

# Clean names
trans <- clean_names(trans)
prods <- clean_names(prods)
house <- clean_names(house)
promos <- clean_names(promos)
cpn <- clean_names(cpn)
redeem <- clean_names(redeem)
camps <- clean_names(camps)
camp_desc <- clean_names(camp_desc)

# Timestamp + calendar fields
trans <- trans %>%
  mutate(
    transaction_timestamp = as.POSIXct(transaction_timestamp, tz = "UTC"),
    date  = as.Date(transaction_timestamp),
    week  = lubridate::floor_date(date, "week", week_start = 1),
    month = lubridate::floor_date(date, "month"),
    dow   = wday(date, label = TRUE, abbr = TRUE, week_start = 1)
  )

by_time <- if (identical(params$time_granularity, "week")) "week" else "month"

# Quick ranges & sizes
list(
  rows = list(
    transactions = nrow(trans), products = nrow(prods), households = nrow(house),
    promotions = nrow(promos), coupons = nrow(cpn), redemptions = nrow(redeem)
  ),
  date_range = trans %>% summarise(min = min(date), max = max(date))
)
## $rows
## $rows$transactions
## [1] 1469307
## 
## $rows$products
## [1] 92331
## 
## $rows$households
## [1] 801
## 
## $rows$promotions
## [1] 20940529
## 
## $rows$coupons
## [1] 116204
## 
## $rows$redemptions
## [1] 2102
## 
## 
## $date_range
## # A tibble: 1 × 2
##   min        max       
##   <date>     <date>    
## 1 2017-01-01 2018-01-01

4 Define health-oriented items

# Consolidate candidate text fields
prods2 <- prods %>%
  mutate(
    across(everything(), ~.x),
    text = paste(
      coalesce(brand, ""),
      coalesce(product_category, ""),
      coalesce(product_type, ""),
      coalesce(department, ""),
      sep = " "
    ) %>% str_to_lower()
  )

is_health_regex <- params$health_regex %>% tolower()

# Health flag via regex + optional taxonomy nudge
prods2 <- prods2 %>%
  mutate(
    is_health = str_detect(text, is_health_regex) |
      department %in% c("PRODUCE", "NATURAL/ORGANIC", "HEALTH & WELLNESS")
  )

# Keep a dictionary
health_dict <- prods2 %>%
  dplyr::select(dplyr::any_of(c(
    "product_id", "department", "brand",
    "product_category", "product_type",
    "is_health"
  )))

Reproducibility. Our is_health definition is transparent and editable via params$health_regex and the department whitelist.


5 Core join & enrich

# Join transactions to products & households
trx <- trans %>%
  left_join(health_dict, by = "product_id") %>%
  left_join(house, by = "household_id") %>%
  mutate(
    spend = (quantity * (retail_disc + coupon_disc + coupon_match_disc) + sales_value) %>% as.numeric(),
    spend = if_else(is.na(spend) | spend == 0, sales_value, spend),
    price_paid = if_else(quantity > 0, spend/quantity, NA_real_),
    level = dplyr::case_when(
  params$basket_level == "department" ~ department,
  params$basket_level %in% c("commodity", "product_category") ~ product_category,
  params$basket_level %in% c("sub_commodity", "product_type") ~ product_type,
  TRUE ~ product_category
)
)

# Segment features (coarse bands)
trx <- trx %>%
  mutate(
    income_band = fct_collapse(as.factor(income),
      Low = c("Under 15K","15-24K","25-34K"),
      Mid = c("35-49K","50-74K"),
      High = c("75-99K","100-124K","125-149K","150K+"),
      other_level = "Unknown"
    ),
    age_band = fct_collapse(as.factor(age),
      "<35" = c("19-24","25-34"),
      "35-54" = c("35-44","45-54"),
      "55+" = c("55-64","65+"),
      other_level = "Unknown"
    ),
    hh_size_band = case_when(
      is.na(household_size) ~ "Unknown",
      household_size <= 2 ~ "1-2",
      household_size <= 4 ~ "3-4",
      TRUE ~ "5+"
    ),
    has_kids = if_else(!is.na(kids_count) & kids_count > 0, "Kids in HH", "No kids")
  )

6 Health share by segment over time

# step 1: sum at is_health level
agg_core <- trx %>%
  group_by(.data[[by_time]], income_band, age_band, hh_size_band, has_kids, is_health) %>%
  summarise(revenue = sum(sales_value, na.rm = TRUE), .groups = "drop")

# step 2: pivot to get health/nonhealth side by side
health_ts <- agg_core %>%
  tidyr::pivot_wider(
    names_from = is_health, values_from = revenue,
    names_prefix = "is_health_", values_fill = 0
  ) %>%
  mutate(
    health_rev    = is_health_TRUE,
    nonhealth_rev = is_health_FALSE,
    total_rev     = health_rev + nonhealth_rev,
    health_share  = if_else(total_rev > 0, health_rev / total_rev, NA_real_)
  ) %>%
  select(.data[[by_time]], income_band, age_band, hh_size_band, has_kids,
         health_rev, nonhealth_rev, total_rev, health_share)

p_income <- health_ts %>%
  group_by(.data[[by_time]], income_band) %>%
  summarise(health_share = mean(health_share, na.rm = TRUE), .groups = "drop") %>%
  ggplot(aes(x = .data[[by_time]], y = health_share, color = income_band)) +
  geom_line() +
  scale_y_continuous(labels = percent_format()) +
  labs(title = "Health-oriented share over time by income band", x = NULL, y = "Health share of revenue") +
  theme(legend.position = "bottom")

p_kids <- health_ts %>%
  group_by(.data[[by_time]], has_kids) %>%
  summarise(health_share = mean(health_share, na.rm = TRUE), .groups = "drop") %>%
  ggplot(aes(x = .data[[by_time]], y = health_share, color = has_kids)) +
  geom_line() +
  scale_y_continuous(labels = percent_format()) +
  labs(title = "Health-oriented share over time by household kids", x = NULL, y = "Health share of revenue") +
  theme(legend.position = "bottom")

p_income / p_kids

6.1 Rank segments by growth

# Simple linear trend slope of health_share ~ time index per segment
seg_ts <- health_ts %>%
  drop_na(health_share) %>%
  group_by(income_band, age_band, hh_size_band, has_kids) %>%
  arrange(.data[[by_time]]) %>%
  mutate(t = row_number())

trend_tbl <- seg_ts %>%
  do({
    m <- lm(health_share ~ t, data = .)
    tibble(
      n_periods = nrow(.),
      avg_share = mean(.$health_share, na.rm = TRUE),
      slope_per_period = coef(m)[["t"]],
      r2 = summary(m)$r.squared
    )
  }) %>%
  ungroup() %>%
  arrange(desc(slope_per_period))

gt(head(trend_tbl, 12)) %>%
  fmt_percent(columns = c(avg_share, slope_per_period), decimals = 2) %>%
  tab_header(title = "Fastest-growing segments by health share trend")
Fastest-growing segments by health share trend
income_band age_band hh_size_band has_kids n_periods avg_share slope_per_period r2
High <35 1-2 Kids in HH 13 4.31% 0.42% 0.0977656794
Low <35 1-2 No kids 54 8.50% 0.10% 0.0608576191
Low 55+ 3-4 Kids in HH 53 13.04% 0.08% 0.0393267742
Low 55+ 1-2 Kids in HH 53 5.47% 0.07% 0.0701190631
High 55+ 3-4 Kids in HH 52 9.09% 0.04% 0.0136796745
Low <35 1-2 Kids in HH 52 6.12% 0.03% 0.0276530882
Mid <35 1-2 No kids 54 8.12% 0.02% 0.0773661887
Unknown 35-54 1-2 Kids in HH 52 15.21% 0.02% 0.0028603619
Low 35-54 5+ Kids in HH 53 6.97% 0.02% 0.0097252381
Unknown <35 1-2 No kids 53 10.36% 0.02% 0.0115194863
Unknown <35 3-4 Kids in HH 53 10.32% 0.01% 0.0010856287
Mid <35 1-2 Kids in HH 52 6.20% 0.01% 0.0005427847

7 Promotions

# Helper: check columns + coerce flags safely
has_cols <- function(df, cols) all(cols %in% names(df))
flagify <- function(x) { if (!is.null(x)) (!is.na(x) & x != "") else rep(FALSE, nrow(promos)) }

# Build trx_prom depending on promos schema
if (has_cols(promos, c("start_date", "end_date"))) {
  promos2 <- promos %>%
    mutate(
      start_date = as.Date(start_date),
      end_date   = as.Date(end_date),
      # handle string/NA indicators for display/mailer
      display = if ("display" %in% names(.)) flagify(display) else FALSE,
      mailer  = if ("mailer"  %in% names(.)) flagify(mailer)  else FALSE
    ) %>%
    select(product_id, store_id, start_date, end_date, display, mailer)

  if (isTRUE(params$promo_join_by_store)) {
    trx_prom <- trx %>%
      left_join(promos2, by = c("product_id","store_id")) %>%
      mutate(on_promo = !is.na(start_date) & date >= start_date & date <= end_date & (display | mailer))
  } else {
    promos3 <- promos2 %>% select(product_id, start_date, end_date, display, mailer) %>% distinct()
    trx_prom <- trx %>%
      left_join(promos3, by = "product_id") %>%
      mutate(on_promo = !is.na(start_date) & date >= start_date & date <= end_date & (display | mailer))
  }

} else if (has_cols(promos, c("display_location","mailer_location","week"))) {
  promos2 <- promos %>%
    mutate(
      week    = as.Date(week),
      display = flagify(display_location),
      mailer  = flagify(mailer_location)
    ) %>%
    select(product_id, store_id, week, display, mailer)

  if (isTRUE(params$promo_join_by_store)) {
    trx_prom <- trx %>%
      left_join(promos2, by = c("product_id","store_id","week")) %>%
      mutate(on_promo = coalesce(display | mailer, FALSE))
  } else {
    promos3 <- promos2 %>% select(product_id, week, display, mailer) %>% distinct()
    trx_prom <- trx %>%
      left_join(promos3, by = c("product_id","week")) %>%
      mutate(on_promo = coalesce(display | mailer, FALSE))
  }

} else {
  stop("Unknown promotions schema. Columns found: ", paste(names(promos), collapse = ", "))
}

# --- Summaries & viz (works for either schema) ------------------------------
promo_effect <- trx_prom %>%
  group_by(on_promo, is_health) %>%
  summarise(
    revenue = sum(sales_value, na.rm = TRUE),
    qty     = sum(quantity, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  group_by(on_promo) %>%
  mutate(share = revenue / sum(revenue))

gt(promo_effect) %>% tab_header(title = "Promo vs. non-promo revenue & share (overall)")
Promo vs. non-promo revenue & share (overall)
is_health revenue qty share
FALSE
FALSE 4207551.64 152718359 9.154733e-01
TRUE 388315.37 204708 8.448913e-02
NA 172.57 41 3.754754e-05
# Segment-specific: weekend emphasis
promo_seg <- trx_prom %>%
  mutate(is_weekend = dow %in% c("Sat","Sun")) %>%
  group_by(is_weekend, income_band, on_promo) %>%
  summarise(
    health_rev = sum(if_else(is_health, sales_value, 0), na.rm = TRUE),
    total_rev  = sum(sales_value, na.rm = TRUE),
    .groups    = "drop"
  ) %>%
  mutate(health_share = if_else(total_rev > 0, health_rev / total_rev, NA_real_))

p_promo <- promo_seg %>%
  ggplot(aes(x = income_band, y = health_share, fill = on_promo)) +
  geom_col(position = position_dodge()) +
  facet_wrap(~is_weekend) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(title = "Health share by income band: promo vs. non-promo (weekday/weekend)",
       x = NULL, y = "Health share") +
  theme(legend.position = "bottom")

p_promo


8 Coupons: redeemers, pre/post change, and segment effects

# Identify coupons tied to health products
cpn2 <- cpn %>% left_join(health_dict, by = "product_id") %>% mutate(is_health_coupon = is_health %in% TRUE)

# First redemption date per household
first_red <- redeem %>%
  left_join(cpn2 %>% select(coupon_upc, is_health_coupon), by = "coupon_upc") %>%
  filter(is_health_coupon %in% TRUE) %>%
  mutate(redemption_date = as.Date(redemption_date)) %>%
  group_by(household_id) %>%
  summarise(first_health_coupon = min(redemption_date), .groups = "drop")

# Compute household-level health share before/after first redemption (4-week windows)
trx_hh <- trx %>% select(household_id, date, sales_value, is_health)

prepost <- first_red %>%
  left_join(trx_hh, by = "household_id") %>%
  mutate(
    rel = as.integer(date - first_health_coupon),
    window = case_when(rel >= -28 & rel < 0 ~ "Pre (−4w)", rel >= 0 & rel <= 28 ~ "Post (+4w)", TRUE ~ NA_character_)
  ) %>%
  filter(!is.na(window)) %>%
  group_by(household_id, window) %>%
  summarise(health_rev = sum(if_else(is_health, sales_value, 0), na.rm=TRUE),
            total_rev = sum(sales_value, na.rm=TRUE), .groups = "drop") %>%
  mutate(health_share = if_else(total_rev>0, health_rev/total_rev, NA_real_))

prepost_summary <- prepost %>%
  group_by(window) %>%
  summarise(avg_health_share = mean(health_share, na.rm = TRUE), n = n_distinct(household_id), .groups = "drop")

gt(prepost_summary) %>%
  fmt_percent(columns = avg_health_share, decimals = 2) %>%
  tab_header(title = "Health coupon redeemers: avg health share pre vs post (4-week windows)")
Health coupon redeemers: avg health share pre vs post (4-week windows)
window avg_health_share n
Post (+4w) 9.04% 267
Pre (−4w) 9.09% 265
# Compare redeemers vs non-redeemers within income segments (overall period)
redeemer_flag <- first_red %>% mutate(redeemer = TRUE)

comp_seg <- trx %>%
  left_join(redeemer_flag, by = "household_id") %>%
  mutate(redeemer = coalesce(redeemer, FALSE)) %>%
  group_by(income_band, redeemer, household_id) %>%
  summarise(health_rev = sum(if_else(is_health, sales_value, 0), na.rm=TRUE), total_rev = sum(sales_value, na.rm=TRUE), .groups = "drop") %>%
  mutate(health_share = if_else(total_rev>0, health_rev/total_rev, NA_real_)) %>%
  group_by(income_band, redeemer) %>%
  summarise(avg_health_share = mean(health_share, na.rm=TRUE), n_hh = n_distinct(household_id), .groups = "drop")

gt(comp_seg) %>% fmt_percent(columns = avg_health_share, decimals = 2) %>% tab_header(title = "Redeemers vs non-redeemers by income band")
Redeemers vs non-redeemers by income band
income_band redeemer avg_health_share n_hh
Low FALSE 7.89% 176
Low TRUE 7.57% 36
Mid FALSE 8.14% 264
Mid TRUE 8.58% 100
High FALSE 9.72% 120
High TRUE 9.89% 48
Unknown FALSE 11.92% 38
Unknown TRUE 12.64% 19
NA FALSE 8.40% 1604
NA TRUE 8.29% 64

9 Basket composition when a health anchor is present

library(rlang)

# figure out the basket (trip) id column name
basket_col <- dplyr::case_when(
  "basket_id"       %in% names(trx) ~ "basket_id",
  "transaction_id"  %in% names(trx) ~ "transaction_id",
  TRUE ~ NA_character_
)

if (is.na(basket_col)) {
  stop("No basket-level identifier found. Looked for 'basket_id' or 'transaction_id' in trx.")
}

# make sure is_health exists and is logical
trx <- trx %>% mutate(is_health = coalesce(is_health, FALSE))

# items at basket level
basket_items <- trx %>%
  dplyr::select(!!sym(basket_col), item = level, is_health) %>%
  distinct()

# mark baskets that contain at least one health item
health_baskets <- basket_items %>%
  group_by(!!sym(basket_col)) %>%
  summarise(has_health = any(is_health), .groups = "drop")

# attach-rate by category when basket has any health item vs not
attach_tbl <- basket_items %>%
  left_join(health_baskets, by = basket_col) %>%
  group_by(item, has_health) %>%
  summarise(baskets = n_distinct(!!sym(basket_col)), .groups = "drop") %>%
  group_by(item) %>%
  mutate(attach_rate = baskets / sum(baskets)) %>%
  tidyr::pivot_wider(
    names_from = has_health,
    values_from = attach_rate,
    names_prefix = "has_health_",
    values_fill = 0
  ) %>%
  mutate(delta = has_health_TRUE - has_health_FALSE) %>%
  arrange(desc(delta))

gt(head(attach_tbl, 15)) %>%
  tab_header(title = "Categories that over-index when health items are in the basket")
Categories that over-index when health items are in the basket
baskets has_health_FALSE has_health_TRUE delta
PROD-WHS SALES
3 0 1.0000000 1.0000000
PRODUCE
50756 0 1.0000000 1.0000000
FROZEN GROCERY
141 0 0.9929577 0.9929577
TRAVEL & LEISURE
413 0 0.8787234 0.8787234
CHEF SHOPPE
511 0 0.8720137 0.8720137
SEAFOOD
1498 0 0.7520080 0.7520080
MEAT
22411 0 0.7452447 0.7452447
NUTRITION
7211 0 0.7321555 0.7321555
SEAFOOD-PCKGD
3688 0 0.7242734 0.7242734
GM MERCH EXP
28 0 0.7179487 0.7179487
MEAT-PCKGD
21320 0 0.6471785 0.6471785
NA
2803 0 0.6231659 0.6231659
DELI
12351 0 0.6175809 0.6175809
POSTAL CENTER
3 0 0.6000000 0.6000000
PHOTO & VIDEO
10 0 0.5882353 0.5882353

10 Pairwise co-occurrence

items_df <- trx %>%
  dplyr::select(basket_id_var = !!sym(basket_col), item = level) %>%
  filter(!is.na(basket_id_var), !is.na(item)) %>%
  distinct()

n_baskets <- n_distinct(items_df$basket_id_var)

item_ct <- items_df %>% count(item, name = "n_item")

basket_pairs <- items_df %>%
  group_by(basket_id_var) %>%
  summarise(items = list(sort(unique(item))), .groups = "drop") %>%
  mutate(pairs = purrr::map(items, function(x) {
    if (length(x) < 2) return(tibble(lhs = character(), rhs = character()))
    m <- t(combn(x, 2))
    tibble(lhs = m[, 1], rhs = m[, 2])
  })) %>%
  select(-items) %>%
  tidyr::unnest(pairs)

pair_ct <- basket_pairs %>% count(lhs, rhs, name = "n_pair")

# directional rules both ways
rules_tbl <- pair_ct %>%
  left_join(item_ct %>% rename(lhs = item, n_lhs = n_item), by = "lhs") %>%
  left_join(item_ct %>% rename(rhs = item, n_rhs = n_item), by = "rhs") %>%
  mutate(
    support       = n_pair / n_baskets,
    conf_lhs_rhs  = n_pair / n_lhs,
    base_rhs      = n_rhs / n_baskets,
    lift_lhs_rhs  = conf_lhs_rhs / base_rhs
  ) %>%
  transmute(lhs, rhs, n_pair, support, confidence = conf_lhs_rhs, lift = lift_lhs_rhs) %>%
  bind_rows(
    pair_ct %>% rename(lhs = rhs, rhs = lhs) %>%
      left_join(item_ct %>% rename(lhs = item, n_lhs = n_item), by = "lhs") %>%
      left_join(item_ct %>% rename(rhs = item, n_rhs = n_item), by = "rhs") %>%
      mutate(
        support       = n_pair / n_baskets,
        conf_lhs_rhs  = n_pair / n_lhs,
        base_rhs      = n_rhs / n_baskets,
        lift_lhs_rhs  = conf_lhs_rhs / base_rhs
      ) %>%
      transmute(lhs, rhs, n_pair, support, confidence = conf_lhs_rhs, lift = lift_lhs_rhs)
  ) %>%
  arrange(desc(lift))

# screen by your params
opp <- rules_tbl %>%
  filter(support >= params$min_support, lift >= params$min_lift) %>%
  distinct(lhs, rhs, .keep_all = TRUE)

gt(head(opp, 20)) %>%
  fmt_percent(columns = c(support, confidence), decimals = 1) %>%
  tab_header(title = "Top co-occurring pairs by lift (no arules)")
Top co-occurring pairs by lift (no arules)
lhs rhs n_pair support confidence lift
SEAFOOD SEAFOOD-PCKGD 472 0.3% 23.7% 7.244823
SEAFOOD-PCKGD SEAFOOD 472 0.3% 9.3% 7.244823
CHEF SHOPPE MEAT 352 0.2% 60.1% 3.109899
MEAT CHEF SHOPPE 352 0.2% 1.2% 3.109899
NUTRITION SEAFOOD 389 0.2% 3.9% 3.086965
SEAFOOD NUTRITION 389 0.2% 19.5% 3.086965
PRODUCE TRAVEL & LEISURE 405 0.3% 0.8% 2.643220
TRAVEL & LEISURE PRODUCE 405 0.3% 86.2% 2.643220
PRODUCE CHEF SHOPPE 498 0.3% 1.0% 2.606801
CHEF SHOPPE PRODUCE 498 0.3% 85.0% 2.606801
MEAT-PCKGD SEAFOOD-PCKGD 2799 1.8% 8.5% 2.597855
SEAFOOD-PCKGD MEAT-PCKGD 2799 1.8% 55.0% 2.597855
SEAFOOD-PCKGD MEAT 2531 1.6% 49.7% 2.573386
MEAT SEAFOOD-PCKGD 2531 1.6% 8.4% 2.573386
SEAFOOD MEAT 938 0.6% 47.1% 2.437893
MEAT SEAFOOD 938 0.6% 3.1% 2.437893
NUTRITION SEAFOOD-PCKGD 764 0.5% 7.8% 2.371791
SEAFOOD-PCKGD NUTRITION 764 0.5% 15.0% 2.371791
MEAT MEAT-PCKGD 14603 9.4% 48.6% 2.294986
MEAT-PCKGD MEAT 14603 9.4% 44.3% 2.294986

11 Executive summary & recommendations

fmt_pct <- function(x, d = 2) ifelse(is.na(x), "—", percent(x, accuracy = 1/10^d))
fmt_pp  <- function(x, d = 2) ifelse(is.na(x), "—", paste0(round(100*x, d), " pp"))

# --- Top 2–3 segments (from trend_tbl) ---
trend_ok <- exists("trend_tbl") && is.data.frame(trend_tbl) && nrow(trend_tbl) > 0
top_txt <- if (trend_ok) {
  top3 <- trend_tbl %>%
    mutate(Segment = paste(income_band, age_band, hh_size_band, has_kids, sep = " / ")) %>%
    arrange(desc(slope_per_period)) %>%
    slice_head(n = 3)
  paste(glue("{top3$Segment} (slope ≈ {round(100*top3$slope_per_period, 2)}%-pts/period)"), collapse = "; ")
} else {
  "See the Fastest-growing segments table above."
}

# --- Promo effect (prefer promo_effect; fallback to promo_seg) ---
promo_line <- {
  if (exists("promo_effect") && nrow(promo_effect) > 0) {
    pe <- promo_effect %>%
      group_by(on_promo, is_health) %>%
      summarise(revenue = sum(revenue, na.rm = TRUE), .groups = "drop") %>%
      group_by(on_promo) %>%
      mutate(share = revenue / sum(revenue)) %>%
      ungroup() %>%
      filter(is_health) %>%
      mutate(on_promo = if_else(on_promo, "Promo", "No promo")) %>%
      tidyr::pivot_wider(names_from = on_promo, values_from = share)
    hs_promo <- pe$Promo %||% NA_real_
    hs_nop   <- pe$`No promo` %||% NA_real_
    if (is.na(hs_promo) || is.na(hs_nop)) {
      "- **What moves them?** Use the weekday/weekend + segment facets in *Promotions* to time offers (no valid on/off promo comparison in aggregate)."
    } else {
      glue("- **What moves them?** Health share **on promo** {fmt_pct(hs_promo)} vs **no promo** {fmt_pct(hs_nop)} (Δ {fmt_pp(hs_promo - hs_nop)}). ",
           "Use the weekday/weekend + segment facets in *Promotions* to time offers.")
    }
  } else if (exists("promo_seg") && nrow(promo_seg) > 0) {
    # fallback: summarize weekend vs weekday lift among on_promo only
    ps <- promo_seg %>%
      filter(on_promo %in% TRUE) %>%
      summarise(weekend = mean(health_share[dplyr::coalesce(is_weekend, FALSE)], na.rm = TRUE),
                weekday = mean(health_share[!dplyr::coalesce(is_weekend, FALSE)], na.rm = TRUE))
    if (is.finite(ps$weekend) && is.finite(ps$weekday)) {
      glue("- **What moves them?** On-promo **weekend** health share {fmt_pct(ps$weekend)} vs **weekday** {fmt_pct(ps$weekday)} (Δ {fmt_pp(ps$weekend - ps$weekday)}). ",
           "Prioritize Fri–Sun windows if weekend lift persists.")
    } else {
      "- **What moves them?** Use the *Promotions* section to identify timing/segment lift patterns."
    }
  } else {
    "- **What moves them?** Use the *Promotions* section to identify timing/segment lift patterns."
  }
}

# --- Coupons pre/post (from prepost_summary) ---
coupon_line <- {
  if (exists("prepost_summary") && nrow(prepost_summary) > 0) {
    post <- prepost_summary %>% filter(str_detect(window, "Post")) %>% pull(avg_health_share) %>% {.[1]}
    pre  <- prepost_summary %>% filter(str_detect(window, "Pre"))  %>% pull(avg_health_share) %>% {.[1]}
    if (is.na(post) || is.na(pre)) {
      "- **Coupons.** See the pre/post table in *Coupons*; if missing, re-run that chunk."
    } else {
      glue("- **Coupons.** Health-coupon redeemers average {fmt_pct(post)} **post** vs {fmt_pct(pre)} **pre** (±4w; Δ {fmt_pp(post - pre)}). ",
           "Broad couponing shows limited mix shift—favor targeting/bundles.")
    }
  } else {
    "- **Coupons.** See the pre/post table in *Coupons*; if missing, re-run that chunk."
  }
}

# --- Scope (context) ---
scope_line <- tryCatch({
  date_min <- suppressWarnings(min(trans$date, na.rm = TRUE)); date_max <- suppressWarnings(max(trans$date, na.rm = TRUE))
  n_txn <- tryCatch(nrow(trans), error = function(e) NA_integer_)
  n_prod <- tryCatch(nrow(prods), error = function(e) NA_integer_)
  n_hh   <- tryCatch(nrow(house), error = function(e) NA_integer_)
  glue("- **Scope.** {format(date_min, '%Y-%m-%d')} to {format(date_max, '%Y-%m-%d')}; ",
       "transactions: {comma(n_txn)}, products: {comma(n_prod)}, households: {comma(n_hh)}.")
}, error = function(e) NULL)

# --- Compose markdown and emit as asis output ---
md <- paste0(
  "**Executive summary & recommendations**\n\n",
  glue("- **Who’s growing the fastest?** {top_txt}."), "\n\n",
  promo_line, "\n\n",
  coupon_line, "\n\n",
  "- **What to do (targeted plan):** 1) Target the top-growth segments with **Fri–Sun** promos on health staples; ",
  "2) Replace broad coupons with **bundled offers** (e.g., healthy breakfast basket) and track **basket health-share**; ",
  "3) Stagger tests by segment and daypart; scale winners.\n\n",
  "- **Limitations.** Text tagging & taxonomy granularity may misclassify edge items; trend ≠ causation; ",
  "pre/post can mask heterogeneity. Run randomized A/B or geo tests (household × week) for incremental lift.\n\n",
  if (!is.null(scope_line)) paste0(scope_line, "\n") else ""
)

knitr::asis_output(md)

Executive summary & recommendations

  • Who’s growing the fastest? High / <35 / 1-2 / Kids in HH (slope ≈ 0.42%-pts/period); Low / <35 / 1-2 / No kids (slope ≈ 0.1%-pts/period); Low / 55+ / 3-4 / Kids in HH (slope ≈ 0.08%-pts/period).

  • What moves them? Use the weekday/weekend + segment facets in Promotions to time offers (no valid on/off promo comparison in aggregate).

  • Coupons. Health-coupon redeemers average 9.04% post vs 9.09% pre (±4w; Δ -0.06 pp). Broad couponing shows limited mix shift—favor targeting/bundles.

  • What to do (targeted plan): 1) Target the top-growth segments with Fri–Sun promos on health staples; 2) Replace broad coupons with bundled offers (e.g., healthy breakfast basket) and track basket health-share; 3) Stagger tests by segment and daypart; scale winners.

  • Limitations. Text tagging & taxonomy granularity may misclassify edge items; trend ≠ causation; pre/post can mask heterogeneity. Run randomized A/B or geo tests (household × week) for incremental lift.

  • Scope. 2017-01-01 to 2018-01-01; transactions: 1,469,307, products: 92,331, households: 801.

if (exists("trend_tbl") && is.data.frame(trend_tbl) && nrow(trend_tbl) > 0) {
  top3_tbl <- trend_tbl %>%
    mutate(Segment = paste(income_band, age_band, hh_size_band, has_kids, sep = " / ")) %>%
    arrange(desc(slope_per_period)) %>%
    slice_head(n = 3) %>%
    transmute(
      `Segment (income / age / HH size / kids)` = Segment,
      `Avg health share` = scales::percent(avg_share, accuracy = 0.01),
      `Trend slope (per period)` = paste0(round(100*slope_per_period, 2), "%-pts"),
      `` = round(r2, 2),
      Periods = n_periods
    )

  # Use kable for a minimal, readable table
  knitr::kable(top3_tbl, caption = "Top segments by health-share growth", align = c("l","r","r","r","r"))
} else {
  cat("_Top segments table unavailable (run the growth-metrics chunk to build `trend_tbl`)._\n")
}
Top segments by health-share growth
Segment (income / age / HH size / kids) Avg health share Trend slope (per period) Periods
High / <35 / 1-2 / Kids in HH 4.31% 0.42%-pts 0.10 13
Low / <35 / 1-2 / No kids 8.50% 0.1%-pts 0.06 54
Low / 55+ / 3-4 / Kids in HH 13.04% 0.08%-pts 0.04 53

12 Appendix: helpers

fmt_pct <- function(x, d=1) scales::percent(x, accuracy = 1/10^d)
fmt_dol <- function(x) scales::dollar(x, accuracy = 1)