Implementation of Estimand Strategies

This document illustrates five estimand strategies aligned with the ICH E9(R1) framework using simulated data in R. Each strategy addresses a distinct clinical question by handling intercurrent events—such as rescue medication use or treatment discontinuation—in different ways. Specifically, we demonstrate the treatment policy strategy, which reflects real-world outcomes regardless of post-randomization events; the hypothetical strategy, which estimates outcomes under a counterfactual scenario (e.g., no rescue therapy) using weighting methods; the composite strategy, which integrates intercurrent events into the endpoint definition; the while-on-treatment strategy, which focuses on outcomes observed prior to treatment discontinuation; and the principal stratum strategy, which targets a latent subgroup defined by post-treatment behavior.

Through a unified simulation framework, this document highlights how different estimand choices lead to different interpretations of treatment effect, even when applied to the same underlying data-generating process. The accompanying analyses provide practical R implementations for each strategy, offering a conceptual and computational reference for applying estimand thinking in real-world evidence and clinical research settings.

# ============================================================
# Estimand 5 strategies in R
# ------------------------------------------------------------
# 1) Treatment policy
# 2) Hypothetical
# 3) Composite
# 4) While-on-treatment
# 5) Principal stratum
#
# ============================================================

set.seed(20260407)

# ---------- functions ----------
get_lm_est <- function(fit, term = "trt") {
  s <- summary(fit)
  if (!term %in% rownames(s$coefficients)) stop("term not found in model")
  est <- s$coefficients[term, "Estimate"]
  se  <- s$coefficients[term, "Std. Error"]
  p   <- s$coefficients[term, "Pr(>|t|)"]
  ci  <- est + c(-1, 1) * 1.96 * se
  data.frame(
    estimate = est,
    lower = ci[1],
    upper = ci[2],
    p_value = p,
    stringsAsFactors = FALSE
  )
}

get_glm_or <- function(fit, term = "trt") {
  s <- summary(fit)
  if (!term %in% rownames(s$coefficients)) stop("term not found in model")
  est <- s$coefficients[term, "Estimate"]
  se  <- s$coefficients[term, "Std. Error"]
  p   <- s$coefficients[term, "Pr(>|z|)"]
  ci  <- est + c(-1, 1) * 1.96 * se
  data.frame(
    estimate_log_odds = est,
    or = exp(est),
    lower_or = exp(ci[1]),
    upper_or = exp(ci[2]),
    p_value = p,
    stringsAsFactors = FALSE
  )
}

make_row_lm <- function(strategy, fit) {
  x <- get_lm_est(fit)
  data.frame(
    strategy = strategy,
    model = "LM",
    estimate = x$estimate,
    effect_scale = "difference",
    lower = x$lower,
    upper = x$upper,
    p_value = x$p_value,
    stringsAsFactors = FALSE
  )
}

make_row_glm <- function(strategy, fit) {
  x <- get_glm_or(fit)
  data.frame(
    strategy = strategy,
    model = "GLM",
    estimate = x$estimate_log_odds,
    effect_scale = "log_odds",
    lower = x$lower_or,
    upper = x$upper_or,
    p_value = x$p_value,
    stringsAsFactors = FALSE
  )
}

# ============================================================
# 1) Treatment policy strategy
# Question: 
### regardless of rescue/stop/add-on therapy,
# what is the overall 24-week HbA1c change?
# ============================================================

n <- 300
base_hba1c <- rnorm(n, mean = 8.5, sd = 1.0)
trt <- rbinom(n, 1, 0.5)

# Intercurrent event: rescue therapy / add-on medication
pr_rescue <- plogis(0.8 - 0.9 * trt + 0.2 * (base_hba1c - 8.5))
rescue <- rbinom(n, 1, pr_rescue)

# Observed endpoint keeps the real-world effect of rescue
hba1c_24w <- base_hba1c - 0.7 * trt + 0.15 * (base_hba1c - 8.5) + 0.25 * rescue + rnorm(n, 0, 0.8)

dat_tp <- data.frame(
  id = 1:n,
  trt = trt,
  base_hba1c = base_hba1c,
  rescue = rescue,
  hba1c_24w = hba1c_24w
)

fit_tp <- lm(hba1c_24w ~ trt + base_hba1c, data = dat_tp)

cat("\n=== 1) Treatment policy ===\n")
## 
## === 1) Treatment policy ===
print(head(dat_tp, 6))
##   id trt base_hba1c rescue hba1c_24w
## 1  1   0   9.340284      1 11.238315
## 2  2   0   8.449053      1  9.100139
## 3  3   1   9.271691      1  8.338624
## 4  4   1   9.018938      0  8.194812
## 5  5   1   8.711004      0  8.075136
## 6  6   1   8.611760      1  7.383633
print(get_lm_est(fit_tp))
##     estimate      lower      upper      p_value
## 1 -0.7309119 -0.9194757 -0.5423482 3.968443e-13
# ============================================================
# 2) Hypothetical strategy
# Question: what if no one had rescue therapy?
# Simple IPCW-style illustration:
# model probability of remaining uncensored (no rescue),
# then weight observed subjects.
### counterfactual effect 
# ============================================================

set.seed(20260407 + 1)
base_hba1c2 <- rnorm(n, mean = 8.5, sd = 1.0)
trt2 <- rbinom(n, 1, 0.5)
pr_rescue2 <- plogis(0.8 - 0.9 * trt2 + 0.2 * (base_hba1c2 - 8.5))
rescue2 <- rbinom(n, 1, pr_rescue2)

# Latent outcome under no rescue
hba1c_24w_norescue <- base_hba1c2 - 0.9 * trt2 + 0.15 * (base_hba1c2 - 8.5) + rnorm(n, 0, 0.8)

# Observed only if no rescue; otherwise missing
hba1c_obs <- ifelse(rescue2 == 0, hba1c_24w_norescue, NA)

dat_hyp <- data.frame(
  id = 1:n,
  trt = trt2,
  base_hba1c = base_hba1c2,
  rescue = rescue2,
  hba1c_obs = hba1c_obs
)

# IPCW using logistic regression
# Model probability of no rescue
fit_cens <- glm(I(rescue == 0) ~ trt + base_hba1c, data = dat_hyp, family = binomial())
p_no_rescue <- predict(fit_cens, type = "response")

# Keep only observed rows and align weights with those rows
dat_hyp_obs <- dat_hyp[dat_hyp$rescue == 0, ]
dat_hyp_obs$ipcw <- 1 / p_no_rescue[dat_hyp$rescue == 0]

fit_hyp <- lm(hba1c_obs ~ trt + base_hba1c, data = dat_hyp_obs, weights = ipcw)

cat("\n=== 2) Hypothetical (IPCW example) ===\n")
## 
## === 2) Hypothetical (IPCW example) ===
print(head(dat_hyp, 6))
##   id trt base_hba1c rescue hba1c_obs
## 1  1   1   9.280415      1        NA
## 2  2   1   9.281150      1        NA
## 3  3   1   8.973996      0  8.696098
## 4  4   0   8.174212      0  9.552224
## 5  5   1   7.364028      0  7.375509
## 6  6   0   8.041308      1        NA
print(get_lm_est(fit_hyp))
##     estimate     lower      upper      p_value
## 1 -0.7438963 -1.024993 -0.4627996 8.703742e-07
# ============================================================
# 3) Composite strategy
# Question: how many patients achieve response WITHOUT rescue?
# Define response as 
### composite outcome= HbA1c drop >= 0.5 AND no rescue.
# ============================================================

set.seed(20260407 + 2)
base_hba1c3 <- rnorm(n, mean = 8.5, sd = 1.0)
trt3 <- rbinom(n, 1, 0.5)
pr_rescue3 <- plogis(0.9 - 0.9 * trt3 + 0.15 * (base_hba1c3 - 8.5))
rescue3 <- rbinom(n, 1, pr_rescue3)

hba1c_24w3 <- base_hba1c3 - 0.8 * trt3 + 0.1 * (base_hba1c3 - 8.5) + 0.2 * rescue3 + rnorm(n, 0, 0.8)
delta3 <- base_hba1c3 - hba1c_24w3
response_composite <- as.integer(delta3 >= 0.5 & rescue3 == 0)

dat_comp <- data.frame(
  id = 1:n,
  trt = trt3,
  base_hba1c = base_hba1c3,
  rescue = rescue3,
  delta = delta3,
  response = response_composite
)

fit_comp <- glm(response ~ trt + base_hba1c, data = dat_comp, family = binomial())

cat("\n=== 3) Composite ===\n")
## 
## === 3) Composite ===
print(head(dat_comp, 6))
##   id trt base_hba1c rescue       delta response
## 1  1   1  10.018124      1 -0.00941693        0
## 2  2   1   8.578286      1  0.34962116        0
## 3  3   0   8.032871      1 -0.13812605        0
## 4  4   1   9.574570      1  0.04254592        0
## 5  5   0   8.306904      1 -0.37587651        0
## 6  6   1   8.903998      0  0.93214149        1
print(get_glm_or(fit_comp))
##   estimate_log_odds      or lower_or upper_or      p_value
## 1          1.521701 4.58001 2.426609 8.644367 2.661154e-06
# ============================================================
# 4) While-on-treatment strategy
# Question: 
### among patients still on treatment at week 12,
# what is the pain score while they are still exposed?
# ============================================================

set.seed(20260407 + 3)
base_pain <- rnorm(n, mean = 6, sd = 1.5)
trt4 <- rbinom(n, 1, 0.5)
pr_stop <- plogis(0.6 - 0.8 * trt4 + 0.15 * (base_pain - 6))
stop12 <- rbinom(n, 1, pr_stop)

# Pain at week 12 while still on treatment
pain12_full <- base_pain - 1.2 * trt4 + 0.25 * (base_pain - 6) + rnorm(n, 0, 1.0)
pain12_obs <- ifelse(stop12 == 0, pain12_full, NA)

dat_wot <- data.frame(
  id = 1:n,
  trt = trt4,
  base_pain = base_pain,
  stop12 = stop12,
  pain12 = pain12_obs
)

fit_wot <- lm(pain12 ~ trt + base_pain, data = dat_wot, subset = stop12 == 0)

cat("\n=== 4) While-on-treatment ===\n")
## 
## === 4) While-on-treatment ===
print(head(dat_wot, 6))
##   id trt base_pain stop12   pain12
## 1  1   0  4.988381      1       NA
## 2  2   1  5.154328      0 2.763700
## 3  3   1  7.294061      0 7.643973
## 4  4   0  6.835051      1       NA
## 5  5   0  4.827163      1       NA
## 6  6   0  7.215711      1       NA
print(get_lm_est(fit_wot))
##    estimate     lower     upper      p_value
## 1 -1.561359 -1.908637 -1.214081 3.466376e-15
# ============================================================
# 5) Principal stratum strategy
# Question: what is the effect among the latent subgroup who
# would stay adherent regardless of treatment assignment?
# This is generally not identifiable in practice.
# ============================================================

set.seed(20260407 + 4)
base_ps <- rnorm(n, mean = 8.5, sd = 1.0)
trt5 <- rbinom(n, 1, 0.5)

# Latent principal stratum:
# 1 = would stay adherent under either arm
# 0 = would tend to switch / use rescue under either arm
ps <- rbinom(n, 1, plogis(0.2 - 0.2 * (base_ps - 8.5)))

# Outcome
hba1c_ps <- base_ps - 1.0 * trt5 + 0.1 * (base_ps - 8.5) + rnorm(n, 0, 0.8)
hba1c_ps[ps == 0] <- base_ps[ps == 0] - 0.4 * trt5[ps == 0] +
  0.15 * (base_ps[ps == 0] - 8.5) + rnorm(sum(ps == 0), 0, 1.1)

dat_ps <- data.frame(
  id = 1:n,
  trt = trt5,
  base_hba1c = base_ps,
  principal_stratum = ps,
  hba1c = hba1c_ps
)

fit_ps <- lm(hba1c ~ trt + base_hba1c, data = dat_ps, subset = principal_stratum == 1)

cat("\n=== 5) Principal stratum (latent, simulation-only) ===\n")
## 
## === 5) Principal stratum (latent, simulation-only) ===
print(head(dat_ps, 6))
##   id trt base_hba1c principal_stratum     hba1c
## 1  1   1  10.156436                 0 10.359183
## 2  2   1   9.340545                 1  9.625588
## 3  3   0   7.041015                 1  6.581665
## 4  4   1   9.081340                 0 10.140717
## 5  5   0   8.686656                 1  8.666886
## 6  6   1   8.596408                 1  8.696773
print(get_lm_est(fit_ps))
##    estimate     lower     upper      p_value
## 1 -1.250809 -1.484216 -1.017403 2.418539e-20
# ============================================================
# Unified summary table
# ============================================================

summary_table <- rbind(
  make_row_lm("Treatment policy", fit_tp),
  make_row_lm("Hypothetical (IPCW)", fit_hyp),
  make_row_glm("Composite", fit_comp),
  make_row_lm("While-on-treatment", fit_wot),
  make_row_lm("Principal stratum", fit_ps)
)

cat("\n=== Summary table ===\n")
## 
## === Summary table ===
print(summary_table)
##              strategy model   estimate effect_scale      lower      upper
## 1    Treatment policy    LM -0.7309119   difference -0.9194757 -0.5423482
## 2 Hypothetical (IPCW)    LM -0.7438963   difference -1.0249929 -0.4627996
## 3           Composite   GLM  1.5217013     log_odds  2.4266087  8.6443667
## 4  While-on-treatment    LM -1.5613586   difference -1.9086366 -1.2140807
## 5   Principal stratum    LM -1.2508093   difference -1.4842159 -1.0174027
##        p_value
## 1 3.968443e-13
## 2 8.703742e-07
## 3 2.661154e-06
## 4 3.466376e-15
## 5 2.418539e-20
# Optional: save datasets
# write.csv(dat_tp, "dat_treatment_policy.csv", row.names = FALSE)
# write.csv(dat_hyp, "dat_hypothetical.csv", row.names = FALSE)
# write.csv(dat_comp, "dat_composite.csv", row.names = FALSE)
# write.csv(dat_wot, "dat_while_on_treatment.csv", row.names = FALSE)
# write.csv(dat_ps, "dat_principal_stratum.csv", row.names = FALSE)