Heterogeneous treatment effects in CGM RCT

Treatment group was given CGM for six months, Control group was not. Then both groups had CGM for the following 6 months. Primary outcome: Change in HbA1c from study enrollment to end of phase 1 (six months later).

Study design: https://pubmed.ncbi.nlm.nih.gov/18828243/

More info: https://clinicaltrials.gov/ct2/show/NCT00406133

Paper with results: https://pubmed.ncbi.nlm.nih.gov/18779236/

Data: http://publicfiles.jaeb.org/jdrfapp/dataset/RT_CGM_Randomized_Clinical_Trial.zip

Patients in the control and treatment (CGM) groups

raw_patients[,.(n = .N), by = 'TxGroup'] %>% kable() %>% kable_styling(c("striped", "hover", "condensed"))

TxGroup	n
Control	219
RT-CGM	232

Calculate outcome: a1c change from beginning to end of phase 1

deltas_phase_1 = merge(
  raw_hba1c[Visit == "Randomization", .(PtID, a1c_baseline = LabA1cResult)],
  raw_hba1c[Visit == "Phase 1: 26 week", .(PtID, a1c_end_phase1 = LabA1cResult)],
  by = "PtID"
)
deltas_phase_1[, delta := a1c_end_phase1 - a1c_baseline]
deltas_phase_1[, summary(delta)]

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -2.700  -0.500  -0.100  -0.131   0.200   2.500       1

# Join with patient groups to estimate treatment effect
deltas_phase_1_t = merge(deltas_phase_1, raw_patients[,.(PtID, treated = ifelse(TxGroup == "RT-CGM", 1,0))], by = 'PtID')
deltas_phase_1_t[, .(avg_outcome = mean(delta, na.rm=T), .N), by = 'treated']  %>% kable() %>%  kable_styling(c("striped", "hover", "condensed"))

treated	avg_outcome	N
1	-0.2393013	229
0	-0.0145540	214

T test

t.test(deltas_phase_1_t[treated==1, delta], deltas_phase_1_t[treated==0, delta])

## 
##  Welch Two Sample t-test
## 
## data:  deltas_phase_1_t[treated == 1, delta] and deltas_phase_1_t[treated == 0, delta]
## t = -3.7944, df = 433.24, p-value = 0.000169
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.3411625 -0.1083322
## sample estimates:
##   mean of x   mean of y 
## -0.23930131 -0.01455399

Estimate Conditional Average Treatment Effects (CATEs)

Patient Features:

pump: 1 if the patient has an insuling pump, 0 if they are injecting themselves.
female: 1 if patient is female
family_caregiver: 1 if the patient’s primary caregiver is a familymember, 0 if the patient is their own primary caregiver
highly_educated_caregiver: 1 if the primary caregiver has a Bachelors degree or higher.

# Encode features
raw_patients[, pump := ifelse(InsulinModality == "Pump", 1, 0)]
raw_patients[, female := ifelse(Gender == "F", 1, 0)]
raw_patients[, family_caregiver := ifelse(EduCareGvrP == "Subject", 0, 1)]
raw_patients[, highly_educated_caregiver := ifelse(EduCareGvrPEdu %in% c("Bachelors", "Masters", "Professional"), 1, 0)]

# Join patient features and outcomes
hte_feats_outcomes_phase1 = merge(
  deltas_phase_1[!is.na(delta)], 
  raw_patients[,.(
    PtID, treated = ifelse(TxGroup == "RT-CGM", 1,0),
    pump, female, family_caregiver, highly_educated_caregiver)], 
  by = 'PtID')

hte_feats_outcomes_phase1[, .(
  n = .N,
  mean_outcome = mean(delta),
  sd_outcome = sd(delta),
  pump = mean(pump),
  female = mean(female),
  family_caregiver = mean(family_caregiver),
  highly_educated_caregiver = mean(highly_educated_caregiver)
  ), by = 'treated'] %>% kable() %>% kable_styling(c("striped", "hover", "condensed"))

treated	n	mean_outcome	sd_outcome	pump	female	family_caregiver	highly_educated_caregiver
1	229	-0.2393013	0.6829576	0.8253275	0.5327511	0.5196507	0.7248908
0	213	-0.0145540	0.5598386	0.8028169	0.5680751	0.5352113	0.7511737

X = hte_feats_outcomes_phase1[, .(pump, female, family_caregiver, highly_educated_caregiver)]
Y = hte_feats_outcomes_phase1[, delta]
W = hte_feats_outcomes_phase1[, treated]

get_all_the_cates = function(X, W, Y) {
  
  # S-learner (OLS)
  dt = copy(X)
  dt[, treated := W]
  dt[, delta := Y]
  sf = lm(delta ~ treated + female + pump + family_caregiver + highly_educated_caregiver, data=dt)
  pred.sf.0 = predict(sf, dt[, .(treated=0, female, pump, family_caregiver, highly_educated_caregiver)])
  pred.sf.1 = predict(sf, dt[, .(treated=1, female, pump, family_caregiver, highly_educated_caregiver)])
  sl.cates = pred.sf.1 - pred.sf.0
  
  # T-learner (OLS)
  tf0 = lm(delta ~ female + pump + family_caregiver + highly_educated_caregiver, data=dt[treated==0])
  tf1 = lm(delta ~ female + pump + family_caregiver + highly_educated_caregiver, data=dt[treated==1])
  tf.preds.0 = predict(tf0, dt)
  tf.preds.1 = predict(tf1, dt)
  tl.cates = tf.preds.1 - tf.preds.0
  
  # Causal Forest
  cf = causal_forest(X, Y, W, tune.parameters = "all")
  cf.cates = predict(cf)$predictions
  
  # Return combined results
  return(list(
    dt = rbind(
    data.table(method = "S-learner", cate = sl.cates),
    data.table(method = "T-learner", cate = tl.cates),
    data.table(method = "Causal Forest", cate = cf.cates)),
    cf = cf))
}

cates = get_all_the_cates(X, W, Y)

Summary statistics of CATEs by method

cates$dt[, .(min = min(cate), mean = mean(cate), sd = sd(cate), max = max(cate)), by = 'method']  %>% kable() %>% kable_styling(c("striped", "hover", "condensed"))

method	min	mean	sd	max
S-learner	-0.2227113	-0.2227113	0.0000000	-0.2227113
T-learner	-0.6831823	-0.2234573	0.1990053	0.0523166
Causal Forest	-0.3324368	-0.2221452	0.0702671	-0.1127619

Ordered predictions by CATE from T-learner

hte_feats_outcomes_phase1[, cate_cf := cates$cf$predictions]
hte_feats_outcomes_phase1[, cate_tl := cates$dt[method == "T-learner", cate]]

hte_feats_outcomes_phase1[, .(cate = mean(cate_tl)), by = .(pump, female, family_caregiver, highly_educated_caregiver)][order(cate)] %>%
    kable(digits=2) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"))

pump	female	family_caregiver	highly_educated_caregiver	cate
0	0	0	1	-0.68
0	0	0	0	-0.67
0	1	0	1	-0.53
0	1	0	0	-0.52
1	0	0	1	-0.43
1	0	0	0	-0.42
0	0	1	1	-0.36
0	0	1	0	-0.35
1	1	0	1	-0.28
1	1	0	0	-0.27
0	1	1	1	-0.21
0	1	1	0	-0.20
1	0	1	1	-0.11
1	0	1	0	-0.10
1	1	1	1	0.04
1	1	1	0	0.05

It’s interesting that the highest CATE is for those that inject insulin and are their own caregiver. Lowest CATEs are for those with a pump and a less educated family caregiver.

ATEs by CATE quartile from T-learner (Causal Forest performed poorly so omitting results)

hte_feats_outcomes_phase1[, tl_cate_qrt := findInterval(
  cate_tl, 
  hte_feats_outcomes_phase1[, quantile(cate_tl, seq(0,1,length.out=5), na.rm=T)],
  all.inside = T)]

rbindlist(lapply(hte_feats_outcomes_phase1[,unique(tl_cate_qrt)], function(x) {
  subset_dt = hte_feats_outcomes_phase1[tl_cate_qrt == x]
  m = lm(delta ~ treated, data = subset_dt)
  return(data.table(
    tl_cate_qrt = x,
    mean_cate = round(subset_dt[, mean(cate_tl, na.rm=TRUE)],4),
    n = subset_dt[, .N],
    p_treated = subset_dt[, mean(treated)],
    control_mean = round(subset_dt[, mean(delta, na.rm=TRUE)],4),
    cate = round(summary(m)$coefficients["treated","Estimate"],4),
    se = round(summary(m)$coefficients["treated","Std. Error"],4),
    t = round(summary(m)$coefficients["treated","t value"],4),
    p = round(summary(m)$coefficients["treated","Pr(>|t|)"],4)
  ))
}))[order(tl_cate_qrt)] %>% kable() %>% kable_styling(c("striped", "hover", "condensed"))

tl_cate_qrt	mean_cate	n	p_treated	control_mean	cate	se	t	p
1	-0.4896	106	0.5471698	-0.0915	-0.5251	0.0935	-5.6165	0.0000
2	-0.2991	103	0.5339806	-0.1553	-0.2909	0.0957	-3.0413	0.0030
3	-0.1639	112	0.4732143	-0.0902	-0.1189	0.1181	-1.0074	0.3159
4	0.0190	121	0.5206612	-0.1826	-0.0031	0.1468	-0.0211	0.9832

Two-way split ITTs for each variable

itts_by_var = function(dt, var) {
  rbindlist(lapply(dt[,unique(get(var))], function(x) {
    subset_dt = dt[get(var) == x]
    m = lm(delta ~ treated, data = subset_dt)
    return(data.table(
      var = var,
      value = x,
      n = subset_dt[, .N],
      control_mean = round(subset_dt[, mean(delta, na.rm=TRUE)],4),
      cate = round(summary(m)$coefficients["treated","Estimate"],4),
      se = round(summary(m)$coefficients["treated","Std. Error"],4),
      t = round(summary(m)$coefficients["treated","t value"],4),
      p = round(summary(m)$coefficients["treated","Pr(>|t|)"],4)
    ))
  }))
}


rbindlist(lapply(colnames(X), function(x) itts_by_var(hte_feats_outcomes_phase1, x))) %>% kable() %>% kable_styling(c("striped", "hover", "condensed"))

var	value	n	control_mean	cate	se	t	p
pump	1	360	-0.1531	-0.1768	0.0656	-2.6938	0.0074
pump	0	82	-0.0341	-0.4214	0.1409	-2.9916	0.0037
female	1	243	-0.1399	-0.1635	0.0868	-1.8834	0.0609
female	0	199	-0.1201	-0.3022	0.0797	-3.7918	0.0002
family_caregiver	0	209	-0.1301	-0.3874	0.0665	-5.8225	0.0000
family_caregiver	1	233	-0.1318	-0.0794	0.0954	-0.8318	0.4064
highly_educated_caregiver	1	326	-0.1267	-0.2242	0.0669	-3.3500	0.0009
highly_educated_caregiver	0	116	-0.1431	-0.2253	0.1289	-1.7480	0.0832

Try single causal tree

#remotes::install_github('susanathey/causalTree')
library(causalTree)

set.seed(1814)
dt = copy(hte_feats_outcomes_phase1)
eval_ind = sample.int(nrow(dt), round(0.5*nrow(dt)))

train_dt = dt[-eval_ind,]
eval_dt = dt[eval_ind,]

fmla_ct = "delta ~ female + pump + family_caregiver + highly_educated_caregiver"
ct_unpruned <- honest.causalTree(
  formula=fmla_ct,                # Define the model
  data=train_dt,                  # Subset used to create tree structure
  est_data=eval_dt,               # Which data set to use to estimate effects
  treatment=train_dt$treated,     # Splitting sample treatment variable
  est_treatment=eval_dt$treated,  # Estimation sample treatment variable
  split.Rule="CT",                # Define the splitting option
  cv.option="TOT",                # Cross validation options
  cp=0,                           # Complexity parameter
  split.Honest=TRUE,              # Use honesty when splitting
  cv.Honest=TRUE,                 # Use honesty when performing cross-validation
  minsize=10,                     # Min. number of treatment and control cases in each leaf
  HonestSampleSize=nrow(eval_dt)) # Num obs used in estimation after building the tree

## [1] 2
## [1] "CT"

# Table of cross-validated values by tuning parameter.
ct_cptable <- as.data.frame(ct_unpruned$cptable)

# Obtain optimal complexity parameter to prune tree.
selected_cp <- which.min(ct_cptable$xerror)
optim_cp_ct <- ct_cptable[selected_cp, "CP"]

# Prune the tree at optimal complexity parameter.
ct_pruned <- prune(tree=ct_unpruned, cp=optim_cp_ct)

library(rpart.plot)
rpart.plot(
  x=ct_pruned,        # Pruned tree
  type=3,             # Draw separate split labels for the left and right directions
  fallen=TRUE,        # Position the leaf nodes at the bottom of the graph
  leaf.round=1,       # Rounding of the corners of the leaf node boxes
  extra=100,          # Display the percentage of observations in the node
  box.palette="RdBu") # Palette for coloring the node