library(data.table)
library(ggplot2)
library(gridExtra)
library(boot)
library(knitr)
library(printr)
R = 500
Implementing five off-policy evalution estimators from Thomas and Brunskill (ICML 2016):
Action: patient is shown to CDEs for review
Reward: Change in TIR
State: CGM summary statistics
sim_dt = fread('~/Google Drive/My Drive/Stanford/CGM/sim_dt.csv')
eval_dt = sim_dt[in_first_year_since_onset==0 & !is.na(dTIR_2w) & !is.na(TIR_lag)]
eval_dt[,.(uniqueN(pat_id), .N, mean(eligible_for_review), sum(tide_message_sent))]
| V1 | N | V3 | V4 |
|---|---|---|---|
| 78 | 2740 | 0.2481752 | 236 |
behavior_policy = function(x, a) {
ifelse(a==1, mean(eval_dt$eligible_for_review), (1-mean(eval_dt$eligible_for_review)))
}
null_policy = function(x, a) {
ifelse(a==1, 0, 1)
}
tir_above_65_policy = function(x, a) {
ifelse((x$TIR_lag>=0.65) == a, 1, 0)
}
tir_below_65_policy = function(x, a) {
ifelse((x$TIR_lag<0.65) == a, 1, 0)
}
tir_below_65_and_pw_above_50_policy = function(x, a) {
ifelse((x$TIR_lag<0.65 & x$p_worn_lag>0.5) == a, 1, 0)
}
always_treat_policy = function(x, a) {
ifelse(a==1, 1, 0)
}
# list of policy functions to evaluate later
policy_fns = list(
"null" = null_policy,
"behavioral" = behavior_policy,
"tir>=65%" = tir_above_65_policy,
"tir<65%" = tir_below_65_policy,
"tir<65% & pw>50%" = tir_below_65_and_pw_above_50_policy,
"always treat" = always_treat_policy
)
is_eval = function(d, p) {
d[, behavior_policy_prob := behavior_policy(d, d$eligible_for_review)]
d[, eval_policy_prob := p(d, d$eligible_for_review)]
c(d[, mean(dTIR_2w*(eval_policy_prob/behavior_policy_prob))], sum(d$eval_policy_prob*d$eligible_for_review))
}
# test
# for (pn in names(policy_fns)) {
# print(pn)
# print(is_eval(eval_dt, policy_fns[[pn]]))
# }
boostrap_is_eval = function(d, i, p) {
boot_d = d[i]
null_policy_reward = is_eval(boot_d, null_policy)
eval_policy_reward = is_eval(boot_d, p)
return((eval_policy_reward[1] - null_policy_reward[1]))
}
# test
# for (pn in names(policy_fns)) {
# print(pn)
# print(boostrap_is_eval(eval_dt, seq(1,nrow(eval_dt)), policy_fns[[pn]]))
# }
# Bootstrapped results
is_bs_results = rbindlist(lapply(names(policy_fns), function(pn) {
data.table(
policy = pn,
delta = boot(eval_dt, boostrap_is_eval, R=R, parallel="multicore", ncpus=10, p = policy_fns[[pn]])$t[,1]
)
}))
# plot
invisible(is_bs_results[, policy := reorder(is_bs_results$policy, is_bs_results$delta, FUN = mean)])
ggplot(is_bs_results, aes(x=policy, y=delta, color=policy)) + theme_bw() +
geom_hline(yintercept=0, linetype="dashed") +
stat_summary(
fun = median,
fun.min = function(z) quantile(z,0.05),
fun.max = function(z) quantile(z,0.95),
geom = "pointrange") + ggtitle("IS results")
wis_eval = function(d, p) {
d[, behavior_policy_prob := behavior_policy(d, d$eligible_for_review)]
d[, eval_policy_prob := p(d, d$eligible_for_review)]
c(d[, weighted.mean(dTIR_2w, (eval_policy_prob/behavior_policy_prob))], sum(d$eval_policy_prob*d$eligible_for_review))
}
boostrap_wis_eval = function(d, i, p) {
boot_d = d[i]
null_policy_reward = wis_eval(boot_d, null_policy)
eval_policy_reward = wis_eval(boot_d, p)
return((eval_policy_reward[1] - null_policy_reward[1]))
}
# Bootstrapped results
wis_bs_results = rbindlist(lapply(names(policy_fns), function(pn) {
data.table(
policy = pn,
delta = boot(eval_dt, boostrap_wis_eval, R=R, parallel="multicore", ncpus=10, p = policy_fns[[pn]])$t[,1]
)
}))
# plot
invisible(wis_bs_results[, policy := reorder(wis_bs_results$policy, wis_bs_results$delta, FUN = mean)])
ggplot(wis_bs_results, aes(x=policy, y=delta, color=policy)) + theme_bw() +
geom_hline(yintercept=0, linetype="dashed") +
stat_summary(
fun = median,
fun.min = function(z) quantile(z,0.05),
fun.max = function(z) quantile(z,0.95),
geom = "pointrange") + ggtitle("WIS results")
merge(wis_bs_results[,.(mean_d=mean(delta), sd=sd(delta)), by='policy'],
is_bs_results[,.(mean_d=mean(delta), sd=sd(delta)), by='policy'],
by='policy', suffixes=c(".wis",".is"))
| policy | mean_d.wis | sd.wis | mean_d.is | sd.is |
|---|---|---|---|---|
| null | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000000 |
| tir>=65% | 0.0019151 | 0.0029331 | 0.0017792 | 0.0032145 |
| behavioral | 0.0022759 | 0.0012396 | 0.0023940 | 0.0013092 |
| tir<65% | 0.0072975 | 0.0040717 | 0.0072922 | 0.0040847 |
| tir<65% & pw>50% | 0.0085750 | 0.0036644 | 0.0081592 | 0.0035622 |
| always treat | 0.0090921 | 0.0050805 | 0.0092650 | 0.0052109 |
Need outcome model.
# simple predictors for model
invisible(eval_dt[, TIR_lag_below_65 := ifelse(TIR_lag<0.65,1,0)])
invisible(eval_dt[, p_worn_lag_above_50 := ifelse(p_worn_lag>0.50,1,0)])
lm_all = lm(dTIR_2w ~ eligible_for_review*(TIR_lag_below_65*p_worn_lag_above_50), eval_dt)
summary(lm_all)
##
## Call:
## lm(formula = dTIR_2w ~ eligible_for_review * (TIR_lag_below_65 *
## p_worn_lag_above_50), data = eval_dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59256 -0.06053 0.00627 0.06908 0.69575
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -0.064174 0.018022
## eligible_for_review 0.009082 0.041490
## TIR_lag_below_65 0.120291 0.021551
## p_worn_lag_above_50 0.036445 0.018375
## TIR_lag_below_65:p_worn_lag_above_50 -0.068949 0.022229
## eligible_for_review:TIR_lag_below_65 -0.021990 0.048401
## eligible_for_review:p_worn_lag_above_50 -0.006658 0.042105
## eligible_for_review:TIR_lag_below_65:p_worn_lag_above_50 0.039599 0.049608
## t value Pr(>|t|)
## (Intercept) -3.561 0.000376 ***
## eligible_for_review 0.219 0.826747
## TIR_lag_below_65 5.582 2.62e-08 ***
## p_worn_lag_above_50 1.983 0.047416 *
## TIR_lag_below_65:p_worn_lag_above_50 -3.102 0.001943 **
## eligible_for_review:TIR_lag_below_65 -0.454 0.649628
## eligible_for_review:p_worn_lag_above_50 -0.158 0.874375
## eligible_for_review:TIR_lag_below_65:p_worn_lag_above_50 0.798 0.424798
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1182 on 2732 degrees of freedom
## Multiple R-squared: 0.06454, Adjusted R-squared: 0.06215
## F-statistic: 26.93 on 7 and 2732 DF, p-value: < 2.2e-16
model_based_eval = function(d, p) {
d[, eval_policy_a := p(d, 1)]
# need to sample actions [does nothing now, except for non-deterministic behavioral policy]
d$eval_policy_a = rbinom(nrow(d), 1, d$eval_policy_a)
# Add preds
d_a0 = copy(d)
d_a0[,eligible_for_review:=0]
d_a1 = copy(d)
d_a1[,eligible_for_review:=1]
d[, pred_a0 := predict(lm_all, d_a0)]
d[, pred_a1 := predict(lm_all, d_a1)]
# Pick which pred to use based on eval policy
d[, pred_based_on_policy := ifelse(eval_policy_a==1, pred_a1, pred_a0)]
# Model-predicted outcomes
mean(d$pred_based_on_policy)
}
for (pn in names(policy_fns)) {
print(pn)
print(model_based_eval(eval_dt, policy_fns[[pn]]))
}
## [1] "null"
## [1] -0.003724238
## [1] "behavioral"
## [1] -0.00141676
## [1] "tir>=65%"
## [1] -0.002265588
## [1] "tir<65%"
## [1] 0.00376931
## [1] "tir<65% & pw>50%"
## [1] 0.004377022
## [1] "always treat"
## [1] 0.005227959
boostrap_mb_eval = function(d, i, p) {
boot_d = d[i]
null_policy_reward = model_based_eval(boot_d, null_policy)
eval_policy_reward = model_based_eval(boot_d, p)
return((eval_policy_reward - null_policy_reward))
}
# Bootstrapped results
mb_bs_results = rbindlist(lapply(names(policy_fns), function(pn) {
data.table(
policy = pn,
delta = boot(eval_dt, boostrap_mb_eval, R=R, parallel="multicore", ncpus=10, p = policy_fns[[pn]])$t[,1]
)
}))
# plot
invisible(mb_bs_results[, policy := reorder(mb_bs_results$policy, mb_bs_results$delta, FUN = mean)])
ggplot(mb_bs_results, aes(x=policy, y=delta, color=policy)) + theme_bw() +
geom_hline(yintercept=0, linetype="dashed") +
stat_summary(
fun = median,
fun.min = function(z) quantile(z,0.05),
fun.max = function(z) quantile(z,0.95),
geom = "pointrange") + ggtitle("MB results")
dr_eval = function(d, p) {
is = is_eval(d, p)[1]
d[, behavior_policy_prob := behavior_policy(d, d$eligible_for_review)]
d[, eval_policy_prob := p(d, d$eligible_for_review)]
d[, weight := (eval_policy_prob/behavior_policy_prob)]
d[, eval_policy_a := p(d, 1)]
# need to sample actions [does nothing now, except for non-deterministic behavioral policy]
d$eval_policy_a = rbinom(nrow(d), 1, d$eval_policy_a)
# Add preds
d_a0 = copy(d)
d_a0[,eligible_for_review:=0]
d_a1 = copy(d)
d_a1[,eligible_for_review:=1]
d[, pred_a0 := predict(lm_all, d_a0)]
d[, pred_a1 := predict(lm_all, d_a1)]
# Pick which pred to use based on eval policy
d[, pred_based_on_policy := ifelse(eval_policy_a==1, pred_a1, pred_a0)]
# Model-based adjustment
d[, model_adj := weight*pred_based_on_policy - pred_based_on_policy]
c(is - mean(d$model_adj), is, mean(d$model_adj))
}
for (pn in names(policy_fns)) {
print(pn)
print(dr_eval(eval_dt, policy_fns[[pn]]))
}
## [1] "null"
## [1] -3.724238e-03 -3.757565e-03 -3.332691e-05
## [1] "behavioral"
## [1] -0.001467469 -0.001467469 0.000000000
## [1] "tir>=65%"
## [1] -0.0022655880 -0.0020539535 0.0002116345
## [1] "tir<65%"
## [1] 3.769310e-03 3.766561e-03 -2.749015e-06
## [1] "tir<65% & pw>50%"
## [1] 0.004377022 0.004647951 0.000270929
## [1] "always treat"
## [1] 0.0052279593 0.0054701718 0.0002422124
boostrap_dr_eval = function(d, i, p) {
boot_d = d[i]
null_policy_reward = dr_eval(boot_d, null_policy)[1]
eval_policy_reward = dr_eval(boot_d, p)[1]
return((eval_policy_reward - null_policy_reward))
}
# Bootstrapped results
dr_bs_results = rbindlist(lapply(names(policy_fns), function(pn) {
data.table(
policy = pn,
delta = boot(eval_dt, boostrap_dr_eval, R=R, parallel="multicore", ncpus=10, p = policy_fns[[pn]])$t[,1]
)
}))
# plot
invisible(dr_bs_results[, policy := reorder(dr_bs_results$policy, dr_bs_results$delta, FUN = mean)])
ggplot(dr_bs_results, aes(x=policy, y=delta, color=policy)) + theme_bw() +
geom_hline(yintercept=0, linetype="dashed") +
stat_summary(
fun = median,
fun.min = function(z) quantile(z,0.05),
fun.max = function(z) quantile(z,0.95),
geom = "pointrange") + ggtitle("DR results")
wdr_eval = function(d, p) {
wis = wis_eval(d, p)[1]
d[, behavior_policy_prob := behavior_policy(d, d$eligible_for_review)]
d[, eval_policy_prob := p(d, d$eligible_for_review)]
d[, weight := (eval_policy_prob/behavior_policy_prob) / sum(d$eval_policy_prob/d$behavior_policy_prob)]
d[, eval_policy_a := p(d, 1)]
# need to sample actions [does nothing now, except for non-deterministic behavioral policy]
d$eval_policy_a = rbinom(nrow(d), 1, d$eval_policy_a)
# Add preds
d_a0 = copy(d)
d_a0[,eligible_for_review:=0]
d_a1 = copy(d)
d_a1[,eligible_for_review:=1]
d[, pred_a0 := predict(lm_all, d_a0)]
d[, pred_a1 := predict(lm_all, d_a1)]
# Pick which pred to use based on eval policy
d[, pred_based_on_policy := ifelse(eval_policy_a==1, pred_a1, pred_a0)]
# Model-based adjustment
d[, model_adj := weight*pred_based_on_policy - pred_based_on_policy]
c(wis - mean(d$model_adj), wis, mean(d$model_adj))
}
for (pn in names(policy_fns)) {
print(pn)
print(wdr_eval(eval_dt, policy_fns[[pn]]))
}
## [1] "null"
## [1] -0.007480431 -0.003757565 0.003722866
## [1] "behavioral"
## [1] -0.002890040 -0.001467469 0.001422570
## [1] "tir>=65%"
## [1] -0.004322845 -0.002058008 0.002264837
## [1] "tir<65%"
## [1] 0.007527092 0.003759154 -0.003767938
## [1] "tir<65% & pw>50%"
## [1] 0.008987010 0.004611671 -0.004375339
## [1] "always treat"
## [1] 0.010696135 0.005470172 -0.005225963
boostrap_wdr_eval = function(d, i, p) {
boot_d = d[i]
null_policy_reward = wdr_eval(boot_d, null_policy)[1]
eval_policy_reward = wdr_eval(boot_d, p)[1]
return((eval_policy_reward - null_policy_reward))
}
# Bootstrapped results
wdr_bs_results = rbindlist(lapply(names(policy_fns), function(pn) {
data.table(
policy = pn,
delta = boot(eval_dt, boostrap_wdr_eval, R=R, parallel="multicore", ncpus=10, p = policy_fns[[pn]])$t[,1]
)
}))
# plot
invisible(wdr_bs_results[, policy := reorder(wdr_bs_results$policy, wdr_bs_results$delta, FUN = mean)])
ggplot(wdr_bs_results, aes(x=policy, y=delta, color=policy)) + theme_bw() +
geom_hline(yintercept=0, linetype="dashed") +
stat_summary(
fun = median,
fun.min = function(z) quantile(z,0.05),
fun.max = function(z) quantile(z,0.95),
geom = "pointrange") + ggtitle("WDR results")
# Combine results
combo_bs_results = rbind(
is_bs_results[,method:="IS"],
wis_bs_results[,method:="WIS"],
mb_bs_results[,method:="MB"],
dr_bs_results[,method:="DR"],
wdr_bs_results[,method:="WDR"]
)
combo_bs_results$method = factor(combo_bs_results$method, levels=c("MB","IS","WIS","DR","WDR"))
combo_bs_results$policy = factor(combo_bs_results$policy, levels=c("null","tir>=65%","behavioral","tir<65%", "tir<65% & pw>50%", "always treat"))
ggplot(combo_bs_results, aes(x=method, y=delta, color=method)) + theme_bw() +
geom_hline(yintercept=0, linetype="dashed") + facet_wrap(~policy) +
stat_summary(
fun = median,
fun.min = function(z) quantile(z,0.025),
fun.max = function(z) quantile(z,0.975),
geom = "pointrange") + ggtitle("All results")
combo_bs_results[, .(m=mean(delta), sd=sd(delta)), by=c('policy','method')][order(policy,method)]
| policy | method | m | sd |
|---|---|---|---|
| null | MB | 0.0000000 | 0.0000000 |
| null | IS | 0.0000000 | 0.0000000 |
| null | WIS | 0.0000000 | 0.0000000 |
| null | DR | 0.0000000 | 0.0000000 |
| null | WDR | 0.0000000 | 0.0000000 |
| tir>=65% | MB | 0.0014584 | 0.0000308 |
| tir>=65% | IS | 0.0017792 | 0.0032145 |
| tir>=65% | WIS | 0.0019151 | 0.0029331 |
| tir>=65% | DR | 0.0011580 | 0.0029931 |
| tir>=65% | WDR | 0.0031738 | 0.0031455 |
| behavioral | MB | 0.0022281 | 0.0001160 |
| behavioral | IS | 0.0023940 | 0.0013092 |
| behavioral | WIS | 0.0022759 | 0.0012396 |
| behavioral | DR | 0.0023889 | 0.0012333 |
| behavioral | WDR | 0.0045855 | 0.0012545 |
| tir<65% | MB | 0.0074890 | 0.0002074 |
| tir<65% | IS | 0.0072922 | 0.0040847 |
| tir<65% | WIS | 0.0072975 | 0.0040717 |
| tir<65% | DR | 0.0073906 | 0.0037251 |
| tir<65% | WDR | 0.0152271 | 0.0040793 |
| tir<65% & pw>50% | MB | 0.0081119 | 0.0001821 |
| tir<65% & pw>50% | IS | 0.0081592 | 0.0035622 |
| tir<65% & pw>50% | WIS | 0.0085750 | 0.0036644 |
| tir<65% & pw>50% | DR | 0.0081148 | 0.0035909 |
| tir<65% & pw>50% | WDR | 0.0164977 | 0.0036536 |
| always treat | MB | 0.0089463 | 0.0001872 |
| always treat | IS | 0.0092650 | 0.0052109 |
| always treat | WIS | 0.0090921 | 0.0050805 |
| always treat | DR | 0.0088771 | 0.0048392 |
| always treat | WDR | 0.0178158 | 0.0049360 |