library(data.table)
library(ggplot2)
library(gridExtra)
library(grf)
library(ggsci)
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
library(parallel)
library(foreach)
library(rlang)
##
## Attaching package: 'rlang'
## The following object is masked from 'package:data.table':
##
## :=
Load data
weekly_dt = fread('~/Google Drive/My Drive/Stanford/CGM/DE/20221111/output/deid_patient_weeks.csv')
daily_dt = fread('~/Google Drive/My Drive/Stanford/CGM/DE/20221111/output/deid_patient_days.csv')
Add any missing observations
cross_join = function(d1,d2) {
o = merge(d1[,i:=1], d2[,i:=1], by='i', allow.cartesian=T)
o[,i:=NULL]
return(o)
}
weekly_dt[,max_week_id_for_pat := max(week_id), by='pat_id']
daily_dt[,max_day_id_for_pat := max(day_id), by='pat_id']
unique_patient_weeks = cross_join(unique(weekly_dt[,.(pat_id,max_week_id_for_pat)]), data.table(week_id=seq(min(weekly_dt$week_id), max(weekly_dt$week_id))))
unique_patient_weeks = unique_patient_weeks[week_id <= max_week_id_for_pat]
unique_patient_days = cross_join(unique(daily_dt[,.(pat_id,max_day_id_for_pat)]), data.table(day_id=seq(min(daily_dt$day_id), max(daily_dt$day_id))))
unique_patient_days = unique_patient_days[day_id <= max_day_id_for_pat]
full_weekly_dt = merge(unique_patient_weeks, weekly_dt, by=c('pat_id','week_id'), all.x=T)
full_daily_dt = merge(unique_patient_days, daily_dt, by=c('pat_id','day_id'), all.x=T)
Add rolling means and differences to daily data
setorder(full_weekly_dt, pat_id, week_id)
setorder(full_daily_dt, pat_id, day_id)
# Fill missing values of time_worn and in_range with zeros
full_daily_dt[is.na(time_worn), time_worn := 0]
full_weekly_dt[is.na(time_worn), time_worn := 0]
full_daily_dt[is.na(in_range), in_range := 0]
full_weekly_dt[is.na(in_range), in_range := 0]
# 7 day rolling mean TIR weighted by time worn
full_daily_dt[,`:=`(
TIR_7d_weighted_sum = frollsum(in_range*time_worn, 7, hasNA=FALSE),
TW_7d_rolling_sum = frollsum(time_worn, 7, hasNA=FALSE)
), by=c('pat_id')]
full_daily_dt[TW_7d_rolling_sum>0, TIR_7dr := TIR_7d_weighted_sum/TW_7d_rolling_sum]
full_daily_dt[, TW_7dr := TW_7d_rolling_sum/7]
# Forward 7-day difference in TIR_7dr
full_daily_dt[, dTIR_7dr := shift(TIR_7dr, n = 7, type = "lead") - TIR_7dr, by='pat_id']
full_daily_dt[, TIR_7dr_lead := shift(TIR_7dr, n = 7, type = "lead"), by='pat_id']
full_daily_dt[, TW_7dr_lead := shift(TW_7dr, n = 7, type = "lead"), by='pat_id']
Add contacts “to-go” to enable filtering of patient-weeks to those eligible for contacts
full_weekly_dt[is.na(contact), contact:=0]
setorder(full_weekly_dt, pat_id, -week_id)
full_weekly_dt[, contacts_to_go := cumsum(contact), by='pat_id']
setorder(full_weekly_dt, pat_id, week_id)
Identify patient population for simulations
# Inclusion criteria:
# The population chosen for our simulations included all study participants who received remote monitoring in their second year after diabetes diagnosis, and who had at least 26 weeks of CGM data and received a message from remote monitoring in their first year.
fy_data = full_daily_dt[in_first_year==1, .(n_weeks = sum(time_worn>0)/7, n_contacts = sum(contact)), by='pat_id']
incl_pats = fy_data[n_weeks>=26 & n_contacts>=1]
full_daily_dt_incl = merge(full_daily_dt[in_first_year==1], incl_pats, by=c('pat_id'))
full_daily_dt_incl[, .(uniqueN(pat_id), .N, sum(contact))]
## V1 N V3
## 1: 200 75912 3328
Create sim_dt of weekly data from second year, with dTIR
# Forward 7-day difference in TIR
full_weekly_dt[, dTIR := shift(in_range, n = 1, type = "lead") - in_range, by='pat_id']
full_weekly_dt[, dTIR_2w := shift(in_range, n = 1, type = "lead") - shift(in_range, n = 1, type = "lag"), by='pat_id']
full_weekly_dt[, dTIR_2wf := shift(in_range, n = 2, type = "lead") - in_range, by='pat_id']
full_weekly_dt[, lag_time_worn := shift(time_worn), by='pat_id']
full_weekly_dt[, lag_in_range := shift(in_range), by='pat_id']
full_weekly_dt[, lead_time_worn := shift(time_worn, type="lead"), by='pat_id']
full_weekly_dt[, lead_eligible_for_review := shift(eligible_for_review, type="lead"), by='pat_id']
full_weekly_dt[time_worn>0.5, mean(dTIR, na.rm=T), by='contact']
## contact V1
## 1: 0 -0.007695477
## 2: 1 0.014140587
full_weekly_dt[lag_time_worn>0.5 & lag_in_range<0.65 & in_first_year==0, mean(dTIR_2w, na.rm=T), by='eligible_for_review']
## eligible_for_review V1
## 1: 1 0.02581935
## 2: 0 0.01768385
# Filter to second-year observations for patients meeting first-year inclusion criteria above
sim_dt = merge(full_weekly_dt[in_first_year==0 & time_worn>0 & (contacts_to_go-contact)>0], incl_pats, by=c('pat_id'))
# Filter to pats eligible and eligible at least once
sim_dt[, pat_ever_eligible := max(eligible_for_review), by='pat_id']
sim_dt[, pat_ever_ineligible := max(1-eligible_for_review), by='pat_id']
sim_dt = sim_dt[pat_ever_eligible==1 & pat_ever_ineligible==1]
Winsorize
sim_dt[!is.na(dTIR), dTIR_wins := pmin(pmax(-0.2, dTIR), 0.2)]
sim_dt[, .(sd(dTIR, na.rm=T), sd(dTIR_wins, na.rm=T))]
## V1 V2
## 1: 0.1172603 0.09731258
full_daily_dt_incl[!is.na(dTIR_7dr), dTIR_7dr_wins := pmin(pmax(-0.2, dTIR_7dr), 0.2)]
Summarize first-year data by patient
stepwise_selection_dt = full_daily_dt_incl[TW_7dr>0.5 & TW_7dr_lead > 0.5 & TIR_7dr<0.7]
pats_dt = stepwise_selection_dt[,.(
n_clinic_remote_contact = sum(contact, na.rm=T),
contact_m_dTIR = mean(ifelse(contact==1, dTIR_7dr_wins, NA), na.rm=T),
contact_sd_dTIR = sd(ifelse(contact==1, dTIR_7dr_wins, NA), na.rm=T)
), by = 'pat_id']
# Shrink contact_m_dTIR
m_contact_m_dTIR_shrunk = pats_dt[,weighted.mean(contact_m_dTIR, n_clinic_remote_contact)]
var_contact_m_dTIR = pats_dt[, var(contact_m_dTIR, na.rm=T)]
pats_dt[, pat_var_contact_m_dTIR := ifelse(n_clinic_remote_contact>=10, contact_sd_dTIR^2, var_contact_m_dTIR)]
pats_dt[, pat_se2_contact_m_dTIR := pat_var_contact_m_dTIR/n_clinic_remote_contact]
pats_dt[, wt := var_contact_m_dTIR/(var_contact_m_dTIR+pat_se2_contact_m_dTIR)]
pats_dt[, contact_m_dTIR_shrunk := wt*contact_m_dTIR + (1-wt)*m_contact_m_dTIR_shrunk]
Fit P(contact | eligibility, X) using first-year data
rf_dt = full_weekly_dt[in_first_year==1 & time_worn>0]
rf_X = model.matrix( ~ time_worn + in_range + hypo + extreme_hypo + hyper + extreme_hyper + gri + low_tir_flag + high_hypo_flag + high_extreme_hypo_flag + num_flags, rf_dt)
set.seed(1814)
rf = regression_forest(
X = rf_X,
Y = rf_dt[, contact],
tune.parameters = "all"
)
rf_varimp_dt = data.table(
var = colnames(rf_X),
imp = variable_importance(rf)[,1])
rf_varimp_dt[order(-imp)]
## var imp
## 1: num_flags 0.6000134307
## 2: gri 0.1361812611
## 3: time_worn 0.0915668878
## 4: in_range 0.0570125056
## 5: hypo 0.0428485298
## 6: hyper 0.0271907043
## 7: extreme_hypo 0.0212578310
## 8: extreme_hyper 0.0181721510
## 9: low_tir_flag 0.0050342284
## 10: high_hypo_flag 0.0005117192
## 11: high_extreme_hypo_flag 0.0002107511
## 12: (Intercept) 0.0000000000
rf_dt[, rf_pred_contact := rf$predictions]
rf_dt[, 1-sd(rf_pred_contact-contact)/sd(contact)]
## [1] 0.04425741
rf_dt[, cor(rf_pred_contact, contact)]
## [1] 0.2942437
ggplot(rf_dt, aes(x=num_flags, y=rf_pred_contact)) + stat_summary(fun.data="mean_cl_normal", geom = 'pointrange')
ggplot(rf_dt, aes(x=in_range, y=rf_pred_contact)) + geom_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# add preds to sim_dt
sim_dt_X = model.matrix( ~ time_worn + in_range + hypo + extreme_hypo + hyper + extreme_hyper + gri + low_tir_flag + high_hypo_flag + high_extreme_hypo_flag + num_flags, sim_dt)
sim_dt[, pred_contact := predict(rf, sim_dt_X)$predictions]
sim_dt[eligible_for_review==1 & contacts_to_go>0, cor(pred_contact, contact)]
## [1] 0.249747
ggplot(sim_dt[eligible_for_review==1 & contacts_to_go>0], aes(x=pred_contact, y=contact)) + geom_smooth() + geom_abline(linetype="dashed") + theme_bw()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Data sizes
fsim_dt = sim_dt[time_worn>0 & lead_eligible_for_review==0 & (contacts_to_go-contact)>0]
# N patients
fsim_dt[, uniqueN(pat_id)]
## [1] 94
# Year 1
full_weekly_dt[in_first_year==1 & time_worn>0 & (contacts_to_go-contact)>0, .(
patient_weeks = .N,
contacts = sum(contact)
)]
## patient_weeks contacts
## 1: 9934 3222
# Year 2
full_weekly_dt[in_first_year==0 & time_worn>0 & (contacts_to_go-contact)>0, .(
patient_weeks = .N,
reviewed = sum(eligible_for_review),
contacts = sum(contact)
)]
## patient_weeks reviewed contacts
## 1: 4648 1106 648
# Total
full_weekly_dt[time_worn>0 & (contacts_to_go-contact)>0, .(
patient_weeks = .N,
reviewed = sum(eligible_for_review),
contacts = sum(contact)
)]
## patient_weeks reviewed contacts
## 1: 14582 11040 3870
Define functions used to target patient-weeks in simulations
low_tir_inclusion = function(patient_weeks_dt, first_year_dt, n_patient_weeks_to_sample) {
patient_weeks_dt[, randnum := rnorm(nrow(patient_weeks_dt))]
setorder(patient_weeks_dt, in_range, randnum)
return(patient_weeks_dt[1:n_patient_weeks_to_sample])
}
random_inclusion = function(patient_weeks_dt, first_year_dt, n_patient_weeks_to_sample) {
patient_weeks_dt[, randnum := rnorm(nrow(patient_weeks_dt))]
setorder(patient_weeks_dt, randnum)
return(patient_weeks_dt[1:n_patient_weeks_to_sample])
}
pcontactonly = function(patient_weeks_dt, first_year_dt, n_patient_weeks_to_sample) {
patient_weeks_dt[, randnum := rnorm(nrow(patient_weeks_dt))]
setorder(patient_weeks_dt, -pred_contact, randnum)
return(patient_weeks_dt[1:n_patient_weeks_to_sample])
}
p_contact_exp_dTIR = function(patient_weeks_dt, first_year_dt, n_patient_weeks_to_sample) {
patient_weeks_dt = merge(patient_weeks_dt, pats_dt[,.(pat_id, contact_m_dTIR_shrunk)], by='pat_id', all.x=T)
patient_weeks_dt[is.na(contact_m_dTIR_shrunk), contact_m_dTIR_shrunk := 0]
patient_weeks_dt[, randnum := rnorm(nrow(patient_weeks_dt))]
patient_weeks_dt[, comp := pred_contact*contact_m_dTIR_shrunk]
setorder(patient_weeks_dt, -comp, randnum)
return(patient_weeks_dt[1:n_patient_weeks_to_sample])
}
Function to run one simulation of an MRT
run_simulation = function(seed, sim_dt, inclusion_fn, n_weeks, n_patients_pop_per_week, n_patients_included_per_week) {
eligible_dt = sim_dt[eligible_for_review==1]
ineligible_dt = sim_dt[eligible_for_review==0]
# Create output by iterating over n_weeks
output_dt = rbindlist(lapply(1:n_weeks, function(s) {
# Sample population available for experiment in this week
# Sample eligible and ineligible pops
set.seed(seed*1E6 + s)
eligible_pop_dt = eligible_dt[sample.int(nrow(eligible_dt), floor(n_patients_pop_per_week/2), replace = TRUE)]
ineligible_pop_dt = ineligible_dt[sample.int(nrow(ineligible_dt), ceiling(n_patients_pop_per_week/2), replace = TRUE)]
# Select experiment population from each pop
eligible_exp_dt = inclusion_fn(eligible_pop_dt, data.table(), floor(n_patients_included_per_week/2))
ineligible_exp_dt = inclusion_fn(ineligible_pop_dt, data.table(), ceiling(n_patients_included_per_week/2))
exp_dt = rbind(eligible_exp_dt, ineligible_exp_dt)
exp_dt[, sim_week_id := s]
return(exp_dt)
}))
return(output_dt)
}
Run a bunch of simulations for each method with different inclusion FNs, n_patients_included_per_week, n_weeks.
sim_grid = expand.grid(
n_pats = c(15, 20, 25, seq(30,200, 5)),
n_weeks = c(26, 52)
)
run_many_sims_calc_power = function(n_sims, sim_grid, sim_dt, inclusion_fn, n_patients_pop_per_week) {
# Detect the number of available cores and create cluster
cl <- parallel::makeCluster(detectCores())
clusterExport(cl, varlist=c('run_simulation','data.table', 'pats_dt'))
clusterExport(cl, c('sim_dt','sim_grid','inclusion_fn','n_patients_pop_per_week','run_simulation'), current_env())
# Activate cluster for foreach library
doParallel::registerDoParallel(cl)
# Outer loop across sim_grid
out = rbindlist(lapply(1:nrow(sim_grid), function(i) {
# Inner loop across n_sims
out_dt = foreach::foreach(s = 1:n_sims, .combine = rbind) %dopar% {
library(data.table)
out_dt = run_simulation(
seed=s,
sim_dt,
inclusion_fn,
n_weeks = sim_grid$n_weeks[i],
n_patients_pop_per_week = n_patients_pop_per_week,
n_patients_included_per_week = sim_grid$n_pats[i])
m = lm(dTIR ~ eligible_for_review, data = out_dt)
coef = summary(m)$coefficients["eligible_for_review", "Estimate"]
se = summary(m)$coefficients["eligible_for_review", "Std. Error"]
pval = summary(m)$coefficients["eligible_for_review", "Pr(>|t|)"]
result_dt = data.table(
n_weeks = sim_grid$n_weeks[i],
n_patients_pop_per_week = n_patients_pop_per_week,
n_patients_included_per_week = sim_grid$n_pats[i],
n_unique_pats_included_from_input_data = out_dt[,uniqueN(pat_id)],
permuted=FALSE,
run_idx = s,
pval = pval,
coef = coef,
se = se)
# Add permuted version as baseline (in supplement)
set.seed(1814)
out_dt$eligible_for_review = out_dt$eligible_for_review[sample.int(length(out_dt$eligible_for_review))]
m = lm(dTIR ~ eligible_for_review, data = out_dt)
coef = summary(m)$coefficients["eligible_for_review", "Estimate"]
se = summary(m)$coefficients["eligible_for_review", "Std. Error"]
pval = summary(m)$coefficients["eligible_for_review", "Pr(>|t|)"]
permuted_result_dt = data.table(
n_weeks = sim_grid$n_weeks[i],
n_patients_pop_per_week = n_patients_pop_per_week,
n_patients_included_per_week = sim_grid$n_pats[i],
n_unique_pats_included_from_input_data = out_dt[,uniqueN(pat_id)],
permuted=TRUE,
run_idx = s,
pval = pval,
coef = coef,
se = se)
return(rbind(result_dt, permuted_result_dt))
}
out_dt
}))
parallel::stopCluster(cl)
out
}
N_SIMS = 500
N_PATIENTS_IN_POP = 200
fsim_dt = sim_dt[time_worn>0 & lead_eligible_for_review==0 & (contacts_to_go-contact)>0]
many_sims_random = run_many_sims_calc_power(N_SIMS, sim_grid, fsim_dt, random_inclusion, n_patients_pop_per_week=N_PATIENTS_IN_POP)
many_sims_low_tir = run_many_sims_calc_power(N_SIMS, sim_grid, fsim_dt, low_tir_inclusion, n_patients_pop_per_week=N_PATIENTS_IN_POP)
many_sims_pcontactonly = run_many_sims_calc_power(N_SIMS, sim_grid, fsim_dt, pcontactonly, n_patients_pop_per_week=N_PATIENTS_IN_POP)
many_sims_p_contact_exp_dTIR = run_many_sims_calc_power(N_SIMS, sim_grid, fsim_dt, p_contact_exp_dTIR, n_patients_pop_per_week=N_PATIENTS_IN_POP)
Make plots
ggplot_opts = function(y_colname, hline_yval, yaxis_max) {
return(list(
geom_line(aes_string(x='n_patients_included_per_week', color='factor(n_weeks)', linetype='permuted', y=y_colname)),
geom_point(aes_string(x='n_patients_included_per_week', color='factor(n_weeks)', shape='permuted', y=y_colname)),
theme_bw(),
geom_hline(yintercept = hline_yval, linetype="dashed"),
coord_cartesian(ylim=c(0,yaxis_max)),
xlab("N patients targeted weekly (of 200 total)"),
scale_color_npg(),
scale_shape_manual(values = c(16, 1)),
ylab(NULL),
theme(legend.position = "none")
))}
agg_output = function(out) {
agg_out = out[,.(
power = mean(pval < 0.05 & coef>0),
m_se = mean(se),
m_coef = mean(coef),
m_coef_stat_sig = mean(ifelse(pval<0.05, coef, NA), na.rm=T),
m_n_unique_pats_included_from_input_data = mean(n_unique_pats_included_from_input_data)
), by=c('n_weeks', 'n_patients_pop_per_week', 'n_patients_included_per_week', 'permuted')]
agg_out[, coef_bias := m_coef_stat_sig - m_coef]
return(agg_out)
}
make_grid_of_plots = function(y_colname, ylabel, hline_yval, yaxis_max) {
p1 = ggplot(agg_output(many_sims_random)) + ggtitle("Random") + ggplot_opts(y_colname, hline_yval, yaxis_max)
p2 = ggplot(agg_output(many_sims_low_tir)) + ggtitle("Ascending TIR") + ggplot_opts(y_colname, hline_yval, yaxis_max)
p4 = ggplot(agg_output(many_sims_pcontactonly)) + ggtitle("Descending P(contact)") + ggplot_opts(y_colname, hline_yval, yaxis_max)
p6 = ggplot(agg_output(many_sims_p_contact_exp_dTIR)) + ggtitle("P(contact) * E[dTIR | contact]") + ggplot_opts(y_colname, hline_yval, yaxis_max)
if(y_colname == 'm_coef') {
ribbon = list(geom_ribbon(aes_string(ymin = paste(y_colname,'- 2*m_se'), ymax = paste(y_colname,'+ 2*m_se'), x='n_patients_included_per_week'),
alpha=0.1, data = function(x){x[permuted == FALSE & n_weeks==52]}))
p1 = p1 + ribbon
p2 = p2 + ribbon
p4 = p4 + ribbon
p6 = p6 + ribbon
}
grid.arrange(p1 + ylab(ylabel), p2, p4 + ylab(ylabel), p6, nrow = 2)
}
# Power
make_grid_of_plots('power', 'Estimated Power', 0.9,1)
# ATE
make_grid_of_plots('m_coef', 'Estimated ATE', 0,0.03)
# Variance of ATE Estimate (SE)
make_grid_of_plots('m_se','SE(ATE)', 0,0.02)
# N pats from historical data
make_grid_of_plots('m_n_unique_pats_included_from_input_data','N patients', 0,100)
Figures to export
make_four_grid_of_plots = function(y_colname, ylabel, hline_yval, yaxis_max) {
# Only 52 week trial if ate
if(y_colname == 'm_coef') {
f_many_sims_random = many_sims_random[permuted==0 & n_weeks==52]
f_many_sims_low_tir = many_sims_low_tir[permuted==0 & n_weeks==52]
f_many_sims_pcontactonly = many_sims_pcontactonly[permuted==0 & n_weeks==52]
f_many_sims_p_contact_exp_dTIR = many_sims_p_contact_exp_dTIR[permuted==0 & n_weeks==52]
} else {
f_many_sims_random = many_sims_random[permuted==0]
f_many_sims_low_tir = many_sims_low_tir[permuted==0]
f_many_sims_pcontactonly = many_sims_pcontactonly[permuted==0]
f_many_sims_p_contact_exp_dTIR = many_sims_p_contact_exp_dTIR[permuted==0]
}
p1 = ggplot(agg_output(f_many_sims_random)) + ggtitle("Random") + ggplot_opts(y_colname, hline_yval, yaxis_max)
p2 = ggplot(agg_output(f_many_sims_low_tir)) + ggtitle("Ascending TIR") + ggplot_opts(y_colname, hline_yval, yaxis_max)
p4 = ggplot(agg_output(f_many_sims_pcontactonly)) + ggtitle("Descending P(contact)") + ggplot_opts(y_colname, hline_yval, yaxis_max)
p6 = ggplot(agg_output(f_many_sims_p_contact_exp_dTIR)) + ggtitle("P(contact) * E[dTIR | contact]") + ggplot_opts(y_colname, hline_yval, yaxis_max)
if(y_colname == 'm_coef') {
ribbon = list(geom_ribbon(aes_string(ymin = paste(y_colname,'- 2*m_se'), ymax = paste(y_colname,'+ 2*m_se'), x='n_patients_included_per_week'),
alpha=0.1, data = function(x){x[permuted == FALSE & n_weeks==52]}))
p1 = p1 + ribbon
p2 = p2 + ribbon
p4 = p4 + ribbon
p6 = p6 + ribbon
}
grid.arrange(p1 + ylab(ylabel), p2, p4 + ylab(ylabel), p6, nrow = 2)
}
# Power
make_four_grid_of_plots('power', 'Estimated PPOS', 0.9,1)
make_four_grid_of_plots('m_coef', 'Estimated ATE', 0,0.03)
# Save SVGs
ggsave(file="~/Downloads/ates.svg", plot=make_four_grid_of_plots('m_coef', 'Estimated ATE', 0,0.03), width=8, height=4)
ggsave(file="~/Downloads/ppos.svg", plot=make_four_grid_of_plots('power', 'Estimated PPOS', 0.9,1), width=8, height=4)
# Save one with legend
ggsave(file="~/Downloads/legend.svg",
plot=ggplot(agg_output(many_sims_random)) + ggplot_opts('m_coef', 0, 0.05) + scale_linetype(guide = 'none') + scale_shape(guide = 'none') +
guides(color=guide_legend(title="N weeks")) + theme(legend.position = "top"),
width=8, height=4)
## Scale for 'shape' is already present. Adding another scale for 'shape', which
## will replace the existing scale.
Table
# N patients needed for 90% PPOS for each duration and targeting policy
rbind(
agg_output(many_sims_random[permuted==0])[, policy:="Random"],
agg_output(many_sims_low_tir[permuted==0])[, policy:="ascTIR"],
agg_output(many_sims_pcontactonly[permuted==0])[, policy:="descPcontact"],
agg_output(many_sims_p_contact_exp_dTIR[permuted==0])[, policy:="descPcontactEdTIR"])[
power>=0.9, .(min_n = min(n_patients_included_per_week)), by=c('policy','n_weeks')]
## policy n_weeks min_n
## 1: Random 26 190
## 2: Random 52 105
## 3: ascTIR 26 115
## 4: ascTIR 52 90
## 5: descPcontact 26 135
## 6: descPcontact 52 90
## 7: descPcontactEdTIR 26 90
## 8: descPcontactEdTIR 52 15
RCT Power when reviewing 1/4 of patients each week (50)
z_alpha = qnorm(0.975) # z(1 - alpha/2)
z_beta = qnorm(0.9) # z(1 - beta)
sd = full_weekly_dt[time_worn>0 & week_id<=26, .(m_tir=sd(in_range, na.rm=T)), by='pat_id'][,sd(m_tir, na.rm=T)]
print(sd)
## [1] 0.06102978
num_weeks_effect_lingers = 8
frac_reviewed = 1/4
multiplier_to_rct_ate = num_weeks_effect_lingers*frac_reviewed
print(multiplier_to_rct_ate)
## [1] 2
ates = rbind(
agg_output(many_sims_random[permuted==0])[, policy:="Random"],
agg_output(many_sims_low_tir[permuted==0])[, policy:="ascTIR"],
agg_output(many_sims_pcontactonly[permuted==0])[, policy:="descPcontact"],
agg_output(many_sims_p_contact_exp_dTIR[permuted==0])[, policy:="descPcontactEdTIR"])[
n_patients_included_per_week==50 & n_weeks==52, .(m_ate = mean(m_coef)), by=c('policy')]
ates[, rct_ate := multiplier_to_rct_ate*m_ate]
get_ss = function(e) ceiling(2*2*((z_alpha+z_beta)/(e/sd))^2)
ates[, pats_needed_in_rct_for_90power := get_ss(rct_ate)]
ates
## policy m_ate rct_ate pats_needed_in_rct_for_90power
## 1: Random 0.009924593 0.019849185 398
## 2: ascTIR 0.003219785 0.006439569 3776
## 3: descPcontact 0.012519879 0.025039758 250
## 4: descPcontactEdTIR 0.014078952 0.028157903 198
Save workspace
save.image("~/Downloads/powerenv.RData")