library(data.table)
library(ggplot2)
library(gridExtra)
library(grf)
library(ggsci)
library(doParallel)

## Loading required package: foreach

## Loading required package: iterators

## Loading required package: parallel

library(parallel)
library(foreach)
library(rlang)

## 
## Attaching package: 'rlang'

## The following object is masked from 'package:data.table':
## 
##     :=

Load data

weekly_dt = fread('~/Google Drive/My Drive/Stanford/CGM/DE/20221111/output/deid_patient_weeks.csv')
daily_dt = fread('~/Google Drive/My Drive/Stanford/CGM/DE/20221111/output/deid_patient_days.csv')

Add any missing observations

cross_join = function(d1,d2) {
  o = merge(d1[,i:=1], d2[,i:=1], by='i', allow.cartesian=T)
  o[,i:=NULL]
  return(o)
}

weekly_dt[,max_week_id_for_pat := max(week_id), by='pat_id']
daily_dt[,max_day_id_for_pat := max(day_id), by='pat_id']

unique_patient_weeks = cross_join(unique(weekly_dt[,.(pat_id,max_week_id_for_pat)]), data.table(week_id=seq(min(weekly_dt$week_id), max(weekly_dt$week_id))))
unique_patient_weeks = unique_patient_weeks[week_id <= max_week_id_for_pat]

unique_patient_days = cross_join(unique(daily_dt[,.(pat_id,max_day_id_for_pat)]), data.table(day_id=seq(min(daily_dt$day_id), max(daily_dt$day_id))))
unique_patient_days = unique_patient_days[day_id <= max_day_id_for_pat]

full_weekly_dt = merge(unique_patient_weeks, weekly_dt, by=c('pat_id','week_id'), all.x=T)
full_daily_dt = merge(unique_patient_days, daily_dt, by=c('pat_id','day_id'), all.x=T)

Add rolling means and differences to daily data

setorder(full_weekly_dt, pat_id, week_id)
setorder(full_daily_dt, pat_id, day_id)

# Fill missing values of time_worn and in_range with zeros
full_daily_dt[is.na(time_worn), time_worn := 0]
full_weekly_dt[is.na(time_worn), time_worn := 0]
full_daily_dt[is.na(in_range), in_range := 0]
full_weekly_dt[is.na(in_range), in_range := 0]

# 7 day rolling mean TIR weighted by time worn
full_daily_dt[,`:=`(
  TIR_7d_weighted_sum = frollsum(in_range*time_worn, 7, hasNA=FALSE),
  TW_7d_rolling_sum = frollsum(time_worn, 7, hasNA=FALSE)
  ), by=c('pat_id')]
full_daily_dt[TW_7d_rolling_sum>0, TIR_7dr := TIR_7d_weighted_sum/TW_7d_rolling_sum]
full_daily_dt[, TW_7dr := TW_7d_rolling_sum/7]

# Forward 7-day difference in TIR_7dr
full_daily_dt[, dTIR_7dr := shift(TIR_7dr, n = 7, type = "lead") - TIR_7dr, by='pat_id']
full_daily_dt[, TIR_7dr_lead := shift(TIR_7dr, n = 7, type = "lead"), by='pat_id']
full_daily_dt[, TW_7dr_lead := shift(TW_7dr, n = 7, type = "lead"), by='pat_id']

Add contacts “to-go” to enable filtering of patient-weeks to those eligible for contacts

full_weekly_dt[is.na(contact), contact:=0]

setorder(full_weekly_dt, pat_id, -week_id)
full_weekly_dt[, contacts_to_go := cumsum(contact), by='pat_id']
setorder(full_weekly_dt, pat_id, week_id)

Identify patient population for simulations

# Inclusion criteria:
# The population chosen for our simulations included all study participants who received remote monitoring in their second year after diabetes diagnosis, and who had at least 26 weeks of CGM data and received a message from remote monitoring in their first year. 

fy_data = full_daily_dt[in_first_year==1, .(n_weeks = sum(time_worn>0)/7, n_contacts = sum(contact)), by='pat_id']
incl_pats = fy_data[n_weeks>=26 & n_contacts>=1]

full_daily_dt_incl = merge(full_daily_dt[in_first_year==1], incl_pats, by=c('pat_id'))
full_daily_dt_incl[, .(uniqueN(pat_id), .N, sum(contact))]

##     V1     N   V3
## 1: 200 75912 3328

Create sim_dt of weekly data from second year, with dTIR

# Forward 7-day difference in TIR
full_weekly_dt[, dTIR := shift(in_range, n = 1, type = "lead") - in_range, by='pat_id']
full_weekly_dt[, dTIR_2w := shift(in_range, n = 1, type = "lead") - shift(in_range, n = 1, type = "lag"), by='pat_id']
full_weekly_dt[, dTIR_2wf := shift(in_range, n = 2, type = "lead") - in_range, by='pat_id']
full_weekly_dt[, lag_time_worn := shift(time_worn), by='pat_id']
full_weekly_dt[, lag_in_range := shift(in_range), by='pat_id']
full_weekly_dt[, lead_time_worn := shift(time_worn, type="lead"), by='pat_id']
full_weekly_dt[, lead_eligible_for_review := shift(eligible_for_review, type="lead"), by='pat_id']

full_weekly_dt[time_worn>0.5, mean(dTIR, na.rm=T), by='contact']

##    contact           V1
## 1:       0 -0.007695477
## 2:       1  0.014140587

full_weekly_dt[lag_time_worn>0.5 & lag_in_range<0.65 & in_first_year==0, mean(dTIR_2w, na.rm=T), by='eligible_for_review']

##    eligible_for_review         V1
## 1:                   1 0.02581935
## 2:                   0 0.01768385

# Filter to second-year observations for patients meeting first-year inclusion criteria above
sim_dt = merge(full_weekly_dt[in_first_year==0 & time_worn>0 & (contacts_to_go-contact)>0], incl_pats, by=c('pat_id'))

# Filter to pats eligible and eligible at least once
sim_dt[, pat_ever_eligible := max(eligible_for_review), by='pat_id']
sim_dt[, pat_ever_ineligible := max(1-eligible_for_review), by='pat_id']
sim_dt = sim_dt[pat_ever_eligible==1 & pat_ever_ineligible==1]

Winsorize

sim_dt[!is.na(dTIR), dTIR_wins := pmin(pmax(-0.2, dTIR), 0.2)]
sim_dt[, .(sd(dTIR, na.rm=T), sd(dTIR_wins, na.rm=T))]

##           V1         V2
## 1: 0.1172603 0.09731258

full_daily_dt_incl[!is.na(dTIR_7dr), dTIR_7dr_wins := pmin(pmax(-0.2, dTIR_7dr), 0.2)]

Summarize first-year data by patient

stepwise_selection_dt = full_daily_dt_incl[TW_7dr>0.5 & TW_7dr_lead > 0.5 & TIR_7dr<0.7]

pats_dt = stepwise_selection_dt[,.(
  n_clinic_remote_contact = sum(contact, na.rm=T),
  contact_m_dTIR = mean(ifelse(contact==1, dTIR_7dr_wins, NA), na.rm=T),
  contact_sd_dTIR = sd(ifelse(contact==1, dTIR_7dr_wins, NA), na.rm=T)
  ), by = 'pat_id']

# Shrink contact_m_dTIR
m_contact_m_dTIR_shrunk = pats_dt[,weighted.mean(contact_m_dTIR, n_clinic_remote_contact)]
var_contact_m_dTIR = pats_dt[, var(contact_m_dTIR, na.rm=T)]
pats_dt[, pat_var_contact_m_dTIR := ifelse(n_clinic_remote_contact>=10, contact_sd_dTIR^2, var_contact_m_dTIR)]
pats_dt[, pat_se2_contact_m_dTIR := pat_var_contact_m_dTIR/n_clinic_remote_contact]
pats_dt[, wt := var_contact_m_dTIR/(var_contact_m_dTIR+pat_se2_contact_m_dTIR)]
pats_dt[, contact_m_dTIR_shrunk := wt*contact_m_dTIR + (1-wt)*m_contact_m_dTIR_shrunk]

Fit P(contact | eligibility, X) using first-year data

rf_dt = full_weekly_dt[in_first_year==1 & time_worn>0]

rf_X = model.matrix( ~ time_worn + in_range + hypo + extreme_hypo + hyper + extreme_hyper + gri + low_tir_flag + high_hypo_flag + high_extreme_hypo_flag + num_flags, rf_dt)

set.seed(1814)
rf = regression_forest(
  X = rf_X,
  Y = rf_dt[, contact],
  tune.parameters = "all"
)

rf_varimp_dt = data.table(
  var = colnames(rf_X),
  imp = variable_importance(rf)[,1])
rf_varimp_dt[order(-imp)]

##                        var          imp
##  1:              num_flags 0.6000134307
##  2:                    gri 0.1361812611
##  3:              time_worn 0.0915668878
##  4:               in_range 0.0570125056
##  5:                   hypo 0.0428485298
##  6:                  hyper 0.0271907043
##  7:           extreme_hypo 0.0212578310
##  8:          extreme_hyper 0.0181721510
##  9:           low_tir_flag 0.0050342284
## 10:         high_hypo_flag 0.0005117192
## 11: high_extreme_hypo_flag 0.0002107511
## 12:            (Intercept) 0.0000000000

rf_dt[, rf_pred_contact := rf$predictions]
rf_dt[, 1-sd(rf_pred_contact-contact)/sd(contact)]

## [1] 0.04425741

rf_dt[, cor(rf_pred_contact, contact)]

## [1] 0.2942437

ggplot(rf_dt, aes(x=num_flags, y=rf_pred_contact)) + stat_summary(fun.data="mean_cl_normal", geom = 'pointrange')

ggplot(rf_dt, aes(x=in_range, y=rf_pred_contact)) + geom_smooth()

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# add preds to sim_dt
sim_dt_X = model.matrix( ~ time_worn + in_range + hypo + extreme_hypo + hyper + extreme_hyper + gri + low_tir_flag + high_hypo_flag + high_extreme_hypo_flag + num_flags, sim_dt)

sim_dt[, pred_contact := predict(rf, sim_dt_X)$predictions]
sim_dt[eligible_for_review==1 & contacts_to_go>0, cor(pred_contact, contact)]

## [1] 0.249747

ggplot(sim_dt[eligible_for_review==1 & contacts_to_go>0], aes(x=pred_contact, y=contact)) + geom_smooth() + geom_abline(linetype="dashed") + theme_bw()

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Data sizes

fsim_dt = sim_dt[time_worn>0 & lead_eligible_for_review==0 & (contacts_to_go-contact)>0]

# N patients
fsim_dt[, uniqueN(pat_id)]

## [1] 94

# Year 1
full_weekly_dt[in_first_year==1 & time_worn>0 & (contacts_to_go-contact)>0, .(
  patient_weeks = .N,
  contacts = sum(contact)
)]

##    patient_weeks contacts
## 1:          9934     3222

# Year 2
full_weekly_dt[in_first_year==0 & time_worn>0 & (contacts_to_go-contact)>0, .(
  patient_weeks = .N,
  reviewed = sum(eligible_for_review),
  contacts = sum(contact)
)]

##    patient_weeks reviewed contacts
## 1:          4648     1106      648

# Total
full_weekly_dt[time_worn>0 & (contacts_to_go-contact)>0, .(
  patient_weeks = .N,
  reviewed = sum(eligible_for_review),
  contacts = sum(contact)
)]

##    patient_weeks reviewed contacts
## 1:         14582    11040     3870

Simulations

Define functions used to target patient-weeks in simulations

low_tir_inclusion = function(patient_weeks_dt, first_year_dt, n_patient_weeks_to_sample) {
  patient_weeks_dt[, randnum := rnorm(nrow(patient_weeks_dt))]
  setorder(patient_weeks_dt, in_range, randnum)
  return(patient_weeks_dt[1:n_patient_weeks_to_sample])
}

random_inclusion = function(patient_weeks_dt, first_year_dt, n_patient_weeks_to_sample) {
  patient_weeks_dt[, randnum := rnorm(nrow(patient_weeks_dt))]
  setorder(patient_weeks_dt, randnum)
  return(patient_weeks_dt[1:n_patient_weeks_to_sample])
}

pcontactonly = function(patient_weeks_dt, first_year_dt, n_patient_weeks_to_sample) {
  patient_weeks_dt[, randnum := rnorm(nrow(patient_weeks_dt))]
  setorder(patient_weeks_dt, -pred_contact, randnum)
  return(patient_weeks_dt[1:n_patient_weeks_to_sample])
}

p_contact_exp_dTIR = function(patient_weeks_dt, first_year_dt, n_patient_weeks_to_sample) {
  patient_weeks_dt = merge(patient_weeks_dt, pats_dt[,.(pat_id, contact_m_dTIR_shrunk)], by='pat_id', all.x=T)
  patient_weeks_dt[is.na(contact_m_dTIR_shrunk), contact_m_dTIR_shrunk := 0]
  patient_weeks_dt[, randnum := rnorm(nrow(patient_weeks_dt))]
  patient_weeks_dt[, comp := pred_contact*contact_m_dTIR_shrunk]
  setorder(patient_weeks_dt, -comp, randnum)
  return(patient_weeks_dt[1:n_patient_weeks_to_sample])
}

Function to run one simulation of an MRT

run_simulation = function(seed, sim_dt, inclusion_fn, n_weeks, n_patients_pop_per_week, n_patients_included_per_week) {
  
  eligible_dt = sim_dt[eligible_for_review==1]
  ineligible_dt = sim_dt[eligible_for_review==0]
  
  # Create output by iterating over n_weeks
  output_dt = rbindlist(lapply(1:n_weeks, function(s) {
    
    # Sample population available for experiment in this week
    # Sample eligible and ineligible pops
    set.seed(seed*1E6 + s)
    eligible_pop_dt = eligible_dt[sample.int(nrow(eligible_dt), floor(n_patients_pop_per_week/2), replace = TRUE)]
    ineligible_pop_dt = ineligible_dt[sample.int(nrow(ineligible_dt), ceiling(n_patients_pop_per_week/2), replace = TRUE)]
    
    # Select experiment population from each pop 
    eligible_exp_dt = inclusion_fn(eligible_pop_dt, data.table(), floor(n_patients_included_per_week/2))
    ineligible_exp_dt = inclusion_fn(ineligible_pop_dt, data.table(), ceiling(n_patients_included_per_week/2))
    
    exp_dt = rbind(eligible_exp_dt, ineligible_exp_dt)
    exp_dt[, sim_week_id := s]
    return(exp_dt)
  }))
  
  return(output_dt)
}

Run a bunch of simulations for each method with different inclusion FNs, n_patients_included_per_week, n_weeks.

sim_grid = expand.grid(
  n_pats = c(15, 20, 25, seq(30,200, 5)),
  n_weeks = c(26, 52)
)

run_many_sims_calc_power = function(n_sims, sim_grid, sim_dt, inclusion_fn, n_patients_pop_per_week) {
  
  # Detect the number of available cores and create cluster
  cl <- parallel::makeCluster(detectCores())
  
  clusterExport(cl, varlist=c('run_simulation','data.table', 'pats_dt'))
  clusterExport(cl, c('sim_dt','sim_grid','inclusion_fn','n_patients_pop_per_week','run_simulation'), current_env())
    
  # Activate cluster for foreach library
  doParallel::registerDoParallel(cl)

  # Outer loop across sim_grid
  out = rbindlist(lapply(1:nrow(sim_grid), function(i) {

    # Inner loop across n_sims
    out_dt = foreach::foreach(s = 1:n_sims, .combine = rbind) %dopar% {

      library(data.table)

      out_dt = run_simulation(
        seed=s,
        sim_dt,
        inclusion_fn,
        n_weeks = sim_grid$n_weeks[i],
        n_patients_pop_per_week = n_patients_pop_per_week,
        n_patients_included_per_week = sim_grid$n_pats[i])

      m = lm(dTIR ~ eligible_for_review, data = out_dt)
      
      coef = summary(m)$coefficients["eligible_for_review", "Estimate"]
      se = summary(m)$coefficients["eligible_for_review", "Std. Error"]
      pval = summary(m)$coefficients["eligible_for_review", "Pr(>|t|)"]

      result_dt = data.table(
        n_weeks = sim_grid$n_weeks[i],
        n_patients_pop_per_week = n_patients_pop_per_week,
        n_patients_included_per_week = sim_grid$n_pats[i],
        n_unique_pats_included_from_input_data = out_dt[,uniqueN(pat_id)],
        permuted=FALSE,
        run_idx = s,
        pval = pval,
        coef = coef,
        se = se)
      
      # Add permuted version as baseline (in supplement)
      set.seed(1814)
      out_dt$eligible_for_review = out_dt$eligible_for_review[sample.int(length(out_dt$eligible_for_review))]
      m = lm(dTIR ~ eligible_for_review, data = out_dt)
      coef = summary(m)$coefficients["eligible_for_review", "Estimate"]
      se = summary(m)$coefficients["eligible_for_review", "Std. Error"]
      pval = summary(m)$coefficients["eligible_for_review", "Pr(>|t|)"]
      
      permuted_result_dt = data.table(
        n_weeks = sim_grid$n_weeks[i],
        n_patients_pop_per_week = n_patients_pop_per_week,
        n_patients_included_per_week = sim_grid$n_pats[i],
        n_unique_pats_included_from_input_data = out_dt[,uniqueN(pat_id)],
        permuted=TRUE,
        run_idx = s,
        pval = pval,
        coef = coef,
        se = se)
      
      return(rbind(result_dt, permuted_result_dt))
      
    }
    
    out_dt
  }))
  
  parallel::stopCluster(cl)
  
  out
}

N_SIMS = 500
N_PATIENTS_IN_POP = 200

fsim_dt = sim_dt[time_worn>0 & lead_eligible_for_review==0 & (contacts_to_go-contact)>0]

many_sims_random = run_many_sims_calc_power(N_SIMS, sim_grid, fsim_dt, random_inclusion, n_patients_pop_per_week=N_PATIENTS_IN_POP)
many_sims_low_tir = run_many_sims_calc_power(N_SIMS, sim_grid, fsim_dt, low_tir_inclusion, n_patients_pop_per_week=N_PATIENTS_IN_POP)
many_sims_pcontactonly = run_many_sims_calc_power(N_SIMS, sim_grid, fsim_dt, pcontactonly, n_patients_pop_per_week=N_PATIENTS_IN_POP)
many_sims_p_contact_exp_dTIR = run_many_sims_calc_power(N_SIMS, sim_grid, fsim_dt, p_contact_exp_dTIR, n_patients_pop_per_week=N_PATIENTS_IN_POP)

Make plots

ggplot_opts = function(y_colname, hline_yval, yaxis_max) {
  return(list(
     geom_line(aes_string(x='n_patients_included_per_week', color='factor(n_weeks)', linetype='permuted', y=y_colname)),
     geom_point(aes_string(x='n_patients_included_per_week', color='factor(n_weeks)', shape='permuted', y=y_colname)),
     theme_bw(),
     geom_hline(yintercept = hline_yval, linetype="dashed"),
     coord_cartesian(ylim=c(0,yaxis_max)),
     xlab("N patients targeted weekly (of 200 total)"),
     scale_color_npg(),
     scale_shape_manual(values = c(16, 1)),
     ylab(NULL),
     theme(legend.position = "none")
    ))} 

agg_output = function(out) {
  agg_out = out[,.(
    power = mean(pval < 0.05 & coef>0),
    m_se = mean(se),
    m_coef = mean(coef),
    m_coef_stat_sig = mean(ifelse(pval<0.05, coef, NA), na.rm=T),
    m_n_unique_pats_included_from_input_data = mean(n_unique_pats_included_from_input_data)
  ), by=c('n_weeks', 'n_patients_pop_per_week', 'n_patients_included_per_week', 'permuted')]
  agg_out[, coef_bias := m_coef_stat_sig - m_coef]
  return(agg_out)
}

make_grid_of_plots = function(y_colname, ylabel, hline_yval, yaxis_max) {
  p1 = ggplot(agg_output(many_sims_random)) + ggtitle("Random") + ggplot_opts(y_colname, hline_yval, yaxis_max)
  p2 = ggplot(agg_output(many_sims_low_tir)) + ggtitle("Ascending TIR") + ggplot_opts(y_colname, hline_yval, yaxis_max)
  p4 = ggplot(agg_output(many_sims_pcontactonly)) + ggtitle("Descending P(contact)") + ggplot_opts(y_colname, hline_yval, yaxis_max)
  p6 = ggplot(agg_output(many_sims_p_contact_exp_dTIR)) + ggtitle("P(contact) * E[dTIR | contact]") + ggplot_opts(y_colname, hline_yval, yaxis_max)
  
  if(y_colname == 'm_coef') {
    
    ribbon = list(geom_ribbon(aes_string(ymin = paste(y_colname,'- 2*m_se'), ymax = paste(y_colname,'+ 2*m_se'), x='n_patients_included_per_week'),
                          alpha=0.1, data = function(x){x[permuted == FALSE & n_weeks==52]}))
    p1 = p1 + ribbon
    p2 = p2 + ribbon
    p4 = p4 + ribbon
    p6 = p6 + ribbon
  }
  
  grid.arrange(p1 + ylab(ylabel), p2, p4 + ylab(ylabel), p6, nrow = 2)
}

# Power
make_grid_of_plots('power', 'Estimated Power', 0.9,1)

# ATE
make_grid_of_plots('m_coef', 'Estimated ATE', 0,0.03)

# Variance of ATE Estimate (SE)
make_grid_of_plots('m_se','SE(ATE)', 0,0.02)

# N pats from historical data
make_grid_of_plots('m_n_unique_pats_included_from_input_data','N patients', 0,100)

Figures to export

make_four_grid_of_plots = function(y_colname, ylabel, hline_yval, yaxis_max) {
  
  # Only 52 week trial if ate
  if(y_colname == 'm_coef') {
    f_many_sims_random = many_sims_random[permuted==0 & n_weeks==52]
    f_many_sims_low_tir = many_sims_low_tir[permuted==0 & n_weeks==52]
    f_many_sims_pcontactonly = many_sims_pcontactonly[permuted==0 & n_weeks==52]
    f_many_sims_p_contact_exp_dTIR = many_sims_p_contact_exp_dTIR[permuted==0 & n_weeks==52]
    
  } else {
    f_many_sims_random = many_sims_random[permuted==0]
    f_many_sims_low_tir = many_sims_low_tir[permuted==0]
    f_many_sims_pcontactonly = many_sims_pcontactonly[permuted==0]
    f_many_sims_p_contact_exp_dTIR = many_sims_p_contact_exp_dTIR[permuted==0]
    
  }
  
  p1 = ggplot(agg_output(f_many_sims_random)) + ggtitle("Random") + ggplot_opts(y_colname, hline_yval, yaxis_max)
  p2 = ggplot(agg_output(f_many_sims_low_tir)) + ggtitle("Ascending TIR") + ggplot_opts(y_colname, hline_yval, yaxis_max)
  p4 = ggplot(agg_output(f_many_sims_pcontactonly)) + ggtitle("Descending P(contact)") + ggplot_opts(y_colname, hline_yval, yaxis_max)
  p6 = ggplot(agg_output(f_many_sims_p_contact_exp_dTIR)) + ggtitle("P(contact) * E[dTIR | contact]") + ggplot_opts(y_colname, hline_yval, yaxis_max)
  
  if(y_colname == 'm_coef') {
    
    ribbon = list(geom_ribbon(aes_string(ymin = paste(y_colname,'- 2*m_se'), ymax = paste(y_colname,'+ 2*m_se'), x='n_patients_included_per_week'),
                          alpha=0.1, data = function(x){x[permuted == FALSE & n_weeks==52]}))
    p1 = p1 + ribbon
    p2 = p2 + ribbon
    p4 = p4 + ribbon
    p6 = p6 + ribbon
  }
  
  grid.arrange(p1 + ylab(ylabel), p2, p4 + ylab(ylabel), p6, nrow = 2)
}

# Power
make_four_grid_of_plots('power', 'Estimated PPOS', 0.9,1)

make_four_grid_of_plots('m_coef', 'Estimated ATE', 0,0.03)

# Save SVGs
ggsave(file="~/Downloads/ates.svg", plot=make_four_grid_of_plots('m_coef', 'Estimated ATE', 0,0.03), width=8, height=4)

ggsave(file="~/Downloads/ppos.svg", plot=make_four_grid_of_plots('power', 'Estimated PPOS', 0.9,1), width=8, height=4)

# Save one with legend
ggsave(file="~/Downloads/legend.svg", 
       plot=ggplot(agg_output(many_sims_random)) + ggplot_opts('m_coef', 0, 0.05) + scale_linetype(guide = 'none') + scale_shape(guide = 'none') +
  guides(color=guide_legend(title="N weeks")) + theme(legend.position = "top"), 
  width=8, height=4)

## Scale for 'shape' is already present. Adding another scale for 'shape', which
## will replace the existing scale.

Table

# N patients needed for 90% PPOS for each duration and targeting policy
rbind(
  agg_output(many_sims_random[permuted==0])[, policy:="Random"],
  agg_output(many_sims_low_tir[permuted==0])[, policy:="ascTIR"],
  agg_output(many_sims_pcontactonly[permuted==0])[, policy:="descPcontact"],
  agg_output(many_sims_p_contact_exp_dTIR[permuted==0])[, policy:="descPcontactEdTIR"])[
    power>=0.9, .(min_n = min(n_patients_included_per_week)), by=c('policy','n_weeks')]

##               policy n_weeks min_n
## 1:            Random      26   190
## 2:            Random      52   105
## 3:            ascTIR      26   115
## 4:            ascTIR      52    90
## 5:      descPcontact      26   135
## 6:      descPcontact      52    90
## 7: descPcontactEdTIR      26    90
## 8: descPcontactEdTIR      52    15

RCT Power when reviewing 1/4 of patients each week (50)

z_alpha = qnorm(0.975) # z(1 - alpha/2)
z_beta = qnorm(0.9) # z(1 - beta)

sd = full_weekly_dt[time_worn>0 & week_id<=26, .(m_tir=sd(in_range, na.rm=T)), by='pat_id'][,sd(m_tir, na.rm=T)]
print(sd)

## [1] 0.06102978

num_weeks_effect_lingers = 8
frac_reviewed = 1/4
multiplier_to_rct_ate = num_weeks_effect_lingers*frac_reviewed
print(multiplier_to_rct_ate)

## [1] 2

ates = rbind(
  agg_output(many_sims_random[permuted==0])[, policy:="Random"],
  agg_output(many_sims_low_tir[permuted==0])[, policy:="ascTIR"],
  agg_output(many_sims_pcontactonly[permuted==0])[, policy:="descPcontact"],
  agg_output(many_sims_p_contact_exp_dTIR[permuted==0])[, policy:="descPcontactEdTIR"])[
    n_patients_included_per_week==50 & n_weeks==52, .(m_ate = mean(m_coef)), by=c('policy')]
ates[, rct_ate := multiplier_to_rct_ate*m_ate]

get_ss = function(e) ceiling(2*2*((z_alpha+z_beta)/(e/sd))^2)
ates[, pats_needed_in_rct_for_90power := get_ss(rct_ate)]
ates

##               policy       m_ate     rct_ate pats_needed_in_rct_for_90power
## 1:            Random 0.009924593 0.019849185                            398
## 2:            ascTIR 0.003219785 0.006439569                           3776
## 3:      descPcontact 0.012519879 0.025039758                            250
## 4: descPcontactEdTIR 0.014078952 0.028157903                            198

Save workspace

save.image("~/Downloads/powerenv.RData")