library(ggplot2)
library(data.table)

Estimate TIR Impact of Different Ks (“Shadow Prices” of CDE Capacity)

Assumptions:

  • Effect of contacts on patients’ TIR peaks at 10pp, and each contact increases TIR by 2pp, i.e.  \[\mathtt{Effect\ of\ Contacts\ on\ TIR} = min(0.10,\ 0.02\ * \mathtt{Num\ of\ Contacts\ in\ the\ last\ year})\] and \[\mathtt{TIR} = \mathtt{Baseline\ TIR} + \mathtt{Effect\ of\ Contacts\ on\ TIR}\]
  • CDEs can review up to K patients each week with TIR below 70%, ranked by ascending TIR (after I apply effect of previous contacts).
  • I simulate 1000 patients using trajectories from real CGM data and compare for Ks between 0 and 600 the population mean TIR vs the baseline mean TIR without contacts to estimate the “TIR utility” of increasing K.

Load Redcap data and patient population with remotely monitored indicator

setwd("~/Downloads/CGM/")
redcap = fread("CGMPilotStudyCGMInNe_DATA_2021-03-03_2025.csv")

# Format column types
redcap[,`:=`(
  dob = as.Date(dob, "%m/%d/%y"),
  diab_onset = as.Date(diab_onset, "%Y-%m-%d"),
  cgm_start_date = as.Date(cgm_start_date, "%m/%d/%y"),
  redcap_event_name = as.character(redcap_event_name)
)]

baseline_info = unique(redcap[redcap_event_name=="baseline_data_arm_1", .(record_id, dob, diab_onset, cgm_start_date)])

# Add a variable that determines whether a patient is remotely monitored
contact_type = redcap[, .(remotely_monitored = max(ifelse(redcap_event_name == "remote_monitoring_arm_1", 1, 0))), by='record_id']
baseline_info = merge(baseline_info, contact_type)

rm(contact_type)

Get Remote contacts

contact_information = redcap[redcap_event_name != "baseline_data_arm_1",
                             type := ifelse(substr(redcap_event_name, 1, 4) == "visi", "visit", "remote")]

remote_data = contact_information[type == "remote" & contact_week == 1, 
                                  .(record_id, contact_week,  contact_method___1,
                                     initiator, contact_reason___1, adjustments,
                                     date_contact, type)]

# Want to differentiate between initiator and adjustments made
remote_data[(initiator == 2 | initiator == 1) & adjustments == 0, type_detail := "remote_patient_noadj"]
remote_data[(initiator == 2 | initiator == 1) & adjustments == 1, type_detail := "remote_patient_adj"]
remote_data[initiator == 3 & adjustments == 0, type_detail := "remote_careteam_noadj"]
remote_data[initiator == 3 & adjustments == 1, type_detail := "remote_careteam_adj"]
remote_data[is.na(type_detail), type_detail := "remote_unknown"]

Combine contacts data

all_contacts = remote_data[,.(record_id, date_contact, type, type_detail)]
all_contacts[, `:=`(
  date_contact = as.Date(date_contact, "%m/%d/%y"),
  week_contact = cut(as.Date(date_contact, "%m/%d/%y"), "week")
)]

# WEEKLY Contacts
weekly_contacts = copy(all_contacts)
setorder(weekly_contacts, record_id, week_contact, -type, -type_detail)
weekly_contacts[, row_rank := seq_len(.N), by=c('record_id','week_contact')]
weekly_contacts = weekly_contacts[row_rank == 1]
weekly_contacts[, rows_by_record_date := .N, by=c('record_id','week_contact')]

# make sure no duplicate record-dates
stopifnot(nrow(weekly_contacts) - uniqueN(all_contacts[,.(record_id, week_contact)]) == 0)

# make sure no record-dates lost
stopifnot(uniqueN(weekly_contacts[,.(record_id, week_contact)]) - 
  uniqueN(all_contacts[,.(record_id, week_contact)]) == 0)

# drop helper columns
weekly_contacts[, row_rank := NULL]
weekly_contacts[, rows_by_record_date := NULL]
# Remote contacts by month
#weekly_contacts[, month_cpmtact]

remote_careteam_contacts_by_week = weekly_contacts[
  type_detail %in% c("remote_careteam_adj"),.(
    n = .N
  ), by=c('week_contact', 'type_detail')]
remote_careteam_contacts_by_week[, week_contact := as.Date(week_contact)]

# ggplot(remote_careteam_contacts_by_week, aes(x=week_contact, y=n, color=type_detail)) + geom_line() +
#   scale_x_date(date_breaks = "1 month", 
#                labels=scales::date_format("%m-%y"), 
#                limits = as.Date(c('2019-07-01','2021-01-31'))) +
#   geom_vline(xintercept = as.Date('2020-05-13'), linetype = "dashed")

Load CGM data for every patient

# setwd("~/Downloads/CGM/Trace Data_0710/Clean/")
# cgm_files = grep("dexcom_clean_record_id_", list.files("."), value = T)
# cgm_data = rbindlist(lapply(cgm_files, fread))[not_filled == 0, .(record_id, time_edit, glucose, dexcom_date)]

setwd("~/Downloads/CGM/dexcom_20210118/")
cgm_files = grep("dexcom_clean_record_id_", list.files("."), value = T)
cgm_data = rbindlist(lapply(cgm_files, fread))
cgm_data = cgm_data[, .(record_id, time_edit = ts, glucose = bg, dexcom_date = as.Date(ts))]

For each patient-day and patient-week, generate aggregate CGM metrics

cgm_data[, `:=`(
  inrange = ifelse(glucose >= 70 & glucose <= 180, 1, 0),
  hyper = ifelse(glucose > 180, 1, 0),
  hypo1 = ifelse(glucose <= 70, 1, 0),
  hypo2 = ifelse(glucose <= 54, 1, 0),
  dexcom_date = as.Date(dexcom_date),
  dexcom_week = cut(as.Date(dexcom_date), 'week')
)]

patient_weeks = cgm_data[, .(
  TIR = mean(inrange),
  hyper = mean(hyper),
  hypo1 = mean(hypo1),
  hypo2 = mean(hypo2)
), by=c('record_id', 'dexcom_week')]

Combine CGM-based states and contact data (visit or remote contact with adjustments)

patient_week_contacts = merge(patient_weeks, 
                              weekly_contacts[type == "visit" | type_detail %in% c("remote_careteam_adj"),
                                              num_date := as.numeric(as.Date(week_contact))], 
                              all.x = TRUE,
                              by.x=c('record_id','dexcom_week'),
                              by.y=c('record_id','week_contact'))

Add cumulative number of contacts in last year

setorder(patient_week_contacts, record_id, dexcom_week)
patient_week_contacts[, dexcom_week := as.Date(as.character(dexcom_week))]

patient_week_contacts[, contact := 0]
patient_week_contacts[!is.na(date_contact) & TIR<0.7, contact := 1]

# contact in last year, inefficient method
patient_week_contacts$rolling_contacts_last_year = sapply(1:nrow(patient_week_contacts), function(i) {
  id = patient_week_contacts[i,record_id]
  oneyearago = patient_week_contacts[i,dexcom_week - 365]
  patient_week_contacts[record_id==id & dexcom_week > oneyearago, sum(contact, na.rm = T)]})

patient_week_contacts[, summary(rolling_contacts_last_year)]
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   1.000   7.055  12.000  50.000

Encode delta TIR as min(0.10, rolling_contacts_last_year*0.02)

patient_week_contacts[,effect_of_obs_contacts := pmin(0.1, 0.02*rolling_contacts_last_year)]
#ggplot(patient_week_contacts, aes(x=effect_of_obs_contacts)) + geom_density()

Back out “baseline” TIR without contacts

patient_week_contacts[, baseline_TIR := pmax(0,TIR - effect_of_obs_contacts)]
#ggplot(patient_week_contacts, aes(x=baseline_TIR)) + geom_density()

Filter to patients that are present in at least 39 weeks in 2020

f_patient_week_contacts = patient_week_contacts[dexcom_week>=as.Date("2020-01-01") & dexcom_week<as.Date("2020-12-31")]
f_patient_week_contacts_pagg = f_patient_week_contacts[,.(n_weeks=.N),by='record_id']
ptokeep = f_patient_week_contacts_pagg[n_weeks>=39,record_id]
f_patient_week_contacts = f_patient_week_contacts[record_id %in% ptokeep]
f_patient_week_contacts = f_patient_week_contacts[,.(record_id,dexcom_week,baseline_TIR)]
f_patient_week_contacts[,uniqueN(record_id)]
## [1] 90

Bootstrap Sample to large patient population

set.seed(1814)
N_PATIENTS = 1000
sampled_record_ids = sample(f_patient_week_contacts[,unique(record_id)], 1000, TRUE)
bs_patient_week_contacts = merge(f_patient_week_contacts, data.table(record_id=sampled_record_ids), allow.cartesian=TRUE)
# add new record_ids
bs_patient_week_contacts[, new_record_id := paste0(as.character(record_id), "-", seq_len(.N)), by=c('record_id','dexcom_week')]
bs_patient_week_contacts[, record_id := NULL]

Now, simulate contacts with a given capacity each week, contacting patients with TIR < 0.7 in order of TIR (ascending)

sim_contacts = function(dt, K) {
  out_dt = copy(dt)
  out_dt[,cf_TIR := 0]
  out_dt[,contact := 0]
  
  for(wk in out_dt[,unique(dexcom_week)]) {
  
    setorder(out_dt, dexcom_week, cf_TIR, new_record_id)
    
    out_dt[, cum_contacts := cumsum(contact), by='new_record_id']
    out_dt[, effect_of_contacts := pmin(0.1, 0.02*cum_contacts)]
    out_dt[, cf_TIR := pmin(1, baseline_TIR + effect_of_contacts)]
    
    out_dt[dexcom_week == wk, rnk := seq_len(.N)]
    out_dt[dexcom_week == wk, contact := ifelse(cf_TIR<0.7 & rnk <= K, 1, 0)]
  }
  
  return(out_dt)
}



test_dt = sim_contacts(bs_patient_week_contacts, 10)

plot_dt = melt(test_dt[,.(
  mean_cf_TIR = mean(cf_TIR),
  p10_cf_TIR = quantile(cf_TIR, 0.1),
  mean_baseline_TIR = mean(baseline_TIR),
  n_contacts = sum(contact)
  ), by='dexcom_week'], id.vars = c('dexcom_week'))


ggplot(plot_dt, aes(x=dexcom_week, y=value)) + geom_line() +
  scale_x_date(date_breaks = "1 month", 
               labels=scales::date_format("%m"), 
               limits = as.Date(c('2020-01-01','2020-12-31'))) +
  facet_wrap(~variable, scales="free") + theme_bw() + ggtitle("Single simulation with K=10")

Run a bunch of K values and get delta in mean TIR bw cf and baseline

Ks = seq(from=0, to=600, by=10)

all_sims_dt = rbindlist(lapply(Ks, function(k){
  sim_dt = sim_contacts(bs_patient_week_contacts, k)
  delta_tir = sim_dt[dexcom_week == max(sim_dt$dexcom_week),mean(cf_TIR) - mean(baseline_TIR)]
  return(data.table(
    K = k,
    delta_mean_tir = delta_tir
  ))
}))

# Marginal utility of each increase in K
setorder(all_sims_dt, K)
all_sims_dt[, marginal_delta_mean_tir := delta_mean_tir - shift(delta_mean_tir, fill=0)]

ggplot(
  melt(all_sims_dt, id.vars = 'K'), aes(x=K, y=value)) + geom_line() + theme_bw() +
  ggtitle("Delta mean TIR / marginal delta vs K") + facet_wrap(~variable, scales = "free_y", ncol = 1)