library(ggplot2)
library(data.table)
Load Redcap data and patient population with remotely monitored indicator
setwd("~/Downloads/CGM/")
redcap = fread("CGMPilotStudyCGMInNe_DATA_2021-03-03_2025.csv")
# Format column types
redcap[,`:=`(
dob = as.Date(dob, "%m/%d/%y"),
diab_onset = as.Date(diab_onset, "%Y-%m-%d"),
cgm_start_date = as.Date(cgm_start_date, "%m/%d/%y"),
redcap_event_name = as.character(redcap_event_name)
)]
baseline_info = unique(redcap[redcap_event_name=="baseline_data_arm_1", .(record_id, dob, diab_onset, cgm_start_date)])
# Add a variable that determines whether a patient is remotely monitored
contact_type = redcap[, .(remotely_monitored = max(ifelse(redcap_event_name == "remote_monitoring_arm_1", 1, 0))), by='record_id']
baseline_info = merge(baseline_info, contact_type)
rm(contact_type)
Get Remote contacts
contact_information = redcap[redcap_event_name != "baseline_data_arm_1",
type := ifelse(substr(redcap_event_name, 1, 4) == "visi", "visit", "remote")]
remote_data = contact_information[type == "remote" & contact_week == 1,
.(record_id, contact_week, contact_method___1,
initiator, contact_reason___1, adjustments,
date_contact, type)]
# Want to differentiate between initiator and adjustments made
remote_data[(initiator == 2 | initiator == 1) & adjustments == 0, type_detail := "remote_patient_noadj"]
remote_data[(initiator == 2 | initiator == 1) & adjustments == 1, type_detail := "remote_patient_adj"]
remote_data[initiator == 3 & adjustments == 0, type_detail := "remote_careteam_noadj"]
remote_data[initiator == 3 & adjustments == 1, type_detail := "remote_careteam_adj"]
remote_data[is.na(type_detail), type_detail := "remote_unknown"]
Combine contacts data
all_contacts = remote_data[,.(record_id, date_contact, type, type_detail)]
all_contacts[, `:=`(
date_contact = as.Date(date_contact, "%m/%d/%y"),
week_contact = cut(as.Date(date_contact, "%m/%d/%y"), "week")
)]
# WEEKLY Contacts
weekly_contacts = copy(all_contacts)
setorder(weekly_contacts, record_id, week_contact, -type, -type_detail)
weekly_contacts[, row_rank := seq_len(.N), by=c('record_id','week_contact')]
weekly_contacts = weekly_contacts[row_rank == 1]
weekly_contacts[, rows_by_record_date := .N, by=c('record_id','week_contact')]
# make sure no duplicate record-dates
stopifnot(nrow(weekly_contacts) - uniqueN(all_contacts[,.(record_id, week_contact)]) == 0)
# make sure no record-dates lost
stopifnot(uniqueN(weekly_contacts[,.(record_id, week_contact)]) -
uniqueN(all_contacts[,.(record_id, week_contact)]) == 0)
# drop helper columns
weekly_contacts[, row_rank := NULL]
weekly_contacts[, rows_by_record_date := NULL]
# Remote contacts by month
#weekly_contacts[, month_cpmtact]
remote_careteam_contacts_by_week = weekly_contacts[
type_detail %in% c("remote_careteam_adj"),.(
n = .N
), by=c('week_contact', 'type_detail')]
remote_careteam_contacts_by_week[, week_contact := as.Date(week_contact)]
# ggplot(remote_careteam_contacts_by_week, aes(x=week_contact, y=n, color=type_detail)) + geom_line() +
# scale_x_date(date_breaks = "1 month",
# labels=scales::date_format("%m-%y"),
# limits = as.Date(c('2019-07-01','2021-01-31'))) +
# geom_vline(xintercept = as.Date('2020-05-13'), linetype = "dashed")
Load CGM data for every patient
# setwd("~/Downloads/CGM/Trace Data_0710/Clean/")
# cgm_files = grep("dexcom_clean_record_id_", list.files("."), value = T)
# cgm_data = rbindlist(lapply(cgm_files, fread))[not_filled == 0, .(record_id, time_edit, glucose, dexcom_date)]
setwd("~/Downloads/CGM/dexcom_20210118/")
cgm_files = grep("dexcom_clean_record_id_", list.files("."), value = T)
cgm_data = rbindlist(lapply(cgm_files, fread))
cgm_data = cgm_data[, .(record_id, time_edit = ts, glucose = bg, dexcom_date = as.Date(ts))]
For each patient-day and patient-week, generate aggregate CGM metrics
cgm_data[, `:=`(
inrange = ifelse(glucose >= 70 & glucose <= 180, 1, 0),
hyper = ifelse(glucose > 180, 1, 0),
hypo1 = ifelse(glucose <= 70, 1, 0),
hypo2 = ifelse(glucose <= 54, 1, 0),
dexcom_date = as.Date(dexcom_date),
dexcom_week = cut(as.Date(dexcom_date), 'week')
)]
patient_weeks = cgm_data[, .(
TIR = mean(inrange),
hyper = mean(hyper),
hypo1 = mean(hypo1),
hypo2 = mean(hypo2)
), by=c('record_id', 'dexcom_week')]
Combine CGM-based states and contact data (visit or remote contact with adjustments)
patient_week_contacts = merge(patient_weeks,
weekly_contacts[type == "visit" | type_detail %in% c("remote_careteam_adj"),
num_date := as.numeric(as.Date(week_contact))],
all.x = TRUE,
by.x=c('record_id','dexcom_week'),
by.y=c('record_id','week_contact'))
Add cumulative number of contacts in last year
setorder(patient_week_contacts, record_id, dexcom_week)
patient_week_contacts[, dexcom_week := as.Date(as.character(dexcom_week))]
patient_week_contacts[, contact := 0]
patient_week_contacts[!is.na(date_contact) & TIR<0.7, contact := 1]
# contact in last year, inefficient method
patient_week_contacts$rolling_contacts_last_year = sapply(1:nrow(patient_week_contacts), function(i) {
id = patient_week_contacts[i,record_id]
oneyearago = patient_week_contacts[i,dexcom_week - 365]
patient_week_contacts[record_id==id & dexcom_week > oneyearago, sum(contact, na.rm = T)]})
patient_week_contacts[, summary(rolling_contacts_last_year)]
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 0.000 1.000 7.055 12.000 50.000
Encode delta TIR as min(0.10, rolling_contacts_last_year*0.02)
patient_week_contacts[,effect_of_obs_contacts := pmin(0.1, 0.02*rolling_contacts_last_year)]
#ggplot(patient_week_contacts, aes(x=effect_of_obs_contacts)) + geom_density()
Back out “baseline” TIR without contacts
patient_week_contacts[, baseline_TIR := pmax(0,TIR - effect_of_obs_contacts)]
#ggplot(patient_week_contacts, aes(x=baseline_TIR)) + geom_density()
Filter to patients that are present in at least 39 weeks in 2020
f_patient_week_contacts = patient_week_contacts[dexcom_week>=as.Date("2020-01-01") & dexcom_week<as.Date("2020-12-31")]
f_patient_week_contacts_pagg = f_patient_week_contacts[,.(n_weeks=.N),by='record_id']
ptokeep = f_patient_week_contacts_pagg[n_weeks>=39,record_id]
f_patient_week_contacts = f_patient_week_contacts[record_id %in% ptokeep]
f_patient_week_contacts = f_patient_week_contacts[,.(record_id,dexcom_week,baseline_TIR)]
f_patient_week_contacts[,uniqueN(record_id)]
## [1] 90
Bootstrap Sample to large patient population
set.seed(1814)
N_PATIENTS = 1000
sampled_record_ids = sample(f_patient_week_contacts[,unique(record_id)], 1000, TRUE)
bs_patient_week_contacts = merge(f_patient_week_contacts, data.table(record_id=sampled_record_ids), allow.cartesian=TRUE)
# add new record_ids
bs_patient_week_contacts[, new_record_id := paste0(as.character(record_id), "-", seq_len(.N)), by=c('record_id','dexcom_week')]
bs_patient_week_contacts[, record_id := NULL]
Now, simulate contacts with a given capacity each week, contacting patients with TIR < 0.7 in order of TIR (ascending)
sim_contacts = function(dt, K) {
out_dt = copy(dt)
out_dt[,cf_TIR := 0]
out_dt[,contact := 0]
for(wk in out_dt[,unique(dexcom_week)]) {
setorder(out_dt, dexcom_week, cf_TIR, new_record_id)
out_dt[, cum_contacts := cumsum(contact), by='new_record_id']
out_dt[, effect_of_contacts := pmin(0.1, 0.02*cum_contacts)]
out_dt[, cf_TIR := pmin(1, baseline_TIR + effect_of_contacts)]
out_dt[dexcom_week == wk, rnk := seq_len(.N)]
out_dt[dexcom_week == wk, contact := ifelse(cf_TIR<0.7 & rnk <= K, 1, 0)]
}
return(out_dt)
}
test_dt = sim_contacts(bs_patient_week_contacts, 10)
plot_dt = melt(test_dt[,.(
mean_cf_TIR = mean(cf_TIR),
p10_cf_TIR = quantile(cf_TIR, 0.1),
mean_baseline_TIR = mean(baseline_TIR),
n_contacts = sum(contact)
), by='dexcom_week'], id.vars = c('dexcom_week'))
ggplot(plot_dt, aes(x=dexcom_week, y=value)) + geom_line() +
scale_x_date(date_breaks = "1 month",
labels=scales::date_format("%m"),
limits = as.Date(c('2020-01-01','2020-12-31'))) +
facet_wrap(~variable, scales="free") + theme_bw() + ggtitle("Single simulation with K=10")
Run a bunch of K values and get delta in mean TIR bw cf and baseline
Ks = seq(from=0, to=600, by=10)
all_sims_dt = rbindlist(lapply(Ks, function(k){
sim_dt = sim_contacts(bs_patient_week_contacts, k)
delta_tir = sim_dt[dexcom_week == max(sim_dt$dexcom_week),mean(cf_TIR) - mean(baseline_TIR)]
return(data.table(
K = k,
delta_mean_tir = delta_tir
))
}))
# Marginal utility of each increase in K
setorder(all_sims_dt, K)
all_sims_dt[, marginal_delta_mean_tir := delta_mean_tir - shift(delta_mean_tir, fill=0)]
ggplot(
melt(all_sims_dt, id.vars = 'K'), aes(x=K, y=value)) + geom_line() + theme_bw() +
ggtitle("Delta mean TIR / marginal delta vs K") + facet_wrap(~variable, scales = "free_y", ncol = 1)