Data loading and massaging

Load Redcap data and patient population with remotely monitored indicator

setwd("~/Downloads/CGM Data/")
redcap = fread("CGMInNewOnsetsFinal_DATA_2020-05-19.csv")

# Format column types
redcap[,`:=`(
  dob = as.Date(dob, "%m/%d/%y"),
  diab_onset = as.Date(diab_onset, "%m/%d/%y"),
  cgm_start_date = as.Date(cgm_start_date, "%m/%d/%y"),
  redcap_event_name = as.character(redcap_event_name)
)]

baseline_info = unique(redcap[redcap_event_name=="baseline_data_arm_1", .(record_id, dob, diab_onset, cgm_start_date)])

# Add a variable that determines whether a patient is remotely monitored
contact_type = redcap[, .(remotely_monitored = max(ifelse(redcap_event_name == "remote_monitoring_arm_1", 1, 0))), by='record_id']
baseline_info = merge(baseline_info, contact_type)

rm(contact_type)

Get contacts (dates and types) for each patient

contact_information = redcap[redcap_event_name != "baseline_data_arm_1",
                             type := ifelse(substr(redcap_event_name, 1, 4) == "visi", "visit", "remote")]

# Visits -- in person and telehealth
visit_data = contact_information[type == "visit" & !is.na(visit_date), 
                                 .(record_id, visit_type, type, visit_date)]
visit_data[, type_detail := ifelse(visit_type == 1, "visit_inperson", ifelse(visit_type == 2, "visit_telehealth", "visit_unknown"))]
visit_data[, num_visits := .N, by='record_id']
visit_data = visit_data[, .(record_id, date_contact = visit_date, type, type_detail, num_visits)]

ggplot(visit_data, aes(x=num_visits)) + geom_bar(stat="count") + scale_x_continuous(breaks=seq(1,max(visit_data$num_visits)))

# Remote Contact
remote_data = contact_information[type == "remote" & contact_week == 1, 
                                  .(record_id, contact_week,  contact_method___1,
                                     initiator, contact_reason___1, adjustments,
                                     date_contact, type)]

# Want to differentiate between initiator and adjustments made
remote_data[(initiator == 2 | initiator == 1) & adjustments == 0, type_detail := "remote_patient_noadj"]
remote_data[(initiator == 2 | initiator == 1) & adjustments == 1, type_detail := "remote_patient_adj"]
remote_data[initiator == 3 & adjustments == 0, type_detail := "remote_careteam_noadj"]
remote_data[initiator == 3 & adjustments == 1, type_detail := "remote_careteam_adj"]
remote_data[is.na(type_detail), type_detail := "remote_unknown"]

table(remote_data$type_detail)

## 
##   remote_careteam_adj remote_careteam_noadj    remote_patient_adj 
##                   491                   563                    74 
##  remote_patient_noadj        remote_unknown 
##                    32                    13

remote_data[, num_remote_contacts := .N, by='record_id']

remote_data = remote_data[,.(record_id, date_contact, type, type_detail, num_remote_contacts)]

ggplot(remote_data, aes(x=num_remote_contacts)) + geom_bar(stat="count") + scale_x_continuous(breaks=seq(1,max(remote_data$num_remote_contacts), 2))

Combine contacts data

all_contacts = rbind(
  remote_data[,.(record_id, date_contact, type, type_detail)], 
  visit_data[,.(record_id, date_contact, type, type_detail)])
all_contacts[, `:=`(
  date_contact = as.Date(date_contact, "%m/%d/%y"),
  week_contact = cut(as.Date(date_contact, "%m/%d/%y"), "week")
)]

# DAILY Contacts
# if two contacts on same date, just keep the visit
daily_contacts = copy(all_contacts)
setorder(daily_contacts, record_id, date_contact, -type, -type_detail)
daily_contacts[, row_rank := seq_len(.N), by=c('record_id','date_contact')]
daily_contacts = daily_contacts[row_rank == 1]
daily_contacts[, rows_by_record_date := .N, by=c('record_id','date_contact')]

# WEEKLY Contacts
weekly_contacts = copy(all_contacts)
setorder(weekly_contacts, record_id, week_contact, -type, -type_detail)
weekly_contacts[, row_rank := seq_len(.N), by=c('record_id','week_contact')]
weekly_contacts = weekly_contacts[row_rank == 1]
weekly_contacts[, rows_by_record_date := .N, by=c('record_id','week_contact')]

# make sure no duplicate record-dates
stopifnot(nrow(daily_contacts) - uniqueN(all_contacts[,.(record_id, date_contact)]) == 0)
stopifnot(nrow(weekly_contacts) - uniqueN(all_contacts[,.(record_id, week_contact)]) == 0)

# make sure no record-dates lost
stopifnot(uniqueN(daily_contacts[,.(record_id, date_contact)]) - 
  uniqueN(all_contacts[,.(record_id, date_contact)]) == 0)
stopifnot(uniqueN(weekly_contacts[,.(record_id, week_contact)]) - 
  uniqueN(all_contacts[,.(record_id, week_contact)]) == 0)

# drop helper columns
daily_contacts[, row_rank := NULL]
daily_contacts[, rows_by_record_date := NULL]
weekly_contacts[, row_rank := NULL]
weekly_contacts[, rows_by_record_date := NULL]

Load CGM data for every patient

setwd("~/Downloads/CGM Data/Trace Data")
cgm_files = grep("dexcom_clean_record_id_", list.files("."), value = T)
cgm_data = rbindlist(lapply(cgm_files, fread))[not_filled == 0, .(record_id, time_edit, glucose, dexcom_date)]

For each patient-day and patient-week, generate aggregate CGM metrics

cgm_data[, `:=`(
  inrange = ifelse(glucose >= 70 & glucose <= 180, 1, 0),
  hyper = ifelse(glucose > 180, 1, 0),
  hypo1 = ifelse(glucose <= 70, 1, 0),
  hypo2 = ifelse(glucose <= 54, 1, 0),
  dexcom_date = as.Date(dexcom_date),
  dexcom_week = cut(as.Date(dexcom_date), 'week')
)]

patient_days = cgm_data[, .(
  TIR = mean(inrange),
  hyper = mean(hyper),
  hypo1 = mean(hypo1),
  hypo2 = mean(hypo2)
), by=c('record_id', 'dexcom_date')]

patient_weeks = cgm_data[, .(
  TIR = mean(inrange),
  hyper = mean(hyper),
  hypo1 = mean(hypo1),
  hypo2 = mean(hypo2)
), by=c('record_id', 'dexcom_week')]


# Add a definition of state into three categories -- currently rather arbitrary cut-offs
patient_days[TIR > 0.7 & hypo1 < 0.04 & hypo2 < 0.01, state := "green"]
patient_days[TIR <= 0.7 & TIR > 0.5 & hypo1 < 0.04 & hypo2 < 0.01, state := "yellow"]
patient_days[(TIR <= 0.5) | hypo1 >= 0.04 | hypo2 >= 0.01, state := "red"]

patient_weeks[TIR > 0.7 & hypo1 < 0.04 & hypo2 < 0.01, state := "green"]
patient_weeks[TIR <= 0.7 & TIR > 0.5 & hypo1 < 0.04 & hypo2 < 0.01, state := "yellow"]
patient_weeks[(TIR <= 0.5) | hypo1 >= 0.04 | hypo2 >= 0.01, state := "red"]

table(patient_weeks$state)

## 
##  green    red yellow 
##   1863   1714    972

Combine CGM-based states and contact data (visits and remote contacts with adjustments)

patient_days_contacts = merge(patient_days, 
                              daily_contacts[type == "visit" | 
                                               type_detail %in% c("remote_careteam_adj", "remote_patient_adj"),
                                             num_date := as.numeric(date_contact)], 
                              all.x = TRUE,
                              by.x=c('record_id','dexcom_date'),
                              by.y=c('record_id','date_contact'))

patient_week_contacts = merge(patient_weeks, 
                              weekly_contacts[type == "visit" | 
                                               type_detail %in% c("remote_careteam_adj", "remote_patient_adj"),
                                              num_date := as.numeric(as.Date(week_contact))], 
                              all.x = TRUE,
                              by.x=c('record_id','dexcom_week'),
                              by.y=c('record_id','week_contact'))

Fill in last and next contact

patient_days_contacts[, num_last_contact_date := num_date]
patient_days_contacts[, num_next_contact_date := num_date]
patient_week_contacts[, num_last_contact_date := num_date]
patient_week_contacts[, num_next_contact_date := num_date]

setorder(patient_days_contacts, record_id, dexcom_date)
setorder(patient_week_contacts, record_id, dexcom_week)

patient_days_contacts[, num_last_contact_date := nafill(num_last_contact_date, "locf"), by = 'record_id']
patient_days_contacts[, num_next_contact_date := nafill(num_next_contact_date, "nocb"), by = 'record_id']
patient_days_contacts[, days_since_last_contact := as.numeric(dexcom_date) - num_last_contact_date]
patient_days_contacts[, days_to_next_contact := num_next_contact_date - as.numeric(dexcom_date)]

patient_week_contacts[, num_last_contact_date := nafill(num_last_contact_date, "locf"), by = 'record_id']
patient_week_contacts[, num_next_contact_date := nafill(num_next_contact_date, "nocb"), by = 'record_id']
patient_week_contacts[, days_since_last_contact := as.numeric(as.Date(dexcom_week)) - num_last_contact_date]
patient_week_contacts[, days_to_next_contact := num_next_contact_date - as.numeric(as.Date(dexcom_week))]

ggplot(melt(patient_week_contacts[,.(days_since_last_contact, days_to_next_contact)]),
            aes(x=value, fill=variable)) + geom_histogram() + facet_wrap(~variable, ncol=1)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Predicting Transition Probabilities based on Days Since Last Contact (0-35)

Using logistic regression models to fit transition probabilities as fn of days since last contact, on data when days since last contact is between 0 and 35.

I will use the average predicted transition probabilities on days 29-35 since contact as baseline transition probability when it has been more than four weeks since a contact. We can improve on this later.

focal_patient_days_contacts = patient_days_contacts[days_since_last_contact<=35]
focal_patient_week_contacts = patient_week_contacts[days_since_last_contact<=35]
setorder(focal_patient_days_contacts, record_id, dexcom_date)
setorder(focal_patient_week_contacts, record_id, dexcom_week)

focal_patient_days_contacts[, next_state := shift(state, type="lead"), by='record_id']
focal_patient_week_contacts[, next_state := shift(state, type="lead"), by='record_id']

get_state_state_prob_predictions = function(from_state, to_state, data) {
  dt = copy(data)[state == from_state]
  xs_to_predict = unique(dt$days_since_last_contact)
  dt[, outcome := ifelse(next_state == to_state, 1, 0)]
  m = glm(outcome ~ as.factor(days_since_last_contact) - 1, family = binomial(), data = dt)
  pred_data = data.table(days_since_last_contact=xs_to_predict)
  preds = predict(m, newdata=pred_data, se=TRUE, type="response")
  pred_data[,fit_prob := preds$fit]
  baseline_prob = pred_data[days_since_last_contact >= 29 & days_since_last_contact <= 35, mean(fit_prob)]
  return(data.table(
    from_state = from_state,
    to_state = to_state,
    days_since_last_contact=xs_to_predict,
    pred_prob = preds$fit,
    pred_se = preds$se.fit,
    baseline_prob = baseline_prob
  ))
}

states = c("green","yellow","red")
state_state_grid = expand.grid(states, states)

daily_state_state_preds = rbindlist(
  lapply(seq(1,nrow(state_state_grid)), 
         function(x) get_state_state_prob_predictions(state_state_grid[x,1],
                                                      state_state_grid[x,2],
                                                      focal_patient_days_contacts))) 

weekly_state_state_preds = rbindlist(
  lapply(seq(1,nrow(state_state_grid)), 
         function(x) get_state_state_prob_predictions(state_state_grid[x,1],
                                                      state_state_grid[x,2],
                                                      focal_patient_week_contacts)))

Plot predicted daily transition probs

outline <- data.frame( 
  from_state = states, 
  outline_color = c('green', 'orange', 'red') 
) 
square <- data.frame( 
  x = c(-Inf, Inf, Inf, -Inf), 
  y = c(-Inf, -Inf, Inf, Inf)
)

ggplot(daily_state_state_preds, 
       aes(x=days_since_last_contact, 
           y=pred_prob, ymin=pred_prob-2*pred_se, ymax=pred_prob+2*pred_se, color=to_state, group=to_state)) +
  geom_line() + geom_point() + geom_errorbar(width=0.1) +
  facet_wrap(from_state~to_state, scales="free_y", shrink=TRUE) + 
  scale_color_manual(breaks = states, values = c('green','orange','red', 'orange')) +
  theme_minimal() +
  geom_polygon(inherit.aes = FALSE, aes(x = x, y = y, color = outline_color, fill = NA), data = merge(outline, square)) +
  scale_fill_identity() +
  theme(legend.position = "none") + 
  geom_line(inherit.aes = FALSE, aes(x=days_since_last_contact, y=baseline_prob, group=to_state),
            linetype="dashed")

Plot predicted weekly transition probs

ggplot(weekly_state_state_preds, 
       aes(x=days_since_last_contact, 
           y=pred_prob, ymin=pred_prob-2*pred_se, ymax=pred_prob+2*pred_se, color=to_state, group=to_state)) +
  geom_line() + geom_point() + geom_errorbar(width=0.1) +
  facet_wrap(from_state~to_state, scales="free_y", shrink=TRUE) + 
  scale_color_manual(breaks = states, values = c('green','orange','red', 'orange')) +
  theme_minimal() +
  geom_polygon(inherit.aes = FALSE, aes(x = x, y = y, color = outline_color, fill = NA), data = merge(outline, square)) +
  scale_fill_identity() + geom_vline(xintercept = 0, linetype="dashed") +
  theme(legend.position = "none") + 
  geom_line(inherit.aes = FALSE, aes(x=days_since_last_contact, y=baseline_prob, group=to_state),
            linetype="dashed")

The weekly probabilities are less noisy look more like what we’d expect, with higher transition probabilities relative to baseline from non-green to green in the weeks following a contact.

Writing out the transition probabilities for simulations

setwd("~/Downloads/CGM Data/")
fwrite(daily_state_state_preds, 'daily_pred_probs.csv')
fwrite(weekly_state_state_preds, 'weekly_pred_probs.csv')