library(ggplot2)
library(data.table)
Load Redcap data and patient population with remotely monitored indicator
setwd("~/Downloads/CGM Data/")
redcap = fread("CGMInNewOnsetsFinal_DATA_2020-05-19.csv")
# Format column types
redcap[,`:=`(
dob = as.Date(dob, "%m/%d/%y"),
diab_onset = as.Date(diab_onset, "%m/%d/%y"),
cgm_start_date = as.Date(cgm_start_date, "%m/%d/%y"),
redcap_event_name = as.character(redcap_event_name)
)]
baseline_info = unique(redcap[redcap_event_name=="baseline_data_arm_1", .(record_id, dob, diab_onset, cgm_start_date)])
# Add a variable that determines whether a patient is remotely monitored
contact_type = redcap[, .(remotely_monitored = max(ifelse(redcap_event_name == "remote_monitoring_arm_1", 1, 0))), by='record_id']
baseline_info = merge(baseline_info, contact_type)
rm(contact_type)
Get contacts (dates and types) for each patient
contact_information = redcap[redcap_event_name != "baseline_data_arm_1",
type := ifelse(substr(redcap_event_name, 1, 4) == "visi", "visit", "remote")]
# Visits -- in person and telehealth
visit_data = contact_information[type == "visit" & !is.na(visit_date),
.(record_id, visit_type, type, visit_date)]
visit_data[, type_detail := ifelse(visit_type == 1, "visit_inperson", ifelse(visit_type == 2, "visit_telehealth", "visit_unknown"))]
visit_data[, num_visits := .N, by='record_id']
visit_data = visit_data[, .(record_id, date_contact = visit_date, type, type_detail, num_visits)]
ggplot(visit_data, aes(x=num_visits)) + geom_bar(stat="count") + scale_x_continuous(breaks=seq(1,max(visit_data$num_visits)))
# Remote Contact
remote_data = contact_information[type == "remote" & contact_week == 1,
.(record_id, contact_week, contact_method___1,
initiator, contact_reason___1, adjustments,
date_contact, type)]
# Want to differentiate between initiator and adjustments made
remote_data[(initiator == 2 | initiator == 1) & adjustments == 0, type_detail := "remote_patient_noadj"]
remote_data[(initiator == 2 | initiator == 1) & adjustments == 1, type_detail := "remote_patient_adj"]
remote_data[initiator == 3 & adjustments == 0, type_detail := "remote_careteam_noadj"]
remote_data[initiator == 3 & adjustments == 1, type_detail := "remote_careteam_adj"]
remote_data[is.na(type_detail), type_detail := "remote_unknown"]
table(remote_data$type_detail)
##
## remote_careteam_adj remote_careteam_noadj remote_patient_adj
## 491 563 74
## remote_patient_noadj remote_unknown
## 32 13
remote_data[, num_remote_contacts := .N, by='record_id']
remote_data = remote_data[,.(record_id, date_contact, type, type_detail, num_remote_contacts)]
ggplot(remote_data, aes(x=num_remote_contacts)) + geom_bar(stat="count") + scale_x_continuous(breaks=seq(1,max(remote_data$num_remote_contacts), 2))
Combine contacts data
all_contacts = rbind(
remote_data[,.(record_id, date_contact, type, type_detail)],
visit_data[,.(record_id, date_contact, type, type_detail)])
all_contacts[, `:=`(
date_contact = as.Date(date_contact, "%m/%d/%y"),
week_contact = cut(as.Date(date_contact, "%m/%d/%y"), "week")
)]
# DAILY Contacts
# if two contacts on same date, just keep the visit
daily_contacts = copy(all_contacts)
setorder(daily_contacts, record_id, date_contact, -type, -type_detail)
daily_contacts[, row_rank := seq_len(.N), by=c('record_id','date_contact')]
daily_contacts = daily_contacts[row_rank == 1]
daily_contacts[, rows_by_record_date := .N, by=c('record_id','date_contact')]
# WEEKLY Contacts
weekly_contacts = copy(all_contacts)
setorder(weekly_contacts, record_id, week_contact, -type, -type_detail)
weekly_contacts[, row_rank := seq_len(.N), by=c('record_id','week_contact')]
weekly_contacts = weekly_contacts[row_rank == 1]
weekly_contacts[, rows_by_record_date := .N, by=c('record_id','week_contact')]
# make sure no duplicate record-dates
stopifnot(nrow(daily_contacts) - uniqueN(all_contacts[,.(record_id, date_contact)]) == 0)
stopifnot(nrow(weekly_contacts) - uniqueN(all_contacts[,.(record_id, week_contact)]) == 0)
# make sure no record-dates lost
stopifnot(uniqueN(daily_contacts[,.(record_id, date_contact)]) -
uniqueN(all_contacts[,.(record_id, date_contact)]) == 0)
stopifnot(uniqueN(weekly_contacts[,.(record_id, week_contact)]) -
uniqueN(all_contacts[,.(record_id, week_contact)]) == 0)
# drop helper columns
daily_contacts[, row_rank := NULL]
daily_contacts[, rows_by_record_date := NULL]
weekly_contacts[, row_rank := NULL]
weekly_contacts[, rows_by_record_date := NULL]
Load CGM data for every patient
setwd("~/Downloads/CGM Data/Trace Data")
cgm_files = grep("dexcom_clean_record_id_", list.files("."), value = T)
cgm_data = rbindlist(lapply(cgm_files, fread))[not_filled == 0, .(record_id, time_edit, glucose, dexcom_date)]
For each patient-day and patient-week, generate aggregate CGM metrics
cgm_data[, `:=`(
inrange = ifelse(glucose >= 70 & glucose <= 180, 1, 0),
hyper = ifelse(glucose > 180, 1, 0),
hypo1 = ifelse(glucose <= 70, 1, 0),
hypo2 = ifelse(glucose <= 54, 1, 0),
dexcom_date = as.Date(dexcom_date),
dexcom_week = cut(as.Date(dexcom_date), 'week')
)]
patient_days = cgm_data[, .(
TIR = mean(inrange),
hyper = mean(hyper),
hypo1 = mean(hypo1),
hypo2 = mean(hypo2)
), by=c('record_id', 'dexcom_date')]
patient_weeks = cgm_data[, .(
TIR = mean(inrange),
hyper = mean(hyper),
hypo1 = mean(hypo1),
hypo2 = mean(hypo2)
), by=c('record_id', 'dexcom_week')]
# Add a definition of state into three categories -- currently rather arbitrary cut-offs
patient_days[TIR > 0.7 & hypo1 < 0.04 & hypo2 < 0.01, state := "green"]
patient_days[TIR <= 0.7 & TIR > 0.5 & hypo1 < 0.04 & hypo2 < 0.01, state := "yellow"]
patient_days[(TIR <= 0.5) | hypo1 >= 0.04 | hypo2 >= 0.01, state := "red"]
patient_weeks[TIR > 0.7 & hypo1 < 0.04 & hypo2 < 0.01, state := "green"]
patient_weeks[TIR <= 0.7 & TIR > 0.5 & hypo1 < 0.04 & hypo2 < 0.01, state := "yellow"]
patient_weeks[(TIR <= 0.5) | hypo1 >= 0.04 | hypo2 >= 0.01, state := "red"]
table(patient_weeks$state)
##
## green red yellow
## 1863 1714 972
Combine CGM-based states and contact data (visits and remote contacts with adjustments)
patient_days_contacts = merge(patient_days,
daily_contacts[type == "visit" |
type_detail %in% c("remote_careteam_adj", "remote_patient_adj"),
num_date := as.numeric(date_contact)],
all.x = TRUE,
by.x=c('record_id','dexcom_date'),
by.y=c('record_id','date_contact'))
patient_week_contacts = merge(patient_weeks,
weekly_contacts[type == "visit" |
type_detail %in% c("remote_careteam_adj", "remote_patient_adj"),
num_date := as.numeric(as.Date(week_contact))],
all.x = TRUE,
by.x=c('record_id','dexcom_week'),
by.y=c('record_id','week_contact'))
Fill in last and next contact
patient_days_contacts[, num_last_contact_date := num_date]
patient_days_contacts[, num_next_contact_date := num_date]
patient_week_contacts[, num_last_contact_date := num_date]
patient_week_contacts[, num_next_contact_date := num_date]
setorder(patient_days_contacts, record_id, dexcom_date)
setorder(patient_week_contacts, record_id, dexcom_week)
patient_days_contacts[, num_last_contact_date := nafill(num_last_contact_date, "locf"), by = 'record_id']
patient_days_contacts[, num_next_contact_date := nafill(num_next_contact_date, "nocb"), by = 'record_id']
patient_days_contacts[, days_since_last_contact := as.numeric(dexcom_date) - num_last_contact_date]
patient_days_contacts[, days_to_next_contact := num_next_contact_date - as.numeric(dexcom_date)]
patient_week_contacts[, num_last_contact_date := nafill(num_last_contact_date, "locf"), by = 'record_id']
patient_week_contacts[, num_next_contact_date := nafill(num_next_contact_date, "nocb"), by = 'record_id']
patient_week_contacts[, days_since_last_contact := as.numeric(as.Date(dexcom_week)) - num_last_contact_date]
patient_week_contacts[, days_to_next_contact := num_next_contact_date - as.numeric(as.Date(dexcom_week))]
ggplot(melt(patient_week_contacts[,.(days_since_last_contact, days_to_next_contact)]),
aes(x=value, fill=variable)) + geom_histogram() + facet_wrap(~variable, ncol=1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Using logistic regression models to fit transition probabilities as fn of days since last contact, on data when days since last contact is between 0 and 35.
I will use the average predicted transition probabilities on days 29-35 since contact as baseline transition probability when it has been more than four weeks since a contact. We can improve on this later.
focal_patient_days_contacts = patient_days_contacts[days_since_last_contact<=35]
focal_patient_week_contacts = patient_week_contacts[days_since_last_contact<=35]
setorder(focal_patient_days_contacts, record_id, dexcom_date)
setorder(focal_patient_week_contacts, record_id, dexcom_week)
focal_patient_days_contacts[, next_state := shift(state, type="lead"), by='record_id']
focal_patient_week_contacts[, next_state := shift(state, type="lead"), by='record_id']
get_state_state_prob_predictions = function(from_state, to_state, data) {
dt = copy(data)[state == from_state]
xs_to_predict = unique(dt$days_since_last_contact)
dt[, outcome := ifelse(next_state == to_state, 1, 0)]
m = glm(outcome ~ as.factor(days_since_last_contact) - 1, family = binomial(), data = dt)
pred_data = data.table(days_since_last_contact=xs_to_predict)
preds = predict(m, newdata=pred_data, se=TRUE, type="response")
pred_data[,fit_prob := preds$fit]
baseline_prob = pred_data[days_since_last_contact >= 29 & days_since_last_contact <= 35, mean(fit_prob)]
return(data.table(
from_state = from_state,
to_state = to_state,
days_since_last_contact=xs_to_predict,
pred_prob = preds$fit,
pred_se = preds$se.fit,
baseline_prob = baseline_prob
))
}
states = c("green","yellow","red")
state_state_grid = expand.grid(states, states)
daily_state_state_preds = rbindlist(
lapply(seq(1,nrow(state_state_grid)),
function(x) get_state_state_prob_predictions(state_state_grid[x,1],
state_state_grid[x,2],
focal_patient_days_contacts)))
weekly_state_state_preds = rbindlist(
lapply(seq(1,nrow(state_state_grid)),
function(x) get_state_state_prob_predictions(state_state_grid[x,1],
state_state_grid[x,2],
focal_patient_week_contacts)))
Plot predicted daily transition probs
outline <- data.frame(
from_state = states,
outline_color = c('green', 'orange', 'red')
)
square <- data.frame(
x = c(-Inf, Inf, Inf, -Inf),
y = c(-Inf, -Inf, Inf, Inf)
)
ggplot(daily_state_state_preds,
aes(x=days_since_last_contact,
y=pred_prob, ymin=pred_prob-2*pred_se, ymax=pred_prob+2*pred_se, color=to_state, group=to_state)) +
geom_line() + geom_point() + geom_errorbar(width=0.1) +
facet_wrap(from_state~to_state, scales="free_y", shrink=TRUE) +
scale_color_manual(breaks = states, values = c('green','orange','red', 'orange')) +
theme_minimal() +
geom_polygon(inherit.aes = FALSE, aes(x = x, y = y, color = outline_color, fill = NA), data = merge(outline, square)) +
scale_fill_identity() +
theme(legend.position = "none") +
geom_line(inherit.aes = FALSE, aes(x=days_since_last_contact, y=baseline_prob, group=to_state),
linetype="dashed")
Plot predicted weekly transition probs
ggplot(weekly_state_state_preds,
aes(x=days_since_last_contact,
y=pred_prob, ymin=pred_prob-2*pred_se, ymax=pred_prob+2*pred_se, color=to_state, group=to_state)) +
geom_line() + geom_point() + geom_errorbar(width=0.1) +
facet_wrap(from_state~to_state, scales="free_y", shrink=TRUE) +
scale_color_manual(breaks = states, values = c('green','orange','red', 'orange')) +
theme_minimal() +
geom_polygon(inherit.aes = FALSE, aes(x = x, y = y, color = outline_color, fill = NA), data = merge(outline, square)) +
scale_fill_identity() + geom_vline(xintercept = 0, linetype="dashed") +
theme(legend.position = "none") +
geom_line(inherit.aes = FALSE, aes(x=days_since_last_contact, y=baseline_prob, group=to_state),
linetype="dashed")
The weekly probabilities are less noisy look more like what we’d expect, with higher transition probabilities relative to baseline from non-green to green in the weeks following a contact.
Writing out the transition probabilities for simulations
setwd("~/Downloads/CGM Data/")
fwrite(daily_state_state_preds, 'daily_pred_probs.csv')
fwrite(weekly_state_state_preds, 'weekly_pred_probs.csv')