Preprocessing: Load CGM data

Only running this once to create aggregate dataset. Can be ignored. Just included for reference.

Click to see code used to generate datasets for simulations

library(data.table)
setwd("~/Downloads/CGM/dexcom_20210628/")
cgm_files = grep(".csv", list.files("."), value = T)

clean_cgm_file = function(f) {
  dt = fread(f)
  dt[,patient_id := as.character(patient_id)]
  dt[,ts := as.POSIXct(ts)]
  dt[,ds := as.Date(ts)]
  dt[,bg := as.numeric(bg)]
  dt[,population := as.character(population)]
  dt = dt[bg>=0]
  dt = dt[!is.na(ts)]
  return(dt[,.(patient_id, ts, bg, population)])
}

combined_cgm_data = rbindlist(lapply(cgm_files, clean_cgm_file))


# For each patient-week, generate aggregate CGM metrics.
# Get % time worn by inferring recording frequency using median time between glucose readings
# (Some patients have readings every 5 minutes, others every 2.5 minutes)

setorder(combined_cgm_data, patient_id, ts)
combined_cgm_data[, lag_ts := shift(ts), by='patient_id']

combined_cgm_data[, `:=`(
  ts_delta = as.numeric(ts - lag_ts),
  inrange = ifelse(bg >= 70 & bg <= 180, 1, 0),
  hyper = ifelse(bg > 180, 1, 0),
  hypo1 = ifelse(bg <= 70, 1, 0),
  hypo2 = ifelse(bg <= 54, 1, 0),
  dexcom_date = as.Date(ts),
  dexcom_week = cut(as.Date(ts), 'week')
)]

patient_weeks = combined_cgm_data[, .(
  TIR = mean(inrange),
  hyper = mean(hyper),
  hypo1 = mean(hypo1),
  hypo2 = mean(hypo2),
  n_obs = .N,
  median_ts_delta = quantile(ts_delta, 0.5, na.rm=T)
), by=c('patient_id', 'population', 'dexcom_week')]

patient_weeks[,expected_n_obs := ifelse(
  median_ts_delta >= 60 & median_ts_delta<=600, # If median ts delta is between 1 and 10 minutes, use it to estimate expected N obs
  7*24*60*60 / median_ts_delta,
  7*24*60*60 / 300 # Otherwise, assume 5 minutes between readings (default)
  )]

patient_weeks[,p_worn := n_obs/expected_n_obs]
stopifnot(patient_weeks[,max(p_worn)] < 1.05)
patient_weeks[p_worn>1, p_worn := 1]
patient_weeks[, summary(p_worn)]

# Add the study population of each patient
study_pop = readxl::read_excel("~/TIDE-LPCH/dexcom_numbers.xls")
study_pop = as.data.table(study_pop)
study_pop = study_pop[,.(study_pop = Population, patient_id = gsub("u","",Dexcom_Number))]

patient_weeks_study_pop = merge(patient_weeks, study_pop, by='patient_id')
patient_weeks_study_pop = patient_weeks_study_pop[,.(patient_id,reviewer=population,review_week=dexcom_week,TIR,hyper,hypo1,hypo2,p_worn,study_pop)]

# Save
fwrite(patient_weeks_study_pop, "~/Downloads/CGM/patient_weeks_study_pop.csv")

Actual Simulation Code

Load data

patient_weeks_study_pop = fread("~/Downloads/CGM files for Teng/patient_weeks_study_pop.csv",)
patient_weeks_study_pop[,patient_id := as.character(patient_id)]

# Filter to 2021 weeks to end of complete data
patient_weeks_study_pop = patient_weeks_study_pop[review_week>="2021-01-04" & review_week <= "2021-06-21"]

# Summary stats
patient_weeks_study_pop[,.(
  n_patient_weeks = .N,
  n_patients = uniqueN(patient_id),
  n_cdes = uniqueN(reviewer),
  n_weeks = uniqueN(review_week)
)]

##    n_patient_weeks n_patients n_cdes n_weeks
## 1:            4088        209      3      25

Load patient list that determines patient review eligibility by week

patient_list = as.data.table(readxl::read_excel("~/Downloads/CGM files for Teng/dexcom_numbers.xls"))
patient_list = patient_list[,.(patient_id = gsub('u','',Dexcom_Number), current_eligible_week_numbers=WeeksToReview)]

patient_weeks_study_pop = merge(patient_weeks_study_pop, patient_list, by = 'patient_id')
colnames(patient_weeks_study_pop)

##  [1] "patient_id"                    "reviewer"                     
##  [3] "review_week"                   "TIR"                          
##  [5] "hyper"                         "hypo1"                        
##  [7] "hypo2"                         "p_worn"                       
##  [9] "study_pop"                     "current_eligible_week_numbers"

Function using current eligiblity and simulating which patients will currently be reviewed by CDEs each week

baseline_review = function(dt, filter_to_eligible_weeks = TRUE) {
  
  out_dt = copy(dt)
  
  # Filter to those currently eligible for review in each week
  out_dt[, week_idx := 1 + as.numeric(format(review_week, "%V")) %% 4]
  out_dt[, eligible_for_review := current_eligible_week_numbers == 0 | current_eligible_week_numbers == week_idx]
  print(table(out_dt$eligible_for_review))
  
  if(filter_to_eligible_weeks) {
    print(sprintf("Dropping %i not eligible for review in current week.", out_dt[eligible_for_review==0,.N]))
    out_dt = out_dt[eligible_for_review==TRUE]
  }
  
  # Rank patients and get top K each week in addition to those violating targets in both of the last two  weeks
  K = 60
  out_dt[p_worn >= 0.75, num_flags := (TIR<0.65) + (hypo1>0.04) + (hypo2>0.01)]
  print(table(out_dt$num_flags))
  
  setorder(out_dt, patient_id, review_week)
  out_dt[,lag_num_flags := shift(num_flags), by='patient_id']
  
  setorder(out_dt, review_week, -num_flags, TIR)
  out_dt[p_worn >= 0.75, rnk := seq_len(.N), by = c('review_week')]
  out_dt[p_worn >= 0.75, review := ifelse((num_flags>0 & rnk <= K) | (lag_num_flags>0 & num_flags>0), "Review", "Don't review")]
  out_dt[p_worn < 0.75, review := "Insufficient data"]
  print(table(out_dt$review))
  
  # Output table with review flag
  return(out_dt)
}

# Test it 
baseline_review(patient_weeks_study_pop)

## 
## FALSE  TRUE 
##  1974  2114 
## [1] "Dropping 1974 not eligible for review in current week."
## 
##   0   1   2   3 
## 862 760 144  60 
## 
##      Don't review Insufficient data            Review 
##               862               288               964

Use review function to generate summary stats

review_output = baseline_review(patient_weeks_study_pop)

## 
## FALSE  TRUE 
##  1974  2114 
## [1] "Dropping 1974 not eligible for review in current week."
## 
##   0   1   2   3 
## 862 760 144  60 
## 
##      Don't review Insufficient data            Review 
##               862               288               964

ggplot(review_output[,.(n_patients = .N), by=c('review_week','review')], 
       aes(x=review_week,y=n_patients,color=review)) + geom_line()  + theme_bw()

ggplot(review_output[,.(n_patients = .N), by=c('reviewer','review_week','review')], 
       aes(x=review_week,y=n_patients,color=review)) + geom_line() + facet_wrap(~reviewer, ncol=1) + theme_bw()

Distribution of reviews per patient

ggplot(review_output[,.(n_reviews = sum(review=="Review")), by=c('patient_id','study_pop')], 
       aes(x=n_reviews)) + facet_wrap(~study_pop, ncol=2) + geom_bar(stat="count") +
  theme_bw() + xlab("Reviews per patient (over 26 week simulation)") + ylab("N patients")

Simulating changing review criteria so TIR flag is (TIR < 65% + a decrease in TIR of 5pp or more), and all patients are eligible for review every week

test_review = function(dt) {
  
  out_dt = copy(dt)
  
  # Rank patients and get top K each week in addition to those violating targets in both of the last two  weeks
  K = 60
  
  setorder(out_dt, patient_id, review_week)
  out_dt[, lag_TIR := shift(TIR), by = 'patient_id']
  out_dt[is.na(lag_TIR), lag_TIR := TIR]
  
  out_dt[p_worn >= 0.75, num_flags := (TIR<0.65 & TIR<lag_TIR-0.05) + (hypo1>0.04) + (hypo2>0.01)]
  print(table(out_dt$num_flags))
  
  setorder(out_dt, patient_id, review_week)
  out_dt[,lag_num_flags := shift(num_flags), by='patient_id']
  
  setorder(out_dt, review_week, -num_flags, TIR)
  out_dt[p_worn >= 0.75, rnk := seq_len(.N), by = c('review_week')]
  out_dt[p_worn >= 0.75, review := ifelse((num_flags>0 & rnk <= K) | (lag_num_flags>0 & num_flags>0), "Review", "Don't review")]
  out_dt[p_worn < 0.75, review := "Insufficient data"]
  print(table(out_dt$review))
  
  # Output table with review flag
  return(out_dt)
}

# Test it 
test_review(patient_weeks_study_pop)

## 
##    0    1    2    3 
## 2078  897  341   50 
## 
##      Don't review Insufficient data            Review 
##              2081               722              1285

test_review_output = test_review(patient_weeks_study_pop)

## 
##    0    1    2    3 
## 2078  897  341   50 
## 
##      Don't review Insufficient data            Review 
##              2081               722              1285

ggplot(test_review_output[,.(n_patients = .N), by=c('review_week','review')], 
       aes(x=review_week,y=n_patients,color=review)) + geom_line()  + theme_bw()

Distribution of reviews per patient

ggplot(test_review_output[,.(n_reviews = sum(review=="Review")), by=c('patient_id','study_pop')], 
       aes(x=n_reviews)) + facet_wrap(~study_pop, ncol=2) + geom_bar(stat="count") +
  theme_bw() + xlab("Reviews per patient (over 26 week simulation)") + ylab("N patients")