library(ggplot2)
library(data.table)
Only running this once to create aggregate dataset. Can be ignored. Just included for reference.
library(data.table)
setwd("~/Downloads/CGM/dexcom_20210628/")
cgm_files = grep(".csv", list.files("."), value = T)
clean_cgm_file = function(f) {
dt = fread(f)
dt[,patient_id := as.character(patient_id)]
dt[,ts := as.POSIXct(ts)]
dt[,ds := as.Date(ts)]
dt[,bg := as.numeric(bg)]
dt[,population := as.character(population)]
dt = dt[bg>=0]
dt = dt[!is.na(ts)]
return(dt[,.(patient_id, ts, bg, population)])
}
combined_cgm_data = rbindlist(lapply(cgm_files, clean_cgm_file))
# For each patient-week, generate aggregate CGM metrics.
# Get % time worn by inferring recording frequency using median time between glucose readings
# (Some patients have readings every 5 minutes, others every 2.5 minutes)
setorder(combined_cgm_data, patient_id, ts)
combined_cgm_data[, lag_ts := shift(ts), by='patient_id']
combined_cgm_data[, `:=`(
ts_delta = as.numeric(ts - lag_ts),
inrange = ifelse(bg >= 70 & bg <= 180, 1, 0),
hyper = ifelse(bg > 180, 1, 0),
hypo1 = ifelse(bg <= 70, 1, 0),
hypo2 = ifelse(bg <= 54, 1, 0),
dexcom_date = as.Date(ts),
dexcom_week = cut(as.Date(ts), 'week')
)]
patient_weeks = combined_cgm_data[, .(
TIR = mean(inrange),
hyper = mean(hyper),
hypo1 = mean(hypo1),
hypo2 = mean(hypo2),
n_obs = .N,
median_ts_delta = quantile(ts_delta, 0.5, na.rm=T)
), by=c('patient_id', 'population', 'dexcom_week')]
patient_weeks[,expected_n_obs := ifelse(
median_ts_delta >= 60 & median_ts_delta<=600, # If median ts delta is between 1 and 10 minutes, use it to estimate expected N obs
7*24*60*60 / median_ts_delta,
7*24*60*60 / 300 # Otherwise, assume 5 minutes between readings (default)
)]
patient_weeks[,p_worn := n_obs/expected_n_obs]
stopifnot(patient_weeks[,max(p_worn)] < 1.05)
patient_weeks[p_worn>1, p_worn := 1]
patient_weeks[, summary(p_worn)]
# Add the study population of each patient
study_pop = readxl::read_excel("~/TIDE-LPCH/dexcom_numbers.xls")
study_pop = as.data.table(study_pop)
study_pop = study_pop[,.(study_pop = Population, patient_id = gsub("u","",Dexcom_Number))]
patient_weeks_study_pop = merge(patient_weeks, study_pop, by='patient_id')
patient_weeks_study_pop = patient_weeks_study_pop[,.(patient_id,reviewer=population,review_week=dexcom_week,TIR,hyper,hypo1,hypo2,p_worn,study_pop)]
# Save
fwrite(patient_weeks_study_pop, "~/Downloads/CGM/patient_weeks_study_pop.csv")
Load data
patient_weeks_study_pop = fread("~/Downloads/CGM/patient_weeks_study_pop.csv",)
patient_weeks_study_pop[,patient_id := as.character(patient_id)]
# Filter to 2021 weeks to end of complete data
patient_weeks_study_pop = patient_weeks_study_pop[review_week>="2021-01-04" & review_week <= "2021-06-21"]
# Summary stats
patient_weeks_study_pop[,.(
n_patient_weeks = .N,
n_patients = uniqueN(patient_id),
n_cdes = uniqueN(reviewer),
n_weeks = uniqueN(review_week)
)]
## n_patient_weeks n_patients n_cdes n_weeks
## 1: 4088 209 3 25
Load patient list that determines patient review eligibility by week
patient_list = as.data.table(readxl::read_excel("~/TIDE-LPCH/dexcom_numbers.xls"))
patient_list = patient_list[,.(patient_id = gsub('u','',Dexcom_Number), current_eligible_week_numbers=WeeksToReview)]
patient_weeks_study_pop = merge(patient_weeks_study_pop, patient_list, by = 'patient_id')
colnames(patient_weeks_study_pop)
## [1] "patient_id" "reviewer"
## [3] "review_week" "TIR"
## [5] "hyper" "hypo1"
## [7] "hypo2" "p_worn"
## [9] "study_pop" "current_eligible_week_numbers"
baseline_review = function(dt, filter_to_eligible_weeks = TRUE) {
out_dt = copy(dt)
# Filter to those currently eligible for review in each week
out_dt[, week_idx := 1 + as.numeric(format(review_week, "%V")) %% 4]
out_dt[, eligible_for_review := current_eligible_week_numbers == 0 | current_eligible_week_numbers == week_idx]
print(table(out_dt$eligible_for_review))
if(filter_to_eligible_weeks) {
print(sprintf("Dropping %i not eligible for review in current week.", out_dt[eligible_for_review==0,.N]))
out_dt = out_dt[eligible_for_review==TRUE]
}
# Rank patients and get top K each week in addition to those violating targets in both of the last two weeks
K = 60
out_dt[p_worn >= 0.75, num_flags := (TIR<0.65) + (hypo1>0.04) + (hypo2>0.01)]
print(table(out_dt$num_flags))
setorder(out_dt, patient_id, review_week)
out_dt[,lag_num_flags := shift(num_flags), by='patient_id']
setorder(out_dt, review_week, -num_flags, TIR)
out_dt[p_worn >= 0.75, rnk := seq_len(.N), by = c('review_week')]
out_dt[p_worn >= 0.75, review := ifelse((num_flags>0 & rnk <= K) | (lag_num_flags>0 & num_flags>0), "Review", "Don't review")]
out_dt[p_worn < 0.75, review := "Insufficient data"]
print(table(out_dt$review))
# Output table with review flag
return(out_dt)
}
# Test it
baseline_review(patient_weeks_study_pop)
##
## FALSE TRUE
## 1974 2114
## [1] "Dropping 1974 not eligible for review in current week."
##
## 0 1 2 3
## 862 760 144 60
##
## Don't review Insufficient data Review
## 862 288 964
review_output = baseline_review(patient_weeks_study_pop)
##
## FALSE TRUE
## 1974 2114
## [1] "Dropping 1974 not eligible for review in current week."
##
## 0 1 2 3
## 862 760 144 60
##
## Don't review Insufficient data Review
## 862 288 964
ggplot(review_output[,.(n_patients = .N), by=c('review_week','review')],
aes(x=review_week,y=n_patients,color=review)) + geom_line() + theme_bw()
ggplot(review_output[,.(n_patients = .N), by=c('reviewer','review_week','review')],
aes(x=review_week,y=n_patients,color=review)) + geom_line() + facet_wrap(~reviewer, ncol=1) + theme_bw()
Distribution of reviews per patient
ggplot(review_output[,.(n_reviews = sum(review=="Review")), by=c('patient_id','study_pop')],
aes(x=n_reviews)) + facet_wrap(~study_pop, ncol=2) + geom_bar(stat="count") +
theme_bw() + xlab("Reviews per patient (over 26 week simulation)") + ylab("N patients")
test_review = function(dt) {
out_dt = copy(dt)
# Rank patients and get top K each week in addition to those violating targets in both of the last two weeks
K = 60
setorder(out_dt, patient_id, review_week)
out_dt[, lag_TIR := shift(TIR), by = 'patient_id']
out_dt[is.na(lag_TIR), lag_TIR := TIR]
out_dt[p_worn >= 0.75, num_flags := (TIR<0.65 & TIR<lag_TIR-0.05) + (hypo1>0.04) + (hypo2>0.01)]
print(table(out_dt$num_flags))
setorder(out_dt, patient_id, review_week)
out_dt[,lag_num_flags := shift(num_flags), by='patient_id']
setorder(out_dt, review_week, -num_flags, TIR)
out_dt[p_worn >= 0.75, rnk := seq_len(.N), by = c('review_week')]
out_dt[p_worn >= 0.75, review := ifelse((num_flags>0 & rnk <= K) | (lag_num_flags>0 & num_flags>0), "Review", "Don't review")]
out_dt[p_worn < 0.75, review := "Insufficient data"]
print(table(out_dt$review))
# Output table with review flag
return(out_dt)
}
# Test it
test_review(patient_weeks_study_pop)
##
## 0 1 2 3
## 2078 897 341 50
##
## Don't review Insufficient data Review
## 2081 722 1285
test_review_output = test_review(patient_weeks_study_pop)
##
## 0 1 2 3
## 2078 897 341 50
##
## Don't review Insufficient data Review
## 2081 722 1285
ggplot(test_review_output[,.(n_patients = .N), by=c('review_week','review')],
aes(x=review_week,y=n_patients,color=review)) + geom_line() + theme_bw()
Distribution of reviews per patient
ggplot(test_review_output[,.(n_reviews = sum(review=="Review")), by=c('patient_id','study_pop')],
aes(x=n_reviews)) + facet_wrap(~study_pop, ncol=2) + geom_bar(stat="count") +
theme_bw() + xlab("Reviews per patient (over 26 week simulation)") + ylab("N patients")