Load data
library(data.table)
library(ggplot2)
library(ggpubr)
cgm_ts = fread("~/Downloads/CGM/deid_hourly_timeseries.csv")
cgm_ts[, pat_id := paste0(pop,"-",pat_id)]
str(cgm_ts)
## Classes 'data.table' and 'data.frame': 1503146 obs. of 13 variables:
## $ day_id : int 636 636 636 636 636 636 636 636 636 636 ...
## $ hour : int 12 13 14 15 16 17 18 19 20 21 ...
## $ mean_glucose : num 107.8 90.8 117.5 112.9 154.5 ...
## $ TIR : num 1 1 1 1 0.75 ...
## $ in_person_visits : int 1 1 1 1 1 1 1 1 1 1 ...
## $ telehealth_visits: int 0 0 0 0 0 0 0 0 0 0 ...
## $ remote_contacts : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pat_id : chr "4T-35" "4T-35" "4T-35" "4T-35" ...
## $ weekday_id : int 3 3 3 3 3 3 3 3 3 3 ...
## $ pop : chr "4T" "4T" "4T" "4T" ...
## $ age : int 12 12 12 12 12 12 12 12 12 12 ...
## $ sexF : int 0 0 0 0 0 0 0 0 0 0 ...
## $ public_insurance : int 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, ".internal.selfref")=<externalptr>
all_pats = cgm_ts[,.(
TIR = mean(TIR),
remote_contacts = max(remote_contacts),
in_person_visits = max(in_person_visits),
telehealth_visits = max(telehealth_visits)
), by = c('pat_id','pop','day_id')]
#all_pats[,day_id := day_id - min(day_id), by='pat_id']
all_pats[,day_id := seq_len(.N), by='pat_id']
all_pats[day_id < 180, remote_contacts := 0]
all_pats[TIR > 0.7, remote_contacts := 0]
# Only keep patients with >0 remote contacts
all_pats = all_pats[pat_id %in% all_pats[remote_contacts==1, unique(pat_id)]]
# Get TIR of last contact
setorder(all_pats, pat_id, day_id)
all_pats[, last_contact_TIR := remote_contacts*TIR]
all_pats[last_contact_TIR == 0, last_contact_TIR := NA]
all_pats[, last_contact_TIR := nafill(last_contact_TIR, "locf"), by='pat_id']
#all_pats[pat_id == "4T-22", last_contact_TIR][1:300]
one_pat = copy(all_pats[pat_id=="Pilot-14"])
one_pat[,.(
.N,
mean(TIR),
sd(TIR),
sum(remote_contacts)
)]
## N V2 V3 V4
## 1: 582 0.6497213 0.1923579 12
First just try to learn mean and variance
mean_var_nloglik = function(theta) -sum(dnorm(one_pat$TIR, theta[1], max(1E-9,theta[2]), log=TRUE))
# MLE
fit0 <- optim(c(0,1), mean_var_nloglik, method = "BFGS", control=list(maxit=10000))
fit0
## $par
## [1] 0.6497191 0.1921935
##
## $value
## [1] -134.0455
##
## $counts
## function gradient
## 72 11
##
## $convergence
## [1] 0
##
## $message
## NULL
Now try learning time trend as well
pred_tir = function(mu_i, d_i, t_i) {
return(mu_i+d_i*t_i)
}
mean_var_nloglik = function(theta) -sum(dnorm(one_pat$TIR,
pred_tir(theta["mu_i"], theta["d_i"], one_pat$day_id), max(1E-9,theta["sigma_i"]),
log=TRUE))
# MLE
init_params = unlist(list(
"mu_i" = mean(one_pat$TIR),
"d_i" = 0.001,
"sigma_i" = sd(one_pat$TIR)
))
fit0 <- optim(init_params, mean_var_nloglik, method = "BFGS", control=list(maxit=10000))
fit0
## $par
## mu_i d_i sigma_i
## 0.7339981570 -0.0002891144 0.1859575139
##
## $value
## [1] -153.2533
##
## $counts
## function gradient
## 91 14
##
## $convergence
## [1] 0
##
## $message
## NULL
Fit model with cumulative effect of contacts and exponential decay
pred_tir = function(mu_i, d_i, t_i, g_i, h_i) {
one_pat$discounted_contacts = one_pat$remote_contacts
one_pat$discounted_contacts = stats::filter(one_pat$discounted_contacts, 2^(-1/max(1E-9,h_i)), method = "recursive")
# print(sum(one_pat$discounted_contacts))
contact_effect = one_pat$discounted_contacts*g_i
mu_i = fcoalesce(one_pat$last_contact_TIR, mu_i)
return(mu_i + d_i*t_i + contact_effect)
}
mean_var_nloglik = function(theta) -sum(dnorm(one_pat$TIR,
pred_tir(theta["mu_i"], theta["d_i"], one_pat$day_id, theta["g_i"], theta["h_i"]),
max(1E-9,theta["sigma_i"]), log=TRUE))
# MLE
init_params = unlist(list(
"mu_i" = mean(one_pat$TIR),
"d_i" = 0.001,
"sigma_i" = sd(one_pat$TIR),
"g_i" = 0.01,
"h_i" = 14
))
fit0 <- optim(init_params, mean_var_nloglik, control=list(maxit=10000), method = "BFGS")
fit0
## $par
## mu_i d_i sigma_i g_i h_i
## 7.111705e-01 -3.654684e-06 1.863615e-01 3.782521e-02 1.400500e+01
##
## $value
## [1] -151.9871
##
## $counts
## function gradient
## 99 23
##
## $convergence
## [1] 0
##
## $message
## NULL
Let’s try doing it for every patient
pred_tir = function(dt, mu_i, d_i, t_i, g_i, h_i) {
dt$discounted_contacts = dt$remote_contacts
dt$discounted_contacts = stats::filter(dt$discounted_contacts, 2^(-1/max(1E-9,h_i)), method = "recursive")
contact_effect = dt$discounted_contacts*g_i
mu_i = fcoalesce(dt$last_contact_TIR, mu_i)
return(mu_i + d_i*t_i + contact_effect)
}
get_params = function(pid) {
dt = copy(all_pats[pat_id == pid & day_id>=180])
dt[, day_id := day_id - min(day_id), by='pat_id']
mean_var_nloglik = function(theta) -sum(dnorm(dt$TIR,
pred_tir(dt, theta["mu_i"], theta["d_i"], dt$day_id, theta["g_i"], theta["h_i"]),
max(1E-9,theta["sigma_i"]), log=TRUE))
init_params = unlist(list(
"mu_i" = mean(dt$TIR),
"d_i" = -0.001,
"sigma_i" = sd(dt$TIR),
"g_i" = 0.02,
"h_i" = 24))
fit0 <- optim(init_params, mean_var_nloglik, control=list(maxit=10000), method = "BFGS")
out_dt = as.data.table(t(fit0$par))
out_dt[, pat_id := pid]
out_dt[, pop := dt$pop[1]]
out_dt[, n_contacts := sum(dt$remote_contacts)]
return(out_dt)
}
testfit = get_params("Pilot-1")
fits = rbindlist(lapply(all_pats[,unique(pat_id)], get_params))
#head(fits[n_contacts>=4 & g_i>0])
# ggdensity(melt(fits, id.vars = c('pat_id','pop')), x = "value",
# add = "mean", rug = TRUE,
# color = "pop", facet.by = "variable", scales="free")
Winsorize
fits_long = melt(fits, id.vars = c('pat_id','pop'))
fits_long[, wins_min := quantile(value, 0.05), by='variable']
fits_long[, wins_max := quantile(value, 0.95), by='variable']
fits_long[, value_wins := pmax(wins_min, pmin(wins_max, value))]
#unique(fits_long[,.(variable,wins_min,wins_max)])
ggdensity(fits_long[variable!='n_contacts'], x = "value_wins",
add = "median", rug = TRUE,
color = "pop", facet.by = "variable", scales="free")
Try doing just one patient at different time steps and see how parameters vary over time
get_params = function(ts) {
dt = one_pat[day_id <= ts]
mean_var_nloglik = function(theta) -sum(dnorm(dt$TIR,
pred_tir(dt, theta["mu_i"], theta["d_i"], dt$day_id, theta["g_i"], theta["h_i"]),
max(1E-9,theta["sigma_i"]), log=TRUE))
init_params = unlist(list(
"mu_i" = mean(dt$TIR),
"d_i" = -0.001,
"sigma_i" = sd(dt$TIR),
"g_i" = 0.02,
"h_i" = 24))
fit0 <- optim(init_params, mean_var_nloglik, control=list(maxit=10000), method = "BFGS")
out_dt = as.data.table(t(fit0$par))
out_dt[, pat_id := dt$pat_id[1]]
out_dt[, contact := dt[day_id==ts, remote_contacts][1]]
out_dt[, pop := dt$pop[1]]
out_dt[, ts := ts]
out_dt[, TIR := dt[day_id>=ts-7 & day_id <= ts-1, mean(TIR)]]
return(out_dt)
}
ts_set = one_pat[day_id >= one_pat[remote_contacts==1, min(day_id)], unique(day_id)]
testfit = get_params(max(ts_set))
ts_fits = rbindlist(lapply(ts_set, get_params))
#head(ts_fits)
ts_fits_long = melt(ts_fits, id.vars = c('pat_id','pop','contact','ts'))
ggplot(ts_fits_long, aes(x=ts, y=value)) + geom_vline(xintercept = ts_fits_long[contact==1, unique(ts)], linetype="dashed") + facet_wrap(~variable, scales="free", ncol = 1) + geom_line() + theme_bw() + coord_cartesian(xlim=c(min(ts_fits$ts),400)) + ggtitle("Single patient MLE parameters over time")