library(data.table)
library(ggplot2)
dt = fread('~/Downloads/CGM/SURF-CGM/clean_data/feats_and_interventions.csv')
str(dt)
## Classes 'data.table' and 'data.frame': 525 obs. of 8 variables:
## $ patient_id : int 1 1 1 1 1 1 1 1 1 1 ...
## $ week : int 10 11 12 13 14 15 16 17 18 19 ...
## $ intervention: int 0 0 0 0 0 NA 0 0 NA NA ...
## $ date : IDate, format: "2019-12-30" "2020-01-10" ...
## $ PW : int 14 115 84 100 40 132 87 68 100 67 ...
## $ TIR : int 48 48 48 72 69 65 50 51 42 57 ...
## $ TBR54 : num 0 0 0 0 0 0.11 0.17 0 0 0 ...
## $ TBR70 : int 0 1 0 2 0 1 1 0 0 0 ...
## - attr(*, ".internal.selfref")=<externalptr>
Filter
# Filter to those we know whether had contact or not (called intervention here).
# Also filter to those with PW > 75% (could change this).
eval_dt = dt[intervention>=0 & PW > 75]
print(eval_dt[,.(.N, sum(intervention, na.rm=T))])
## N V2
## 1: 331 106
Add number of flags each week
eval_dt[,
num_flags :=
ifelse(TIR < 65, 1, 0) +
ifelse(TBR54 > 1, 1, 0) +
ifelse(TBR70 > 4, 1, 0)]
# P(intervention) by num_flags and TIR
ggplot(eval_dt,
aes(x=TIR, y=intervention, color=factor(num_flags))) +
geom_smooth(method = "glm", method.args = list(family = "binomial"), se=F) +
theme_minimal() + ggtitle("Prob of contact vs TIR and # of flags")
## `geom_smooth()` using formula 'y ~ x'
Make a simple table. Drop patients without flags.
# TIR buckets
eval_dt[,TIR_bucket := cut(TIR, c(0,25,50,65,100), include.lowest=TRUE)]
agg_dt = eval_dt[, .(p_contact = mean(intervention)), by=c('num_flags', 'TIR_bucket')]
dcast(agg_dt[num_flags>0], TIR_bucket ~ num_flags)
## Using 'p_contact' as value column. Use 'value.var' to override
## TIR_bucket 1 2 3
## 1: [0,25] 1.0000000 NA NA
## 2: (25,50] 0.8750000 1.0000000 1
## 3: (50,65] 0.6153846 1.0000000 1
## 4: (65,100] 0.3888889 0.8888889 NA