simple_p_contact_model.utf8

library(data.table)
library(ggplot2)
dt = fread('~/Downloads/CGM/SURF-CGM/clean_data/feats_and_interventions.csv')
str(dt)

## Classes 'data.table' and 'data.frame':   525 obs. of  8 variables:
##  $ patient_id  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ week        : int  10 11 12 13 14 15 16 17 18 19 ...
##  $ intervention: int  0 0 0 0 0 NA 0 0 NA NA ...
##  $ date        : IDate, format: "2019-12-30" "2020-01-10" ...
##  $ PW          : int  14 115 84 100 40 132 87 68 100 67 ...
##  $ TIR         : int  48 48 48 72 69 65 50 51 42 57 ...
##  $ TBR54       : num  0 0 0 0 0 0.11 0.17 0 0 0 ...
##  $ TBR70       : int  0 1 0 2 0 1 1 0 0 0 ...
##  - attr(*, ".internal.selfref")=<externalptr>

Filter

# Filter to those we know whether had contact or not (called intervention here).
# Also filter to those with PW > 75% (could change this).
eval_dt = dt[intervention>=0 & PW > 75]
print(eval_dt[,.(.N, sum(intervention, na.rm=T))])

##      N  V2
## 1: 331 106

Add number of flags each week

eval_dt[, 
        num_flags := 
          ifelse(TIR < 65, 1, 0) +
          ifelse(TBR54 > 1, 1, 0) +
          ifelse(TBR70 > 4, 1, 0)]

# P(intervention) by num_flags and TIR
ggplot(eval_dt, 
       aes(x=TIR, y=intervention, color=factor(num_flags))) + 
  geom_smooth(method = "glm",  method.args = list(family = "binomial"), se=F) + 
  theme_minimal() + ggtitle("Prob of contact vs TIR and # of flags")

## `geom_smooth()` using formula 'y ~ x'

Make a simple table. Drop patients without flags.

# TIR buckets
eval_dt[,TIR_bucket := cut(TIR, c(0,25,50,65,100), include.lowest=TRUE)]
agg_dt = eval_dt[, .(p_contact = mean(intervention)), by=c('num_flags', 'TIR_bucket')]
dcast(agg_dt[num_flags>0], TIR_bucket ~ num_flags)

## Using 'p_contact' as value column. Use 'value.var' to override

##    TIR_bucket         1         2  3
## 1:     [0,25] 1.0000000        NA NA
## 2:    (25,50] 0.8750000 1.0000000  1
## 3:    (50,65] 0.6153846 1.0000000  1
## 4:   (65,100] 0.3888889 0.8888889 NA