library(data.table)
library(ggplot2)
library(estimatr)
library(plm)
## 
## Attaching package: 'plm'
## The following object is masked from 'package:data.table':
## 
##     between
dt = fread("~/Google Drive/Stanford/CGM/deid_eligibility_dt.csv")
str(dt)
## Classes 'data.table' and 'data.frame':   2984 obs. of  15 variables:
##  $ pat_id             : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ obs_week_id        : int  60 61 62 63 64 65 66 67 68 69 ...
##  $ eligible_for_review: int  0 0 0 1 0 0 0 1 0 0 ...
##  $ TIR                : num  1 1 0.988 0.987 0.997 ...
##  $ TIR_lead           : num  1 0.988 0.987 0.997 1 ...
##  $ TIR_lag            : num  0.982 1 1 0.988 0.987 ...
##  $ dTIR_2w            : num  0.01759 -0.01215 -0.0132 0.00928 0.0132 ...
##  $ dTIR_1w            : num  0 -0.01215 -0.00106 0.01033 0.00287 ...
##  $ p_worn             : num  0.446 0.61 0.858 0.301 0.519 ...
##  $ p_worn_lag         : num  0.818 0.446 0.61 0.858 0.301 ...
##  $ pump_use           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ age_at_onset       : int  14 14 14 14 14 14 14 14 14 14 ...
##  $ public_insurance   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ sexF               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ tide_message_sent  : int  0 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, ".internal.selfref")=<externalptr>

Regression tests

“First stage”, P(message | eligible)

# All
summary(lm(tide_message_sent ~ eligible_for_review, data=dt))
## 
## Call:
## lm(formula = tide_message_sent ~ eligible_for_review, data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.30214 -0.00805 -0.00805 -0.00805  0.99195 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.008050   0.005131   1.569    0.117    
## eligible_for_review 0.294089   0.010249  28.694   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2426 on 2982 degrees of freedom
## Multiple R-squared:  0.2164, Adjusted R-squared:  0.2161 
## F-statistic: 823.4 on 1 and 2982 DF,  p-value: < 2.2e-16
# Filtered to TIR_lag < 65% and p_worn_lag > 50% [criteria used in dashboard]
summary(lm(tide_message_sent ~ eligible_for_review, data=dt[TIR_lag<0.65 & p_worn_lag>0.5]))
## 
## Call:
## lm(formula = tide_message_sent ~ eligible_for_review, data = dt[TIR_lag < 
##     0.65 & p_worn_lag > 0.5])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38699 -0.01036 -0.01036 -0.01036  0.98964 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.01036    0.00881   1.176     0.24    
## eligible_for_review  0.37663    0.01757  21.439   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2597 on 1159 degrees of freedom
## Multiple R-squared:  0.284,  Adjusted R-squared:  0.2833 
## F-statistic: 459.6 on 1 and 1159 DF,  p-value: < 2.2e-16

“Reduced form”, E[dTIR | eligible]

dTIR = TIR_lead - TIR_lag

# All with patient FEs
summary(plm(dTIR_2w ~ eligible_for_review, 
         data = dt,
         effect = "individual",
         model = "within",
         index = "pat_id"))$coefficients
##                        Estimate  Std. Error  t-value   Pr(>|t|)
## eligible_for_review 0.009501993 0.005447072 1.744422 0.08120097
# Filtered to TIR_lag < 65% and p_worn_lag > 50% [criteria used in dashboard]
summary(plm(dTIR_2w ~ eligible_for_review, 
         data = dt[TIR_lag<0.65 & p_worn_lag>0.5],
         effect = "individual",
         model = "within",
         index = "pat_id"))$coefficients
##                       Estimate  Std. Error  t-value   Pr(>|t|)
## eligible_for_review 0.02088126 0.009045574 2.308451 0.02116911

IVs, dTIR ~ tide_message_sent | eligible

# All with clustered errors by patient ID
summary(iv_robust(dTIR_2w ~ tide_message_sent | eligible_for_review, 
                  data = dt, diagnostics=T,
                  clusters = pat_id))
## 
## Call:
## iv_robust(formula = dTIR_2w ~ tide_message_sent | eligible_for_review, 
##     data = dt, clusters = pat_id, diagnostics = T)
## 
## Standard error type:  CR2 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  CI Lower  CI Upper
## (Intercept)       -0.004016   0.002032  -1.976  0.05337 -0.008093 6.052e-05
## tide_message_sent  0.029590   0.019319   1.532  0.13160 -0.009166 6.835e-02
##                      DF
## (Intercept)       52.86
## tide_message_sent 52.56
## 
## Multiple R-squared:  0.001125 ,  Adjusted R-squared:  0.0007603 
## F-statistic: 2.346 on 1 and 77 DF,  p-value: 0.1297
## 
## Diagnostics:
##                  numdf dendf  value p.value    
## Weak instruments     1    77 75.391 4.9e-13 ***
## Wu-Hausman           1    77  0.487   0.487    
## Overidentifying      0    NA     NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Filtered to TIR_lag < 65% and p_worn_lag > 50% [criteria used in dashboard]
summary(iv_robust(dTIR_2w ~ tide_message_sent | eligible_for_review, 
                  data = dt[p_worn_lag>0.5 & TIR_lag<0.65], diagnostics=T,
                  clusters = pat_id))
## 
## Call:
## iv_robust(formula = dTIR_2w ~ tide_message_sent | eligible_for_review, 
##     data = dt[p_worn_lag > 0.5 & TIR_lag < 0.65], clusters = pat_id, 
##     diagnostics = T)
## 
## Standard error type:  CR2 
## 
## Coefficients:
##                   Estimate Std. Error t value  Pr(>|t|) CI Lower CI Upper    DF
## (Intercept)        0.02306   0.005247   4.394 0.0001199 0.012357  0.03376 31.14
## tide_message_sent  0.05129   0.021974   2.334 0.0260923 0.006515  0.09607 31.67
## 
## Multiple R-squared:  -0.003975 , Adjusted R-squared:  -0.004883 
## F-statistic: 5.449 on 1 and 64 DF,  p-value: 0.02273
## 
## Diagnostics:
##                  numdf dendf  value  p.value    
## Weak instruments     1    64 47.959 2.52e-09 ***
## Wu-Hausman           1    64  3.093   0.0834 .  
## Overidentifying      0    NA     NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1