Libraries used:

library(tidyverse)
library(tmle)
library(ggdag) # for nicer DAG image
theme_set(theme_dag())
library(SuperLearner) 
library(visreg)
set.seed(54321)

Import data

load(file = "data_for_ML.impute.RData")
load(file = "data_for_ML.synth.RData")

Create Albumin cutoff in both datasets

data.impute <- data.impute %>%
  mutate(low_alb = ifelse(min_Alb < 3, 1, 0))

synth_MIMIC <- synth_MIMIC %>%
  mutate(low_alb = ifelse(min_Alb < 3, 1, 0))

Crude ORs (continuous Alb)

MIMC

crude_OR_MIMIC <- glm(pri ~ min_Alb, data = data.impute, family = binomial())
summary(crude_OR_MIMIC)  
## 
## Call:
## glm(formula = pri ~ min_Alb, family = binomial(), data = data.impute)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9486  -0.4479  -0.3513  -0.2861   2.6919  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.19251    0.27739   0.694    0.488    
## min_Alb     -0.84194    0.09062  -9.291   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1917.5  on 3472  degrees of freedom
## Residual deviance: 1828.8  on 3471  degrees of freedom
## AIC: 1832.8
## 
## Number of Fisher Scoring iterations: 5
exp(crude_OR_MIMIC$coefficients[2])
##   min_Alb 
## 0.4308749
visreg(crude_OR_MIMIC, "min_Alb", main = "Crude effect of Alb on PrI (MIMIC)", ylab="Log odds (PrI)", xlab = "Alb", ylim = c(-5, 1))

SYNTH

crude_OR_synth <- glm(pri ~ min_Alb, data = synth_MIMIC, family = binomial())
summary(crude_OR_synth)  
## 
## Call:
## glm(formula = pri ~ min_Alb, family = binomial(), data = synth_MIMIC)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6491  -0.4523  -0.4046  -0.3684   2.4567  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.10035    0.28188  -3.904 9.48e-05 ***
## min_Alb     -0.38900    0.08691  -4.476 7.61e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2038  on 3472  degrees of freedom
## Residual deviance: 2018  on 3471  degrees of freedom
## AIC: 2022
## 
## Number of Fisher Scoring iterations: 5
exp(crude_OR_synth$coefficients[2])
##   min_Alb 
## 0.6777347
visreg(crude_OR_synth, "min_Alb", main = "Crude effect of Alb on PrI (Synth)", ylab="Log odds (PrI)", xlab = "Alb", ylim = c(-5, 1))

Crude ORs (Binary - Low Alb)

MIMC

crude_OR_MIMIC2 <- glm(pri ~ low_alb, data = data.impute, family = binomial())
summary(crude_OR_MIMIC2)  
## 
## Call:
## glm(formula = pri ~ low_alb, family = binomial(), data = data.impute)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.5468  -0.5468  -0.3312  -0.3312   2.4210  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.87574    0.09014  -31.90   <2e-16 ***
## low_alb      1.05096    0.12724    8.26   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1917.5  on 3472  degrees of freedom
## Residual deviance: 1850.5  on 3471  degrees of freedom
## AIC: 1854.5
## 
## Number of Fisher Scoring iterations: 5
exp(crude_OR_MIMIC2$coefficients[2])
##  low_alb 
## 2.860401
visreg(crude_OR_MIMIC2, "low_alb", main = "Crude effect of Alb on PrI (MIMIC)", ylab="Log odds (PrI)", xlab = "Alb", ylim = c(-4, 1))

SYNTH

crude_OR_synth2 <- glm(pri ~ low_alb, data = synth_MIMIC, family = binomial())
summary(crude_OR_synth2)  
## 
## Call:
## glm(formula = pri ~ low_alb, family = binomial(), data = synth_MIMIC)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.4698  -0.4698  -0.4041  -0.4041   2.2565  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.46430    0.07518 -32.780   <2e-16 ***
## low_alb      0.31602    0.12684   2.492   0.0127 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2038.0  on 3472  degrees of freedom
## Residual deviance: 2031.9  on 3471  degrees of freedom
## AIC: 2035.9
## 
## Number of Fisher Scoring iterations: 5
exp(crude_OR_synth2$coefficients[2])
##  low_alb 
## 1.371655
visreg(crude_OR_synth2, "low_alb", main = "Crude effect of Alb on PrI (Synth)", ylab="Log odds (PrI)", xlab = "Alb", ylim = c(-4, 1))

DAG

D_AW_DAG <- dagify(Y ~  A + W ,
                   A ~ W,
                   labels = c("Y" = "PrI development", 
                              "A" = "Low Albumin",
                              "W" = "Covariates"),
                   exposure = "A",
                   outcome = "Y")
ggdag(D_AW_DAG, text = TRUE, use_labels = "label")

W: {Age, min_hgb, min_HR, min_Braden_Nutrition}

TMLE

Candidate learners for SL:

sl_libs <- c('SL.glmnet', 'SL.ranger', 'SL.glm')

Format data

Y.mimic <- as.numeric(data.impute$pri)-1; table(Y.mimic)
## Y.mimic
##    0    1 
## 3199  274
A.mimic <- data.impute$low_alb
W.mimic <- data.impute %>% select(age, min_Hgb, max_HR, min_Braden_Nutrition)
  
Y.synth <- as.numeric(synth_MIMIC$pri)-1 ; table(Y.synth)
## Y.synth
##    0    1 
## 3174  299
A.synth <- synth_MIMIC$low_alb
W.synth <- synth_MIMIC %>% select(age, min_Hgb, max_HR, min_Braden_Nutrition)

TMLE 1: MIMIC

tmle_1 <- tmle(Y = Y.mimic, A = A.mimic, W = W.mimic, 
               family = "binomial",
               Q.SL.library = sl_libs,
               g.SL.library = sl_libs) 
  
tmle_1
##  Additive Effect
##    Parameter Estimate:  0.062305
##    Estimated Variance:  6.9979e-05
##               p-value:  9.4755e-14
##     95% Conf Interval: (0.045909, 0.078701) 
## 
##  Additive Effect among the Treated
##    Parameter Estimate:  0.069761
##    Estimated Variance:  0.00014812
##               p-value:  9.9261e-09
##     95% Conf Interval: (0.045907, 0.093615) 
## 
##  Additive Effect among the Controls
##    Parameter Estimate:  0.058439
##    Estimated Variance:  5.4971e-05
##               p-value:  3.2218e-15
##     95% Conf Interval: (0.043907, 0.072971) 
## 
##  Relative Risk
##    Parameter Estimate:  2.0749
##               p-value:  1.1662e-13
##     95% Conf Interval: (1.711, 2.516) 
## 
##               log(RR):  0.72989
##     variance(log(RR)):  0.0096749 
## 
##  Odds Ratio
##    Parameter Estimate:  2.2218
##               p-value:  8.7899e-14
##     95% Conf Interval: (1.8013, 2.7405) 
## 
##               log(OR):  0.79832
##     variance(log(OR)):  0.011458

TMLE 2: Synth

tmle_2 <- tmle(Y = Y.synth, A = A.synth, W = W.synth, 
               family = "binomial",
               Q.SL.library = sl_libs,
               g.SL.library = sl_libs) 
  
tmle_2
##  Additive Effect
##    Parameter Estimate:  0.0067901
##    Estimated Variance:  7.3483e-05
##               p-value:  0.4283
##     95% Conf Interval: (-0.010011, 0.023592) 
## 
##  Additive Effect among the Treated
##    Parameter Estimate:  0.0047641
##    Estimated Variance:  0.00014155
##               p-value:  0.68884
##     95% Conf Interval: (-0.018555, 0.028083) 
## 
##  Additive Effect among the Controls
##    Parameter Estimate:  0.016705
##    Estimated Variance:  6.0763e-05
##               p-value:  0.032111
##     95% Conf Interval: (0.0014267, 0.031983) 
## 
##  Relative Risk
##    Parameter Estimate:  1.0802
##               p-value:  0.42638
##     95% Conf Interval: (0.89325, 1.3062) 
## 
##               log(RR):  0.077103
##     variance(log(RR)):  0.0093965 
## 
##  Odds Ratio
##    Parameter Estimate:  1.0882
##               p-value:  0.42654
##     95% Conf Interval: (0.8835, 1.3404) 
## 
##               log(OR):  0.084549
##     variance(log(OR)):  0.011307