library(tidyverse)
library(tmle)
library(ggdag) # for nicer DAG image
theme_set(theme_dag())
library(SuperLearner)
library(visreg)
set.seed(54321)
load(file = "data_for_ML.impute.RData")
load(file = "data_for_ML.synth.RData")
data.impute <- data.impute %>%
mutate(low_alb = ifelse(min_Alb < 3, 1, 0))
synth_MIMIC <- synth_MIMIC %>%
mutate(low_alb = ifelse(min_Alb < 3, 1, 0))
crude_OR_MIMIC <- glm(pri ~ min_Alb, data = data.impute, family = binomial())
summary(crude_OR_MIMIC)
##
## Call:
## glm(formula = pri ~ min_Alb, family = binomial(), data = data.impute)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9486 -0.4479 -0.3513 -0.2861 2.6919
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.19251 0.27739 0.694 0.488
## min_Alb -0.84194 0.09062 -9.291 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1917.5 on 3472 degrees of freedom
## Residual deviance: 1828.8 on 3471 degrees of freedom
## AIC: 1832.8
##
## Number of Fisher Scoring iterations: 5
exp(crude_OR_MIMIC$coefficients[2])
## min_Alb
## 0.4308749
visreg(crude_OR_MIMIC, "min_Alb", main = "Crude effect of Alb on PrI (MIMIC)", ylab="Log odds (PrI)", xlab = "Alb", ylim = c(-5, 1))
crude_OR_synth <- glm(pri ~ min_Alb, data = synth_MIMIC, family = binomial())
summary(crude_OR_synth)
##
## Call:
## glm(formula = pri ~ min_Alb, family = binomial(), data = synth_MIMIC)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6491 -0.4523 -0.4046 -0.3684 2.4567
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.10035 0.28188 -3.904 9.48e-05 ***
## min_Alb -0.38900 0.08691 -4.476 7.61e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2038 on 3472 degrees of freedom
## Residual deviance: 2018 on 3471 degrees of freedom
## AIC: 2022
##
## Number of Fisher Scoring iterations: 5
exp(crude_OR_synth$coefficients[2])
## min_Alb
## 0.6777347
visreg(crude_OR_synth, "min_Alb", main = "Crude effect of Alb on PrI (Synth)", ylab="Log odds (PrI)", xlab = "Alb", ylim = c(-5, 1))
crude_OR_MIMIC2 <- glm(pri ~ low_alb, data = data.impute, family = binomial())
summary(crude_OR_MIMIC2)
##
## Call:
## glm(formula = pri ~ low_alb, family = binomial(), data = data.impute)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.5468 -0.5468 -0.3312 -0.3312 2.4210
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.87574 0.09014 -31.90 <2e-16 ***
## low_alb 1.05096 0.12724 8.26 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1917.5 on 3472 degrees of freedom
## Residual deviance: 1850.5 on 3471 degrees of freedom
## AIC: 1854.5
##
## Number of Fisher Scoring iterations: 5
exp(crude_OR_MIMIC2$coefficients[2])
## low_alb
## 2.860401
visreg(crude_OR_MIMIC2, "low_alb", main = "Crude effect of Alb on PrI (MIMIC)", ylab="Log odds (PrI)", xlab = "Alb", ylim = c(-4, 1))
crude_OR_synth2 <- glm(pri ~ low_alb, data = synth_MIMIC, family = binomial())
summary(crude_OR_synth2)
##
## Call:
## glm(formula = pri ~ low_alb, family = binomial(), data = synth_MIMIC)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4698 -0.4698 -0.4041 -0.4041 2.2565
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.46430 0.07518 -32.780 <2e-16 ***
## low_alb 0.31602 0.12684 2.492 0.0127 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2038.0 on 3472 degrees of freedom
## Residual deviance: 2031.9 on 3471 degrees of freedom
## AIC: 2035.9
##
## Number of Fisher Scoring iterations: 5
exp(crude_OR_synth2$coefficients[2])
## low_alb
## 1.371655
visreg(crude_OR_synth2, "low_alb", main = "Crude effect of Alb on PrI (Synth)", ylab="Log odds (PrI)", xlab = "Alb", ylim = c(-4, 1))
D_AW_DAG <- dagify(Y ~ A + W ,
A ~ W,
labels = c("Y" = "PrI development",
"A" = "Low Albumin",
"W" = "Covariates"),
exposure = "A",
outcome = "Y")
ggdag(D_AW_DAG, text = TRUE, use_labels = "label")
W: {Age, min_hgb, min_HR, min_Braden_Nutrition}
Candidate learners for SL:
sl_libs <- c('SL.glmnet', 'SL.ranger', 'SL.glm')
Y.mimic <- as.numeric(data.impute$pri)-1; table(Y.mimic)
## Y.mimic
## 0 1
## 3199 274
A.mimic <- data.impute$low_alb
W.mimic <- data.impute %>% select(age, min_Hgb, max_HR, min_Braden_Nutrition)
Y.synth <- as.numeric(synth_MIMIC$pri)-1 ; table(Y.synth)
## Y.synth
## 0 1
## 3174 299
A.synth <- synth_MIMIC$low_alb
W.synth <- synth_MIMIC %>% select(age, min_Hgb, max_HR, min_Braden_Nutrition)
tmle_1 <- tmle(Y = Y.mimic, A = A.mimic, W = W.mimic,
family = "binomial",
Q.SL.library = sl_libs,
g.SL.library = sl_libs)
tmle_1
## Additive Effect
## Parameter Estimate: 0.062305
## Estimated Variance: 6.9979e-05
## p-value: 9.4755e-14
## 95% Conf Interval: (0.045909, 0.078701)
##
## Additive Effect among the Treated
## Parameter Estimate: 0.069761
## Estimated Variance: 0.00014812
## p-value: 9.9261e-09
## 95% Conf Interval: (0.045907, 0.093615)
##
## Additive Effect among the Controls
## Parameter Estimate: 0.058439
## Estimated Variance: 5.4971e-05
## p-value: 3.2218e-15
## 95% Conf Interval: (0.043907, 0.072971)
##
## Relative Risk
## Parameter Estimate: 2.0749
## p-value: 1.1662e-13
## 95% Conf Interval: (1.711, 2.516)
##
## log(RR): 0.72989
## variance(log(RR)): 0.0096749
##
## Odds Ratio
## Parameter Estimate: 2.2218
## p-value: 8.7899e-14
## 95% Conf Interval: (1.8013, 2.7405)
##
## log(OR): 0.79832
## variance(log(OR)): 0.011458
tmle_2 <- tmle(Y = Y.synth, A = A.synth, W = W.synth,
family = "binomial",
Q.SL.library = sl_libs,
g.SL.library = sl_libs)
tmle_2
## Additive Effect
## Parameter Estimate: 0.0067901
## Estimated Variance: 7.3483e-05
## p-value: 0.4283
## 95% Conf Interval: (-0.010011, 0.023592)
##
## Additive Effect among the Treated
## Parameter Estimate: 0.0047641
## Estimated Variance: 0.00014155
## p-value: 0.68884
## 95% Conf Interval: (-0.018555, 0.028083)
##
## Additive Effect among the Controls
## Parameter Estimate: 0.016705
## Estimated Variance: 6.0763e-05
## p-value: 0.032111
## 95% Conf Interval: (0.0014267, 0.031983)
##
## Relative Risk
## Parameter Estimate: 1.0802
## p-value: 0.42638
## 95% Conf Interval: (0.89325, 1.3062)
##
## log(RR): 0.077103
## variance(log(RR)): 0.0093965
##
## Odds Ratio
## Parameter Estimate: 1.0882
## p-value: 0.42654
## 95% Conf Interval: (0.8835, 1.3404)
##
## log(OR): 0.084549
## variance(log(OR)): 0.011307