Libraries used:

library(tidyverse)  # many useful data manipulation packages, such as dplyr, ggplot2, and tidyr
library(synthpop)   # create synthetic data 
library(h2o)
set.seed(12345)

Import and format data for H2o

load(file = "viable.sans_Braden.RData")

Generate synthetic data for dashboards

synth.viable <- syn(viable)
## Warning: In your synthesis there are numeric variables with 5 or fewer levels: pri, Vasopressors, lab_count_observed, rx_count_observed, BMI_observed, min_pH_observed, min_Hgb_observed, min_Alb_observed, max_Lactate_observed, min_hgb_A1C_observed, min_O2_sat_observed, min_mbp_observed, min_Abpm_observed, min_Abps_observed, min_Abpd_observed, max_WBC_observed, max_temp_observed, min_pO2_observed, min_BE_observed, max_pCO2_observed.
## Consider changing them to factors. You can do it using parameter 'minnumlevels'.
## 
## Variable(s): min_dbp_observed, max_HR_observed, min_HR_observed, min_sbp_observed numeric but with only 1 or fewer distinct values turned into factor(s) for synthesis.
## 
## Variable min_dbp_observed has only one value so its method has been changed to "constant".
## Variable min_dbp_observed removed as predictor because only one value.
## Variable max_HR_observed has only one value so its method has been changed to "constant".
## Variable max_HR_observed removed as predictor because only one value.
## Variable min_HR_observed has only one value so its method has been changed to "constant".
## Variable min_HR_observed removed as predictor because only one value.
## Variable min_sbp_observed has only one value so its method has been changed to "constant".
## Variable min_sbp_observed removed as predictor because only one value.
## 
## Synthesis
## -----------
##  pri lab_count rx_count BMI min_pH age min_Hgb min_Alb max_Lactate min_dbp
##  previous_visits min_hgb_A1C max_HR min_HR min_O2_sat min_sbp min_mbp min_Abpm min_Abps min_Abpd
##  max_WBC max_temp max_PEEP min_pO2 min_BE max_pCO2 Vasopressors lab_count_observed rx_count_observed BMI_observed
##  min_pH_observed min_Hgb_observed min_Alb_observed max_Lactate_observed min_dbp_observed min_hgb_A1C_observed max_HR_observed min_HR_observed min_O2_sat_observed min_sbp_observed
##  min_mbp_observed min_Abpm_observed min_Abps_observed min_Abpd_observed max_WBC_observed max_temp_observed min_pO2_observed min_BE_observed max_pCO2_observed
synth_viable <- synth.viable$syn
write.csv(synth_viable, "synth_viable.csv", row.names = FALSE)

format pri variable and set as h2o datasets

h2o.init(nthreads = -1)
##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         1 days 21 hours 
##     H2O cluster timezone:       America/Denver 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.40.0.4 
##     H2O cluster version age:    4 months 
##     H2O cluster name:           H2O_started_from_R_andywilson1_iiy192 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   7.81 GB 
##     H2O cluster total cores:    10 
##     H2O cluster allowed cores:  10 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.3.1 (2023-06-16)
viable$pri <- as.factor(viable$pri)
viable.h2o <- as.h2o(viable)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
y <- "pri"
x <- setdiff(names(viable), y)
y
## [1] "pri"
x
##  [1] "lab_count"            "rx_count"             "BMI"                 
##  [4] "min_pH"               "age"                  "min_Hgb"             
##  [7] "min_Alb"              "max_Lactate"          "min_dbp"             
## [10] "previous_visits"      "min_hgb_A1C"          "max_HR"              
## [13] "min_HR"               "min_O2_sat"           "min_sbp"             
## [16] "min_mbp"              "min_Abpm"             "min_Abps"            
## [19] "min_Abpd"             "max_WBC"              "max_temp"            
## [22] "max_PEEP"             "min_pO2"              "min_BE"              
## [25] "max_pCO2"             "Vasopressors"         "lab_count_observed"  
## [28] "rx_count_observed"    "BMI_observed"         "min_pH_observed"     
## [31] "min_Hgb_observed"     "min_Alb_observed"     "max_Lactate_observed"
## [34] "min_dbp_observed"     "min_hgb_A1C_observed" "max_HR_observed"     
## [37] "min_HR_observed"      "min_O2_sat_observed"  "min_sbp_observed"    
## [40] "min_mbp_observed"     "min_Abpm_observed"    "min_Abps_observed"   
## [43] "min_Abpd_observed"    "max_WBC_observed"     "max_temp_observed"   
## [46] "min_pO2_observed"     "min_BE_observed"      "max_pCO2_observed"

Ensemble super learner

my_automl <- h2o.automl(x = x,
                        y = y,
                        balance_classes = TRUE,
                        training_frame = viable.h2o)
## 
  |                                                                            
  |                                                                      |   0%
## 09:08:38.616: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
## 09:09:19.294: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
## 09:09:23.100: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
## 09:09:33.832: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
## 09:09:34.500: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   8%
## 09:10:14.332: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
## 09:10:37.859: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |=======                                                               |  11%
## 09:10:44.498: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  12%
## 09:10:51.365: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
## 09:10:58.963: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
## 09:10:59.515: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |============                                                          |  17%
## 09:11:00.358: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |=============                                                         |  18%
## 09:12:34.148: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
## 09:12:58.606: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  22%
## 09:13:04.562: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |================                                                      |  24%
  |                                                                            
  |==================                                                    |  26%
## 09:13:14.215: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
## 09:13:15.68: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |==============================                                        |  44%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |===================================                                   |  51%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |============================================                          |  64%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |==========================================================            |  84%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
## 10:01:24.59: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
## 10:01:26.412: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |=================================================================     |  94%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
## 10:07:19.77: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
## 10:07:20.775: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |===================================================================== |  98%
## 10:07:38.554: _train param, Dropping bad and constant columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
## 10:07:44.142: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |===================================================================== |  99%
## 10:07:49.818: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
## 10:08:06.66: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
## 10:08:23.556: _train param, Dropping unused columns: [max_HR_observed, min_HR_observed, min_sbp_observed, min_dbp_observed]

Save leader model

h2o.saveModel(my_automl@leader, path = "/Users/andywilson1/Library/CloudStorage/Box-Box/Active/MIMIC4 v.1.0", force = TRUE)
## [1] "/Users/andywilson1/Library/CloudStorage/Box-Box/Active/MIMIC4 v.1.0/StackedEnsemble_BestOfFamily_5_AutoML_2_20230829_90838"

Summary stacked ensemble

my_aml <- my_automl@leader
my_aml
## Model Details:
## ==============
## 
## H2OBinomialModel: stackedensemble
## Model ID:  StackedEnsemble_BestOfFamily_5_AutoML_2_20230829_90838 
## Model Summary for Stacked Ensemble: 
##                                          key            value
## 1                          Stacking strategy cross_validation
## 2       Number of base models (used / total)              5/6
## 3           # GBM base models (used / total)              1/1
## 4       # XGBoost base models (used / total)              1/1
## 5  # DeepLearning base models (used / total)              1/1
## 6           # GLM base models (used / total)              1/1
## 7           # DRF base models (used / total)              1/2
## 8                      Metalearner algorithm              GLM
## 9         Metalearner fold assignment scheme           Random
## 10                        Metalearner nfolds                5
## 11                   Metalearner fold_column               NA
## 12        Custom metalearner hyperparameters             None
## 
## 
## H2OBinomialMetrics: stackedensemble
## ** Reported on training data. **
## 
## MSE:  0.03784811
## RMSE:  0.1945459
## LogLoss:  0.1434172
## Mean Per-Class Error:  0.3277215
## AUC:  0.8747589
## AUCPR:  0.3653137
## Gini:  0.7495179
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##           0   1    Error        Rate
## 0      9350 234 0.024416   =234/9584
## 1       301 176 0.631027    =301/477
## Totals 9651 410 0.053176  =535/10061
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.206464    0.396843 100
## 2                       max f2  0.098854    0.461561 195
## 3                 max f0point5  0.234175    0.447898  82
## 4                 max accuracy  0.287058    0.956366  54
## 5                max precision  0.572051    1.000000   0
## 6                   max recall  0.006593    1.000000 384
## 7              max specificity  0.572051    1.000000   0
## 8             max absolute_mcc  0.232268    0.372074  83
## 9   max min_per_class_accuracy  0.069838    0.798623 235
## 10 max mean_per_class_accuracy  0.062766    0.800481 246
## 11                     max tns  0.572051 9584.000000   0
## 12                     max fns  0.572051  476.000000   0
## 13                     max fps  0.002180 9584.000000 399
## 14                     max tps  0.006593  477.000000 384
## 15                     max tnr  0.572051    1.000000   0
## 16                     max fnr  0.572051    0.997904   0
## 17                     max fpr  0.002180    1.000000 399
## 18                     max tpr  0.006593    1.000000 384
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## 
## H2OBinomialMetrics: stackedensemble
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  0.04345336
## RMSE:  0.2084547
## LogLoss:  0.1678189
## Mean Per-Class Error:  0.3506697
## AUC:  0.7949661
## AUCPR:  0.1739178
## Gini:  0.5899321
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##            0    1    Error         Rate
## 0      24709 2291 0.084852  =2291/27000
## 1        860  535 0.616487    =860/1395
## Totals 25569 2826 0.110970  =3151/28395
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold        value idx
## 1                       max f1  0.122098     0.253494 173
## 2                       max f2  0.075192     0.367367 233
## 3                 max f0point5  0.177291     0.235789 122
## 4                 max accuracy  0.432071     0.950977   8
## 5                max precision  0.607924     1.000000   0
## 6                   max recall  0.003141     1.000000 395
## 7              max specificity  0.607924     1.000000   0
## 8             max absolute_mcc  0.088601     0.223842 214
## 9   max min_per_class_accuracy  0.052760     0.720430 269
## 10 max mean_per_class_accuracy  0.048055     0.726700 278
## 11                     max tns  0.607924 27000.000000   0
## 12                     max fns  0.607924  1394.000000   0
## 13                     max fps  0.001531 27000.000000 399
## 14                     max tps  0.003141  1395.000000 395
## 15                     max tnr  0.607924     1.000000   0
## 16                     max fnr  0.607924     0.999283   0
## 17                     max fpr  0.001531     1.000000 399
## 18                     max tpr  0.003141     1.000000 395
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## Cross-Validation Metrics Summary: 
##                 mean         sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid
## accuracy    0.891255   0.031951   0.891084   0.905212   0.914703   0.836268
## auc         0.794948   0.010558   0.808681   0.803620   0.786388   0.790573
## err         0.108745   0.031951   0.108916   0.094788   0.085297   0.163732
## err_count 617.600000 182.118910 623.000000 542.000000 496.000000 930.000000
## f0point5    0.223991   0.026914   0.238187   0.237000   0.235394   0.175953
##           cv_5_valid
## accuracy    0.909008
## auc         0.785479
## err         0.090992
## err_count 497.000000
## f0point5    0.233422
## 
## ---
##                          mean        sd  cv_1_valid  cv_2_valid  cv_3_valid
## precision            0.205309  0.031028    0.215412    0.216842    0.226158
## r2                   0.069315  0.011807    0.083996    0.076416    0.068726
## recall               0.390476  0.104018    0.412752    0.377289    0.281356
## residual_deviance 1906.087500 79.260020 1966.611700 1857.442500 2014.034400
## rmse                 0.208411  0.004117    0.212687    0.204915    0.211772
## specificity          0.917314  0.039417    0.917374    0.931680    0.948551
##                    cv_4_valid  cv_5_valid
## precision            0.150313    0.217822
## r2                   0.052944    0.064493
## recall               0.553846    0.327138
## residual_deviance 1834.611100 1857.737900
## rmse                 0.203388    0.209294
## specificity          0.849815    0.939149