data <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2020/2020-09-22/members.csv')
## Rows: 76519 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): expedition_id, member_id, peak_id, peak_name, season, sex, citizen...
## dbl  (5): year, age, highpoint_metres, death_height_metres, injury_height_me...
## lgl  (6): hired, success, solo, oxygen_used, died, injured
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Remove variables with too many missing values
data_clean <- data %>%
  select(-c( highpoint_metres, death_height_metres, death_cause, injured, injury_type, injury_height_metres, age)) %>%
    
# Remove Irrelevant Variables
    select(-oxygen_used, -solo, -hired) %>%
    
# Remove Redundant Variables
    select(-c(peak_id)) %>%

# Remove Duplicates in Member_id
    distinct(member_id, .keep_all = TRUE) %>%
    
    select(-expedition_role, -peak_name, -citizenship, -sex) %>%
    na.omit() %>%

    mutate(across(where(is.logical), as.factor)) %>%
    mutate(across(where(is.character), as.factor)) %>%
    mutate(died = if_else(died == TRUE, "died", "not_died")) %>% mutate(died = as.factor(died))

Split Data

set.seed(1234)

members_split <- initial_split( data_clean, strata = "died")
members_train <- training(members_split)
members_test <- testing(members_split)

Recipes

recipe_obj <- recipe(died ~ ., data = members_train) %>%
    
    # Remove zero variance variables
    step_zv(all_predictors()) 

Model

# Initialize h20
h2o.init()
##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         8 hours 2 minutes 
##     H2O cluster timezone:       America/New_York 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.44.0.3 
##     H2O cluster version age:    1 year, 4 months and 5 days 
##     H2O cluster name:           H2O_started_from_R_sheac_zyd177 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.91 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.4.2 (2024-10-31 ucrt)
## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is (1 year, 4 months and 5 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
split.h2o <- h2o.splitFrame(as.h2o(members_train), ratios = c(0.85), seed = 2345)
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%
train_h2o <- split.h2o[[1]]
valid_h2o <- split.h2o[[2]]
test_h20 <- as.h2o(members_test)
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%
y <- "died"
x <- setdiff(names(members_train), y)

models_h2o <- h2o.automl(
    x = x,
    y = y, 
    training_frame    = train_h2o,
    validation_frame  = valid_h2o,
    leaderboard_frame = test_h20, 
    max_runtime_secs  = 30,
    nfolds            = 5,
    seed              = 3456
)
##   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%
## 23:59:24.922: User specified a validation frame with cross-validation still enabled. Please note that the models will still be validated using cross-validation only, the validation frame will be used to provide purely informative validation metrics on the trained models.
## 23:59:24.927: AutoML: XGBoost is not available; skipping it.  |                                                                              |=========                                                             |  13%  |                                                                              |================                                                      |  22%  |                                                                              |======================                                                |  31%  |                                                                              |============================                                          |  40%  |                                                                              |==================================                                    |  49%  |                                                                              |=========================================                             |  59%  |                                                                              |================================================                      |  68%  |                                                                              |======================================================                |  77%  |                                                                              |============================================================          |  86%  |                                                                              |===================================================================   |  96%  |                                                                              |======================================================================| 100%

Examine output of h2o.automl

models_h2o %>% typeof()
## [1] "S4"
models_h2o %>% slotNames()
## [1] "project_name"   "leader"         "leaderboard"    "event_log"     
## [5] "modeling_steps" "training_info"
models_h2o@leaderboard
##                                                   model_id       auc    logloss
## 1 StackedEnsemble_BestOfFamily_1_AutoML_11_20250425_235924 0.7628755 0.06530714
## 2                          GBM_1_AutoML_11_20250425_235924 0.7566792 0.06643769
## 3                          GLM_1_AutoML_11_20250425_235924 0.7064148 0.06919456
## 4                          DRF_1_AutoML_11_20250425_235924 0.5813538 0.81028190
##       aucpr mean_per_class_error      rmse        mse
## 1 0.9946560            0.4905568 0.1150300 0.01323191
## 2 0.9943798            0.4962386 0.1151975 0.01327047
## 3 0.9936840            0.5000000 0.1160812 0.01347484
## 4 0.9884582            0.5000000 0.1638095 0.02683354
## 
## [4 rows x 7 columns]
models_h2o@leader
## Model Details:
## ==============
## 
## H2OBinomialModel: stackedensemble
## Model ID:  StackedEnsemble_BestOfFamily_1_AutoML_11_20250425_235924 
## Model Summary for Stacked Ensemble: 
##                                    key            value
## 1                    Stacking strategy cross_validation
## 2 Number of base models (used / total)              2/2
## 3     # GBM base models (used / total)              1/1
## 4     # GLM base models (used / total)              1/1
## 5                Metalearner algorithm              GLM
## 6   Metalearner fold assignment scheme           Random
## 7                   Metalearner nfolds                5
## 8              Metalearner fold_column               NA
## 9   Custom metalearner hyperparameters             None
## 
## 
## H2OBinomialMetrics: stackedensemble
## ** Reported on training data. **
## 
## MSE:  0.01036999
## RMSE:  0.1018332
## LogLoss:  0.04091755
## Mean Per-Class Error:  0.2883362
## AUC:  0.9893431
## AUCPR:  0.9998391
## Gini:  0.9786862
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##          died not_died    Error      Rate
## died       62       84 0.575342   =84/146
## not_died   13     9762 0.001330  =13/9775
## Totals     75     9846 0.009777  =97/9921
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.816620    0.995056 341
## 2                       max f2  0.781544    0.997509 352
## 3                 max f0point5  0.901792    0.994437 276
## 4                 max accuracy  0.816620    0.990223 341
## 5                max precision  0.997718    1.000000   0
## 6                   max recall  0.572985    1.000000 388
## 7              max specificity  0.997718    1.000000   0
## 8             max absolute_mcc  0.816620    0.588506 341
## 9   max min_per_class_accuracy  0.953109    0.957545 203
## 10 max mean_per_class_accuracy  0.967640    0.969463 176
## 11                     max tns  0.997718  146.000000   0
## 12                     max fns  0.997718 9749.000000   0
## 13                     max fps  0.346818  146.000000 399
## 14                     max tps  0.572985 9775.000000 388
## 15                     max tnr  0.997718    1.000000   0
## 16                     max fnr  0.997718    0.997340   0
## 17                     max fpr  0.346818    1.000000 399
## 18                     max tpr  0.572985    1.000000 388
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: stackedensemble
## ** Reported on validation data. **
## 
## MSE:  0.01360119
## RMSE:  0.1166242
## LogLoss:  0.06382294
## Mean Per-Class Error:  0.4923664
## AUC:  0.8076697
## AUCPR:  0.9949519
## Gini:  0.6153394
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##          died not_died    Error       Rate
## died        2      129 0.984733   =129/131
## not_died    0     8542 0.000000    =0/8542
## Totals      2     8671 0.014874  =129/8673
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.572485    0.992506 398
## 2                       max f2  0.572485    0.996989 398
## 3                 max f0point5  0.856587    0.989236 355
## 4                 max accuracy  0.644122    0.985126 395
## 5                max precision  0.997794    1.000000   0
## 6                   max recall  0.572485    1.000000 398
## 7              max specificity  0.997794    1.000000   0
## 8             max absolute_mcc  0.914028    0.272880 305
## 9   max min_per_class_accuracy  0.989263    0.734723 110
## 10 max mean_per_class_accuracy  0.987612    0.751473 126
## 11                     max tns  0.997794  131.000000   0
## 12                     max fns  0.997794 8513.000000   0
## 13                     max fps  0.555953  131.000000 399
## 14                     max tps  0.572485 8542.000000 398
## 15                     max tnr  0.997794    1.000000   0
## 16                     max fnr  0.997794    0.996605   0
## 17                     max fpr  0.555953    1.000000 399
## 18                     max tpr  0.572485    1.000000 398
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: stackedensemble
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  0.01339676
## RMSE:  0.1157444
## LogLoss:  0.06563967
## Mean Per-Class Error:  0.4838673
## AUC:  0.7563866
## AUCPR:  0.9937829
## Gini:  0.5127732
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##          died not_died    Error        Rate
## died       23      688 0.967651    =688/711
## not_died    4    48000 0.000083    =4/48004
## Totals     27    48688 0.014205  =692/48715
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold        value idx
## 1                       max f1  0.600009     0.992843 378
## 2                       max f2  0.559648     0.997113 385
## 3                 max f0point5  0.778740     0.989027 331
## 4                 max accuracy  0.623293     0.985795 373
## 5                max precision  0.996568     0.996726  10
## 6                   max recall  0.559648     1.000000 385
## 7              max specificity  0.998300     0.998594   0
## 8             max absolute_mcc  0.896214     0.205835 248
## 9   max min_per_class_accuracy  0.989414     0.677360  65
## 10 max mean_per_class_accuracy  0.986104     0.695576  85
## 11                     max tns  0.998300   710.000000   0
## 12                     max fns  0.998300 47918.000000   0
## 13                     max fps  0.269136   711.000000 399
## 14                     max tps  0.559648 48004.000000 385
## 15                     max tnr  0.998300     0.998594   0
## 16                     max fnr  0.998300     0.998208   0
## 17                     max fpr  0.269136     1.000000 399
## 18                     max tpr  0.559648     1.000000 385
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## Cross-Validation Metrics Summary: 
##                 mean        sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid
## accuracy    0.985901  0.001253   0.986795   0.987397   0.985062   0.985943
## auc         0.754224  0.032457   0.770760   0.698636   0.782340   0.759724
## err         0.014099  0.001253   0.013205   0.012603   0.014938   0.014057
## err_count 137.400000 12.700394 129.000000 122.000000 147.000000 136.000000
## f0point5    0.988731  0.001009   0.989404   0.989947   0.988125   0.988779
##           cv_5_valid
## accuracy    0.984308
## auc         0.759659
## err         0.015692
## err_count 153.000000
## f0point5    0.987400
## 
## ---
##                          mean        sd  cv_1_valid  cv_2_valid  cv_3_valid
## precision            0.985974  0.001255    0.986790    0.987491    0.985251
## r2                   0.067863  0.011998    0.057531    0.052297    0.076715
## recall               0.999917  0.000087    1.000000    0.999895    0.999794
## residual_deviance 1278.190700 75.516750 1221.464500 1212.383000 1329.822800
## rmse                 0.115653  0.004428    0.112501    0.110773    0.118875
## specificity          0.039393  0.009853    0.030075    0.047244    0.052288
##                    cv_4_valid  cv_5_valid
## precision            0.986038    0.984300
## r2                   0.077640    0.075132
## recall               0.999895    1.000000
## residual_deviance 1242.637100 1384.645800
## rmse                 0.114689    0.121428
## specificity          0.035714    0.031646

Load and Save

best.model <- h2o.loadModel("h2o_models/GBM_1_AutoML_10_20250425_235231")

Make Predictions

predictions <- h2o.predict(best.model, newdata = test_h20)
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'expedition_id' has levels not trained on: ["AMAD00308",
## "AMAD00311", "AMAD00320", "AMAD03317", "AMAD03319", "AMAD04103", "AMAD04318",
## "AMAD04324", "AMAD05307", "AMAD06105", ...304 not listed..., "RAMT98301",
## "RATC19101", "SAIP98302", "SHEY07201", "SNOW79301", "TENR12301", "TILI86301",
## "TKPO08101", "TUKU97103", "YANS10301"]
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'member_id' has levels not trained on: ["ACHN18301-03",
## "ACHN18301-09", "AMAD00102-05", "AMAD00103-01", "AMAD00103-02", "AMAD00104-03",
## "AMAD00104-06", "AMAD00106-03", "AMAD00109-02", "AMAD00110-06", ...19110 not
## listed..., "YALU89401-04", "YALU91301-06", "YANS03301-01", "YANS03301-04",
## "YANS10301-01", "YANS10301-02", "YARA18301-01", "YARA18301-03", "YAUP17101-02",
## "YAUP89301-04"]
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'season' has levels not trained on: ["Unknown"]
predictions_tbl <- predictions %>%
    as.tibble()
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
predictions_tbl
## # A tibble: 19,130 × 3
##    predict     died not_died
##    <fct>      <dbl>    <dbl>
##  1 not_died 0.00780    0.992
##  2 not_died 0.0102     0.990
##  3 not_died 0.0279     0.972
##  4 not_died 0.0279     0.972
##  5 not_died 0.0279     0.972
##  6 not_died 0.0102     0.990
##  7 not_died 0.0102     0.990
##  8 not_died 0.0102     0.990
##  9 not_died 0.0279     0.972
## 10 not_died 0.00434    0.996
## # ℹ 19,120 more rows

Evaluate Model

performance_h2o <- h2o.performance(best.model, newdata = test_h20)
typeof(performance_h2o)
## [1] "S4"
slotNames(performance_h2o)
## [1] "algorithm" "on_train"  "on_valid"  "on_xval"   "metrics"
performance_h2o@metrics
## $model
## $model$`__meta`
## $model$`__meta`$schema_version
## [1] 3
## 
## $model$`__meta`$schema_name
## [1] "ModelKeyV3"
## 
## $model$`__meta`$schema_type
## [1] "Key<Model>"
## 
## 
## $model$name
## [1] "GBM_1_AutoML_10_20250425_235231"
## 
## $model$type
## [1] "Key<Model>"
## 
## $model$URL
## [1] "/3/Models/GBM_1_AutoML_10_20250425_235231"
## 
## 
## $model_checksum
## [1] "5529523217639880610"
## 
## $frame
## $frame$name
## [1] "members_test_sid_a62c_3"
## 
## 
## $frame_checksum
## [1] "-4563503279189474734"
## 
## $description
## NULL
## 
## $scoring_time
## [1] 1.74564e+12
## 
## $predictions
## NULL
## 
## $MSE
## [1] 0.01327047
## 
## $RMSE
## [1] 0.1151975
## 
## $nobs
## [1] 19130
## 
## $custom_metric_name
## NULL
## 
## $custom_metric_value
## [1] 0
## 
## $r2
## [1] 0.02493755
## 
## $logloss
## [1] 0.06643769
## 
## $AUC
## [1] 0.7566792
## 
## $pr_auc
## [1] 0.9943798
## 
## $Gini
## [1] 0.5133584
## 
## $mean_per_class_error
## [1] 0.4962386
## 
## $domain
## [1] "died"     "not_died"
## 
## $cm
## $cm$`__meta`
## $cm$`__meta`$schema_version
## [1] 3
## 
## $cm$`__meta`$schema_name
## [1] "ConfusionMatrixV3"
## 
## $cm$`__meta`$schema_type
## [1] "ConfusionMatrix"
## 
## 
## $cm$table
## Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
##          died not_died  Error           Rate
## died        2      262 0.9924 =    262 / 264
## not_died    1    18865 0.0001 =   1 / 18,866
## Totals      3    19127 0.0137 = 263 / 19,130
## 
## 
## $thresholds_and_metric_scores
## Metrics for Thresholds: Binomial metrics as a function of classification thresholds
##   threshold       f1       f2 f0point5 accuracy precision   recall specificity
## 1  0.998676 0.003704 0.002318 0.009208 0.015630  1.000000 0.001855    1.000000
## 2  0.998604 0.006972 0.004369 0.017250 0.017250  1.000000 0.003498    1.000000
## 3  0.998490 0.010021 0.006286 0.024680 0.018766  1.000000 0.005036    1.000000
## 4  0.998401 0.014941 0.009391 0.036534 0.021223  1.000000 0.007527    1.000000
## 5  0.998349 0.022639 0.014271 0.054739 0.025091  1.000000 0.011449    1.000000
##   absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns   fns fps tps
## 1     0.005064               0.001855                0.500928 264 18831   0  35
## 2     0.006960               0.003498                0.501749 264 18800   0  66
## 3     0.008357               0.005036                0.502518 264 18771   0  95
## 4     0.010230               0.007527                0.503763 264 18724   0 142
## 5     0.012641               0.011449                0.505725 264 18650   0 216
##        tnr      fnr      fpr      tpr idx
## 1 1.000000 0.998145 0.000000 0.001855   0
## 2 1.000000 0.996502 0.000000 0.003498   1
## 3 1.000000 0.994964 0.000000 0.005036   2
## 4 1.000000 0.992473 0.000000 0.007527   3
## 5 1.000000 0.988551 0.000000 0.011449   4
## 
## ---
##     threshold       f1       f2 f0point5 accuracy precision   recall
## 395  0.884100 0.992732 0.996574 0.988920 0.985572  0.986395 0.999152
## 396  0.876299 0.992918 0.996870 0.988997 0.985938  0.986400 0.999523
## 397  0.875719 0.993025 0.997103 0.988979 0.986147  0.986301 0.999841
## 398  0.867347 0.993051 0.997146 0.988990 0.986200  0.986301 0.999894
## 399  0.865394 0.993078 0.997188 0.989001 0.986252  0.986302 0.999947
## 400  0.773507 0.993052 0.997209 0.988929 0.986200  0.986200 1.000000
##     specificity absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns
## 395    0.015152     0.051634               0.015152                0.507152   4
## 396    0.015152     0.065694               0.015152                0.507337   4
## 397    0.007576     0.053527               0.007576                0.503708   2
## 398    0.007576     0.060271               0.007576                0.503735   2
## 399    0.007576     0.070087               0.007576                0.503761   2
## 400    0.000000     0.000000               0.000000                0.500000   0
##     fns fps   tps      tnr      fnr      fpr      tpr idx
## 395  16 260 18850 0.015152 0.000848 0.984848 0.999152 394
## 396   9 260 18857 0.015152 0.000477 0.984848 0.999523 395
## 397   3 262 18863 0.007576 0.000159 0.992424 0.999841 396
## 398   2 262 18864 0.007576 0.000106 0.992424 0.999894 397
## 399   1 262 18865 0.007576 0.000053 0.992424 0.999947 398
## 400   0 264 18866 0.000000 0.000000 1.000000 1.000000 399
## 
## $max_criteria_and_metric_scores
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold        value idx
## 1                       max f1  0.865394     0.993078 398
## 2                       max f2  0.773507     0.997209 399
## 3                 max f0point5  0.901782     0.989272 389
## 4                 max accuracy  0.865394     0.986252 398
## 5                max precision  0.998676     1.000000   0
## 6                   max recall  0.773507     1.000000 399
## 7              max specificity  0.998676     1.000000   0
## 8             max absolute_mcc  0.976167     0.170105 270
## 9   max min_per_class_accuracy  0.992878     0.676720 123
## 10 max mean_per_class_accuracy  0.989598     0.707249 176
## 11                     max tns  0.998676   264.000000   0
## 12                     max fns  0.998676 18831.000000   0
## 13                     max fps  0.773507   264.000000 399
## 14                     max tps  0.773507 18866.000000 399
## 15                     max tnr  0.998676     1.000000   0
## 16                     max fnr  0.998676     0.998145   0
## 17                     max fpr  0.773507     1.000000 399
## 18                     max tpr  0.773507     1.000000 399
## 
## $gains_lift_table
## Gains/Lift Table: Avg response rate: 98.62 %, avg score: 99.22 %
##    group cumulative_data_fraction lower_threshold     lift cumulative_lift
## 1      1               0.01024569        0.998349 1.013993        1.013993
## 2      2               0.02033455        0.998176 1.013993        1.013993
## 3      3               0.03000523        0.998065 1.008512        1.012227
## 4      4               0.04009409        0.998002 1.013993        1.012671
## 5      5               0.05023523        0.997935 1.008767        1.011883
## 6      6               0.10026137        0.997473 1.007636        1.009764
## 7      7               0.15039205        0.997095 1.010821        1.010116
## 8      8               0.20000000        0.996846 1.010788        1.010283
## 9      9               0.30020910        0.995951 1.007646        1.009403
## 10    10               0.40020910        0.995162 1.006043        1.008563
## 11    11               0.50015682        0.994427 1.007099        1.008271
## 12    12               0.60010455        0.993518 1.004447        1.007634
## 13    13               0.70000000        0.992562 1.004442        1.007178
## 14    14               0.80005227        0.990924 1.004987        1.006904
## 15    15               0.90005227        0.987233 0.999682        1.006102
## 16    16               1.00000000        0.773507 0.945050        1.000000
##    response_rate    score cumulative_response_rate cumulative_score
## 1       1.000000 0.998485                 1.000000         0.998485
## 2       1.000000 0.998274                 1.000000         0.998380
## 3       0.994595 0.998107                 0.998258         0.998292
## 4       1.000000 0.998039                 0.998696         0.998228
## 5       0.994845 0.997962                 0.997919         0.998175
## 6       0.993730 0.997729                 0.995829         0.997952
## 7       0.996872 0.997277                 0.996177         0.997727
## 8       0.996839 0.996968                 0.996341         0.997539
## 9       0.993740 0.996378                 0.995473         0.997151
## 10      0.992159 0.995550                 0.994645         0.996751
## 11      0.993201 0.994789                 0.994356         0.996359
## 12      0.990586 0.994000                 0.993728         0.995966
## 13      0.990581 0.993051                 0.993279         0.995550
## 14      0.991118 0.991825                 0.993009         0.995084
## 15      0.985886 0.989415                 0.992217         0.994454
## 16      0.932008 0.972071                 0.986200         0.992217
##    capture_rate cumulative_capture_rate      gain cumulative_gain
## 1      0.010389                0.010389  1.399343        1.399343
## 2      0.010230                0.020619  1.399343        1.399343
## 3      0.009753                0.030372  0.851238        1.222689
## 4      0.010230                0.040602  1.399343        1.267140
## 5      0.010230                0.050832  0.876666        1.188314
## 6      0.050408                0.101240  0.763610        0.976405
## 7      0.050673                0.151913  1.082139        1.011650
## 8      0.050143                0.202057  1.078797        1.028305
## 9      0.100975                0.303032  0.764605        0.940283
## 10     0.100604                0.403636  0.604262        0.856321
## 11     0.100657                0.504293  0.709912        0.827064
## 12     0.100392                0.604686  0.444746        0.763389
## 13     0.100339                0.705025  0.444247        0.717845
## 14     0.100551                0.805576  0.498722        0.690442
## 15     0.099968                0.905544 -0.031803        0.610197
## 16     0.094456                1.000000 -5.494964        0.000000
##    kolmogorov_smirnov
## 1            0.010389
## 2            0.020619
## 3            0.026584
## 4            0.036814
## 5            0.043256
## 6            0.070937
## 7            0.110247
## 8            0.149026
## 9            0.204547
## 10           0.248333
## 11           0.299748
## 12           0.331958
## 13           0.364116
## 14           0.400273
## 15           0.397969
## 16           0.000000
h2o.auc(performance_h2o)
## [1] 0.7566792
h2o.confusionMatrix(performance_h2o)
## Confusion Matrix (vertical: actual; across: predicted)  for max f1 @ threshold = 0.865393765774585:
##          died not_died    Error        Rate
## died        2      262 0.992424    =262/264
## not_died    1    18865 0.000053    =1/18866
## Totals      3    19127 0.013748  =263/19130