Goal is to automate building and tuning a classification model to predict employee attrition, using the h2o::h2o.automl.
Import the cleaned data from Module 7.
data <- read_csv("../00_data/data_wrangled/data_clean.csv") %>%
# h2o requires all variables to be either numeric or factors
mutate(across(where(is.character), factor))
## Rows: 1470 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Attrition, BusinessTravel, Department, EducationField, Gender, Job...
## dbl (24): Age, DailyRate, DistanceFromHome, Education, EmployeeNumber, Envir...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(1234)
data_split <- initial_split(data, strata = "Attrition")
train_tbl <- training(data_split)
test_tbl <- testing(data_split)
recipe_obj <- recipe(Attrition ~ ., data = train_tbl) %>%
# Remove zero variance variables
step_zv(all_predictors())
# Initialize h2o
h2o.init()
##
## H2O is not running yet, starting it now...
##
## Note: In case of errors look at the following log files:
## C:\Users\User\AppData\Local\Temp\Rtmpa2WzSN\file66c6f0c5f48/h2o_User_started_from_r.out
## C:\Users\User\AppData\Local\Temp\Rtmpa2WzSN\file66c211a3981/h2o_User_started_from_r.err
##
##
## Starting H2O JVM and connecting: Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 3 seconds 769 milliseconds
## H2O cluster timezone: America/New_York
## H2O data parsing timezone: UTC
## H2O cluster version: 3.44.0.3
## H2O cluster version age: 1 year, 4 months and 11 days
## H2O cluster name: H2O_started_from_R_User_fhp551
## H2O cluster total nodes: 1
## H2O cluster total memory: 1.90 GB
## H2O cluster total cores: 8
## H2O cluster allowed cores: 8
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## R Version: R version 4.4.1 (2024-06-14 ucrt)
## Warning in h2o.clusterInfo():
## Your H2O cluster version is (1 year, 4 months and 11 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
split.h2o <- h2o.splitFrame(as.h2o(train_tbl), ratio = c(0.85), seed = 2345)
## | | | 0% | |======================================================================| 100%
train_h2o <- split.h2o[[1]]
valid_h2o <- split.h2o[[2]]
test_h2o <- as.h2o(test_tbl)
## | | | 0% | |======================================================================| 100%
y <- "Attrition"
x <- setdiff(names(train_tbl), y)
models_h2o <-h2o.automl(
x = x,
y = y,
training_frame = train_h2o,
validation_frame = valid_h2o,
leaderboard_frame = test_h2o,
# max_runtime_secs = 30,
max_models = 10,
exclude_algos = "DeepLearning",
nfolds = 5,
seed = 3456
)
## | | | 0%
## 16:40:16.302: User specified a validation frame with cross-validation still enabled. Please note that the models will still be validated using cross-validation only, the validation frame will be used to provide purely informative validation metrics on the trained models.
## 16:40:16.319: AutoML: XGBoost is not available; skipping it. | |====== | 8% | |=============== | 21% | |======================= | 33% | |======================================================================| 100%
Examine the output of h2o.automl
models_h2o %>% typeof()
## [1] "S4"
models_h2o %>% slotNames()
## [1] "project_name" "leader" "leaderboard" "event_log"
## [5] "modeling_steps" "training_info"
models_h2o@leaderboard
## model_id auc logloss
## 1 GBM_grid_1_AutoML_1_20250501_164016_model_1 0.8589536 0.3269150
## 2 StackedEnsemble_BestOfFamily_1_AutoML_1_20250501_164016 0.8489213 0.3120121
## 3 GBM_1_AutoML_1_20250501_164016 0.8485976 0.3241789
## 4 StackedEnsemble_AllModels_1_AutoML_1_20250501_164016 0.8459547 0.3086959
## 5 GLM_1_AutoML_1_20250501_164016 0.8400216 0.3186222
## 6 GBM_4_AutoML_1_20250501_164016 0.8123517 0.3439750
## aucpr mean_per_class_error rmse mse
## 1 0.6193157 0.2031553 0.3141580 0.09869523
## 2 0.6388749 0.2508900 0.3021263 0.09128029
## 3 0.6066085 0.2404531 0.3118557 0.09725395
## 4 0.6272661 0.2393204 0.3000865 0.09005189
## 5 0.6515822 0.2369741 0.3064601 0.09391777
## 6 0.5489556 0.2654531 0.3191061 0.10182871
##
## [12 rows x 7 columns]
models_h2o@leader
## Model Details:
## ==============
##
## H2OBinomialModel: gbm
## Model ID: GBM_grid_1_AutoML_1_20250501_164016_model_1
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1 52 52 6249 2
## max_depth mean_depth min_leaves max_leaves mean_leaves
## 1 4 3.51923 4 6 4.90385
##
##
## H2OBinomialMetrics: gbm
## ** Reported on training data. **
##
## MSE: 0.08569147
## RMSE: 0.2927311
## LogLoss: 0.2912412
## Mean Per-Class Error: 0.166489
## AUC: 0.9062733
## AUCPR: 0.7620909
## Gini: 0.8125465
## R^2: 0.3621394
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 741 47 0.059645 =47/788
## Yes 41 109 0.273333 =41/150
## Totals 782 156 0.093817 =88/938
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.274743 0.712418 116
## 2 max f2 0.196306 0.734430 173
## 3 max f0point5 0.355201 0.743728 79
## 4 max accuracy 0.355201 0.908316 79
## 5 max precision 0.758149 1.000000 0
## 6 max recall 0.037873 1.000000 363
## 7 max specificity 0.758149 1.000000 0
## 8 max absolute_mcc 0.274743 0.656573 116
## 9 max min_per_class_accuracy 0.196306 0.833333 173
## 10 max mean_per_class_accuracy 0.233417 0.839239 143
## 11 max tns 0.758149 788.000000 0
## 12 max fns 0.758149 149.000000 0
## 13 max fps 0.009906 788.000000 399
## 14 max tps 0.037873 150.000000 363
## 15 max tnr 0.758149 1.000000 0
## 16 max fnr 0.758149 0.993333 0
## 17 max fpr 0.009906 1.000000 399
## 18 max tpr 0.037873 1.000000 363
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: gbm
## ** Reported on validation data. **
## ** Validation metrics **
##
## MSE: 0.1082276
## RMSE: 0.3289796
## LogLoss: 0.3473961
## Mean Per-Class Error: 0.296024
## AUC: 0.8292484
## AUCPR: 0.5344578
## Gini: 0.6584967
## R^2: 0.2169122
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 126 10 0.073529 =10/136
## Yes 14 13 0.518519 =14/27
## Totals 140 23 0.147239 =24/163
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.328997 0.520000 22
## 2 max f2 0.101237 0.673575 84
## 3 max f0point5 0.450399 0.593220 7
## 4 max accuracy 0.450399 0.871166 7
## 5 max precision 0.747865 1.000000 0
## 6 max recall 0.064601 1.000000 116
## 7 max specificity 0.747865 1.000000 0
## 8 max absolute_mcc 0.328997 0.435645 22
## 9 max min_per_class_accuracy 0.188543 0.740741 53
## 10 max mean_per_class_accuracy 0.123248 0.768110 77
## 11 max tns 0.747865 136.000000 0
## 12 max fns 0.747865 26.000000 0
## 13 max fps 0.015486 136.000000 162
## 14 max tps 0.064601 27.000000 116
## 15 max tnr 0.747865 1.000000 0
## 16 max fnr 0.747865 0.962963 0
## 17 max fpr 0.015486 1.000000 162
## 18 max tpr 0.064601 1.000000 116
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: gbm
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 0.104097
## RMSE: 0.3226406
## LogLoss: 0.3498197
## Mean Per-Class Error: 0.263934
## AUC: 0.8061591
## AUCPR: 0.5718686
## Gini: 0.6123181
## R^2: 0.2251345
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 703 85 0.107868 =85/788
## Yes 63 87 0.420000 =63/150
## Totals 766 172 0.157783 =148/938
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.248759 0.540373 124
## 2 max f2 0.167822 0.629067 192
## 3 max f0point5 0.382707 0.603015 50
## 4 max accuracy 0.386261 0.876333 48
## 5 max precision 0.707985 1.000000 0
## 6 max recall 0.014993 1.000000 396
## 7 max specificity 0.707985 1.000000 0
## 8 max absolute_mcc 0.248759 0.447192 124
## 9 max min_per_class_accuracy 0.171201 0.747462 188
## 10 max mean_per_class_accuracy 0.167822 0.755956 192
## 11 max tns 0.707985 788.000000 0
## 12 max fns 0.707985 148.000000 0
## 13 max fps 0.012802 788.000000 399
## 14 max tps 0.014993 150.000000 396
## 15 max tnr 0.707985 1.000000 0
## 16 max fnr 0.707985 0.986667 0
## 17 max fpr 0.012802 1.000000 399
## 18 max tpr 0.014993 1.000000 396
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid
## accuracy 0.845415 0.032004 0.877660 0.813830 0.845745
## auc 0.805102 0.047479 0.835443 0.764979 0.760970
## err 0.154585 0.032004 0.122340 0.186170 0.154255
## err_count 29.000000 6.000000 23.000000 35.000000 29.000000
## f0point5 0.535526 0.080613 0.617977 0.449438 0.529412
## f1 0.563634 0.075632 0.656716 0.477612 0.553846
## f2 0.595796 0.072671 0.700637 0.509554 0.580645
## lift_top_group 6.253334 0.018257 6.266667 6.266667 6.266667
## logloss 0.349918 0.024434 0.329397 0.375094 0.370469
## max_per_class_error 0.380000 0.073030 0.266667 0.466667 0.400000
## mcc 0.474396 0.093590 0.588018 0.368823 0.463268
## mean_per_class_accuracy 0.754163 0.046220 0.819198 0.700211 0.746202
## mean_per_class_error 0.245837 0.046220 0.180802 0.299789 0.253797
## mse 0.104179 0.008702 0.094910 0.113622 0.110688
## pr_auc 0.562756 0.091921 0.665415 0.469876 0.488546
## precision 0.518648 0.084327 0.594595 0.432432 0.514286
## r2 0.224483 0.065475 0.292303 0.152776 0.174647
## recall 0.620000 0.073030 0.733333 0.533333 0.600000
## rmse 0.322540 0.013535 0.308074 0.337078 0.332699
## specificity 0.888325 0.028295 0.905063 0.867089 0.892405
## cv_4_valid cv_5_valid
## accuracy 0.812834 0.877005
## auc 0.792781 0.871338
## err 0.187166 0.122995
## err_count 35.000000 23.000000
## f0point5 0.463918 0.616883
## f1 0.507042 0.622951
## f2 0.559006 0.629139
## lift_top_group 6.233333 6.233333
## logloss 0.354536 0.320093
## max_per_class_error 0.400000 0.366667
## mcc 0.402274 0.549597
## mean_per_class_accuracy 0.726752 0.778450
## mean_per_class_error 0.273248 0.221550
## mse 0.106455 0.095219
## pr_auc 0.535602 0.654341
## precision 0.439024 0.612903
## r2 0.209632 0.293058
## recall 0.600000 0.633333
## rmse 0.326275 0.308575
## specificity 0.853503 0.923567
?h2o.getModel
## starting httpd help server ... done
?h2o.saveModel
?h2o.loadModel
best_model <- models_h2o@leader
predictions <- h2o.predict(best_model, newdata = test_h2o)
## | | | 0% | |======================================================================| 100%
predictions_tbl <- predictions %>%
as_tibble()
predictions_tbl %>%
bind_cols(test_tbl)
## # A tibble: 369 × 35
## predict No Yes Age Attrition BusinessTravel DailyRate Department
## <fct> <dbl> <dbl> <dbl> <fct> <fct> <dbl> <fct>
## 1 No 0.704 0.296 59 No Travel_Rarely 1324 Research & …
## 2 No 0.802 0.198 35 No Travel_Rarely 809 Research & …
## 3 No 0.911 0.0891 34 No Travel_Rarely 1346 Research & …
## 4 Yes 0.666 0.334 22 No Non-Travel 1123 Research & …
## 5 No 0.925 0.0749 53 No Travel_Rarely 1219 Sales
## 6 No 0.944 0.0561 24 No Non-Travel 673 Research & …
## 7 Yes 0.669 0.331 21 No Travel_Rarely 391 Research & …
## 8 No 0.906 0.0940 34 Yes Travel_Rarely 699 Research & …
## 9 No 0.980 0.0196 53 No Travel_Rarely 1282 Research & …
## 10 Yes 0.570 0.430 32 Yes Travel_Frequently 1125 Research & …
## # ℹ 359 more rows
## # ℹ 27 more variables: DistanceFromHome <dbl>, Education <dbl>,
## # EducationField <fct>, EmployeeNumber <dbl>, EnvironmentSatisfaction <dbl>,
## # Gender <fct>, HourlyRate <dbl>, JobInvolvement <dbl>, JobLevel <dbl>,
## # JobRole <fct>, JobSatisfaction <dbl>, MaritalStatus <fct>,
## # MonthlyIncome <dbl>, MonthlyRate <dbl>, NumCompaniesWorked <dbl>,
## # OverTime <fct>, PercentSalaryHike <dbl>, PerformanceRating <dbl>, …
?h2o.performance
performance_h2o <- h2o.performance(best_model, newdata = test_h2o)
typeof(performance_h2o)
## [1] "S4"
slotNames(performance_h2o)
## [1] "algorithm" "on_train" "on_valid" "on_xval" "metrics"
performance_h2o@metrics
## $model
## $model$`__meta`
## $model$`__meta`$schema_version
## [1] 3
##
## $model$`__meta`$schema_name
## [1] "ModelKeyV3"
##
## $model$`__meta`$schema_type
## [1] "Key<Model>"
##
##
## $model$name
## [1] "GBM_grid_1_AutoML_1_20250501_164016_model_1"
##
## $model$type
## [1] "Key<Model>"
##
## $model$URL
## [1] "/3/Models/GBM_grid_1_AutoML_1_20250501_164016_model_1"
##
##
## $model_checksum
## [1] "2212417739849831560"
##
## $frame
## $frame$name
## [1] "test_tbl_sid_97bd_3"
##
##
## $frame_checksum
## [1] "-54413681510283746"
##
## $description
## NULL
##
## $scoring_time
## [1] 1.746132e+12
##
## $predictions
## NULL
##
## $MSE
## [1] 0.09869523
##
## $RMSE
## [1] 0.314158
##
## $nobs
## [1] 369
##
## $custom_metric_name
## NULL
##
## $custom_metric_value
## [1] 0
##
## $r2
## [1] 0.275165
##
## $logloss
## [1] 0.326915
##
## $AUC
## [1] 0.8589536
##
## $pr_auc
## [1] 0.6193157
##
## $Gini
## [1] 0.7179072
##
## $mean_per_class_error
## [1] 0.2031553
##
## $domain
## [1] "No" "Yes"
##
## $cm
## $cm$`__meta`
## $cm$`__meta`$schema_version
## [1] 3
##
## $cm$`__meta`$schema_name
## [1] "ConfusionMatrixV3"
##
## $cm$`__meta`$schema_type
## [1] "ConfusionMatrix"
##
##
## $cm$table
## Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
## No Yes Error Rate
## No 271 38 0.1230 = 38 / 309
## Yes 17 43 0.2833 = 17 / 60
## Totals 288 81 0.1491 = 55 / 369
##
##
## $thresholds_and_metric_scores
## Metrics for Thresholds: Binomial metrics as a function of classification thresholds
## threshold f1 f2 f0point5 accuracy precision recall specificity
## 1 0.731582 0.032787 0.020747 0.078125 0.840108 1.000000 0.016667 1.000000
## 2 0.716552 0.064516 0.041322 0.147059 0.842818 1.000000 0.033333 1.000000
## 3 0.640632 0.063492 0.041152 0.138889 0.840108 0.666667 0.033333 0.996764
## 4 0.634157 0.093750 0.061475 0.197368 0.842818 0.750000 0.050000 0.996764
## 5 0.612128 0.092308 0.061224 0.187500 0.840108 0.600000 0.050000 0.993528
## absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns fns fps tps
## 1 0.118299 0.016667 0.508333 309 59 0 1
## 2 0.167527 0.033333 0.516667 309 58 0 2
## 3 0.123674 0.033333 0.515049 308 58 1 2
## 4 0.166643 0.050000 0.523382 308 57 1 3
## 5 0.138926 0.050000 0.521764 307 57 2 3
## tnr fnr fpr tpr idx
## 1 1.000000 0.983333 0.000000 0.016667 0
## 2 1.000000 0.966667 0.000000 0.033333 1
## 3 0.996764 0.966667 0.003236 0.033333 2
## 4 0.996764 0.950000 0.003236 0.050000 3
## 5 0.993528 0.950000 0.006472 0.050000 4
##
## ---
## threshold f1 f2 f0point5 accuracy precision recall
## 364 0.015157 0.283019 0.496689 0.197889 0.176152 0.164835 1.000000
## 365 0.014460 0.282353 0.495868 0.197368 0.173442 0.164384 1.000000
## 366 0.013218 0.281690 0.495050 0.196850 0.170732 0.163934 1.000000
## 367 0.013080 0.281030 0.494234 0.196335 0.168022 0.163488 1.000000
## 368 0.010512 0.280374 0.493421 0.195822 0.165312 0.163043 1.000000
## 369 0.008764 0.279720 0.492611 0.195313 0.162602 0.162602 1.000000
## specificity absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns
## 364 0.016181 0.051645 0.016181 0.508091 5
## 365 0.012945 0.046130 0.012945 0.506472 4
## 366 0.009709 0.039895 0.009709 0.504854 3
## 367 0.006472 0.032530 0.006472 0.503236 2
## 368 0.003236 0.022971 0.003236 0.501618 1
## 369 0.000000 0.000000 0.000000 0.500000 0
## fns fps tps tnr fnr fpr tpr idx
## 364 0 304 60 0.016181 0.000000 0.983819 1.000000 363
## 365 0 305 60 0.012945 0.000000 0.987055 1.000000 364
## 366 0 306 60 0.009709 0.000000 0.990291 1.000000 365
## 367 0 307 60 0.006472 0.000000 0.993528 1.000000 366
## 368 0 308 60 0.003236 0.000000 0.996764 1.000000 367
## 369 0 309 60 0.000000 0.000000 1.000000 1.000000 368
##
## $max_criteria_and_metric_scores
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.249563 0.609929 80
## 2 max f2 0.198288 0.699708 102
## 3 max f0point5 0.390382 0.733696 30
## 4 max accuracy 0.390382 0.899729 30
## 5 max precision 0.731582 1.000000 0
## 6 max recall 0.015157 1.000000 363
## 7 max specificity 0.731582 1.000000 0
## 8 max absolute_mcc 0.390382 0.581369 30
## 9 max min_per_class_accuracy 0.198288 0.800000 102
## 10 max mean_per_class_accuracy 0.198288 0.811003 102
## 11 max tns 0.731582 309.000000 0
## 12 max fns 0.731582 59.000000 0
## 13 max fps 0.008764 309.000000 368
## 14 max tps 0.015157 60.000000 363
## 15 max tnr 0.731582 1.000000 0
## 16 max fnr 0.731582 0.983333 0
## 17 max fpr 0.008764 1.000000 368
## 18 max tpr 0.015157 1.000000 363
##
## $gains_lift_table
## Gains/Lift Table: Avg response rate: 16.26 %, avg score: 15.74 %
## group cumulative_data_fraction lower_threshold lift cumulative_lift
## 1 1 0.01084011 0.619177 4.612500 4.612500
## 2 2 0.02168022 0.546460 4.612500 4.612500
## 3 3 0.03252033 0.472238 6.150000 5.125000
## 4 4 0.04065041 0.448004 4.100000 4.920000
## 5 5 0.05149051 0.435214 6.150000 5.178947
## 6 6 0.10027100 0.361430 3.758333 4.487838
## 7 7 0.15176152 0.306958 1.942105 3.624107
## 8 8 0.20054201 0.255720 2.391667 3.324324
## 9 9 0.30081301 0.186250 1.329730 2.659459
## 10 10 0.40108401 0.145596 0.332432 2.077703
## 11 11 0.50135501 0.111820 0.498649 1.761892
## 12 12 0.59891599 0.083204 0.512500 1.558371
## 13 13 0.69918699 0.065160 0.332432 1.382558
## 14 14 0.79945799 0.045640 0.000000 1.209153
## 15 15 0.89972900 0.031109 0.166216 1.092922
## 16 16 1.00000000 0.008764 0.166216 1.000000
## response_rate score cumulative_response_rate cumulative_score
## 1 0.750000 0.680731 0.750000 0.680731
## 2 0.750000 0.573676 0.750000 0.627204
## 3 1.000000 0.497360 0.833333 0.583922
## 4 0.666667 0.454451 0.800000 0.558028
## 5 1.000000 0.442637 0.842105 0.533735
## 6 0.611111 0.398347 0.729730 0.467871
## 7 0.315789 0.329919 0.589286 0.421066
## 8 0.388889 0.284036 0.540541 0.387734
## 9 0.216216 0.222638 0.432432 0.332702
## 10 0.054054 0.163265 0.337838 0.290343
## 11 0.081081 0.125002 0.286486 0.257275
## 12 0.083333 0.098653 0.253394 0.231436
## 13 0.054054 0.073330 0.224806 0.208762
## 14 0.000000 0.053626 0.196610 0.189304
## 15 0.027027 0.036970 0.177711 0.172327
## 16 0.027027 0.023374 0.162602 0.157391
## capture_rate cumulative_capture_rate gain cumulative_gain
## 1 0.050000 0.050000 361.250000 361.250000
## 2 0.050000 0.100000 361.250000 361.250000
## 3 0.066667 0.166667 515.000000 412.500000
## 4 0.033333 0.200000 310.000000 392.000000
## 5 0.066667 0.266667 515.000000 417.894737
## 6 0.183333 0.450000 275.833333 348.783784
## 7 0.100000 0.550000 94.210526 262.410714
## 8 0.116667 0.666667 139.166667 232.432432
## 9 0.133333 0.800000 32.972973 165.945946
## 10 0.033333 0.833333 -66.756757 107.770270
## 11 0.050000 0.883333 -50.135135 76.189189
## 12 0.050000 0.933333 -48.750000 55.837104
## 13 0.033333 0.966667 -66.756757 38.255814
## 14 0.000000 0.966667 -100.000000 20.915254
## 15 0.016667 0.983333 -83.378378 9.292169
## 16 0.016667 1.000000 -83.378378 0.000000
## kolmogorov_smirnov
## 1 0.046764
## 2 0.093528
## 3 0.160194
## 4 0.190291
## 5 0.256958
## 6 0.417638
## 7 0.475566
## 8 0.556634
## 9 0.596117
## 10 0.516181
## 11 0.456149
## 12 0.399353
## 13 0.319417
## 14 0.199676
## 15 0.099838
## 16 0.000000
h2o.auc(performance_h2o)
## [1] 0.8589536
h2o.confusionMatrix(performance_h2o)
## Confusion Matrix (vertical: actual; across: predicted) for max f1 @ threshold = 0.249563495594922:
## No Yes Error Rate
## No 271 38 0.122977 =38/309
## Yes 17 43 0.283333 =17/60
## Totals 288 81 0.149051 =55/369
h2o.metric(performance_h2o)
## Metrics for Thresholds: Binomial metrics as a function of classification thresholds
## threshold f1 f2 f0point5 accuracy precision recall specificity
## 1 0.731582 0.032787 0.020747 0.078125 0.840108 1.000000 0.016667 1.000000
## 2 0.716552 0.064516 0.041322 0.147059 0.842818 1.000000 0.033333 1.000000
## 3 0.640632 0.063492 0.041152 0.138889 0.840108 0.666667 0.033333 0.996764
## 4 0.634157 0.093750 0.061475 0.197368 0.842818 0.750000 0.050000 0.996764
## 5 0.612128 0.092308 0.061224 0.187500 0.840108 0.600000 0.050000 0.993528
## absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns fns fps tps
## 1 0.118299 0.016667 0.508333 309 59 0 1
## 2 0.167527 0.033333 0.516667 309 58 0 2
## 3 0.123674 0.033333 0.515049 308 58 1 2
## 4 0.166643 0.050000 0.523382 308 57 1 3
## 5 0.138926 0.050000 0.521764 307 57 2 3
## tnr fnr fpr tpr idx
## 1 1.000000 0.983333 0.000000 0.016667 0
## 2 1.000000 0.966667 0.000000 0.033333 1
## 3 0.996764 0.966667 0.003236 0.033333 2
## 4 0.996764 0.950000 0.003236 0.050000 3
## 5 0.993528 0.950000 0.006472 0.050000 4
##
## ---
## threshold f1 f2 f0point5 accuracy precision recall
## 364 0.015157 0.283019 0.496689 0.197889 0.176152 0.164835 1.000000
## 365 0.014460 0.282353 0.495868 0.197368 0.173442 0.164384 1.000000
## 366 0.013218 0.281690 0.495050 0.196850 0.170732 0.163934 1.000000
## 367 0.013080 0.281030 0.494234 0.196335 0.168022 0.163488 1.000000
## 368 0.010512 0.280374 0.493421 0.195822 0.165312 0.163043 1.000000
## 369 0.008764 0.279720 0.492611 0.195313 0.162602 0.162602 1.000000
## specificity absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns
## 364 0.016181 0.051645 0.016181 0.508091 5
## 365 0.012945 0.046130 0.012945 0.506472 4
## 366 0.009709 0.039895 0.009709 0.504854 3
## 367 0.006472 0.032530 0.006472 0.503236 2
## 368 0.003236 0.022971 0.003236 0.501618 1
## 369 0.000000 0.000000 0.000000 0.500000 0
## fns fps tps tnr fnr fpr tpr idx
## 364 0 304 60 0.016181 0.000000 0.983819 1.000000 363
## 365 0 305 60 0.012945 0.000000 0.987055 1.000000 364
## 366 0 306 60 0.009709 0.000000 0.990291 1.000000 365
## 367 0 307 60 0.006472 0.000000 0.993528 1.000000 366
## 368 0 308 60 0.003236 0.000000 0.996764 1.000000 367
## 369 0 309 60 0.000000 0.000000 1.000000 1.000000 368