Set up

Import Data

Import Data From module 7

data <- read_csv("../00_data/data_wrangled/data_clean2.csv") %>%
  select(-notes) %>%
    
    # h2o requires all variables to be either numeric or factors
    mutate(across(where(is.character), factor))

## New names:
## Rows: 7458 Columns: 9
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): coname, exec_fullname, ceo_dismissal, notes dbl (5): ...1,
## dismissal_dataset_id, tenure_no_ceodb, max_tenure_ceodb, fyea...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

Split Data

set.seed(1234)

data_split <- initial_split(data, strata = "ceo_dismissal")
train_tbl <- training(data_split)
test_tbl <- testing(data_split)

train_tbl <- train_tbl %>%
    mutate(ceo_dismissal = as.factor(ceo_dismissal))

test_tbl <- test_tbl %>%
    mutate(ceo_dismissal = as.factor(ceo_dismissal))

Recipes

recipe_obj <- recipe(ceo_dismissal ~ ., data = train_tbl) %>%
    
    # Remove zero variance variables
    step_zv(all_predictors())

Model

# Initialize h2o
h2o.init()

##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         6 minutes 19 seconds 
##     H2O cluster timezone:       America/New_York 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.44.0.3 
##     H2O cluster version age:    1 year, 4 months and 15 days 
##     H2O cluster name:           H2O_started_from_R_User_xzn825 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   1.71 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.4.1 (2024-06-14 ucrt)

## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is (1 year, 4 months and 15 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html

train_h2o <- as.h2o(train_tbl)

##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%

test_h2o <- as.h2o(test_tbl)

##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%

train_h2o[["ceo_dismissal"]] <- as.factor(train_h2o[["ceo_dismissal"]])
test_h2o[["ceo_dismissal"]] <- as.factor(test_h2o[["ceo_dismissal"]])

print(h2o.describe(train_h2o))

##                  Label Type Missing Zeros PosInf NegInf  Min    Max
## 1                 ...1  int       0     0      0      0    1   7475
## 2 dismissal_dataset_id  int       0     0      0      0    2 559044
## 3               coname enum       0     1      0      0    0   3003
## 4        exec_fullname enum       0     1      0      0    0   5296
## 5        ceo_dismissal enum       0  1111      0      0    0      1
## 6      tenure_no_ceodb  int       0     0      0      0    1      3
## 7     max_tenure_ceodb  int       0     0      0      0    1      4
## 8           fyear_gone  int       0     0      0      0 1980   2021
##           Mean        Sigma Cardinality
## 1 3747.5692830 2.144169e+03          NA
## 2 5660.7280529 2.682587e+04          NA
## 3           NA           NA        3004
## 4           NA           NA        5297
## 5    0.8013588 3.990129e-01           2
## 6    1.0262829 1.665610e-01          NA
## 7    1.0480958 2.300954e-01          NA
## 8 2006.4519936 7.514666e+00          NA

print(h2o.describe(test_h2o))

##                  Label Type Missing Zeros PosInf NegInf  Min    Max
## 1                 ...1  int       0     0      0      0    2   7461
## 2 dismissal_dataset_id  int       0     0      0      0    1 559039
## 3               coname enum       0     1      0      0    0   1484
## 4        exec_fullname enum       0     1      0      0    0   1839
## 5        ceo_dismissal enum       0   371      0      0    0      1
## 6      tenure_no_ceodb  int       0     0      0      0    1      3
## 7     max_tenure_ceodb  int       0     0      0      0    1      3
## 8           fyear_gone  int       0     0      0      0 1990   2021
##           Mean        Sigma Cardinality
## 1 3703.5324397 2.195641e+03          NA
## 2 5298.8632708 2.238609e+04          NA
## 3           NA           NA        1485
## 4           NA           NA        1840
## 5    0.8010724 3.993005e-01           2
## 6    1.0235925 1.553100e-01          NA
## 7    1.0466488 2.208809e-01          NA
## 8 2006.2482574 7.468390e+00          NA

split.h2o <- h2o.splitFrame(data = train_h2o, ratio = c(0.85), seed = 2345)
train_h2o <- split.h2o[[1]]
valid_h2o <- split.h2o[[2]]

y <- "ceo_dismissal"
x <- setdiff(names(train_tbl), y)

models_h2o <-h2o.automl(
    x = x, 
    y = y, 
    training_frame    = train_h2o, 
    validation_frame  = valid_h2o, 
    leaderboard_frame = test_h2o, 
    # max_runtime_secs  = 30, 
    max_models        = 10,
    exclude_algos     = "DeepLearning",
    nfolds            = 5, 
    seed              = 3456 
)

##   |                                                                              |                                                                      |   0%  |                                                                              |==                                                                    |   3%
## 20:25:53.782: User specified a validation frame with cross-validation still enabled. Please note that the models will still be validated using cross-validation only, the validation frame will be used to provide purely informative validation metrics on the trained models.
## 20:25:53.786: AutoML: XGBoost is not available; skipping it.  |                                                                              |===                                                                   |   4%  |                                                                              |=======                                                               |  10%  |                                                                              |=========                                                             |  12%  |                                                                              |===============                                                       |  21%  |                                                                              |==================                                                    |  26%  |                                                                              |====================                                                  |  29%  |                                                                              |=======================                                               |  33%  |                                                                              |===============================================                       |  67%  |                                                                              |======================================================================| 100%

Examine the output of h2o.automl

models_h2o %>% typeof()

## [1] "S4"

models_h2o %>% slotNames()

## [1] "project_name"   "leader"         "leaderboard"    "event_log"     
## [5] "modeling_steps" "training_info"

models_h2o@leaderboard

##                                                  model_id       auc   logloss
## 1                          GBM_1_AutoML_3_20250505_202553 0.5656065 0.5256068
## 2 StackedEnsemble_BestOfFamily_1_AutoML_3_20250505_202553 0.5584215 0.4949310
## 3    StackedEnsemble_AllModels_1_AutoML_3_20250505_202553 0.5430356 0.4991453
## 4                          GBM_3_AutoML_3_20250505_202553 0.5420252 0.5085978
## 5             GBM_grid_1_AutoML_3_20250505_202553_model_1 0.5369934 0.5072509
## 6                          GBM_2_AutoML_3_20250505_202553 0.5348348 0.5133798
##       aucpr mean_per_class_error      rmse       mse
## 1 0.8254422            0.5000000 0.4055355 0.1644591
## 2 0.8373919            0.5000000 0.3976640 0.1581367
## 3 0.8286244            0.5000000 0.3995318 0.1596256
## 4 0.8189387            0.5000000 0.4025896 0.1620784
## 5 0.8104962            0.4973046 0.4019058 0.1615283
## 6 0.8126084            0.5000000 0.4049437 0.1639794
## 
## [12 rows x 7 columns]

models_h2o@leader

## Model Details:
## ==============
## 
## H2OBinomialModel: gbm
## Model ID:  GBM_1_AutoML_3_20250505_202553 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              20                       20               24694         8
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1        15   10.90000         28         32    29.65000
## 
## 
## H2OBinomialMetrics: gbm
## ** Reported on training data. **
## 
## MSE:  0.0727113
## RMSE:  0.2696503
## LogLoss:  0.2641725
## Mean Per-Class Error:  0.06332186
## AUC:  0.9902714
## AUCPR:  0.9975053
## Gini:  0.9805428
## R^2:  0.5446184
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##           dismissed not_dis    Error       Rate
## dismissed       847     100 0.105597   =100/947
## not_dis          80    3721 0.021047   =80/3801
## Totals          927    3821 0.037911  =180/4748
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.711267    0.976384 190
## 2                       max f2  0.657988    0.983756 215
## 3                 max f0point5  0.780842    0.981108 154
## 4                 max accuracy  0.715440    0.962089 188
## 5                max precision  0.940063    1.000000   0
## 6                   max recall  0.435188    1.000000 319
## 7              max specificity  0.940063    1.000000   0
## 8             max absolute_mcc  0.715440    0.881009 188
## 9   max min_per_class_accuracy  0.763147    0.950370 163
## 10 max mean_per_class_accuracy  0.780842    0.954696 154
## 11                     max tns  0.940063  947.000000   0
## 12                     max fns  0.940063 3798.000000   0
## 13                     max fps  0.147692  947.000000 399
## 14                     max tps  0.435188 3801.000000 319
## 15                     max tnr  0.940063    1.000000   0
## 16                     max fnr  0.940063    0.999211   0
## 17                     max fpr  0.147692    1.000000 399
## 18                     max tpr  0.435188    1.000000 319
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: gbm
## ** Reported on validation data. **
## ** Validation metrics **
## 
## MSE:  0.1610166
## RMSE:  0.4012687
## LogLoss:  0.5169004
## Mean Per-Class Error:  0.4969512
## AUC:  0.5693743
## AUCPR:  0.8339982
## Gini:  0.1387486
## R^2:  -0.0294209
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##           dismissed not_dis    Error      Rate
## dismissed         1     163 0.993902  =163/164
## not_dis           0     681 0.000000    =0/681
## Totals            1     844 0.192899  =163/845
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold      value idx
## 1                       max f1  0.503794   0.893115 398
## 2                       max f2  0.503794   0.954316 398
## 3                 max f0point5  0.503794   0.839290 398
## 4                 max accuracy  0.503794   0.807101 398
## 5                max precision  0.957102   1.000000   0
## 6                   max recall  0.503794   1.000000 398
## 7              max specificity  0.957102   1.000000   0
## 8             max absolute_mcc  0.874719   0.124872 220
## 9   max min_per_class_accuracy  0.897750   0.548780 161
## 10 max mean_per_class_accuracy  0.878856   0.572445 214
## 11                     max tns  0.957102 164.000000   0
## 12                     max fns  0.957102 680.000000   0
## 13                     max fps  0.495691 164.000000 399
## 14                     max tps  0.503794 681.000000 398
## 15                     max tnr  0.957102   1.000000   0
## 16                     max fnr  0.957102   0.998532   0
## 17                     max fpr  0.495691   1.000000 399
## 18                     max tpr  0.503794   1.000000 398
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: gbm
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  0.1634281
## RMSE:  0.4042625
## LogLoss:  0.5201853
## Mean Per-Class Error:  0.4985476
## AUC:  0.5747655
## AUCPR:  0.8313301
## Gini:  0.149531
## R^2:  -0.02352956
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##           dismissed not_dis    Error       Rate
## dismissed         3     944 0.996832   =944/947
## not_dis           1    3800 0.000263    =1/3801
## Totals            4    4744 0.199031  =945/4748
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.420150    0.889409 396
## 2                       max f2  0.385576    0.952584 398
## 3                 max f0point5  0.475385    0.834250 385
## 4                 max accuracy  0.420150    0.800969 396
## 5                max precision  0.961763    1.000000   0
## 6                   max recall  0.385576    1.000000 398
## 7              max specificity  0.961763    1.000000   0
## 8             max absolute_mcc  0.842146    0.118493 189
## 9   max min_per_class_accuracy  0.890740    0.546990 115
## 10 max mean_per_class_accuracy  0.866376    0.563691 156
## 11                     max tns  0.961763  947.000000   0
## 12                     max fns  0.961763 3799.000000   0
## 13                     max fps  0.350050  947.000000 399
## 14                     max tps  0.385576 3801.000000 398
## 15                     max tnr  0.961763    1.000000   0
## 16                     max fnr  0.961763    0.999474   0
## 17                     max fpr  0.350050    1.000000 399
## 18                     max tpr  0.385576    1.000000 398
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## Cross-Validation Metrics Summary: 
##                               mean       sd cv_1_valid cv_2_valid cv_3_valid
## accuracy                  0.801390 0.001197   0.803158   0.800000   0.801053
## auc                       0.580013 0.019646   0.569574   0.575384   0.562185
## err                       0.198610 0.001197   0.196842   0.200000   0.198947
## err_count               188.600000 1.140176 187.000000 190.000000 189.000000
## f0point5                  0.834469 0.000957   0.835903   0.833333   0.834247
## f1                        0.889618 0.000542   0.890323   0.888889   0.889538
## f2                        0.952572 0.000221   0.952321   0.952381   0.952679
## lift_top_group            1.149263 0.137340   1.250000   1.250000   0.998686
## logloss                   0.541275 0.006080   0.533277   0.542740   0.544452
## max_per_class_error       0.994731 0.009116   0.978947   1.000000   1.000000
## mcc                       0.087119 0.031099   0.109109         NA         NA
## mean_per_class_accuracy   0.502503 0.004274   0.509868   0.500000   0.500000
## mean_per_class_error      0.497497 0.004274   0.490132   0.500000   0.500000
## mse                       0.166832 0.000786   0.166187   0.167723   0.167471
## pr_auc                    0.837060 0.012549   0.840352   0.840541   0.818671
## precision                 0.801352 0.001184   0.803175   0.800000   0.801053
## r2                       -0.044853 0.005228  -0.038670  -0.048267  -0.050851
## recall                    0.999737 0.000588   0.998684   1.000000   1.000000
## rmse                      0.408450 0.000963   0.407661   0.409540   0.409233
## specificity               0.005269 0.009116   0.021053   0.000000   0.000000
##                         cv_4_valid cv_5_valid
## accuracy                  0.801897   0.800843
## auc                       0.579811   0.613113
## err                       0.198103   0.199157
## err_count               188.000000 189.000000
## f0point5                  0.834798   0.834065
## f1                        0.889930   0.889409
## f2                        0.952859   0.952620
## lift_top_group            0.998947   1.248684
## logloss                   0.548699   0.537206
## max_per_class_error       0.994709   1.000000
## mcc                       0.065129         NA
## mean_per_class_accuracy   0.502645   0.500000
## mean_per_class_error      0.497354   0.500000
## mse                       0.166872   0.165908
## pr_auc                    0.832832   0.852902
## precision                 0.801688   0.800843
## r2                       -0.046261  -0.040217
## recall                    1.000000   1.000000
## rmse                      0.408500   0.407318
## specificity               0.005291   0.000000

Save and Load

?h2o.getModel

## starting httpd help server ... done

?h2o.saveModel
?h2o.loadModel

best_model <- models_h2o@leader

Make Predictions

predictions <- h2o.predict(best_model, newdata = test_h2o)

##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%

## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'coname' has levels not trained on: ["ABIOMED INC", "ACUSON
## CORP", "ADESA INC", "ADTRAN INC", "ADVANCE CIRCUITS INC", "ADVANCED MARKETING
## SERVICES", "AEROFLEX INC", "AEROVIRONMENT INC", "ALASKA AIR GROUP INC", "ALBANY
## MOLECULAR RESH INC", ...403 not listed..., "WEATHERFORD ENTERRA INC", "WEBEX
## COMMUNICATIONS INC", "WELLMAN INC", "WESTERN GAS RESOURCES INC", "WESTROCK CO",
## "WEX INC", "WHEELABRATOR TECHNOLOGIES", "WYNDHAM DESTINATIONS INC", "WYNN
## RESORTS LTD", "YOUNKERS INC"]

## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'exec_fullname' has levels not trained on: ["A. Frederick
## Gerstell", "A. George (Skip) Battle", "A. James Dearlove", "A. Lorne Weil", "A.
## William Reynolds", "Abe J. Gustin Jr.", "Adam D. Singer", "Adrian Adams",
## "Ahmad R. Chatila", "Alain C. Viry", ...1644 not listed..., "William Thomas
## Dillard", "William V. Hickey", "William W. Sprague Jr.", "William White Adams",
## "William Wrigley Jr.", "Willliam T. Jensen", "Wilson B. Sexton", "Woodson M.
## Hobbs", "Ying Lu", "Zan Guerry"]

predictions_tbl <- predictions %>%
    as_tibble()

predictions_tbl %>%
    bind_cols(test_tbl)

## New names:
## • `...1` -> `...4`

## # A tibble: 1,865 × 11
##    predict dismissed not_dis  ...4 dismissal_dataset_id coname     exec_fullname
##    <fct>       <dbl>   <dbl> <dbl>                <dbl> <fct>      <fct>        
##  1 not_dis    0.0992   0.901     2                   12 AMERICAN … Robert L. Cr…
##  2 not_dis    0.103    0.897     3                   13 AMERICAN … Donald J. Ca…
##  3 not_dis    0.241    0.759     9                   65 AIR PRODU… Harold A. Wa…
##  4 not_dis    0.116    0.884    12                   78 ALBERTSON… Gary Glenn M…
##  5 not_dis    0.127    0.873    13                   80 ALCAN INC  David Morton 
##  6 not_dis    0.116    0.884    14                   81 ALCAN INC  Jacques Boug…
##  7 not_dis    0.129    0.871    17                   88 ALEXANDER… Tinsley H. I…
##  8 not_dis    0.142    0.858    18                   99 HONEYWELL… Lawrence A. …
##  9 not_dis    0.119    0.881    23                  117 HESS CORP  Leon Hess    
## 10 not_dis    0.111    0.889    26                  121 BEAM INC   William J. A…
## # ℹ 1,855 more rows
## # ℹ 4 more variables: ceo_dismissal <fct>, tenure_no_ceodb <dbl>,
## #   max_tenure_ceodb <dbl>, fyear_gone <dbl>

Evaluate Model

?h2o.performance
performance_h2o <- h2o.performance(best_model, newdata = test_h2o)
confusion_matrix <- h2o.confusionMatrix(performance_h2o)
print(confusion_matrix)

## Confusion Matrix (vertical: actual; across: predicted)  for max f1 @ threshold = 0.229895324126849:
##           dismissed not_dis    Error       Rate
## dismissed         0     371 1.000000   =371/371
## not_dis           0    1494 0.000000    =0/1494
## Totals            0    1865 0.198928  =371/1865

#typeof(performance_h2o)
#slotNames(performance_h2o)
#performance_h2o@metrics

auc <- h2o.auc(performance_h2o)
print(paste("AUC:", auc))

## [1] "AUC: 0.565606541169169"

metrics <- performance_h2o@metrics
print(metrics)

## $model
## $model$`__meta`
## $model$`__meta`$schema_version
## [1] 3
## 
## $model$`__meta`$schema_name
## [1] "ModelKeyV3"
## 
## $model$`__meta`$schema_type
## [1] "Key<Model>"
## 
## 
## $model$name
## [1] "GBM_1_AutoML_3_20250505_202553"
## 
## $model$type
## [1] "Key<Model>"
## 
## $model$URL
## [1] "/3/Models/GBM_1_AutoML_3_20250505_202553"
## 
## 
## $model_checksum
## [1] "447411804817538224"
## 
## $frame
## $frame$name
## [1] "RTMP_sid_bc65_6"
## 
## 
## $frame_checksum
## [1] "9011093136619684768"
## 
## $description
## NULL
## 
## $scoring_time
## [1] 1.746491e+12
## 
## $predictions
## NULL
## 
## $MSE
## [1] 0.1644591
## 
## $RMSE
## [1] 0.4055355
## 
## $nobs
## [1] 1865
## 
## $custom_metric_name
## NULL
## 
## $custom_metric_value
## [1] 0
## 
## $r2
## [1] -0.0320269
## 
## $logloss
## [1] 0.5256068
## 
## $AUC
## [1] 0.5656065
## 
## $pr_auc
## [1] 0.8254422
## 
## $Gini
## [1] 0.1312131
## 
## $mean_per_class_error
## [1] 0.5
## 
## $domain
## [1] "dismissed" "not_dis"  
## 
## $cm
## $cm$`__meta`
## $cm$`__meta`$schema_version
## [1] 3
## 
## $cm$`__meta`$schema_name
## [1] "ConfusionMatrixV3"
## 
## $cm$`__meta`$schema_type
## [1] "ConfusionMatrix"
## 
## 
## $cm$table
## Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
##           dismissed not_dis  Error          Rate
## dismissed         0     371 1.0000 =   371 / 371
## not_dis           0    1494 0.0000 =   0 / 1,494
## Totals            0    1865 0.1989 = 371 / 1,865
## 
## 
## $thresholds_and_metric_scores
## Metrics for Thresholds: Binomial metrics as a function of classification thresholds
##   threshold       f1       f2 f0point5 accuracy precision   recall specificity
## 1  0.953882 0.001337 0.000836 0.003329 0.198928  0.500000 0.000669    0.997305
## 2  0.952959 0.005337 0.003344 0.013210 0.200536  0.800000 0.002677    0.997305
## 3  0.951831 0.007995 0.005014 0.019711 0.201609  0.857143 0.004016    0.997305
## 4  0.950652 0.009315 0.005848 0.022876 0.201609  0.777778 0.004685    0.994609
## 5  0.949261 0.010631 0.006681 0.026008 0.201609  0.727273 0.005355    0.991914
##   absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns  fns fps tps
## 1     0.024711               0.000669                0.498987 370 1493   1   1
## 2     0.000139               0.002677                0.499991 370 1490   1   4
## 3     0.008621               0.004016                0.500660 370 1488   1   6
## 4     0.004064               0.004685                0.499647 369 1487   2   7
## 5     0.014240               0.005355                0.498634 368 1486   3   8
##        tnr      fnr      fpr      tpr idx
## 1 0.997305 0.999331 0.002695 0.000669   0
## 2 0.997305 0.997323 0.002695 0.002677   1
## 3 0.997305 0.995984 0.002695 0.004016   2
## 4 0.994609 0.995315 0.005391 0.004685   3
## 5 0.991914 0.994645 0.008086 0.005355   4
## 
## ---
##     threshold       f1       f2 f0point5 accuracy precision   recall
## 395  0.447303 0.888425 0.950345 0.834080 0.799464  0.801399 0.996653
## 396  0.435700 0.889088 0.951378 0.834453 0.800536  0.801613 0.997992
## 397  0.389037 0.889154 0.951773 0.834265 0.800536  0.801289 0.998661
## 398  0.380633 0.889485 0.952290 0.834451 0.801072  0.801396 0.999331
## 399  0.362542 0.889220 0.952168 0.834078 0.800536  0.800966 0.999331
## 400  0.229895 0.889550 0.952685 0.834264 0.801072  0.801072 1.000000
##     specificity absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns
## 395    0.005391     0.013344               0.005391                0.501022   2
## 396    0.005391     0.026115               0.005391                0.501691   2
## 397    0.002695     0.013515               0.002695                0.500678   1
## 398    0.002695     0.024711               0.002695                0.501013   1
## 399    0.000000     0.011542               0.000000                0.499665   0
## 400    0.000000     0.000000               0.000000                0.500000   0
##     fns fps  tps      tnr      fnr      fpr      tpr idx
## 395   5 369 1489 0.005391 0.003347 0.994609 0.996653 394
## 396   3 369 1491 0.005391 0.002008 0.994609 0.997992 395
## 397   2 370 1492 0.002695 0.001339 0.997305 0.998661 396
## 398   1 370 1493 0.002695 0.000669 0.997305 0.999331 397
## 399   1 371 1493 0.000000 0.000669 1.000000 0.999331 398
## 400   0 371 1494 0.000000 0.000000 1.000000 1.000000 399
## 
## $max_criteria_and_metric_scores
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.229895    0.889550 399
## 2                       max f2  0.229895    0.952685 399
## 3                 max f0point5  0.716851    0.835679 316
## 4                 max accuracy  0.380633    0.801072 397
## 5                max precision  0.937262    0.880597  18
## 6                   max recall  0.229895    1.000000 399
## 7              max specificity  0.953882    0.997305   0
## 8             max absolute_mcc  0.790605    0.107165 263
## 9   max min_per_class_accuracy  0.898800    0.555556 118
## 10 max mean_per_class_accuracy  0.899547    0.563519 116
## 11                     max tns  0.953882  370.000000   0
## 12                     max fns  0.953882 1493.000000   0
## 13                     max fps  0.362542  371.000000 398
## 14                     max tps  0.229895 1494.000000 399
## 15                     max tnr  0.953882    0.997305   0
## 16                     max fnr  0.953882    0.999331   0
## 17                     max fpr  0.362542    1.000000 398
## 18                     max tpr  0.229895    1.000000 399
## 
## $gains_lift_table
## Gains/Lift Table: Avg response rate: 80.11 %, avg score: 87.30 %
##    group cumulative_data_fraction lower_threshold     lift cumulative_lift
## 1      1               0.01018767        0.944808 0.919820        0.919820
## 2      2               0.02037534        0.942024 1.182625        1.051222
## 3      3               0.03056300        0.939078 1.116924        1.073123
## 4      4               0.04021448        0.936127 1.109624        1.081883
## 5      5               0.05040214        0.934275 0.985521        1.062406
## 6      6               0.10026810        0.927785 0.953024        1.008007
## 7      7               0.15013405        0.923770 1.060407        1.025411
## 8      8               0.20000000        0.920008 1.100675        1.044177
## 9      9               0.30026810        0.913764 1.041385        1.043244
## 10    10               0.40053619        0.908046 1.034709        1.041108
## 11    11               0.50026810        0.900822 1.067118        1.046293
## 12    12               0.60000000        0.891748 0.953024        1.030790
## 13    13               0.69973190        0.879236 1.000004        1.026402
## 14    14               0.80000000        0.852719 0.981305        1.020750
## 15    15               0.89973190        0.776666 0.986581        1.016962
## 16    16               1.00000000        0.229895 0.847794        1.000000
##    response_rate    score cumulative_response_rate cumulative_score
## 1       0.736842 0.949343                 0.736842         0.949343
## 2       0.947368 0.943407                 0.842105         0.946375
## 3       0.894737 0.940356                 0.859649         0.944369
## 4       0.888889 0.937267                 0.866667         0.942664
## 5       0.789474 0.935080                 0.851064         0.941131
## 6       0.763441 0.930628                 0.807487         0.935908
## 7       0.849462 0.925622                 0.821429         0.932492
## 8       0.881720 0.921697                 0.836461         0.929800
## 9       0.834225 0.916837                 0.835714         0.925472
## 10      0.828877 0.910654                 0.834003         0.921762
## 11      0.854839 0.904460                 0.838156         0.918313
## 12      0.763441 0.896665                 0.825737         0.914714
## 13      0.801075 0.885866                 0.822222         0.910603
## 14      0.786096 0.868780                 0.817694         0.905361
## 15      0.790323 0.816990                 0.814660         0.895565
## 16      0.679144 0.670164                 0.801072         0.872965
##    capture_rate cumulative_capture_rate       gain cumulative_gain
## 1      0.009371                0.009371  -8.018037       -8.018037
## 2      0.012048                0.021419  18.262524        5.122243
## 3      0.011379                0.032798  11.692384        7.312290
## 4      0.010710                0.043507  10.962368        8.188309
## 5      0.010040                0.053548  -1.447897        6.240565
## 6      0.047523                0.101071  -4.697644        0.800707
## 7      0.052878                0.153949   6.040650        2.541117
## 8      0.054886                0.208835  10.067510        4.417671
## 9      0.104418                0.313253   4.138479        4.324441
## 10     0.103748                0.417001   3.470925        4.110776
## 11     0.106426                0.523427   6.711793        4.629307
## 12     0.095047                0.618474  -4.697644        3.078983
## 13     0.099732                0.718206   0.000360        2.640190
## 14     0.098394                0.816600  -1.869510        2.074967
## 15     0.098394                0.914993  -1.341927        1.696217
## 16     0.085007                1.000000 -15.220597        0.000000
##    kolmogorov_smirnov
## 1           -0.004106
## 2            0.005247
## 3            0.011235
## 4            0.016553
## 5            0.015812
## 6            0.004036
## 7            0.019178
## 8            0.044415
## 9            0.065275
## 10           0.082770
## 11           0.116419
## 12           0.092867
## 13           0.092869
## 14           0.083446
## 15           0.076718
## 16           0.000000

Apply 11

Nils Skogestig

2025-04-30

Set up

Import Data

Split Data

Recipes

Model

Save and Load

Make Predictions

Evaluate Model