Set up

Import data

Import the cleaned data from Module 7.

library(h2o)
## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit https://docs.h2o.ai
## 
## ----------------------------------------------------------------------
## 
## Attaching package: 'h2o'
## The following objects are masked from 'package:stats':
## 
##     cor, sd, var
## The following objects are masked from 'package:base':
## 
##     &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::day()   masks h2o::day()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ lubridate::hour()  masks h2o::hour()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ lubridate::month() masks h2o::month()
## ✖ lubridate::week()  masks h2o::week()
## ✖ lubridate::year()  masks h2o::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
## ✔ broom        1.0.7     ✔ rsample      1.2.1
## ✔ dials        1.2.1     ✔ tune         1.2.1
## ✔ infer        1.0.7     ✔ workflows    1.1.4
## ✔ modeldata    1.4.0     ✔ workflowsets 1.1.0
## ✔ parsnip      1.2.1     ✔ yardstick    1.3.1
## ✔ recipes      1.1.0
## Warning: package 'broom' was built under R version 4.3.3
## Warning: package 'modeldata' was built under R version 4.3.3
## Warning: package 'recipes' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Dig deeper into tidy modeling with R at https://www.tmwr.org
library(tidyquant)
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: quantmod
## Loading required package: TTR
## 
## Attaching package: 'TTR'
## 
## The following object is masked from 'package:dials':
## 
##     momentum
## 
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
data <- read.csv("../00_data/data_wrangled/data.csv") %>%
    
    # h2o requires all variables to be either numeric or factors
    mutate(across(where(is.character), factor))

Split data

set.seed(1234)

data_split <- initial_split(data, strata = "accreditation")
train_tbl <- training(data_split)
test_tbl <- testing(data_split)

Recipes

recipe_obj <- recipe(accreditation ~ ., data = train_tbl) %>%
    
    # Remove zero variance variables
    step_zv(all_predictors())

Model

# Initialize h2o
h2o.init()
##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 days 24 minutes 
##     H2O cluster timezone:       America/New_York 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.44.0.3 
##     H2O cluster version age:    11 months 
##     H2O cluster name:           H2O_started_from_R_jordanlanowy_fhp551 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   1.44 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.3.2 (2023-10-31)
## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is (11 months) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
split_h2o <- h2o.splitFrame(as.h2o(train_tbl), ratios = 0.85, seed = 2345)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
train_h2o <- split_h2o[[1]]
valid_h2o <- split_h2o[[2]]
test_h2o <- as.h2o(test_tbl)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
y <- "accreditation"
x <- setdiff(names(train_tbl), y)

models_h2o <- h2o.automl(
    x = x,
    y = y,
    training_frame = train_h2o, 
    validation_frame = valid_h2o, 
    leaderboard_frame = test_h2o, 
    max_runtime_secs = 30,
    max_models = 10, 
    exclude_algos = "DeepLearning",
    nfolds = 5, 
    seed = 3456
)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
## 13:24:45.800: User specified a validation frame with cross-validation still enabled. Please note that the models will still be validated using cross-validation only, the validation frame will be used to provide purely informative validation metrics on the trained models.
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |======================================================================| 100%

Examine the output of h2o.automl

models_h2o %>% typeof()
## [1] "S4"
models_h2o %>% slotNames()
## [1] "project_name"   "leader"         "leaderboard"    "event_log"     
## [5] "modeling_steps" "training_info"
models_h2o@leaderboard
##                                                   model_id       auc   logloss
## 1    StackedEnsemble_AllModels_1_AutoML_15_20241121_132445 0.9371310 0.3047063
## 2 StackedEnsemble_BestOfFamily_1_AutoML_15_20241121_132445 0.9366797 0.3060339
## 3                      XGBoost_3_AutoML_15_20241121_132445 0.9353839 0.3133164
## 4                          GLM_1_AutoML_15_20241121_132445 0.9336554 0.3132786
## 5                      XGBoost_2_AutoML_15_20241121_132445 0.9237338 0.3484276
## 6                      XGBoost_1_AutoML_15_20241121_132445 0.9200615 0.3491579
##       aucpr mean_per_class_error      rmse        mse
## 1 0.9594277            0.1095772 0.3045837 0.09277121
## 2 0.9584524            0.1168927 0.3042431 0.09256386
## 3 0.9577012            0.1218670 0.3076886 0.09467225
## 4 0.9575205            0.1210038 0.3083793 0.09509777
## 5 0.9472094            0.1242517 0.3229280 0.10428252
## 6 0.9484182            0.1280523 0.3255031 0.10595228
## 
## [12 rows x 7 columns]
models_h2o@leader
## Model Details:
## ==============
## 
## H2OBinomialModel: stackedensemble
## Model ID:  StackedEnsemble_AllModels_1_AutoML_15_20241121_132445 
## Model Summary for Stacked Ensemble: 
##                                     key            value
## 1                     Stacking strategy cross_validation
## 2  Number of base models (used / total)             4/10
## 3      # GBM base models (used / total)              1/4
## 4  # XGBoost base models (used / total)              1/3
## 5      # GLM base models (used / total)              1/1
## 6      # DRF base models (used / total)              1/2
## 7                 Metalearner algorithm              GLM
## 8    Metalearner fold assignment scheme           Random
## 9                    Metalearner nfolds                5
## 10              Metalearner fold_column               NA
## 11   Custom metalearner hyperparameters             None
## 
## 
## H2OBinomialMetrics: stackedensemble
## ** Reported on training data. **
## 
## MSE:  0.03770232
## RMSE:  0.1941709
## LogLoss:  0.1437038
## Mean Per-Class Error:  0.0459837
## AUC:  0.993834
## AUCPR:  0.995356
## Gini:  0.987668
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##              Accredited Unaccredited    Error       Rate
## Accredited         1004           81 0.074654   =81/1085
## Unaccredited         25         1419 0.017313   =25/1444
## Totals             1029         1500 0.041914  =106/2529
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.317940    0.963995 237
## 2                       max f2  0.249712    0.981583 257
## 3                 max f0point5  0.612197    0.968843 154
## 4                 max accuracy  0.317940    0.958086 237
## 5                max precision  0.999674    1.000000   0
## 6                   max recall  0.174263    1.000000 285
## 7              max specificity  0.999674    1.000000   0
## 8             max absolute_mcc  0.317940    0.914843 237
## 9   max min_per_class_accuracy  0.447828    0.949309 202
## 10 max mean_per_class_accuracy  0.317940    0.954016 237
## 11                     max tns  0.999674 1085.000000   0
## 12                     max fns  0.999674 1307.000000   0
## 13                     max fps  0.005000 1085.000000 399
## 14                     max tps  0.174263 1444.000000 285
## 15                     max tnr  0.999674    1.000000   0
## 16                     max fnr  0.999674    0.905125   0
## 17                     max fpr  0.005000    1.000000 399
## 18                     max tpr  0.174263    1.000000 285
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: stackedensemble
## ** Reported on validation data. **
## 
## MSE:  0.08736751
## RMSE:  0.29558
## LogLoss:  0.2921939
## Mean Per-Class Error:  0.1110858
## AUC:  0.9472648
## AUCPR:  0.9653165
## Gini:  0.8945296
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##              Accredited Unaccredited    Error     Rate
## Accredited          162           28 0.147368  =28/190
## Unaccredited         19          235 0.074803  =19/254
## Totals              181          263 0.105856  =47/444
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold      value idx
## 1                       max f1  0.590911   0.909091 225
## 2                       max f2  0.234776   0.928189 254
## 3                 max f0point5  0.879082   0.927788 172
## 4                 max accuracy  0.614111   0.894144 223
## 5                max precision  0.999863   1.000000   0
## 6                   max recall  0.043830   1.000000 387
## 7              max specificity  0.999863   1.000000   0
## 8             max absolute_mcc  0.614111   0.783198 223
## 9   max min_per_class_accuracy  0.755629   0.885827 207
## 10 max mean_per_class_accuracy  0.755629   0.890282 207
## 11                     max tns  0.999863 190.000000   0
## 12                     max fns  0.999863 252.000000   0
## 13                     max fps  0.007441 190.000000 399
## 14                     max tps  0.043830 254.000000 387
## 15                     max tnr  0.999863   1.000000   0
## 16                     max fnr  0.999863   0.992126   0
## 17                     max fpr  0.007441   1.000000 399
## 18                     max tpr  0.043830   1.000000 387
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: stackedensemble
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  0.08671589
## RMSE:  0.2944756
## LogLoss:  0.290198
## Mean Per-Class Error:  0.1057492
## AUC:  0.941625
## AUCPR:  0.9623053
## Gini:  0.8832499
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##              Accredited Unaccredited    Error       Rate
## Accredited          978          107 0.098618  =107/1085
## Unaccredited        163         1281 0.112881  =163/1444
## Totals             1141         1388 0.106762  =270/2529
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.446059    0.904661 200
## 2                       max f2  0.170718    0.909633 298
## 3                 max f0point5  0.719219    0.924312 143
## 4                 max accuracy  0.519811    0.894029 187
## 5                max precision  0.999726    1.000000   0
## 6                   max recall  0.026721    1.000000 392
## 7              max specificity  0.999726    1.000000   0
## 8             max absolute_mcc  0.519811    0.787407 187
## 9   max min_per_class_accuracy  0.413299    0.891244 208
## 10 max mean_per_class_accuracy  0.519811    0.896547 187
## 11                     max tns  0.999726 1085.000000   0
## 12                     max fns  0.999726 1380.000000   0
## 13                     max fps  0.004505 1085.000000 399
## 14                     max tps  0.026721 1444.000000 392
## 15                     max tnr  0.999726    1.000000   0
## 16                     max fnr  0.999726    0.955679   0
## 17                     max fpr  0.004505    1.000000 399
## 18                     max tpr  0.026721    1.000000 392
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## Cross-Validation Metrics Summary: 
##                mean       sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid
## accuracy   0.897202 0.022957   0.931481   0.910506   0.882235   0.881092
## auc        0.942244 0.010531   0.952546   0.953077   0.940730   0.928876
## err        0.102798 0.022957   0.068519   0.089494   0.117764   0.118908
## err_count 51.600000 9.989995  37.000000  46.000000  59.000000  61.000000
## f0point5   0.922940 0.014994   0.947622   0.922598   0.908767   0.913534
##           cv_5_valid
## accuracy    0.880694
## auc         0.935989
## err         0.119306
## err_count  55.000000
## f0point5    0.922179
## 
## ---
##                         mean        sd cv_1_valid cv_2_valid cv_3_valid
## precision           0.933503  0.014724   0.954861   0.923875   0.917266
## r2                  0.643981  0.054988   0.721783   0.680706   0.619601
## recall              0.883759  0.033342   0.919732   0.917526   0.876289
## residual_deviance 293.146200 23.289720 268.940920 278.529720 302.449900
## rmse                0.294251  0.022277   0.262206   0.280047   0.304325
## specificity         0.915413  0.021191   0.946058   0.901345   0.890476
##                   cv_4_valid cv_5_valid
## precision           0.931034   0.940476
## r2                  0.603117   0.594699
## recall              0.849650   0.855596
## residual_deviance 328.495180 287.315340
## rmse                0.312903   0.311772
## specificity         0.920705   0.918478

Save and load

?h2o.getModel
?h2o.saveModel
?h2o.loadModel

h2o.getModel("StackedEnsemble_AllModels_1_AutoML_13_20241121_130635") %>%
    h2o.saveModel("h2o_models/")
## [1] "/Users/jordanlanowy/Desktop/PSU_DAT3100/11_module13/h2o_models/StackedEnsemble_AllModels_1_AutoML_13_20241121_130635"
best_model <- h2o.loadModel("h2o_models/StackedEnsemble_AllModels_1_AutoML_13_20241121_130635 /StackedEnsemble_AllModels_1_AutoML_13_20241121_130635")
best_model
## Model Details:
## ==============
## 
## H2OBinomialModel: stackedensemble
## Model ID:  StackedEnsemble_AllModels_1_AutoML_13_20241121_130635 
## Model Summary for Stacked Ensemble: 
##                                     key            value
## 1                     Stacking strategy cross_validation
## 2  Number of base models (used / total)             4/10
## 3      # GBM base models (used / total)              1/4
## 4  # XGBoost base models (used / total)              1/3
## 5      # GLM base models (used / total)              1/1
## 6      # DRF base models (used / total)              1/2
## 7                 Metalearner algorithm              GLM
## 8    Metalearner fold assignment scheme           Random
## 9                    Metalearner nfolds                5
## 10              Metalearner fold_column               NA
## 11   Custom metalearner hyperparameters             None
## 
## 
## H2OBinomialMetrics: stackedensemble
## ** Reported on training data. **
## 
## MSE:  0.03770232
## RMSE:  0.1941709
## LogLoss:  0.1437038
## Mean Per-Class Error:  0.0459837
## AUC:  0.993834
## AUCPR:  0.995356
## Gini:  0.987668
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##              Accredited Unaccredited    Error       Rate
## Accredited         1004           81 0.074654   =81/1085
## Unaccredited         25         1419 0.017313   =25/1444
## Totals             1029         1500 0.041914  =106/2529
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.317940    0.963995 237
## 2                       max f2  0.249712    0.981583 257
## 3                 max f0point5  0.612197    0.968843 154
## 4                 max accuracy  0.317940    0.958086 237
## 5                max precision  0.999674    1.000000   0
## 6                   max recall  0.174263    1.000000 285
## 7              max specificity  0.999674    1.000000   0
## 8             max absolute_mcc  0.317940    0.914843 237
## 9   max min_per_class_accuracy  0.447828    0.949309 202
## 10 max mean_per_class_accuracy  0.317940    0.954016 237
## 11                     max tns  0.999674 1085.000000   0
## 12                     max fns  0.999674 1307.000000   0
## 13                     max fps  0.005000 1085.000000 399
## 14                     max tps  0.174263 1444.000000 285
## 15                     max tnr  0.999674    1.000000   0
## 16                     max fnr  0.999674    0.905125   0
## 17                     max fpr  0.005000    1.000000 399
## 18                     max tpr  0.174263    1.000000 285
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: stackedensemble
## ** Reported on validation data. **
## 
## MSE:  0.08736751
## RMSE:  0.29558
## LogLoss:  0.2921939
## Mean Per-Class Error:  0.1110858
## AUC:  0.9472648
## AUCPR:  0.9653165
## Gini:  0.8945296
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##              Accredited Unaccredited    Error     Rate
## Accredited          162           28 0.147368  =28/190
## Unaccredited         19          235 0.074803  =19/254
## Totals              181          263 0.105856  =47/444
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold      value idx
## 1                       max f1  0.590911   0.909091 225
## 2                       max f2  0.234776   0.928189 254
## 3                 max f0point5  0.879082   0.927788 172
## 4                 max accuracy  0.614111   0.894144 223
## 5                max precision  0.999863   1.000000   0
## 6                   max recall  0.043830   1.000000 387
## 7              max specificity  0.999863   1.000000   0
## 8             max absolute_mcc  0.614111   0.783198 223
## 9   max min_per_class_accuracy  0.755629   0.885827 207
## 10 max mean_per_class_accuracy  0.755629   0.890282 207
## 11                     max tns  0.999863 190.000000   0
## 12                     max fns  0.999863 252.000000   0
## 13                     max fps  0.007441 190.000000 399
## 14                     max tps  0.043830 254.000000 387
## 15                     max tnr  0.999863   1.000000   0
## 16                     max fnr  0.999863   0.992126   0
## 17                     max fpr  0.007441   1.000000 399
## 18                     max tpr  0.043830   1.000000 387
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: stackedensemble
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  0.08671589
## RMSE:  0.2944756
## LogLoss:  0.290198
## Mean Per-Class Error:  0.1057492
## AUC:  0.941625
## AUCPR:  0.9623053
## Gini:  0.8832499
## 
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
##              Accredited Unaccredited    Error       Rate
## Accredited          978          107 0.098618  =107/1085
## Unaccredited        163         1281 0.112881  =163/1444
## Totals             1141         1388 0.106762  =270/2529
## 
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold       value idx
## 1                       max f1  0.446059    0.904661 200
## 2                       max f2  0.170718    0.909633 298
## 3                 max f0point5  0.719219    0.924312 143
## 4                 max accuracy  0.519811    0.894029 187
## 5                max precision  0.999726    1.000000   0
## 6                   max recall  0.026721    1.000000 392
## 7              max specificity  0.999726    1.000000   0
## 8             max absolute_mcc  0.519811    0.787407 187
## 9   max min_per_class_accuracy  0.413299    0.891244 208
## 10 max mean_per_class_accuracy  0.519811    0.896547 187
## 11                     max tns  0.999726 1085.000000   0
## 12                     max fns  0.999726 1380.000000   0
## 13                     max fps  0.004505 1085.000000 399
## 14                     max tps  0.026721 1444.000000 392
## 15                     max tnr  0.999726    1.000000   0
## 16                     max fnr  0.999726    0.955679   0
## 17                     max fpr  0.004505    1.000000 399
## 18                     max tpr  0.026721    1.000000 392
## 
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## Cross-Validation Metrics Summary: 
##                mean       sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid
## accuracy   0.897202 0.022957   0.931481   0.910506   0.882235   0.881092
## auc        0.942244 0.010531   0.952546   0.953077   0.940730   0.928876
## err        0.102798 0.022957   0.068519   0.089494   0.117764   0.118908
## err_count 51.600000 9.989995  37.000000  46.000000  59.000000  61.000000
## f0point5   0.922940 0.014994   0.947622   0.922598   0.908767   0.913534
##           cv_5_valid
## accuracy    0.880694
## auc         0.935989
## err         0.119306
## err_count  55.000000
## f0point5    0.922179
## 
## ---
##                         mean        sd cv_1_valid cv_2_valid cv_3_valid
## precision           0.933503  0.014724   0.954861   0.923875   0.917266
## r2                  0.643981  0.054988   0.721783   0.680706   0.619601
## recall              0.883759  0.033342   0.919732   0.917526   0.876289
## residual_deviance 293.146200 23.289720 268.940920 278.529720 302.449900
## rmse                0.294251  0.022277   0.262206   0.280047   0.304325
## specificity         0.915413  0.021191   0.946058   0.901345   0.890476
##                   cv_4_valid cv_5_valid
## precision           0.931034   0.940476
## r2                  0.603117   0.594699
## recall              0.849650   0.855596
## residual_deviance 328.495180 287.315340
## rmse                0.312903   0.311772
## specificity         0.920705   0.918478

Make predictions

predictions <- h2o.predict(best_model, newdata = test_h2o)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'name_of_museum' has levels not trained on: ["1066 Battle Of
## Hastings, Battle Abbey And Battlefield", "13th/18th Royal Hussars Museum",
## "1620s House And Garden", "Abbotsford", "Aberaeron Toy Museum", "Aberdeen
## Science Centre", "Aberdeenshire Farming Museum", "Abergavenny Museum",
## "Abertillery And District Museum", "Abriachan Museum", ...973 not listed...,
## "World Of Glass", "Wrexham County Borough Museum", "Wymondham Abbey", "Yelde
## Hall Museum", "Yelverton Paperweight Centre", "York City Art Gallery", "York
## Cold War Bunker", "Yorkshire Museum Of Farming", "Yorkshire Sculpture Park",
## "Young Gallery Salisbury"]
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'village_town_or_city' has levels not trained on: ["Aberaeron",
## "Abriachan", "Alexandria", "Antrim", "Appleby Magna", "Argyll", "Arisaig",
## "Ash", "Ashby-de-la-Zouch", "Ballater", ...243 not listed..., "nr Okehampton",
## "nr Prestonpans", "nr Skipton", "nr Spilsby", "nr Wakefield", "nr Westbury",
## "nr. Cheltenham", "nr. Kilwinning", "nr. Rugeley", "nr. Yeovil"]
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'postcode' has levels not trained on: ["AB10 1JX", "AB11 6EQ",
## "AB15 7XH", "AB24 3EE", "AB32 6RX", "AB35 5TB", "AB41 7GX", "AB41 7PD", "AB42
## 2UP", "AB42 5FQ", ...876 not listed..., "YO61 1DP", "YO61 4AD", "YO62 5LJ",
## "ZE1 0EL", "ZE1 0UL", "ZE2 9AY", "ZE2 9DJ", "ZE2 9HL", "ZE2 9QD", "ZE2 9SB"]
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'admin_area' has levels not trained on: ["/England/South East
## (English Region)/Bracknell Forest (English UA)", "/England/South West (English
## Region)/Isles of Scilly (English UA)"]
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'subject_matter' has levels not trained on: ["Communications"]
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'year_opened' has levels not trained on: ["1660:1660",
## "1707:1707", "1750:1759", "1761:1761", "1807:1807", "1816:1816", "1818:1818",
## "1823:1823", "1843:1843", "1854:1854", ...15 not listed..., "1960:1979",
## "1960:1984", "1960:1997", "1960:2000", "1974:1979", "1983:1985", "1989:2004",
## "1995:1996", "1996:1998", "2000:2005"]
## Warning in doTryCatch(return(expr), name, parentenv, handler): Test/Validation
## dataset column 'year_closed' has levels not trained on: ["1936:2017",
## "1965:2017", "1971:1999", "1978:1978", "1979:1981", "1981:2017", "1984:1985",
## "1984:2004", "1985:2002", "1985:2008", "1985:2015", "1990:2000", "1991:2017",
## "2006:2017", "2007:2017", "2011:2017", "2017:2018"]
predictions_tbl <- predictions %>%
    as.tibble()
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
predictions_tbl %>%
    bind_cols(test_tbl)
## # A tibble: 993 × 29
##    predict     Accredited Unaccredited     X name_of_museum village_town_or_city
##    <fct>            <dbl>        <dbl> <int> <fct>          <fct>               
##  1 Unaccredit…   0.00199        0.998      1 The Woodland … nr Westbury         
##  2 Unaccredit…   0.126          0.874      3 Jet Age Museum Cheltenham          
##  3 Unaccredit…   0.306          0.694      6 Prestongrange… nr Prestonpans      
##  4 Accredited    0.955          0.0451     7 National Muse… Edinburgh           
##  5 Accredited    0.782          0.218      9 Baird Institu… Cumnock             
##  6 Accredited    0.853          0.147     11 Taigh-tasgaid… Kildonan            
##  7 Unaccredit…   0.0398         0.960     15 Clayton Hall … Manchester          
##  8 Accredited    0.930          0.0700    20 National War … Edinburgh           
##  9 Unaccredit…   0.00593        0.994     21 Woodlands Art… London              
## 10 Unaccredit…   0.000306       1.00      25 London Gas Mu… London              
## # ℹ 983 more rows
## # ℹ 23 more variables: postcode <fct>, latitude <dbl>, longitude <dbl>,
## #   admin_area <fct>, accreditation <fct>, governance <fct>, size <fct>,
## #   size_provenance <fct>, subject_matter <fct>, year_opened <fct>,
## #   year_closed <fct>, primary_provenance_of_data <fct>,
## #   area_deprivation_index <int>, area_deprivation_index_crime <int>,
## #   area_deprivation_index_education <int>, …

Evaluate model

?h2o.performance
performance_h2o <- h2o.performance(best_model, newdata = test_h2o)
typeof(performance_h2o)
## [1] "S4"
slotNames(performance_h2o)
## [1] "algorithm" "on_train"  "on_valid"  "on_xval"   "metrics"
performance_h2o@metrics
## $model
## $model$`__meta`
## $model$`__meta`$schema_version
## [1] 3
## 
## $model$`__meta`$schema_name
## [1] "ModelKeyV3"
## 
## $model$`__meta`$schema_type
## [1] "Key<Model>"
## 
## 
## $model$name
## [1] "StackedEnsemble_AllModels_1_AutoML_13_20241121_130635"
## 
## $model$type
## [1] "Key<Model>"
## 
## $model$URL
## [1] "/3/Models/StackedEnsemble_AllModels_1_AutoML_13_20241121_130635"
## 
## 
## $model_checksum
## [1] "-7071169776820997318"
## 
## $frame
## $frame$name
## [1] "test_tbl_sid_b0ec_3"
## 
## 
## $frame_checksum
## [1] "2942151053878311765"
## 
## $description
## NULL
## 
## $scoring_time
## [1] 1.732214e+12
## 
## $predictions
## NULL
## 
## $MSE
## [1] 0.09277121
## 
## $RMSE
## [1] 0.3045837
## 
## $nobs
## [1] 993
## 
## $custom_metric_name
## NULL
## 
## $custom_metric_value
## [1] 0
## 
## $r2
## [1] 0.6212793
## 
## $logloss
## [1] 0.3047063
## 
## $AUC
## [1] 0.937131
## 
## $pr_auc
## [1] 0.9594277
## 
## $Gini
## [1] 0.874262
## 
## $mean_per_class_error
## [1] 0.1095772
## 
## $domain
## [1] "Accredited"   "Unaccredited"
## 
## $cm
## $cm$`__meta`
## $cm$`__meta`$schema_version
## [1] 3
## 
## $cm$`__meta`$schema_name
## [1] "ConfusionMatrixV3"
## 
## $cm$`__meta`$schema_type
## [1] "ConfusionMatrix"
## 
## 
## $cm$table
## Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
##              Accredited Unaccredited  Error        Rate
## Accredited          395           31 0.0728 =  31 / 426
## Unaccredited         83          484 0.1464 =  83 / 567
## Totals              478          515 0.1148 = 114 / 993
## 
## 
## $thresholds_and_metric_scores
## Metrics for Thresholds: Binomial metrics as a function of classification thresholds
##   threshold       f1       f2 f0point5 accuracy precision   recall specificity
## 1  0.999679 0.103679 0.067421 0.224313 0.460222  1.000000 0.054674    1.000000
## 2  0.999337 0.173913 0.116279 0.344828 0.483384  1.000000 0.095238    1.000000
## 3  0.999091 0.233645 0.160051 0.432526 0.504532  1.000000 0.132275    1.000000
## 4  0.998800 0.276596 0.192878 0.488722 0.520645  1.000000 0.160494    1.000000
## 5  0.998473 0.302395 0.213170 0.520082 0.530715  1.000000 0.178131    1.000000
##   absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns fns fps tps
## 1     0.155599               0.054674                0.527337 426 536   0  31
## 2     0.207863               0.095238                0.547619 426 513   0  54
## 3     0.247755               0.132275                0.566138 426 492   0  75
## 4     0.275316               0.160494                0.580247 426 476   0  91
## 5     0.291670               0.178131                0.589065 426 466   0 101
##        tnr      fnr      fpr      tpr idx
## 1 1.000000 0.945326 0.000000 0.054674   0
## 2 1.000000 0.904762 0.000000 0.095238   1
## 3 1.000000 0.867725 0.000000 0.132275   2
## 4 1.000000 0.839506 0.000000 0.160494   3
## 5 1.000000 0.821869 0.000000 0.178131   4
## 
## ---
##     threshold       f1       f2 f0point5 accuracy precision   recall
## 395  0.015793 0.730200 0.871235 0.628464 0.578046  0.575051 1.000000
## 396  0.013890 0.729260 0.870700 0.627351 0.576032  0.573887 1.000000
## 397  0.011867 0.728792 0.870433 0.626796 0.575025  0.573306 1.000000
## 398  0.010070 0.728324 0.870166 0.626243 0.574018  0.572727 1.000000
## 399  0.007493 0.727856 0.869899 0.625690 0.573011  0.572149 1.000000
## 400  0.004727 0.726923 0.869365 0.624587 0.570997  0.570997 1.000000
##     specificity absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns
## 395    0.016432     0.097207               0.016432                0.508216   7
## 396    0.011737     0.082072               0.011737                0.505869   5
## 397    0.009390     0.073370               0.009390                0.504695   4
## 398    0.007042     0.063508               0.007042                0.503521   3
## 399    0.004695     0.051828               0.004695                0.502347   2
## 400    0.000000     0.000000               0.000000                0.500000   0
##     fns fps tps      tnr      fnr      fpr      tpr idx
## 395   0 419 567 0.016432 0.000000 0.983568 1.000000 394
## 396   0 421 567 0.011737 0.000000 0.988263 1.000000 395
## 397   0 422 567 0.009390 0.000000 0.990610 1.000000 396
## 398   0 423 567 0.007042 0.000000 0.992958 1.000000 397
## 399   0 424 567 0.004695 0.000000 0.995305 1.000000 398
## 400   0 426 567 0.000000 0.000000 1.000000 1.000000 399
## 
## $max_criteria_and_metric_scores
## Maximum Metrics: Maximum metrics at their respective thresholds
##                         metric threshold      value idx
## 1                       max f1  0.666006   0.894640 162
## 2                       max f2  0.150210   0.903614 297
## 3                 max f0point5  0.752670   0.922985 143
## 4                 max accuracy  0.666006   0.885196 162
## 5                max precision  0.999679   1.000000   0
## 6                   max recall  0.024562   1.000000 391
## 7              max specificity  0.999679   1.000000   0
## 8             max absolute_mcc  0.666006   0.773471 162
## 9   max min_per_class_accuracy  0.436560   0.876543 191
## 10 max mean_per_class_accuracy  0.666006   0.890423 162
## 11                     max tns  0.999679 426.000000   0
## 12                     max fns  0.999679 536.000000   0
## 13                     max fps  0.004727 426.000000 399
## 14                     max tps  0.024562 567.000000 391
## 15                     max tnr  0.999679   1.000000   0
## 16                     max fnr  0.999679   0.945326   0
## 17                     max fpr  0.004727   1.000000 399
## 18                     max tpr  0.024562   1.000000 391
## 
## $gains_lift_table
## Gains/Lift Table: Avg response rate: 57.10 %, avg score: 57.01 %
##    group cumulative_data_fraction lower_threshold     lift cumulative_lift
## 1      1               0.01007049        0.999738 1.751323        1.751323
## 2      2               0.02014099        0.999599 1.751323        1.751323
## 3      3               0.03021148        0.999542 1.751323        1.751323
## 4      4               0.04028197        0.999354 1.751323        1.751323
## 5      5               0.05035247        0.999247 1.751323        1.751323
## 6      6               0.10070493        0.998372 1.751323        1.751323
## 7      7               0.15005035        0.994811 1.751323        1.751323
## 8      8               0.20040282        0.989992 1.751323        1.751323
## 9      9               0.30010070        0.965191 1.698252        1.733692
## 10    10               0.39979859        0.897588 1.645182        1.711620
## 11    11               0.50050352        0.722766 1.436085        1.656180
## 12    12               0.60020141        0.276711 0.778366        1.510369
## 13    13               0.69989930        0.172740 0.336112        1.343101
## 14    14               0.79959718        0.108928 0.300732        1.213133
## 15    15               0.89929507        0.065965 0.194591        1.100215
## 16    16               1.00000000        0.004407 0.105079        1.000000
##    response_rate    score cumulative_response_rate cumulative_score
## 1       1.000000 0.999806                 1.000000         0.999806
## 2       1.000000 0.999677                 1.000000         0.999742
## 3       1.000000 0.999576                 1.000000         0.999687
## 4       1.000000 0.999465                 1.000000         0.999631
## 5       1.000000 0.999277                 1.000000         0.999560
## 6       1.000000 0.998900                 1.000000         0.999230
## 7       1.000000 0.996624                 1.000000         0.998373
## 8       1.000000 0.992514                 1.000000         0.996901
## 9       0.969697 0.979834                 0.989933         0.991231
## 10      0.939394 0.937904                 0.977330         0.977933
## 11      0.820000 0.825460                 0.945674         0.947254
## 12      0.444444 0.482238                 0.862416         0.870012
## 13      0.191919 0.214721                 0.766906         0.776668
## 14      0.171717 0.136078                 0.692695         0.696796
## 15      0.111111 0.087457                 0.628219         0.629243
## 16      0.060000 0.041806                 0.570997         0.570085
##    capture_rate cumulative_capture_rate       gain cumulative_gain
## 1      0.017637                0.017637  75.132275       75.132275
## 2      0.017637                0.035273  75.132275       75.132275
## 3      0.017637                0.052910  75.132275       75.132275
## 4      0.017637                0.070547  75.132275       75.132275
## 5      0.017637                0.088183  75.132275       75.132275
## 6      0.088183                0.176367  75.132275       75.132275
## 7      0.086420                0.262787  75.132275       75.132275
## 8      0.088183                0.350970  75.132275       75.132275
## 9      0.169312                0.520282  69.825236       73.369199
## 10     0.164021                0.684303  64.518198       71.162022
## 11     0.144621                0.828924  43.608466       65.618047
## 12     0.077601                0.906526 -22.163433       51.036895
## 13     0.033510                0.940035 -66.388755       34.310076
## 14     0.029982                0.970018 -69.926781       21.313289
## 15     0.019400                0.989418 -80.540858       10.021508
## 16     0.010582                1.000000 -89.492063        0.000000
##    kolmogorov_smirnov
## 1            0.017637
## 2            0.035273
## 3            0.052910
## 4            0.070547
## 5            0.088183
## 6            0.176367
## 7            0.262787
## 8            0.350970
## 9            0.513240
## 10           0.663177
## 11           0.765544
## 12           0.714037
## 13           0.559754
## 14           0.397248
## 15           0.210075
## 16           0.000000
## 
## $residual_deviance
## [1] 605.1466
## 
## $null_deviance
## [1] 1356.501
## 
## $AIC
## [1] 615.1466
## 
## $loglikelihood
## [1] 0
## 
## $null_degrees_of_freedom
## [1] 992
## 
## $residual_degrees_of_freedom
## [1] 988
h2o.auc(performance_h2o)
## [1] 0.937131
h2o.confusionMatrix(performance_h2o)
## Confusion Matrix (vertical: actual; across: predicted)  for max f1 @ threshold = 0.666005878614975:
##              Accredited Unaccredited    Error      Rate
## Accredited          395           31 0.072770   =31/426
## Unaccredited         83          484 0.146384   =83/567
## Totals              478          515 0.114804  =114/993
h2o.metric(performance_h2o)
## Metrics for Thresholds: Binomial metrics as a function of classification thresholds
##   threshold       f1       f2 f0point5 accuracy precision   recall specificity
## 1  0.999679 0.103679 0.067421 0.224313 0.460222  1.000000 0.054674    1.000000
## 2  0.999337 0.173913 0.116279 0.344828 0.483384  1.000000 0.095238    1.000000
## 3  0.999091 0.233645 0.160051 0.432526 0.504532  1.000000 0.132275    1.000000
## 4  0.998800 0.276596 0.192878 0.488722 0.520645  1.000000 0.160494    1.000000
## 5  0.998473 0.302395 0.213170 0.520082 0.530715  1.000000 0.178131    1.000000
##   absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns fns fps tps
## 1     0.155599               0.054674                0.527337 426 536   0  31
## 2     0.207863               0.095238                0.547619 426 513   0  54
## 3     0.247755               0.132275                0.566138 426 492   0  75
## 4     0.275316               0.160494                0.580247 426 476   0  91
## 5     0.291670               0.178131                0.589065 426 466   0 101
##        tnr      fnr      fpr      tpr idx
## 1 1.000000 0.945326 0.000000 0.054674   0
## 2 1.000000 0.904762 0.000000 0.095238   1
## 3 1.000000 0.867725 0.000000 0.132275   2
## 4 1.000000 0.839506 0.000000 0.160494   3
## 5 1.000000 0.821869 0.000000 0.178131   4
## 
## ---
##     threshold       f1       f2 f0point5 accuracy precision   recall
## 395  0.015793 0.730200 0.871235 0.628464 0.578046  0.575051 1.000000
## 396  0.013890 0.729260 0.870700 0.627351 0.576032  0.573887 1.000000
## 397  0.011867 0.728792 0.870433 0.626796 0.575025  0.573306 1.000000
## 398  0.010070 0.728324 0.870166 0.626243 0.574018  0.572727 1.000000
## 399  0.007493 0.727856 0.869899 0.625690 0.573011  0.572149 1.000000
## 400  0.004727 0.726923 0.869365 0.624587 0.570997  0.570997 1.000000
##     specificity absolute_mcc min_per_class_accuracy mean_per_class_accuracy tns
## 395    0.016432     0.097207               0.016432                0.508216   7
## 396    0.011737     0.082072               0.011737                0.505869   5
## 397    0.009390     0.073370               0.009390                0.504695   4
## 398    0.007042     0.063508               0.007042                0.503521   3
## 399    0.004695     0.051828               0.004695                0.502347   2
## 400    0.000000     0.000000               0.000000                0.500000   0
##     fns fps tps      tnr      fnr      fpr      tpr idx
## 395   0 419 567 0.016432 0.000000 0.983568 1.000000 394
## 396   0 421 567 0.011737 0.000000 0.988263 1.000000 395
## 397   0 422 567 0.009390 0.000000 0.990610 1.000000 396
## 398   0 423 567 0.007042 0.000000 0.992958 1.000000 397
## 399   0 424 567 0.004695 0.000000 0.995305 1.000000 398
## 400   0 426 567 0.000000 0.000000 1.000000 1.000000 399