# Load required libraries
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
## Warning: package 'tidymodels' was built under R version 4.3.1
## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
## ✔ broom        1.0.6      ✔ rsample      1.2.1 
## ✔ dials        1.2.1      ✔ tune         1.2.1 
## ✔ infer        1.0.7      ✔ workflows    1.1.4 
## ✔ modeldata    1.4.0      ✔ workflowsets 1.1.0 
## ✔ parsnip      1.2.1      ✔ yardstick    1.3.1 
## ✔ recipes      1.0.10
## Warning: package 'broom' was built under R version 4.3.3
## Warning: package 'dials' was built under R version 4.3.1
## Warning: package 'scales' was built under R version 4.3.1
## Warning: package 'infer' was built under R version 4.3.1
## Warning: package 'modeldata' was built under R version 4.3.3
## Warning: package 'parsnip' was built under R version 4.3.1
## Warning: package 'recipes' was built under R version 4.3.1
## Warning: package 'rsample' was built under R version 4.3.1
## Warning: package 'tune' was built under R version 4.3.1
## Warning: package 'workflows' was built under R version 4.3.1
## Warning: package 'workflowsets' was built under R version 4.3.1
## Warning: package 'yardstick' was built under R version 4.3.1
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ recipes::fixed()  masks stringr::fixed()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step()   masks stats::step()
## • Search for functions across packages at https://www.tidymodels.org/find/
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(h2o)
## Warning: package 'h2o' was built under R version 4.3.1
## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit https://docs.h2o.ai
## 
## ----------------------------------------------------------------------
## 
## 
## Attaching package: 'h2o'
## 
## The following objects are masked from 'package:lubridate':
## 
##     day, hour, month, week, year
## 
## The following objects are masked from 'package:stats':
## 
##     cor, sd, var
## 
## The following objects are masked from 'package:base':
## 
##     &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc
library(readr)
# Initialize H2O Cluster
h2o.init(max_mem_size = "4G")
## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     /var/folders/54/8ptbpppx6yl7x4zf4sjpnr6r0000gn/T//Rtmp780MBp/file1587c4eaedd5e/h2o_jobboonstoppel_started_from_r.out
##     /var/folders/54/8ptbpppx6yl7x4zf4sjpnr6r0000gn/T//Rtmp780MBp/file1587c3a476ead/h2o_jobboonstoppel_started_from_r.err
## 
## 
## Starting H2O JVM and connecting: .. Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 210 milliseconds 
##     H2O cluster timezone:       America/New_York 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.44.0.3 
##     H2O cluster version age:    11 months and 19 days 
##     H2O cluster name:           H2O_started_from_R_jobboonstoppel_qhw407 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   3.99 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.3.0 (2023-04-21)
## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is (11 months and 19 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
# Ensure cluster information is displayed
h2o.clusterInfo()
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 261 milliseconds 
##     H2O cluster timezone:       America/New_York 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.44.0.3 
##     H2O cluster version age:    11 months and 19 days 
##     H2O cluster name:           H2O_started_from_R_jobboonstoppel_qhw407 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   3.99 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.3.0 (2023-04-21)
## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is (11 months and 19 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
# Load dataset
attrition_raw_tbl <- read_csv("../00_data/WA_Fn-UseC_-HR-Employee-Attrition.csv")
## Rows: 1470 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): Attrition, BusinessTravel, Department, EducationField, Gender, Job...
## dbl (26): Age, DailyRate, DistanceFromHome, Education, EmployeeCount, Employ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Clean and Prepare the Data
attrition_raw_tbl <- attrition_raw_tbl %>%
    clean_names() %>%                                 # Clean column names for easier reference
    mutate(attrition = as.factor(attrition)) %>%      # Convert Attrition to a factor (response variable)
    select(-over18, -employee_count, -standard_hours) %>% # Remove columns with no variance
    drop_na()                                         # Handle missing values (if any)

# Convert to H2O Frame
attrition_h2o <- as.h2o(attrition_raw_tbl)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
# Split the Data into Training and Testing Sets
splits <- h2o.splitFrame(attrition_h2o, ratios = 0.8, seed = 123)
train_data_h2o <- splits[[1]]
test_data_h2o <- splits[[2]]
# Run H2O AutoML
attrition_automl <- h2o.automl(
    x = setdiff(names(attrition_raw_tbl), "attrition"), # Predictor columns
    y = "attrition",                                   # Response column
    training_frame = train_data_h2o,
    leaderboard_frame = test_data_h2o,                 # Use test data for leaderboard
    max_runtime_secs = 30,                            # 5 minutes runtime
    nfolds = 5,                                        # Cross-validation folds
    seed = 123
)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   4%
## 14:24:25.234: AutoML: XGBoost is not available; skipping it.
## 14:24:25.250: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:25.823: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:26.640: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:26.784: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
  |                                                                            
  |=========                                                             |  12%
## 14:24:27.593: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:27.974: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:28.312: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:28.606: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:28.738: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:28.880: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
  |                                                                            
  |================                                                      |  22%
## 14:24:29.470: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:29.695: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
  |                                                                            
  |====================                                                  |  28%
## 14:24:31.730: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:31.868: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |=======================================================               |  79%
## 14:24:49.168: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:49.302: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |===================================================================== |  99%
## 14:24:54.123: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:54.286: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:54.482: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 14:24:55.42: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
  |                                                                            
  |======================================================================| 100%
# Display the AutoML leaderboard
leaderboard <- h2o.get_leaderboard(attrition_automl, extra_columns = "ALL")
print(leaderboard)
##                                                  model_id       auc   logloss
## 1            GBM_grid_1_AutoML_1_20241210_142425_model_16 0.7662590 0.3502607
## 2 StackedEnsemble_BestOfFamily_4_AutoML_1_20241210_142425 0.7643135 0.3489582
## 3             GBM_grid_1_AutoML_1_20241210_142425_model_2 0.7584769 0.3490854
## 4 StackedEnsemble_BestOfFamily_1_AutoML_1_20241210_142425 0.7569946 0.3509802
## 5    StackedEnsemble_AllModels_4_AutoML_1_20241210_142425 0.7536594 0.3544772
## 6    StackedEnsemble_AllModels_3_AutoML_1_20241210_142425 0.7532889 0.3535866
##       aucpr mean_per_class_error      rmse       mse training_time_ms
## 1 0.3572338            0.3447749 0.3243110 0.1051776               51
## 2 0.3925606            0.2607467 0.3222077 0.1038178              124
## 3 0.4118369            0.2812210 0.3215199 0.1033751               67
## 4 0.4161036            0.3085974 0.3229948 0.1043256              125
## 5 0.3832035            0.3022976 0.3263881 0.1065292              145
## 6 0.3972768            0.2981749 0.3253880 0.1058774              132
##   predict_time_per_row_ms            algo
## 1                0.021552             GBM
## 2                0.018534 StackedEnsemble
## 3                0.007438             GBM
## 4                0.008407 StackedEnsemble
## 5                0.033633 StackedEnsemble
## 6                0.026656 StackedEnsemble
## 
## [50 rows x 10 columns]
# Evaluate the Best Model
best_model <- attrition_automl@leader
performance <- h2o.performance(best_model, newdata = test_data_h2o)

# Display metrics
cat("AUC: ", h2o.auc(performance), "\n")
## AUC:  0.766259
cat("Log Loss: ", h2o.logloss(performance), "\n")
## Log Loss:  0.3502607
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(h2o.confusionMatrix(performance))
## Confusion Matrix (vertical: actual; across: predicted)  for max f1 @ threshold = 0.332685217703168:
##         No Yes    Error     Rate
## No     245  12 0.046693  =12/257
## Yes     27  15 0.642857   =27/42
## Totals 272  27 0.130435  =39/299
# Make Predictions
predictions <- h2o.predict(best_model, test_data_h2o)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
predictions
##   predict        No        Yes
## 1     Yes 0.7377179 0.26228206
## 2      No 0.9188792 0.08112078
## 3     Yes 0.6673148 0.33268522
## 4      No 0.9230013 0.07699872
## 5      No 0.8474860 0.15251396
## 6      No 0.9469701 0.05302990
## 
## [299 rows x 3 columns]
# Convert predictions back to R data frame
predictions_df <- as.data.frame(predictions)
predictions_df
##     predict        No        Yes
## 1       Yes 0.7377179 0.26228206
## 2        No 0.9188792 0.08112078
## 3       Yes 0.6673148 0.33268522
## 4        No 0.9230013 0.07699872
## 5        No 0.8474860 0.15251396
## 6        No 0.9469701 0.05302990
## 7        No 0.8050931 0.19490688
## 8        No 0.8851595 0.11484049
## 9        No 0.8003696 0.19963042
## 10       No 0.9101260 0.08987404
## 11       No 0.8894355 0.11056449
## 12       No 0.7746501 0.22534987
## 13       No 0.9574723 0.04252770
## 14       No 0.8691103 0.13088968
## 15       No 0.8471901 0.15280988
## 16       No 0.9255823 0.07441770
## 17       No 0.8498638 0.15013619
## 18       No 0.8872749 0.11272508
## 19       No 0.9309805 0.06901949
## 20       No 0.9147420 0.08525800
## 21       No 0.8130824 0.18691759
## 22       No 0.8263098 0.17369022
## 23       No 0.9302519 0.06974811
## 24       No 0.9765633 0.02343666
## 25       No 0.9402065 0.05979349
## 26       No 0.8622224 0.13777764
## 27       No 0.8480476 0.15195239
## 28       No 0.9116551 0.08834490
## 29      Yes 0.3995031 0.60049687
## 30       No 0.9538665 0.04613352
## 31       No 0.8060709 0.19392910
## 32       No 0.9319479 0.06805208
## 33       No 0.8327385 0.16726153
## 34       No 0.7858446 0.21415545
## 35       No 0.8663126 0.13368742
## 36       No 0.8114183 0.18858172
## 37       No 0.9128002 0.08719984
## 38       No 0.7662189 0.23378113
## 39       No 0.9149888 0.08501124
## 40       No 0.9173984 0.08260156
## 41       No 0.9130282 0.08697176
## 42       No 0.9534717 0.04652834
## 43       No 0.7534105 0.24658949
## 44       No 0.8383045 0.16169553
## 45       No 0.9407478 0.05925221
## 46       No 0.8716781 0.12832192
## 47       No 0.9404135 0.05958655
## 48       No 0.8929414 0.10705857
## 49       No 0.8844753 0.11552474
## 50       No 0.8329285 0.16707149
## 51       No 0.9561157 0.04388430
## 52       No 0.9422579 0.05774209
## 53       No 0.8410985 0.15890149
## 54       No 0.9083896 0.09161039
## 55       No 0.7858325 0.21416748
## 56       No 0.9481953 0.05180470
## 57       No 0.8472808 0.15271922
## 58       No 0.9347320 0.06526801
## 59       No 0.9230620 0.07693798
## 60       No 0.9143745 0.08562546
## 61       No 0.9149045 0.08509546
## 62      Yes 0.6435271 0.35647295
## 63       No 0.8377609 0.16223910
## 64       No 0.8702546 0.12974541
## 65       No 0.8450524 0.15494756
## 66      Yes 0.2944091 0.70559092
## 67       No 0.8242896 0.17571037
## 68       No 0.8941060 0.10589403
## 69       No 0.9273687 0.07263134
## 70       No 0.9639998 0.03600023
## 71       No 0.9214213 0.07857871
## 72       No 0.8831172 0.11688280
## 73       No 0.9624550 0.03754499
## 74       No 0.9411549 0.05884511
## 75       No 0.9554136 0.04458643
## 76       No 0.8485128 0.15148721
## 77       No 0.9750903 0.02490969
## 78       No 0.9439084 0.05609158
## 79       No 0.8591610 0.14083896
## 80       No 0.9059209 0.09407913
## 81       No 0.8403489 0.15965113
## 82       No 0.7565301 0.24346985
## 83       No 0.8630091 0.13699093
## 84       No 0.8013021 0.19869792
## 85      Yes 0.5589074 0.44109260
## 86       No 0.8463652 0.15363477
## 87       No 0.9180911 0.08190886
## 88       No 0.9334091 0.06659088
## 89       No 0.9051010 0.09489896
## 90       No 0.9193187 0.08068125
## 91       No 0.9230934 0.07690659
## 92       No 0.8662782 0.13372176
## 93       No 0.9609141 0.03908593
## 94      Yes 0.6607302 0.33926983
## 95       No 0.8518885 0.14811153
## 96       No 0.7560791 0.24392091
## 97       No 0.9445701 0.05542985
## 98       No 0.8172440 0.18275599
## 99       No 0.8367789 0.16322109
## 100     Yes 0.6466705 0.35332948
## 101      No 0.8390688 0.16093120
## 102      No 0.8808712 0.11912879
## 103      No 0.8849737 0.11502625
## 104     Yes 0.5558388 0.44416117
## 105     Yes 0.4497749 0.55022510
## 106      No 0.8938810 0.10611897
## 107     Yes 0.6535035 0.34649655
## 108     Yes 0.5883709 0.41162906
## 109     Yes 0.5625926 0.43740737
## 110      No 0.8901669 0.10983313
## 111     Yes 0.4746027 0.52539729
## 112      No 0.9352401 0.06475992
## 113      No 0.9136217 0.08637828
## 114      No 0.9387193 0.06128065
## 115      No 0.8612273 0.13877269
## 116      No 0.8970034 0.10299662
## 117     Yes 0.4769796 0.52302037
## 118      No 0.7557924 0.24420761
## 119      No 0.9235184 0.07648159
## 120      No 0.8119954 0.18800460
## 121      No 0.9228844 0.07711558
## 122      No 0.8549600 0.14503997
## 123      No 0.8400359 0.15996405
## 124      No 0.7657443 0.23425571
## 125     Yes 0.6900364 0.30996357
## 126      No 0.9466462 0.05335384
## 127      No 0.9146757 0.08532429
## 128      No 0.9470732 0.05292684
## 129      No 0.7974392 0.20256078
## 130      No 0.8386029 0.16139711
## 131      No 0.9000773 0.09992275
## 132      No 0.8972356 0.10276436
## 133      No 0.8050364 0.19496355
## 134     Yes 0.5191031 0.48089693
## 135      No 0.9067952 0.09320477
## 136      No 0.9370007 0.06299927
## 137      No 0.9541730 0.04582697
## 138      No 0.8072662 0.19273383
## 139      No 0.8163749 0.18362505
## 140      No 0.7642191 0.23578091
## 141      No 0.8521168 0.14788322
## 142     Yes 0.6539819 0.34601807
## 143      No 0.9457119 0.05428811
## 144      No 0.9062444 0.09375562
## 145      No 0.7522925 0.24770752
## 146     Yes 0.5315382 0.46846182
## 147      No 0.9662242 0.03377577
## 148     Yes 0.6859548 0.31404521
## 149      No 0.9172033 0.08279673
## 150      No 0.9297037 0.07029631
## 151      No 0.9475181 0.05248186
## 152      No 0.7868211 0.21317894
## 153     Yes 0.6308184 0.36918157
## 154      No 0.8765786 0.12342140
## 155      No 0.9357525 0.06424752
## 156      No 0.8847020 0.11529803
## 157      No 0.9108102 0.08918980
## 158     Yes 0.4540394 0.54596065
## 159      No 0.8470872 0.15291280
## 160      No 0.9081911 0.09180894
## 161      No 0.8958119 0.10418808
## 162      No 0.8834314 0.11656856
## 163      No 0.8768251 0.12317493
## 164      No 0.8208738 0.17912621
## 165      No 0.8259713 0.17402868
## 166      No 0.9084401 0.09155987
## 167      No 0.8464613 0.15353875
## 168      No 0.8140514 0.18594863
## 169      No 0.8664195 0.13358048
## 170      No 0.9445987 0.05540126
## 171      No 0.8660436 0.13395636
## 172      No 0.9308402 0.06915981
## 173      No 0.9570931 0.04290695
## 174      No 0.8027304 0.19726956
## 175     Yes 0.6726027 0.32739734
## 176      No 0.9527311 0.04726893
## 177      No 0.9015092 0.09849085
## 178      No 0.7773325 0.22266747
## 179      No 0.8754675 0.12453248
## 180      No 0.8873999 0.11260006
## 181      No 0.9400402 0.05995982
## 182      No 0.7815278 0.21847224
## 183      No 0.9598646 0.04013540
## 184      No 0.8632363 0.13676367
## 185     Yes 0.5926673 0.40733267
## 186      No 0.8982596 0.10174043
## 187      No 0.9267557 0.07324434
## 188      No 0.8044347 0.19556530
## 189      No 0.9193492 0.08065082
## 190      No 0.7965857 0.20341428
## 191     Yes 0.3716350 0.62836497
## 192      No 0.7545301 0.24546990
## 193      No 0.9129883 0.08701169
## 194     Yes 0.6918216 0.30817842
## 195      No 0.8382691 0.16173094
## 196     Yes 0.6199489 0.38005108
## 197      No 0.9079557 0.09204430
## 198      No 0.7477538 0.25224620
## 199      No 0.9199737 0.08002631
## 200      No 0.8652269 0.13477314
## 201      No 0.8801971 0.11980288
## 202      No 0.8468689 0.15313109
## 203      No 0.9483901 0.05160986
## 204      No 0.8737375 0.12626246
## 205      No 0.9187735 0.08122654
## 206      No 0.7887168 0.21128325
## 207      No 0.9419228 0.05807718
## 208      No 0.8788380 0.12116196
## 209      No 0.8439051 0.15609490
## 210      No 0.8197804 0.18021960
## 211     Yes 0.6863235 0.31367653
## 212      No 0.8853359 0.11466413
## 213      No 0.7894943 0.21050570
## 214      No 0.9060835 0.09391645
## 215      No 0.7728184 0.22718164
## 216      No 0.9342120 0.06578801
## 217     Yes 0.7302759 0.26972408
## 218      No 0.9114970 0.08850303
## 219      No 0.9415852 0.05841479
## 220      No 0.7649617 0.23503834
## 221     Yes 0.4720654 0.52793457
## 222      No 0.9349167 0.06508327
## 223      No 0.9201074 0.07989260
## 224      No 0.8836657 0.11633426
## 225      No 0.8106662 0.18933381
## 226      No 0.7919509 0.20804905
## 227      No 0.9243753 0.07562473
## 228      No 0.9012876 0.09871237
## 229     Yes 0.7247730 0.27522703
## 230      No 0.7879618 0.21203817
## 231      No 0.9114956 0.08850437
## 232      No 0.9314180 0.06858197
## 233      No 0.8909306 0.10906937
## 234      No 0.9405696 0.05943044
## 235      No 0.9143355 0.08566454
## 236      No 0.8646562 0.13534378
## 237     Yes 0.7368949 0.26310509
## 238      No 0.9706427 0.02935734
## 239      No 0.8253586 0.17464140
## 240      No 0.8384212 0.16157877
## 241      No 0.9434840 0.05651598
## 242      No 0.8471951 0.15280494
## 243      No 0.8185529 0.18144709
## 244      No 0.9341228 0.06587722
## 245      No 0.8025828 0.19741717
## 246     Yes 0.5933326 0.40666744
## 247      No 0.8336091 0.16639088
## 248      No 0.9099593 0.09004068
## 249      No 0.9259171 0.07408286
## 250      No 0.8273593 0.17264067
## 251      No 0.8606836 0.13931642
## 252      No 0.8973309 0.10266911
## 253      No 0.9040166 0.09598337
## 254      No 0.7938096 0.20619039
## 255      No 0.8879497 0.11205035
## 256      No 0.9146806 0.08531939
## 257      No 0.7463779 0.25362215
## 258      No 0.9685457 0.03145434
## 259      No 0.9470295 0.05297051
## 260      No 0.9174393 0.08256073
## 261      No 0.8118046 0.18819538
## 262      No 0.9005425 0.09945750
## 263     Yes 0.7201277 0.27987226
## 264      No 0.9500661 0.04993385
## 265      No 0.8835885 0.11641148
## 266      No 0.9253403 0.07465971
## 267      No 0.8932267 0.10677325
## 268      No 0.9482134 0.05178659
## 269      No 0.8939510 0.10604900
## 270      No 0.8458325 0.15416750
## 271      No 0.9537351 0.04626490
## 272      No 0.8659340 0.13406600
## 273      No 0.9004551 0.09954488
## 274      No 0.9497541 0.05024595
## 275      No 0.9081775 0.09182245
## 276      No 0.9639530 0.03604698
## 277     Yes 0.7210128 0.27898721
## 278     Yes 0.6213035 0.37869652
## 279      No 0.9142816 0.08571839
## 280      No 0.9009959 0.09900405
## 281     Yes 0.5635262 0.43647382
## 282      No 0.8639300 0.13607000
## 283     Yes 0.5245555 0.47544445
## 284      No 0.9260448 0.07395516
## 285     Yes 0.6864936 0.31350645
## 286      No 0.9159895 0.08401051
## 287      No 0.9080192 0.09198079
## 288      No 0.7553306 0.24466935
## 289      No 0.9241581 0.07584194
## 290      No 0.9537035 0.04629655
## 291      No 0.9480141 0.05198594
## 292      No 0.9431777 0.05682226
## 293      No 0.8674119 0.13258812
## 294      No 0.7637833 0.23621675
## 295      No 0.8659346 0.13406543
## 296      No 0.9229535 0.07704649
## 297      No 0.7977816 0.20221844
## 298      No 0.9540064 0.04599358
## 299      No 0.9352209 0.06477905
# Shut Down H2O
h2o.shutdown(prompt = FALSE)

Key Changes

  1. H2O Initialization: Added h2o.init() in its own chunk to ensure the cluster is active throughout the session.
  2. Cluster Info: Verified the cluster with h2o.clusterInfo().
  3. Improved Memory Allocation: Increased the H2O memory allocation to 4 GB.
  4. Chunk Structure: Split tasks into separate chunks for modular execution.

Additional Notes

This approach should resolve the connection error and make your R Markdown workflow seamless. Let me know if you need further adjustments!