# Initialize H2O Cluster
h2o.init(max_mem_size = "4G") 
##  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         13 hours 3 minutes 
##     H2O cluster timezone:       America/New_York 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.44.0.3 
##     H2O cluster version age:    1 year, 4 months and 10 days 
##     H2O cluster name:           H2O_started_from_R_User_fut320 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   3.68 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.4.1 (2024-06-14 ucrt)
## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is (1 year, 4 months and 10 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
# Ensure cluster information is displayed
h2o.clusterInfo()
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         13 hours 3 minutes 
##     H2O cluster timezone:       America/New_York 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.44.0.3 
##     H2O cluster version age:    1 year, 4 months and 10 days 
##     H2O cluster name:           H2O_started_from_R_User_fut320 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   3.68 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     R Version:                  R version 4.4.1 (2024-06-14 ucrt)
## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is (1 year, 4 months and 10 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
# Load dataset
attrition_raw_tbl <- read_csv("../00_data/WA_Fn-UseC_-HR-Employee-Attrition.csv")
## Rows: 1470 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): Attrition, BusinessTravel, Department, EducationField, Gender, Job...
## dbl (26): Age, DailyRate, DistanceFromHome, Education, EmployeeCount, Employ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Clean and Prepare the Data
attrition_raw_tbl <- attrition_raw_tbl %>%
    clean_names() %>%                                 # Clean column names for easier reference
    mutate(attrition = as.factor(attrition)) %>%      # Convert Attrition to a factor (response variable)
    select(-over18, -employee_count, -standard_hours) %>% # Remove columns with no variance
    drop_na()                                         # Handle missing values (if any)

# Convert to H2O Frame
attrition_h2o <- as.h2o(attrition_raw_tbl)
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%
# Split the Data into Training and Testing Sets
splits <- h2o.splitFrame(attrition_h2o, ratios = 0.8, seed = 123)
train_data_h2o <- splits[[1]]
test_data_h2o <- splits[[2]]
# Run H2O AutoML
attrition_automl <- h2o.automl(
    x = setdiff(names(attrition_raw_tbl), "attrition"), # Predictor columns
    y = "attrition",                                   # Response column
    training_frame = train_data_h2o,
    leaderboard_frame = test_data_h2o,                 # Use test data for leaderboard
    max_runtime_secs = 30,                            # 5 minutes runtime
    nfolds = 5,                                        # Cross-validation folds
    seed = 123
)
##   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%
## 09:43:16.29: AutoML: XGBoost is not available; skipping it.
## 09:43:16.42: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:16.339: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:17.314: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:17.670: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]  |                                                                              |==========                                                            |  14%
## 09:43:19.63: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:19.532: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:20.62: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:20.680: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:20.850: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:21.131: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]  |                                                                              |==================                                                    |  26%
## 09:43:22.60: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:22.392: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:22.750: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:22.927: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]  |                                                                              |=======================                                               |  32%  |                                                                              |=============================                                         |  41%  |                                                                              |===================================                                   |  51%  |                                                                              |================================================                      |  68%  |                                                                              |========================================================              |  80%
## 09:43:41.273: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]  |                                                                              |==============================================================        |  89%
## 09:43:41.451: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]  |                                                                              |======================================================================| 100%
## 
## 09:43:45.584: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:45.877: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
# Evaluate the Best Model

best_model <- attrition_automl@leader
performance <- h2o.performance(best_model, newdata = test_data_h2o)

# Display metrics
cat("AUC: ", h2o.auc(performance), "\n") 
## AUC:  0.766259
cat("Log Loss: ", h2o.logloss(performance), "\n")
## Log Loss:  0.3502607
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(h2o.confusionMatrix(performance))
## Confusion Matrix (vertical: actual; across: predicted)  for max f1 @ threshold = 0.332685217703168:
##         No Yes    Error     Rate
## No     245  12 0.046693  =12/257
## Yes     27  15 0.642857   =27/42
## Totals 272  27 0.130435  =39/299
# Make Predictions
predictions <- h2o.predict(best_model, test_data_h2o)
##   |                                                                              |                                                                      |   0%  |                                                                              |======================================================================| 100%
predictions
##   predict        No        Yes
## 1     Yes 0.7377179 0.26228206
## 2      No 0.9188792 0.08112078
## 3     Yes 0.6673148 0.33268522
## 4      No 0.9230013 0.07699872
## 5      No 0.8474860 0.15251396
## 6      No 0.9469701 0.05302990
## 
## [299 rows x 3 columns]
# Convert predictions back to R data frame
predictions_df <- as.data.frame(predictions)
predictions_df
##     predict        No        Yes
## 1       Yes 0.7377179 0.26228206
## 2        No 0.9188792 0.08112078
## 3       Yes 0.6673148 0.33268522
## 4        No 0.9230013 0.07699872
## 5        No 0.8474860 0.15251396
## 6        No 0.9469701 0.05302990
## 7        No 0.8050931 0.19490688
## 8        No 0.8851595 0.11484049
## 9        No 0.8003696 0.19963042
## 10       No 0.9101260 0.08987404
## 11       No 0.8894355 0.11056449
## 12       No 0.7746501 0.22534987
## 13       No 0.9574723 0.04252770
## 14       No 0.8691103 0.13088968
## 15       No 0.8471901 0.15280988
## 16       No 0.9255823 0.07441770
## 17       No 0.8498638 0.15013619
## 18       No 0.8872749 0.11272508
## 19       No 0.9309805 0.06901949
## 20       No 0.9147420 0.08525800
## 21       No 0.8130824 0.18691759
## 22       No 0.8263098 0.17369022
## 23       No 0.9302519 0.06974811
## 24       No 0.9765633 0.02343666
## 25       No 0.9402065 0.05979349
## 26       No 0.8622224 0.13777764
## 27       No 0.8480476 0.15195239
## 28       No 0.9116551 0.08834490
## 29      Yes 0.3995031 0.60049687
## 30       No 0.9538665 0.04613352
## 31       No 0.8060709 0.19392910
## 32       No 0.9319479 0.06805208
## 33       No 0.8327385 0.16726153
## 34       No 0.7858446 0.21415545
## 35       No 0.8663126 0.13368742
## 36       No 0.8114183 0.18858172
## 37       No 0.9128002 0.08719984
## 38       No 0.7662189 0.23378113
## 39       No 0.9149888 0.08501124
## 40       No 0.9173984 0.08260156
## 41       No 0.9130282 0.08697176
## 42       No 0.9534717 0.04652834
## 43       No 0.7534105 0.24658949
## 44       No 0.8383045 0.16169553
## 45       No 0.9407478 0.05925221
## 46       No 0.8716781 0.12832192
## 47       No 0.9404135 0.05958655
## 48       No 0.8929414 0.10705857
## 49       No 0.8844753 0.11552474
## 50       No 0.8329285 0.16707149
## 51       No 0.9561157 0.04388430
## 52       No 0.9422579 0.05774209
## 53       No 0.8410985 0.15890149
## 54       No 0.9083896 0.09161039
## 55       No 0.7858325 0.21416748
## 56       No 0.9481953 0.05180470
## 57       No 0.8472808 0.15271922
## 58       No 0.9347320 0.06526801
## 59       No 0.9230620 0.07693798
## 60       No 0.9143745 0.08562546
## 61       No 0.9149045 0.08509546
## 62      Yes 0.6435271 0.35647295
## 63       No 0.8377609 0.16223910
## 64       No 0.8702546 0.12974541
## 65       No 0.8450524 0.15494756
## 66      Yes 0.2944091 0.70559092
## 67       No 0.8242896 0.17571037
## 68       No 0.8941060 0.10589403
## 69       No 0.9273687 0.07263134
## 70       No 0.9639998 0.03600023
## 71       No 0.9214213 0.07857871
## 72       No 0.8831172 0.11688280
## 73       No 0.9624550 0.03754499
## 74       No 0.9411549 0.05884511
## 75       No 0.9554136 0.04458643
## 76       No 0.8485128 0.15148721
## 77       No 0.9750903 0.02490969
## 78       No 0.9439084 0.05609158
## 79       No 0.8591610 0.14083896
## 80       No 0.9059209 0.09407913
## 81       No 0.8403489 0.15965113
## 82       No 0.7565301 0.24346985
## 83       No 0.8630091 0.13699093
## 84       No 0.8013021 0.19869792
## 85      Yes 0.5589074 0.44109260
## 86       No 0.8463652 0.15363477
## 87       No 0.9180911 0.08190886
## 88       No 0.9334091 0.06659088
## 89       No 0.9051010 0.09489896
## 90       No 0.9193187 0.08068125
## 91       No 0.9230934 0.07690659
## 92       No 0.8662782 0.13372176
## 93       No 0.9609141 0.03908593
## 94      Yes 0.6607302 0.33926983
## 95       No 0.8518885 0.14811153
## 96       No 0.7560791 0.24392091
## 97       No 0.9445701 0.05542985
## 98       No 0.8172440 0.18275599
## 99       No 0.8367789 0.16322109
## 100     Yes 0.6466705 0.35332948
## 101      No 0.8390688 0.16093120
## 102      No 0.8808712 0.11912879
## 103      No 0.8849737 0.11502625
## 104     Yes 0.5558388 0.44416117
## 105     Yes 0.4497749 0.55022510
## 106      No 0.8938810 0.10611897
## 107     Yes 0.6535035 0.34649655
## 108     Yes 0.5883709 0.41162906
## 109     Yes 0.5625926 0.43740737
## 110      No 0.8901669 0.10983313
## 111     Yes 0.4746027 0.52539729
## 112      No 0.9352401 0.06475992
## 113      No 0.9136217 0.08637828
## 114      No 0.9387193 0.06128065
## 115      No 0.8612273 0.13877269
## 116      No 0.8970034 0.10299662
## 117     Yes 0.4769796 0.52302037
## 118      No 0.7557924 0.24420761
## 119      No 0.9235184 0.07648159
## 120      No 0.8119954 0.18800460
## 121      No 0.9228844 0.07711558
## 122      No 0.8549600 0.14503997
## 123      No 0.8400359 0.15996405
## 124      No 0.7657443 0.23425571
## 125     Yes 0.6900364 0.30996357
## 126      No 0.9466462 0.05335384
## 127      No 0.9146757 0.08532429
## 128      No 0.9470732 0.05292684
## 129      No 0.7974392 0.20256078
## 130      No 0.8386029 0.16139711
## 131      No 0.9000773 0.09992275
## 132      No 0.8972356 0.10276436
## 133      No 0.8050364 0.19496355
## 134     Yes 0.5191031 0.48089693
## 135      No 0.9067952 0.09320477
## 136      No 0.9370007 0.06299927
## 137      No 0.9541730 0.04582697
## 138      No 0.8072662 0.19273383
## 139      No 0.8163749 0.18362505
## 140      No 0.7642191 0.23578091
## 141      No 0.8521168 0.14788322
## 142     Yes 0.6539819 0.34601807
## 143      No 0.9457119 0.05428811
## 144      No 0.9062444 0.09375562
## 145      No 0.7522925 0.24770752
## 146     Yes 0.5315382 0.46846182
## 147      No 0.9662242 0.03377577
## 148     Yes 0.6859548 0.31404521
## 149      No 0.9172033 0.08279673
## 150      No 0.9297037 0.07029631
## 151      No 0.9475181 0.05248186
## 152      No 0.7868211 0.21317894
## 153     Yes 0.6308184 0.36918157
## 154      No 0.8765786 0.12342140
## 155      No 0.9357525 0.06424752
## 156      No 0.8847020 0.11529803
## 157      No 0.9108102 0.08918980
## 158     Yes 0.4540394 0.54596065
## 159      No 0.8470872 0.15291280
## 160      No 0.9081911 0.09180894
## 161      No 0.8958119 0.10418808
## 162      No 0.8834314 0.11656856
## 163      No 0.8768251 0.12317493
## 164      No 0.8208738 0.17912621
## 165      No 0.8259713 0.17402868
## 166      No 0.9084401 0.09155987
## 167      No 0.8464613 0.15353875
## 168      No 0.8140514 0.18594863
## 169      No 0.8664195 0.13358048
## 170      No 0.9445987 0.05540126
## 171      No 0.8660436 0.13395636
## 172      No 0.9308402 0.06915981
## 173      No 0.9570931 0.04290695
## 174      No 0.8027304 0.19726956
## 175     Yes 0.6726027 0.32739734
## 176      No 0.9527311 0.04726893
## 177      No 0.9015092 0.09849085
## 178      No 0.7773325 0.22266747
## 179      No 0.8754675 0.12453248
## 180      No 0.8873999 0.11260006
## 181      No 0.9400402 0.05995982
## 182      No 0.7815278 0.21847224
## 183      No 0.9598646 0.04013540
## 184      No 0.8632363 0.13676367
## 185     Yes 0.5926673 0.40733267
## 186      No 0.8982596 0.10174043
## 187      No 0.9267557 0.07324434
## 188      No 0.8044347 0.19556530
## 189      No 0.9193492 0.08065082
## 190      No 0.7965857 0.20341428
## 191     Yes 0.3716350 0.62836497
## 192      No 0.7545301 0.24546990
## 193      No 0.9129883 0.08701169
## 194     Yes 0.6918216 0.30817842
## 195      No 0.8382691 0.16173094
## 196     Yes 0.6199489 0.38005108
## 197      No 0.9079557 0.09204430
## 198      No 0.7477538 0.25224620
## 199      No 0.9199737 0.08002631
## 200      No 0.8652269 0.13477314
## 201      No 0.8801971 0.11980288
## 202      No 0.8468689 0.15313109
## 203      No 0.9483901 0.05160986
## 204      No 0.8737375 0.12626246
## 205      No 0.9187735 0.08122654
## 206      No 0.7887168 0.21128325
## 207      No 0.9419228 0.05807718
## 208      No 0.8788380 0.12116196
## 209      No 0.8439051 0.15609490
## 210      No 0.8197804 0.18021960
## 211     Yes 0.6863235 0.31367653
## 212      No 0.8853359 0.11466413
## 213      No 0.7894943 0.21050570
## 214      No 0.9060835 0.09391645
## 215      No 0.7728184 0.22718164
## 216      No 0.9342120 0.06578801
## 217     Yes 0.7302759 0.26972408
## 218      No 0.9114970 0.08850303
## 219      No 0.9415852 0.05841479
## 220      No 0.7649617 0.23503834
## 221     Yes 0.4720654 0.52793457
## 222      No 0.9349167 0.06508327
## 223      No 0.9201074 0.07989260
## 224      No 0.8836657 0.11633426
## 225      No 0.8106662 0.18933381
## 226      No 0.7919509 0.20804905
## 227      No 0.9243753 0.07562473
## 228      No 0.9012876 0.09871237
## 229     Yes 0.7247730 0.27522703
## 230      No 0.7879618 0.21203817
## 231      No 0.9114956 0.08850437
## 232      No 0.9314180 0.06858197
## 233      No 0.8909306 0.10906937
## 234      No 0.9405696 0.05943044
## 235      No 0.9143355 0.08566454
## 236      No 0.8646562 0.13534378
## 237     Yes 0.7368949 0.26310509
## 238      No 0.9706427 0.02935734
## 239      No 0.8253586 0.17464140
## 240      No 0.8384212 0.16157877
## 241      No 0.9434840 0.05651598
## 242      No 0.8471951 0.15280494
## 243      No 0.8185529 0.18144709
## 244      No 0.9341228 0.06587722
## 245      No 0.8025828 0.19741717
## 246     Yes 0.5933326 0.40666744
## 247      No 0.8336091 0.16639088
## 248      No 0.9099593 0.09004068
## 249      No 0.9259171 0.07408286
## 250      No 0.8273593 0.17264067
## 251      No 0.8606836 0.13931642
## 252      No 0.8973309 0.10266911
## 253      No 0.9040166 0.09598337
## 254      No 0.7938096 0.20619039
## 255      No 0.8879497 0.11205035
## 256      No 0.9146806 0.08531939
## 257      No 0.7463779 0.25362215
## 258      No 0.9685457 0.03145434
## 259      No 0.9470295 0.05297051
## 260      No 0.9174393 0.08256073
## 261      No 0.8118046 0.18819538
## 262      No 0.9005425 0.09945750
## 263     Yes 0.7201277 0.27987226
## 264      No 0.9500661 0.04993385
## 265      No 0.8835885 0.11641148
## 266      No 0.9253403 0.07465971
## 267      No 0.8932267 0.10677325
## 268      No 0.9482134 0.05178659
## 269      No 0.8939510 0.10604900
## 270      No 0.8458325 0.15416750
## 271      No 0.9537351 0.04626490
## 272      No 0.8659340 0.13406600
## 273      No 0.9004551 0.09954488
## 274      No 0.9497541 0.05024595
## 275      No 0.9081775 0.09182245
## 276      No 0.9639530 0.03604698
## 277     Yes 0.7210128 0.27898721
## 278     Yes 0.6213035 0.37869652
## 279      No 0.9142816 0.08571839
## 280      No 0.9009959 0.09900405
## 281     Yes 0.5635262 0.43647382
## 282      No 0.8639300 0.13607000
## 283     Yes 0.5245555 0.47544445
## 284      No 0.9260448 0.07395516
## 285     Yes 0.6864936 0.31350645
## 286      No 0.9159895 0.08401051
## 287      No 0.9080192 0.09198079
## 288      No 0.7553306 0.24466935
## 289      No 0.9241581 0.07584194
## 290      No 0.9537035 0.04629655
## 291      No 0.9480141 0.05198594
## 292      No 0.9431777 0.05682226
## 293      No 0.8674119 0.13258812
## 294      No 0.7637833 0.23621675
## 295      No 0.8659346 0.13406543
## 296      No 0.9229535 0.07704649
## 297      No 0.7977816 0.20221844
## 298      No 0.9540064 0.04599358
## 299      No 0.9352209 0.06477905
# Shut Down H2O
h2o.shutdown(prompt = FALSE)