# Initialize H2O Cluster
h2o.init(max_mem_size = "4G")
## Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 13 hours 3 minutes
## H2O cluster timezone: America/New_York
## H2O data parsing timezone: UTC
## H2O cluster version: 3.44.0.3
## H2O cluster version age: 1 year, 4 months and 10 days
## H2O cluster name: H2O_started_from_R_User_fut320
## H2O cluster total nodes: 1
## H2O cluster total memory: 3.68 GB
## H2O cluster total cores: 8
## H2O cluster allowed cores: 8
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## R Version: R version 4.4.1 (2024-06-14 ucrt)
## Warning in h2o.clusterInfo():
## Your H2O cluster version is (1 year, 4 months and 10 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
# Ensure cluster information is displayed
h2o.clusterInfo()
## R is connected to the H2O cluster:
## H2O cluster uptime: 13 hours 3 minutes
## H2O cluster timezone: America/New_York
## H2O data parsing timezone: UTC
## H2O cluster version: 3.44.0.3
## H2O cluster version age: 1 year, 4 months and 10 days
## H2O cluster name: H2O_started_from_R_User_fut320
## H2O cluster total nodes: 1
## H2O cluster total memory: 3.68 GB
## H2O cluster total cores: 8
## H2O cluster allowed cores: 8
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## R Version: R version 4.4.1 (2024-06-14 ucrt)
## Warning in h2o.clusterInfo():
## Your H2O cluster version is (1 year, 4 months and 10 days) old. There may be a newer version available.
## Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html
# Load dataset
attrition_raw_tbl <- read_csv("../00_data/WA_Fn-UseC_-HR-Employee-Attrition.csv")
## Rows: 1470 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Attrition, BusinessTravel, Department, EducationField, Gender, Job...
## dbl (26): Age, DailyRate, DistanceFromHome, Education, EmployeeCount, Employ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Clean and Prepare the Data
attrition_raw_tbl <- attrition_raw_tbl %>%
clean_names() %>% # Clean column names for easier reference
mutate(attrition = as.factor(attrition)) %>% # Convert Attrition to a factor (response variable)
select(-over18, -employee_count, -standard_hours) %>% # Remove columns with no variance
drop_na() # Handle missing values (if any)
# Convert to H2O Frame
attrition_h2o <- as.h2o(attrition_raw_tbl)
## | | | 0% | |======================================================================| 100%
# Split the Data into Training and Testing Sets
splits <- h2o.splitFrame(attrition_h2o, ratios = 0.8, seed = 123)
train_data_h2o <- splits[[1]]
test_data_h2o <- splits[[2]]
# Run H2O AutoML
attrition_automl <- h2o.automl(
x = setdiff(names(attrition_raw_tbl), "attrition"), # Predictor columns
y = "attrition", # Response column
training_frame = train_data_h2o,
leaderboard_frame = test_data_h2o, # Use test data for leaderboard
max_runtime_secs = 30, # 5 minutes runtime
nfolds = 5, # Cross-validation folds
seed = 123
)
## | | | 0% | |=== | 4%
## 09:43:16.29: AutoML: XGBoost is not available; skipping it.
## 09:43:16.42: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:16.339: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:17.314: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:17.670: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time] | |========== | 14%
## 09:43:19.63: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:19.532: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:20.62: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:20.680: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:20.850: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:21.131: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time] | |================== | 26%
## 09:43:22.60: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:22.392: _train param, Dropping bad and constant columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:22.750: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:22.927: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time] | |======================= | 32% | |============================= | 41% | |=================================== | 51% | |================================================ | 68% | |======================================================== | 80%
## 09:43:41.273: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time] | |============================================================== | 89%
## 09:43:41.451: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time] | |======================================================================| 100%
##
## 09:43:45.584: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
## 09:43:45.877: _train param, Dropping unused columns: [marital_status, job_role, gender, business_travel, education_field, department, over_time]
# Evaluate the Best Model
best_model <- attrition_automl@leader
performance <- h2o.performance(best_model, newdata = test_data_h2o)
# Display metrics
cat("AUC: ", h2o.auc(performance), "\n")
## AUC: 0.766259
cat("Log Loss: ", h2o.logloss(performance), "\n")
## Log Loss: 0.3502607
cat("Confusion Matrix:\n")
## Confusion Matrix:
print(h2o.confusionMatrix(performance))
## Confusion Matrix (vertical: actual; across: predicted) for max f1 @ threshold = 0.332685217703168:
## No Yes Error Rate
## No 245 12 0.046693 =12/257
## Yes 27 15 0.642857 =27/42
## Totals 272 27 0.130435 =39/299
# Make Predictions
predictions <- h2o.predict(best_model, test_data_h2o)
## | | | 0% | |======================================================================| 100%
predictions
## predict No Yes
## 1 Yes 0.7377179 0.26228206
## 2 No 0.9188792 0.08112078
## 3 Yes 0.6673148 0.33268522
## 4 No 0.9230013 0.07699872
## 5 No 0.8474860 0.15251396
## 6 No 0.9469701 0.05302990
##
## [299 rows x 3 columns]
# Convert predictions back to R data frame
predictions_df <- as.data.frame(predictions)
predictions_df
## predict No Yes
## 1 Yes 0.7377179 0.26228206
## 2 No 0.9188792 0.08112078
## 3 Yes 0.6673148 0.33268522
## 4 No 0.9230013 0.07699872
## 5 No 0.8474860 0.15251396
## 6 No 0.9469701 0.05302990
## 7 No 0.8050931 0.19490688
## 8 No 0.8851595 0.11484049
## 9 No 0.8003696 0.19963042
## 10 No 0.9101260 0.08987404
## 11 No 0.8894355 0.11056449
## 12 No 0.7746501 0.22534987
## 13 No 0.9574723 0.04252770
## 14 No 0.8691103 0.13088968
## 15 No 0.8471901 0.15280988
## 16 No 0.9255823 0.07441770
## 17 No 0.8498638 0.15013619
## 18 No 0.8872749 0.11272508
## 19 No 0.9309805 0.06901949
## 20 No 0.9147420 0.08525800
## 21 No 0.8130824 0.18691759
## 22 No 0.8263098 0.17369022
## 23 No 0.9302519 0.06974811
## 24 No 0.9765633 0.02343666
## 25 No 0.9402065 0.05979349
## 26 No 0.8622224 0.13777764
## 27 No 0.8480476 0.15195239
## 28 No 0.9116551 0.08834490
## 29 Yes 0.3995031 0.60049687
## 30 No 0.9538665 0.04613352
## 31 No 0.8060709 0.19392910
## 32 No 0.9319479 0.06805208
## 33 No 0.8327385 0.16726153
## 34 No 0.7858446 0.21415545
## 35 No 0.8663126 0.13368742
## 36 No 0.8114183 0.18858172
## 37 No 0.9128002 0.08719984
## 38 No 0.7662189 0.23378113
## 39 No 0.9149888 0.08501124
## 40 No 0.9173984 0.08260156
## 41 No 0.9130282 0.08697176
## 42 No 0.9534717 0.04652834
## 43 No 0.7534105 0.24658949
## 44 No 0.8383045 0.16169553
## 45 No 0.9407478 0.05925221
## 46 No 0.8716781 0.12832192
## 47 No 0.9404135 0.05958655
## 48 No 0.8929414 0.10705857
## 49 No 0.8844753 0.11552474
## 50 No 0.8329285 0.16707149
## 51 No 0.9561157 0.04388430
## 52 No 0.9422579 0.05774209
## 53 No 0.8410985 0.15890149
## 54 No 0.9083896 0.09161039
## 55 No 0.7858325 0.21416748
## 56 No 0.9481953 0.05180470
## 57 No 0.8472808 0.15271922
## 58 No 0.9347320 0.06526801
## 59 No 0.9230620 0.07693798
## 60 No 0.9143745 0.08562546
## 61 No 0.9149045 0.08509546
## 62 Yes 0.6435271 0.35647295
## 63 No 0.8377609 0.16223910
## 64 No 0.8702546 0.12974541
## 65 No 0.8450524 0.15494756
## 66 Yes 0.2944091 0.70559092
## 67 No 0.8242896 0.17571037
## 68 No 0.8941060 0.10589403
## 69 No 0.9273687 0.07263134
## 70 No 0.9639998 0.03600023
## 71 No 0.9214213 0.07857871
## 72 No 0.8831172 0.11688280
## 73 No 0.9624550 0.03754499
## 74 No 0.9411549 0.05884511
## 75 No 0.9554136 0.04458643
## 76 No 0.8485128 0.15148721
## 77 No 0.9750903 0.02490969
## 78 No 0.9439084 0.05609158
## 79 No 0.8591610 0.14083896
## 80 No 0.9059209 0.09407913
## 81 No 0.8403489 0.15965113
## 82 No 0.7565301 0.24346985
## 83 No 0.8630091 0.13699093
## 84 No 0.8013021 0.19869792
## 85 Yes 0.5589074 0.44109260
## 86 No 0.8463652 0.15363477
## 87 No 0.9180911 0.08190886
## 88 No 0.9334091 0.06659088
## 89 No 0.9051010 0.09489896
## 90 No 0.9193187 0.08068125
## 91 No 0.9230934 0.07690659
## 92 No 0.8662782 0.13372176
## 93 No 0.9609141 0.03908593
## 94 Yes 0.6607302 0.33926983
## 95 No 0.8518885 0.14811153
## 96 No 0.7560791 0.24392091
## 97 No 0.9445701 0.05542985
## 98 No 0.8172440 0.18275599
## 99 No 0.8367789 0.16322109
## 100 Yes 0.6466705 0.35332948
## 101 No 0.8390688 0.16093120
## 102 No 0.8808712 0.11912879
## 103 No 0.8849737 0.11502625
## 104 Yes 0.5558388 0.44416117
## 105 Yes 0.4497749 0.55022510
## 106 No 0.8938810 0.10611897
## 107 Yes 0.6535035 0.34649655
## 108 Yes 0.5883709 0.41162906
## 109 Yes 0.5625926 0.43740737
## 110 No 0.8901669 0.10983313
## 111 Yes 0.4746027 0.52539729
## 112 No 0.9352401 0.06475992
## 113 No 0.9136217 0.08637828
## 114 No 0.9387193 0.06128065
## 115 No 0.8612273 0.13877269
## 116 No 0.8970034 0.10299662
## 117 Yes 0.4769796 0.52302037
## 118 No 0.7557924 0.24420761
## 119 No 0.9235184 0.07648159
## 120 No 0.8119954 0.18800460
## 121 No 0.9228844 0.07711558
## 122 No 0.8549600 0.14503997
## 123 No 0.8400359 0.15996405
## 124 No 0.7657443 0.23425571
## 125 Yes 0.6900364 0.30996357
## 126 No 0.9466462 0.05335384
## 127 No 0.9146757 0.08532429
## 128 No 0.9470732 0.05292684
## 129 No 0.7974392 0.20256078
## 130 No 0.8386029 0.16139711
## 131 No 0.9000773 0.09992275
## 132 No 0.8972356 0.10276436
## 133 No 0.8050364 0.19496355
## 134 Yes 0.5191031 0.48089693
## 135 No 0.9067952 0.09320477
## 136 No 0.9370007 0.06299927
## 137 No 0.9541730 0.04582697
## 138 No 0.8072662 0.19273383
## 139 No 0.8163749 0.18362505
## 140 No 0.7642191 0.23578091
## 141 No 0.8521168 0.14788322
## 142 Yes 0.6539819 0.34601807
## 143 No 0.9457119 0.05428811
## 144 No 0.9062444 0.09375562
## 145 No 0.7522925 0.24770752
## 146 Yes 0.5315382 0.46846182
## 147 No 0.9662242 0.03377577
## 148 Yes 0.6859548 0.31404521
## 149 No 0.9172033 0.08279673
## 150 No 0.9297037 0.07029631
## 151 No 0.9475181 0.05248186
## 152 No 0.7868211 0.21317894
## 153 Yes 0.6308184 0.36918157
## 154 No 0.8765786 0.12342140
## 155 No 0.9357525 0.06424752
## 156 No 0.8847020 0.11529803
## 157 No 0.9108102 0.08918980
## 158 Yes 0.4540394 0.54596065
## 159 No 0.8470872 0.15291280
## 160 No 0.9081911 0.09180894
## 161 No 0.8958119 0.10418808
## 162 No 0.8834314 0.11656856
## 163 No 0.8768251 0.12317493
## 164 No 0.8208738 0.17912621
## 165 No 0.8259713 0.17402868
## 166 No 0.9084401 0.09155987
## 167 No 0.8464613 0.15353875
## 168 No 0.8140514 0.18594863
## 169 No 0.8664195 0.13358048
## 170 No 0.9445987 0.05540126
## 171 No 0.8660436 0.13395636
## 172 No 0.9308402 0.06915981
## 173 No 0.9570931 0.04290695
## 174 No 0.8027304 0.19726956
## 175 Yes 0.6726027 0.32739734
## 176 No 0.9527311 0.04726893
## 177 No 0.9015092 0.09849085
## 178 No 0.7773325 0.22266747
## 179 No 0.8754675 0.12453248
## 180 No 0.8873999 0.11260006
## 181 No 0.9400402 0.05995982
## 182 No 0.7815278 0.21847224
## 183 No 0.9598646 0.04013540
## 184 No 0.8632363 0.13676367
## 185 Yes 0.5926673 0.40733267
## 186 No 0.8982596 0.10174043
## 187 No 0.9267557 0.07324434
## 188 No 0.8044347 0.19556530
## 189 No 0.9193492 0.08065082
## 190 No 0.7965857 0.20341428
## 191 Yes 0.3716350 0.62836497
## 192 No 0.7545301 0.24546990
## 193 No 0.9129883 0.08701169
## 194 Yes 0.6918216 0.30817842
## 195 No 0.8382691 0.16173094
## 196 Yes 0.6199489 0.38005108
## 197 No 0.9079557 0.09204430
## 198 No 0.7477538 0.25224620
## 199 No 0.9199737 0.08002631
## 200 No 0.8652269 0.13477314
## 201 No 0.8801971 0.11980288
## 202 No 0.8468689 0.15313109
## 203 No 0.9483901 0.05160986
## 204 No 0.8737375 0.12626246
## 205 No 0.9187735 0.08122654
## 206 No 0.7887168 0.21128325
## 207 No 0.9419228 0.05807718
## 208 No 0.8788380 0.12116196
## 209 No 0.8439051 0.15609490
## 210 No 0.8197804 0.18021960
## 211 Yes 0.6863235 0.31367653
## 212 No 0.8853359 0.11466413
## 213 No 0.7894943 0.21050570
## 214 No 0.9060835 0.09391645
## 215 No 0.7728184 0.22718164
## 216 No 0.9342120 0.06578801
## 217 Yes 0.7302759 0.26972408
## 218 No 0.9114970 0.08850303
## 219 No 0.9415852 0.05841479
## 220 No 0.7649617 0.23503834
## 221 Yes 0.4720654 0.52793457
## 222 No 0.9349167 0.06508327
## 223 No 0.9201074 0.07989260
## 224 No 0.8836657 0.11633426
## 225 No 0.8106662 0.18933381
## 226 No 0.7919509 0.20804905
## 227 No 0.9243753 0.07562473
## 228 No 0.9012876 0.09871237
## 229 Yes 0.7247730 0.27522703
## 230 No 0.7879618 0.21203817
## 231 No 0.9114956 0.08850437
## 232 No 0.9314180 0.06858197
## 233 No 0.8909306 0.10906937
## 234 No 0.9405696 0.05943044
## 235 No 0.9143355 0.08566454
## 236 No 0.8646562 0.13534378
## 237 Yes 0.7368949 0.26310509
## 238 No 0.9706427 0.02935734
## 239 No 0.8253586 0.17464140
## 240 No 0.8384212 0.16157877
## 241 No 0.9434840 0.05651598
## 242 No 0.8471951 0.15280494
## 243 No 0.8185529 0.18144709
## 244 No 0.9341228 0.06587722
## 245 No 0.8025828 0.19741717
## 246 Yes 0.5933326 0.40666744
## 247 No 0.8336091 0.16639088
## 248 No 0.9099593 0.09004068
## 249 No 0.9259171 0.07408286
## 250 No 0.8273593 0.17264067
## 251 No 0.8606836 0.13931642
## 252 No 0.8973309 0.10266911
## 253 No 0.9040166 0.09598337
## 254 No 0.7938096 0.20619039
## 255 No 0.8879497 0.11205035
## 256 No 0.9146806 0.08531939
## 257 No 0.7463779 0.25362215
## 258 No 0.9685457 0.03145434
## 259 No 0.9470295 0.05297051
## 260 No 0.9174393 0.08256073
## 261 No 0.8118046 0.18819538
## 262 No 0.9005425 0.09945750
## 263 Yes 0.7201277 0.27987226
## 264 No 0.9500661 0.04993385
## 265 No 0.8835885 0.11641148
## 266 No 0.9253403 0.07465971
## 267 No 0.8932267 0.10677325
## 268 No 0.9482134 0.05178659
## 269 No 0.8939510 0.10604900
## 270 No 0.8458325 0.15416750
## 271 No 0.9537351 0.04626490
## 272 No 0.8659340 0.13406600
## 273 No 0.9004551 0.09954488
## 274 No 0.9497541 0.05024595
## 275 No 0.9081775 0.09182245
## 276 No 0.9639530 0.03604698
## 277 Yes 0.7210128 0.27898721
## 278 Yes 0.6213035 0.37869652
## 279 No 0.9142816 0.08571839
## 280 No 0.9009959 0.09900405
## 281 Yes 0.5635262 0.43647382
## 282 No 0.8639300 0.13607000
## 283 Yes 0.5245555 0.47544445
## 284 No 0.9260448 0.07395516
## 285 Yes 0.6864936 0.31350645
## 286 No 0.9159895 0.08401051
## 287 No 0.9080192 0.09198079
## 288 No 0.7553306 0.24466935
## 289 No 0.9241581 0.07584194
## 290 No 0.9537035 0.04629655
## 291 No 0.9480141 0.05198594
## 292 No 0.9431777 0.05682226
## 293 No 0.8674119 0.13258812
## 294 No 0.7637833 0.23621675
## 295 No 0.8659346 0.13406543
## 296 No 0.9229535 0.07704649
## 297 No 0.7977816 0.20221844
## 298 No 0.9540064 0.04599358
## 299 No 0.9352209 0.06477905
# Shut Down H2O
h2o.shutdown(prompt = FALSE)